Add test to ensure the blaze archive is not extracted if the `install_base` already exists.

Move `ExtractData` and associated data structures to `archive_utils`.

PiperOrigin-RevId: 535333531
Change-Id: Ia2b7112b86cda1126948121b89b09718ec689142
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b24e7b0..a86d803 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -17,9 +17,11 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <thread>  // NOLINT
 #include <vector>
 
 #include "src/main/cpp/blaze_util_platform.h"
+#include "src/main/cpp/startup_options.h"
 #include "src/main/cpp/util/errors.h"
 #include "src/main/cpp/util/exit_code.h"
 #include "src/main/cpp/util/file.h"
@@ -102,6 +104,105 @@
   bool done_ = false;
 };
 
+// Installs Blaze by extracting the embedded data files, iff necessary.
+// The MD5-named install_base directory on disk is trusted; we assume
+// no-one has modified the extracted files beneath this directory once
+// it is in place. Concurrency during extraction is handled by
+// extracting in a tmp dir and then renaming it into place where it
+// becomes visible atomically at the new path.
+ExtractionDurationMillis ExtractData(const string &self_path,
+                                     const vector<string> &archive_contents,
+                                     const string &expected_install_md5,
+                                     const StartupOptions &startup_options,
+                                     LoggingInfo *logging_info) {
+  const string &install_base = startup_options.install_base;
+  // If the install dir doesn't exist, create it, if it does, we know it's good.
+  if (!blaze_util::PathExists(install_base)) {
+    uint64_t st = GetMillisecondsMonotonic();
+    // Work in a temp dir to avoid races.
+    string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
+    ExtractArchiveOrDie(self_path, startup_options.product_name,
+                        expected_install_md5, tmp_install);
+    BlessFiles(tmp_install);
+
+    uint64_t et = GetMillisecondsMonotonic();
+    const ExtractionDurationMillis extract_data_duration(
+        et - st, /*archived_extracted=*/true);
+
+    // Now rename the completed installation to its final name.
+    int attempts = 0;
+    while (attempts < 120) {
+      int result = blaze_util::RenameDirectory(tmp_install, install_base);
+      if (result == blaze_util::kRenameDirectorySuccess ||
+          result == blaze_util::kRenameDirectoryFailureNotEmpty) {
+        // If renaming fails because the directory already exists and is not
+        // empty, then we assume another good installation snuck in before us.
+        blaze_util::RemoveRecursively(tmp_install);
+        break;
+      } else {
+        // Otherwise the install directory may still be scanned by the antivirus
+        // (in case we're running on Windows) so we need to wait for that to
+        // finish and try renaming again.
+        ++attempts;
+        BAZEL_LOG(USER) << "install base directory '" << tmp_install
+                        << "' could not be renamed into place after "
+                        << attempts << " second(s), trying again\r";
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+      }
+    }
+
+    // Give up renaming after 120 failed attempts / 2 minutes.
+    if (attempts == 120) {
+      blaze_util::RemoveRecursively(tmp_install);
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "install base directory '" << tmp_install
+          << "' could not be renamed into place: "
+          << blaze_util::GetLastErrorString();
+    }
+    return extract_data_duration;
+  } else {
+    // This would be detected implicitly below, but checking explicitly lets
+    // us give a better error message.
+    if (!blaze_util::IsDirectory(install_base)) {
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "install base directory '" << install_base
+          << "' could not be created. It exists but is not a directory.";
+    }
+    blaze_util::Path install_dir(install_base);
+    // Check that all files are present and have timestamps from BlessFiles().
+    std::unique_ptr<blaze_util::IFileMtime> mtime(
+        blaze_util::CreateFileMtime());
+    for (const auto &it : archive_contents) {
+      blaze_util::Path path = install_dir.GetRelative(it);
+      if (!mtime->IsUntampered(path)) {
+        BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+            << "corrupt installation: file '" << path.AsPrintablePath()
+            << "' is missing or modified.  Please remove '" << install_base
+            << "' and try again.";
+      }
+    }
+    // Also check that the installed files claim to match this binary.
+    // We check this afterward because the above diagnostic is better
+    // for a missing install_base_key file.
+    blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
+    string on_disk_key;
+    if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "cannot read '" << key_path.AsPrintablePath()
+          << "': " << blaze_util::GetLastErrorString();
+    }
+    if (on_disk_key != expected_install_md5) {
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "The install_base directory '" << install_base
+          << "' contains a different " << startup_options.product_name
+          << " version (found " << on_disk_key << " but this binary is "
+          << expected_install_md5
+          << ").  Remove it or specify a different --install_base.";
+    }
+    return ExtractionDurationMillis();
+  }
+}
+
 void DetermineArchiveContents(const string &archive_path, vector<string> *files,
                               string *install_md5) {
   PartialZipExtractor pze;
diff --git a/src/main/cpp/archive_utils.h b/src/main/cpp/archive_utils.h
index e74ae67..18856ab 100644
--- a/src/main/cpp/archive_utils.h
+++ b/src/main/cpp/archive_utils.h
@@ -18,6 +18,9 @@
 #include <string>
 #include <vector>
 
+#include "src/main/cpp/startup_options.h"
+#include "src/main/cpp/util/logging.h"
+
 namespace blaze {
 
 // Determines the contents of the archive, storing the names of the contained
@@ -26,6 +29,77 @@
                               std::vector<std::string> *files,
                               std::string *install_md5);
 
+struct DurationMillis {
+ public:
+  const uint64_t millis;
+
+  DurationMillis() : millis(kUnknownDuration) {}
+  DurationMillis(const uint64_t ms) : millis(ms) {}
+
+  bool IsUnknown() const { return millis == kUnknownDuration; }
+
+ private:
+  // Value representing that a timing event never occurred or is unknown.
+  static constexpr uint64_t kUnknownDuration = 0;
+};
+
+// DurationMillis that tracks if an archive was extracted.
+struct ExtractionDurationMillis : DurationMillis {
+  const bool archive_extracted;
+  ExtractionDurationMillis() : DurationMillis(), archive_extracted(false) {}
+  ExtractionDurationMillis(const uint64_t ms, const bool archive_extracted)
+      : DurationMillis(ms), archive_extracted(archive_extracted) {}
+};
+
+// The reason for a blaze server restart.
+// Keep in sync with logging.proto.
+enum RestartReason {
+  NO_RESTART = 0,
+  NO_DAEMON,
+  NEW_VERSION,
+  NEW_OPTIONS,
+  PID_FILE_BUT_NO_SERVER,
+  SERVER_VANISHED,
+  SERVER_UNRESPONSIVE
+};
+
+// Encapsulates miscellaneous information reported to the server for logging and
+// profiling purposes.
+struct LoggingInfo {
+ public:
+  explicit LoggingInfo(const std::string &binary_path_,
+                       const uint64_t start_time_ms_)
+      : binary_path(binary_path_),
+        start_time_ms(start_time_ms_),
+        restart_reason(NO_RESTART) {}
+
+  void SetRestartReasonIfNotSet(const RestartReason restart_reason_) {
+    if (restart_reason == NO_RESTART) {
+      restart_reason = restart_reason_;
+    }
+  }
+
+  // Path of this binary.
+  const std::string binary_path;
+
+  // The time in ms the binary started up, measured from approximately the time
+  // that "main" was called.
+  const uint64_t start_time_ms;
+
+  // The reason the server was restarted.
+  RestartReason restart_reason;
+};
+
+// Extracts the archive and ensures success via calls to ExtractArchiveOrDie and
+// BlessFiles. If the install base, the location the archive is unpacked,
+// already exists, extraction is skipped. Kills the client if an error is
+// encountered.
+ExtractionDurationMillis ExtractData(
+    const std::string &self_path,
+    const std::vector<std::string> &archive_contents,
+    const std::string &expected_install_md5,
+    const StartupOptions &startup_options, LoggingInfo *logging_info);
+
 // Extracts the embedded data files in `archive_path` into `output_dir`.
 // It's expected that `output_dir` already exists and that it's a directory.
 // Fails if `expected_install_md5` doesn't match that contained in the archive,
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index a8ae099..63f2e22 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -166,18 +166,6 @@
 //   connections. It would also not be resilient against a dead server that
 //   left a PID file around.
 
-// The reason for a blaze server restart.
-// Keep in sync with logging.proto.
-enum RestartReason {
-  NO_RESTART = 0,
-  NO_DAEMON,
-  NEW_VERSION,
-  NEW_OPTIONS,
-  PID_FILE_BUT_NO_SERVER,
-  SERVER_VANISHED,
-  SERVER_UNRESPONSIVE
-};
-
 // String string representation of RestartReason.
 static const char *ReasonString(RestartReason reason) {
   switch (reason) {
@@ -203,45 +191,6 @@
   return "unknown";
 }
 
-struct DurationMillis {
-  const uint64_t millis;
-
-  DurationMillis() : millis(kUnknownDuration) {}
-  DurationMillis(const uint64_t ms) : millis(ms) {}
-
-  bool IsKnown() const { return millis == kUnknownDuration; }
-
- private:
-  // Value representing that a timing event never occurred or is unknown.
-  static constexpr uint64_t kUnknownDuration = 0;
-};
-
-// Encapsulates miscellaneous information reported to the server for logging and
-// profiling purposes.
-struct LoggingInfo {
-  explicit LoggingInfo(const string &binary_path_,
-                       const uint64_t start_time_ms_)
-      : binary_path(binary_path_),
-        start_time_ms(start_time_ms_),
-        restart_reason(NO_RESTART) {}
-
-  void SetRestartReasonIfNotSet(const RestartReason restart_reason_) {
-    if (restart_reason == NO_RESTART) {
-      restart_reason = restart_reason_;
-    }
-  }
-
-  // Path of this binary.
-  const string binary_path;
-
-  // The time in ms the binary started up, measured from approximately the time
-  // that "main" was called.
-  const uint64_t start_time_ms;
-
-  // The reason the server was restarted.
-  RestartReason restart_reason;
-};
-
 class BlazeServer final {
  public:
   explicit BlazeServer(const StartupOptions &startup_options);
@@ -589,14 +538,14 @@
 
   // The time in ms a command had to wait on a busy Blaze server process.
   // This is part of startup_time.
-  if (command_wait_duration_ms.IsKnown()) {
+  if (command_wait_duration_ms.IsUnknown()) {
     args->push_back("--command_wait_time=" +
                     blaze_util::ToString(command_wait_duration_ms.millis));
   }
 
   // The time in ms spent on extracting the new blaze version.
   // This is part of startup_time.
-  if (extract_data_duration.IsKnown()) {
+  if (extract_data_duration.IsUnknown()) {
     args->push_back("--extract_data_time=" +
                     blaze_util::ToString(extract_data_duration.millis));
   }
@@ -901,103 +850,6 @@
   delete server_startup;
 }
 
-// Installs Blaze by extracting the embedded data files, iff necessary.
-// The MD5-named install_base directory on disk is trusted; we assume
-// no-one has modified the extracted files beneath this directory once
-// it is in place. Concurrency during extraction is handled by
-// extracting in a tmp dir and then renaming it into place where it
-// becomes visible atomically at the new path.
-static DurationMillis ExtractData(const string &self_path,
-                                  const vector<string> &archive_contents,
-                                  const string &expected_install_md5,
-                                  const StartupOptions &startup_options,
-                                  LoggingInfo *logging_info) {
-  const string &install_base = startup_options.install_base;
-  // If the install dir doesn't exist, create it, if it does, we know it's good.
-  if (!blaze_util::PathExists(install_base)) {
-    uint64_t st = GetMillisecondsMonotonic();
-    // Work in a temp dir to avoid races.
-    string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
-    ExtractArchiveOrDie(self_path, startup_options.product_name,
-                        expected_install_md5, tmp_install);
-    BlessFiles(tmp_install);
-
-    uint64_t et = GetMillisecondsMonotonic();
-    const DurationMillis extract_data_duration(et - st);
-
-    // Now rename the completed installation to its final name.
-    int attempts = 0;
-    while (attempts < 120) {
-      int result = blaze_util::RenameDirectory(tmp_install, install_base);
-      if (result == blaze_util::kRenameDirectorySuccess ||
-          result == blaze_util::kRenameDirectoryFailureNotEmpty) {
-        // If renaming fails because the directory already exists and is not
-        // empty, then we assume another good installation snuck in before us.
-        blaze_util::RemoveRecursively(tmp_install);
-        break;
-      } else {
-        // Otherwise the install directory may still be scanned by the antivirus
-        // (in case we're running on Windows) so we need to wait for that to
-        // finish and try renaming again.
-        ++attempts;
-        BAZEL_LOG(USER) << "install base directory '" << tmp_install
-                        << "' could not be renamed into place after "
-                        << attempts << " second(s), trying again\r";
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-      }
-    }
-
-    // Give up renaming after 120 failed attempts / 2 minutes.
-    if (attempts == 120) {
-      blaze_util::RemoveRecursively(tmp_install);
-      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-          << "install base directory '" << tmp_install
-          << "' could not be renamed into place: " << GetLastErrorString();
-    }
-    return extract_data_duration;
-  } else {
-    // This would be detected implicitly below, but checking explicitly lets
-    // us give a better error message.
-    if (!blaze_util::IsDirectory(install_base)) {
-      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-          << "install base directory '" << install_base
-          << "' could not be created. It exists but is not a directory.";
-    }
-    blaze_util::Path install_dir(install_base);
-    // Check that all files are present and have timestamps from BlessFiles().
-    std::unique_ptr<blaze_util::IFileMtime> mtime(
-        blaze_util::CreateFileMtime());
-    for (const auto &it : archive_contents) {
-      blaze_util::Path path = install_dir.GetRelative(it);
-      if (!mtime->IsUntampered(path)) {
-        BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-            << "corrupt installation: file '" << path.AsPrintablePath()
-            << "' is missing or modified.  Please remove '" << install_base
-            << "' and try again.";
-      }
-    }
-    // Also check that the installed files claim to match this binary.
-    // We check this afterward because the above diagnostic is better
-    // for a missing install_base_key file.
-    blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
-    string on_disk_key;
-    if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
-      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-          << "cannot read '" << key_path.AsPrintablePath()
-          << "': " << GetLastErrorString();
-    }
-    if (on_disk_key != expected_install_md5) {
-      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-          << "The install_base directory '" << install_base
-          << "' contains a different " << startup_options.product_name
-          << " version (found " << on_disk_key << " but this binary is "
-          << expected_install_md5
-          << ").  Remove it or specify a different --install_base.";
-    }
-    return DurationMillis();
-  }
-}
-
 static bool IsVolatileArg(const string &arg) {
   // TODO(ccalvarin) when --batch is gone and the startup_options field in the
   // gRPC message is always set, there is no reason for client options that are
@@ -1529,7 +1381,7 @@
 
   WarnFilesystemType(startup_options.output_base);
 
-  const DurationMillis extract_data_duration = ExtractData(
+  const ExtractionDurationMillis extract_data_duration = ExtractData(
       self_path, archive_contents, install_md5, startup_options, logging_info);
 
   blaze_server->Connect();