Add test to ensure the blaze archive is not extracted if the `install_base` already exists.
Move `ExtractData` and associated data structures to `archive_utils`.
PiperOrigin-RevId: 535333531
Change-Id: Ia2b7112b86cda1126948121b89b09718ec689142
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b24e7b0..a86d803 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -17,9 +17,11 @@
#include <memory>
#include <set>
#include <string>
+#include <thread> // NOLINT
#include <vector>
#include "src/main/cpp/blaze_util_platform.h"
+#include "src/main/cpp/startup_options.h"
#include "src/main/cpp/util/errors.h"
#include "src/main/cpp/util/exit_code.h"
#include "src/main/cpp/util/file.h"
@@ -102,6 +104,105 @@
bool done_ = false;
};
+// Installs Blaze by extracting the embedded data files, iff necessary.
+// The MD5-named install_base directory on disk is trusted; we assume
+// no-one has modified the extracted files beneath this directory once
+// it is in place. Concurrency during extraction is handled by
+// extracting in a tmp dir and then renaming it into place where it
+// becomes visible atomically at the new path.
+ExtractionDurationMillis ExtractData(const string &self_path,
+ const vector<string> &archive_contents,
+ const string &expected_install_md5,
+ const StartupOptions &startup_options,
+ LoggingInfo *logging_info) {
+ const string &install_base = startup_options.install_base;
+ // If the install dir doesn't exist, create it, if it does, we know it's good.
+ if (!blaze_util::PathExists(install_base)) {
+ uint64_t st = GetMillisecondsMonotonic();
+ // Work in a temp dir to avoid races.
+ string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
+ ExtractArchiveOrDie(self_path, startup_options.product_name,
+ expected_install_md5, tmp_install);
+ BlessFiles(tmp_install);
+
+ uint64_t et = GetMillisecondsMonotonic();
+ const ExtractionDurationMillis extract_data_duration(
+ et - st, /*archived_extracted=*/true);
+
+ // Now rename the completed installation to its final name.
+ int attempts = 0;
+ while (attempts < 120) {
+ int result = blaze_util::RenameDirectory(tmp_install, install_base);
+ if (result == blaze_util::kRenameDirectorySuccess ||
+ result == blaze_util::kRenameDirectoryFailureNotEmpty) {
+ // If renaming fails because the directory already exists and is not
+ // empty, then we assume another good installation snuck in before us.
+ blaze_util::RemoveRecursively(tmp_install);
+ break;
+ } else {
+ // Otherwise the install directory may still be scanned by the antivirus
+ // (in case we're running on Windows) so we need to wait for that to
+ // finish and try renaming again.
+ ++attempts;
+ BAZEL_LOG(USER) << "install base directory '" << tmp_install
+ << "' could not be renamed into place after "
+ << attempts << " second(s), trying again\r";
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+ }
+ }
+
+ // Give up renaming after 120 failed attempts / 2 minutes.
+ if (attempts == 120) {
+ blaze_util::RemoveRecursively(tmp_install);
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "install base directory '" << tmp_install
+ << "' could not be renamed into place: "
+ << blaze_util::GetLastErrorString();
+ }
+ return extract_data_duration;
+ } else {
+ // This would be detected implicitly below, but checking explicitly lets
+ // us give a better error message.
+ if (!blaze_util::IsDirectory(install_base)) {
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "install base directory '" << install_base
+ << "' could not be created. It exists but is not a directory.";
+ }
+ blaze_util::Path install_dir(install_base);
+ // Check that all files are present and have timestamps from BlessFiles().
+ std::unique_ptr<blaze_util::IFileMtime> mtime(
+ blaze_util::CreateFileMtime());
+ for (const auto &it : archive_contents) {
+ blaze_util::Path path = install_dir.GetRelative(it);
+ if (!mtime->IsUntampered(path)) {
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "corrupt installation: file '" << path.AsPrintablePath()
+ << "' is missing or modified. Please remove '" << install_base
+ << "' and try again.";
+ }
+ }
+ // Also check that the installed files claim to match this binary.
+ // We check this afterward because the above diagnostic is better
+ // for a missing install_base_key file.
+ blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
+ string on_disk_key;
+ if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "cannot read '" << key_path.AsPrintablePath()
+ << "': " << blaze_util::GetLastErrorString();
+ }
+ if (on_disk_key != expected_install_md5) {
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "The install_base directory '" << install_base
+ << "' contains a different " << startup_options.product_name
+ << " version (found " << on_disk_key << " but this binary is "
+ << expected_install_md5
+ << "). Remove it or specify a different --install_base.";
+ }
+ return ExtractionDurationMillis();
+ }
+}
+
void DetermineArchiveContents(const string &archive_path, vector<string> *files,
string *install_md5) {
PartialZipExtractor pze;
diff --git a/src/main/cpp/archive_utils.h b/src/main/cpp/archive_utils.h
index e74ae67..18856ab 100644
--- a/src/main/cpp/archive_utils.h
+++ b/src/main/cpp/archive_utils.h
@@ -18,6 +18,9 @@
#include <string>
#include <vector>
+#include "src/main/cpp/startup_options.h"
+#include "src/main/cpp/util/logging.h"
+
namespace blaze {
// Determines the contents of the archive, storing the names of the contained
@@ -26,6 +29,77 @@
std::vector<std::string> *files,
std::string *install_md5);
+struct DurationMillis {
+ public:
+ const uint64_t millis;
+
+ DurationMillis() : millis(kUnknownDuration) {}
+ DurationMillis(const uint64_t ms) : millis(ms) {}
+
+ bool IsUnknown() const { return millis == kUnknownDuration; }
+
+ private:
+ // Value representing that a timing event never occurred or is unknown.
+ static constexpr uint64_t kUnknownDuration = 0;
+};
+
+// DurationMillis that tracks if an archive was extracted.
+struct ExtractionDurationMillis : DurationMillis {
+ const bool archive_extracted;
+ ExtractionDurationMillis() : DurationMillis(), archive_extracted(false) {}
+ ExtractionDurationMillis(const uint64_t ms, const bool archive_extracted)
+ : DurationMillis(ms), archive_extracted(archive_extracted) {}
+};
+
+// The reason for a blaze server restart.
+// Keep in sync with logging.proto.
+enum RestartReason {
+ NO_RESTART = 0,
+ NO_DAEMON,
+ NEW_VERSION,
+ NEW_OPTIONS,
+ PID_FILE_BUT_NO_SERVER,
+ SERVER_VANISHED,
+ SERVER_UNRESPONSIVE
+};
+
+// Encapsulates miscellaneous information reported to the server for logging and
+// profiling purposes.
+struct LoggingInfo {
+ public:
+ explicit LoggingInfo(const std::string &binary_path_,
+ const uint64_t start_time_ms_)
+ : binary_path(binary_path_),
+ start_time_ms(start_time_ms_),
+ restart_reason(NO_RESTART) {}
+
+ void SetRestartReasonIfNotSet(const RestartReason restart_reason_) {
+ if (restart_reason == NO_RESTART) {
+ restart_reason = restart_reason_;
+ }
+ }
+
+ // Path of this binary.
+ const std::string binary_path;
+
+ // The time in ms the binary started up, measured from approximately the time
+ // that "main" was called.
+ const uint64_t start_time_ms;
+
+ // The reason the server was restarted.
+ RestartReason restart_reason;
+};
+
+// Extracts the archive and ensures success via calls to ExtractArchiveOrDie and
+// BlessFiles. If the install base, the location the archive is unpacked,
+// already exists, extraction is skipped. Kills the client if an error is
+// encountered.
+ExtractionDurationMillis ExtractData(
+ const std::string &self_path,
+ const std::vector<std::string> &archive_contents,
+ const std::string &expected_install_md5,
+ const StartupOptions &startup_options, LoggingInfo *logging_info);
+
// Extracts the embedded data files in `archive_path` into `output_dir`.
// It's expected that `output_dir` already exists and that it's a directory.
// Fails if `expected_install_md5` doesn't match that contained in the archive,
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index a8ae099..63f2e22 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -166,18 +166,6 @@
// connections. It would also not be resilient against a dead server that
// left a PID file around.
-// The reason for a blaze server restart.
-// Keep in sync with logging.proto.
-enum RestartReason {
- NO_RESTART = 0,
- NO_DAEMON,
- NEW_VERSION,
- NEW_OPTIONS,
- PID_FILE_BUT_NO_SERVER,
- SERVER_VANISHED,
- SERVER_UNRESPONSIVE
-};
-
// String string representation of RestartReason.
static const char *ReasonString(RestartReason reason) {
switch (reason) {
@@ -203,45 +191,6 @@
return "unknown";
}
-struct DurationMillis {
- const uint64_t millis;
-
- DurationMillis() : millis(kUnknownDuration) {}
- DurationMillis(const uint64_t ms) : millis(ms) {}
-
- bool IsKnown() const { return millis == kUnknownDuration; }
-
- private:
- // Value representing that a timing event never occurred or is unknown.
- static constexpr uint64_t kUnknownDuration = 0;
-};
-
-// Encapsulates miscellaneous information reported to the server for logging and
-// profiling purposes.
-struct LoggingInfo {
- explicit LoggingInfo(const string &binary_path_,
- const uint64_t start_time_ms_)
- : binary_path(binary_path_),
- start_time_ms(start_time_ms_),
- restart_reason(NO_RESTART) {}
-
- void SetRestartReasonIfNotSet(const RestartReason restart_reason_) {
- if (restart_reason == NO_RESTART) {
- restart_reason = restart_reason_;
- }
- }
-
- // Path of this binary.
- const string binary_path;
-
- // The time in ms the binary started up, measured from approximately the time
- // that "main" was called.
- const uint64_t start_time_ms;
-
- // The reason the server was restarted.
- RestartReason restart_reason;
-};
-
class BlazeServer final {
public:
explicit BlazeServer(const StartupOptions &startup_options);
@@ -589,14 +538,14 @@
// The time in ms a command had to wait on a busy Blaze server process.
// This is part of startup_time.
- if (command_wait_duration_ms.IsKnown()) {
+ if (command_wait_duration_ms.IsUnknown()) {
args->push_back("--command_wait_time=" +
blaze_util::ToString(command_wait_duration_ms.millis));
}
// The time in ms spent on extracting the new blaze version.
// This is part of startup_time.
- if (extract_data_duration.IsKnown()) {
+ if (extract_data_duration.IsUnknown()) {
args->push_back("--extract_data_time=" +
blaze_util::ToString(extract_data_duration.millis));
}
@@ -901,103 +850,6 @@
delete server_startup;
}
-// Installs Blaze by extracting the embedded data files, iff necessary.
-// The MD5-named install_base directory on disk is trusted; we assume
-// no-one has modified the extracted files beneath this directory once
-// it is in place. Concurrency during extraction is handled by
-// extracting in a tmp dir and then renaming it into place where it
-// becomes visible atomically at the new path.
-static DurationMillis ExtractData(const string &self_path,
- const vector<string> &archive_contents,
- const string &expected_install_md5,
- const StartupOptions &startup_options,
- LoggingInfo *logging_info) {
- const string &install_base = startup_options.install_base;
- // If the install dir doesn't exist, create it, if it does, we know it's good.
- if (!blaze_util::PathExists(install_base)) {
- uint64_t st = GetMillisecondsMonotonic();
- // Work in a temp dir to avoid races.
- string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
- ExtractArchiveOrDie(self_path, startup_options.product_name,
- expected_install_md5, tmp_install);
- BlessFiles(tmp_install);
-
- uint64_t et = GetMillisecondsMonotonic();
- const DurationMillis extract_data_duration(et - st);
-
- // Now rename the completed installation to its final name.
- int attempts = 0;
- while (attempts < 120) {
- int result = blaze_util::RenameDirectory(tmp_install, install_base);
- if (result == blaze_util::kRenameDirectorySuccess ||
- result == blaze_util::kRenameDirectoryFailureNotEmpty) {
- // If renaming fails because the directory already exists and is not
- // empty, then we assume another good installation snuck in before us.
- blaze_util::RemoveRecursively(tmp_install);
- break;
- } else {
- // Otherwise the install directory may still be scanned by the antivirus
- // (in case we're running on Windows) so we need to wait for that to
- // finish and try renaming again.
- ++attempts;
- BAZEL_LOG(USER) << "install base directory '" << tmp_install
- << "' could not be renamed into place after "
- << attempts << " second(s), trying again\r";
- std::this_thread::sleep_for(std::chrono::seconds(1));
- }
- }
-
- // Give up renaming after 120 failed attempts / 2 minutes.
- if (attempts == 120) {
- blaze_util::RemoveRecursively(tmp_install);
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "install base directory '" << tmp_install
- << "' could not be renamed into place: " << GetLastErrorString();
- }
- return extract_data_duration;
- } else {
- // This would be detected implicitly below, but checking explicitly lets
- // us give a better error message.
- if (!blaze_util::IsDirectory(install_base)) {
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "install base directory '" << install_base
- << "' could not be created. It exists but is not a directory.";
- }
- blaze_util::Path install_dir(install_base);
- // Check that all files are present and have timestamps from BlessFiles().
- std::unique_ptr<blaze_util::IFileMtime> mtime(
- blaze_util::CreateFileMtime());
- for (const auto &it : archive_contents) {
- blaze_util::Path path = install_dir.GetRelative(it);
- if (!mtime->IsUntampered(path)) {
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "corrupt installation: file '" << path.AsPrintablePath()
- << "' is missing or modified. Please remove '" << install_base
- << "' and try again.";
- }
- }
- // Also check that the installed files claim to match this binary.
- // We check this afterward because the above diagnostic is better
- // for a missing install_base_key file.
- blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
- string on_disk_key;
- if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "cannot read '" << key_path.AsPrintablePath()
- << "': " << GetLastErrorString();
- }
- if (on_disk_key != expected_install_md5) {
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "The install_base directory '" << install_base
- << "' contains a different " << startup_options.product_name
- << " version (found " << on_disk_key << " but this binary is "
- << expected_install_md5
- << "). Remove it or specify a different --install_base.";
- }
- return DurationMillis();
- }
-}
-
static bool IsVolatileArg(const string &arg) {
// TODO(ccalvarin) when --batch is gone and the startup_options field in the
// gRPC message is always set, there is no reason for client options that are
@@ -1529,7 +1381,7 @@
WarnFilesystemType(startup_options.output_base);
- const DurationMillis extract_data_duration = ExtractData(
+ const ExtractionDurationMillis extract_data_duration = ExtractData(
self_path, archive_contents, install_md5, startup_options, logging_info);
blaze_server->Connect();