Move functionality directly related to the dist archive into archive_utils

Hides away all of the implementation details around how we interact with the
archive into the cc file, exposing a nice, simple API.

I tried to keep the changes isolated to moving around code, but I did wind up
going ahead and renaming some methods and vars to more directly express their
functionality.

PiperOrigin-RevId: 249491141
diff --git a/src/main/cpp/BUILD b/src/main/cpp/BUILD
index c61e459..fe9bd85 100644
--- a/src/main/cpp/BUILD
+++ b/src/main/cpp/BUILD
@@ -82,6 +82,7 @@
         ":blaze_util",
         "//src/main/cpp/util",
         "//src/main/cpp/util:blaze_exit_code",
+        "//src/main/cpp/util:errors",
         "//src/main/cpp/util:logging",
         "//third_party/ijar:zip",
     ],
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b73b1a0..2e0cd40 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -15,6 +15,9 @@
 
 #include <vector>
 
+#include "src/main/cpp/blaze_util_platform.h"
+#include "src/main/cpp/util/errors.h"
+#include "src/main/cpp/util/file.h"
 #include "src/main/cpp/util/exit_code.h"
 #include "src/main/cpp/util/logging.h"
 #include "src/main/cpp/util/path.h"
@@ -24,98 +27,226 @@
 namespace blaze {
 
 using std::vector;
+using std::string;
+
+// A devtools_ijar::ZipExtractorProcessor that has a pure version of Accept.
+class PureZipExtractorProcessor : public devtools_ijar::ZipExtractorProcessor {
+ public:
+  virtual ~PureZipExtractorProcessor() {}
+
+  // Like devtools_ijar::ZipExtractorProcessor::Accept, but is guaranteed to not
+  // have side-effects.
+  virtual bool AcceptPure(const char *filename,
+                          const devtools_ijar::u4 attr) const = 0;
+};
 
 // A devtools_ijar::ZipExtractorProcessor that processes the ZIP entries using
 // the given PureZipExtractorProcessors.
-CompoundZipProcessor::CompoundZipProcessor(
-    const vector<PureZipExtractorProcessor *> &processors)
-    : processors_(processors) {}
+class CompoundZipProcessor : public devtools_ijar::ZipExtractorProcessor {
+ public:
+  explicit CompoundZipProcessor(
+      const vector<PureZipExtractorProcessor*>& processors)
+      : processors_(processors) {}
 
-bool CompoundZipProcessor::Accept(const char *filename,
-                                  const devtools_ijar::u4 attr) {
-  bool should_accept = false;
-  for (auto *processor : processors_) {
-    if (processor->Accept(filename, attr)) {
-      // ZipExtractorProcessor::Accept is allowed to be side-effectful, so
-      // we don't want to break out on the first true here.
-      should_accept = true;
+  bool Accept(const char *filename, const devtools_ijar::u4 attr) override {
+    bool should_accept = false;
+    for (auto *processor : processors_) {
+      if (processor->Accept(filename, attr)) {
+        // ZipExtractorProcessor::Accept is allowed to be side-effectful, so
+        // we don't want to break out on the first true here.
+        should_accept = true;
+      }
+    }
+    return should_accept;
+  }
+
+  void Process(const char *filename, const devtools_ijar::u4 attr,
+               const devtools_ijar::u1 *data, const size_t size) override {
+    for (auto *processor : processors_) {
+      if (processor->AcceptPure(filename, attr)) {
+        processor->Process(filename, attr, data, size);
+      }
     }
   }
-  return should_accept;
-}
 
-void CompoundZipProcessor::Process(const char *filename,
-                                   const devtools_ijar::u4 attr,
-                                   const devtools_ijar::u1 *data,
-                                   const size_t size) {
-  for (auto *processor : processors_) {
-    if (processor->AcceptPure(filename, attr)) {
-      processor->Process(filename, attr, data, size);
-    }
-  }
-}
+ private:
+  const vector<PureZipExtractorProcessor*> processors_;
+};
 
 // A PureZipExtractorProcessor to extract the InstallKeyFile
-GetInstallKeyFileProcessor::GetInstallKeyFileProcessor(string *install_base_key)
+class GetInstallKeyFileProcessor : public PureZipExtractorProcessor {
+ public:
+  explicit GetInstallKeyFileProcessor(string *install_base_key)
     : install_base_key_(install_base_key) {}
 
-bool GetInstallKeyFileProcessor::AcceptPure(
-    const char *filename, const devtools_ijar::u4 attr) const {
-  return strcmp(filename, "install_base_key") == 0;
-}
-
-void GetInstallKeyFileProcessor::Process(const char *filename,
-                                         const devtools_ijar::u4 attr,
-                                         const devtools_ijar::u1 *data,
-                                         const size_t size) {
-  string str(reinterpret_cast<const char *>(data), size);
-  blaze_util::StripWhitespace(&str);
-  if (str.size() != 32) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to extract install_base_key: file size mismatch "
-           "(should be 32, is "
-        << str.size() << ")";
+  bool AcceptPure(const char *filename,
+                  const devtools_ijar::u4 attr) const override {
+    return strcmp(filename, "install_base_key") == 0;
   }
-  *install_base_key_ = str;
+
+  bool Accept(const char *filename, const devtools_ijar::u4 attr) override {
+    return AcceptPure(filename, attr);
+  }
+
+  void Process(const char *filename,
+               const devtools_ijar::u4 attr,
+               const devtools_ijar::u1 *data,
+               const size_t size) override {
+    string str(reinterpret_cast<const char *>(data), size);
+    blaze_util::StripWhitespace(&str);
+    if (str.size() != 32) {
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "Failed to extract install_base_key: file size mismatch "
+             "(should be 32, is "
+          << str.size() << ")";
+    }
+    *install_base_key_ = str;
+  }
+
+ private:
+  string *install_base_key_;
+};
+
+// A PureZipExtractorProcessor that adds the names of all the files ZIP up in
+// the Blaze binary to the given vector.
+class NoteAllFilesZipProcessor : public PureZipExtractorProcessor {
+ public:
+  explicit NoteAllFilesZipProcessor(std::vector<std::string>* files)
+      : files_(files) {}
+
+  bool AcceptPure(const char *filename,
+                  const devtools_ijar::u4 attr) const override {
+    return false;
+  }
+
+  bool Accept(const char *filename,
+              const devtools_ijar::u4 attr) override {
+    files_->push_back(filename);
+    return false;
+  }
+
+  void Process(const char *filename,
+               const devtools_ijar::u4 attr,
+               const devtools_ijar::u1 *data,
+               const size_t size) override {
+    BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+        << "NoteAllFilesZipProcessor::Process shouldn't be called";
+  }
+
+ private:
+  std::vector<std::string>* files_;
+};
+
+// A PureZipExtractorProcessor to extract the files from the blaze zip.
+class ExtractBlazeZipProcessor : public PureZipExtractorProcessor {
+ public:
+  explicit ExtractBlazeZipProcessor(const string &output_dir,
+                                    blaze::embedded_binaries::Dumper *dumper)
+      : output_dir_(output_dir), dumper_(dumper) {}
+
+  bool AcceptPure(const char *filename,
+                  const devtools_ijar::u4 attr) const override {
+    return !devtools_ijar::zipattr_is_dir(attr);
+  }
+
+  bool Accept(const char *filename, const devtools_ijar::u4 attr) override {
+    return AcceptPure(filename, attr);
+  }
+
+  void Process(const char *filename,
+               const devtools_ijar::u4 attr,
+               const devtools_ijar::u1 *data,
+               const size_t size) override {
+    dumper_->Dump(data, size, blaze_util::JoinPath(output_dir_, filename));
+  }
+
+ private:
+  const string output_dir_;
+  blaze::embedded_binaries::Dumper *dumper_;
+};
+
+void DetermineArchiveContents(
+    const string &archive_path,
+    const string &product_name,
+    std::vector<std::string>* files,
+    string *install_md5) {
+  NoteAllFilesZipProcessor note_all_files_processor(files);
+  GetInstallKeyFileProcessor install_key_processor(install_md5);
+  CompoundZipProcessor processor({&note_all_files_processor,
+                                  &install_key_processor});
+  std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
+      devtools_ijar::ZipExtractor::Create(archive_path.c_str(), &processor));
+  if (extractor == NULL) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to open " << product_name
+        << " as a zip file: " << blaze_util::GetLastErrorString();
+  }
+  if (extractor->ProcessAll() < 0) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to extract install_base_key: " << extractor->GetError();
+  }
+
+  if (install_md5->empty()) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to find install_base_key's in zip file";
+  }
 }
 
-NoteAllFilesZipProcessor::NoteAllFilesZipProcessor(
-    std::vector<std::string> *files)
-    : files_(files) {}
+void ExtractArchiveOrDie(const string &archive_path,
+                         const string &product_name,
+                         const string &expected_install_md5,
+                         const string &output_dir) {
+  std::string install_md5;
+  GetInstallKeyFileProcessor install_key_processor(&install_md5);
 
-bool NoteAllFilesZipProcessor::AcceptPure(const char *filename,
-                                          const devtools_ijar::u4 attr) const {
-  return false;
-}
+  std::string error;
+  std::unique_ptr<blaze::embedded_binaries::Dumper> dumper(
+      blaze::embedded_binaries::Create(&error));
+  if (dumper == nullptr) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) << error;
+  }
+  ExtractBlazeZipProcessor extract_blaze_processor(output_dir,
+                                                   dumper.get());
 
-bool NoteAllFilesZipProcessor::Accept(const char *filename,
-                                      const devtools_ijar::u4 attr) {
-  files_->push_back(filename);
-  return false;
-}
+  CompoundZipProcessor processor({&extract_blaze_processor,
+                                  &install_key_processor});
+  if (!blaze_util::MakeDirectories(output_dir, 0777)) {
+    BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+        << "couldn't create '" << output_dir
+        << "': " << blaze_util::GetLastErrorString();
+  }
 
-void NoteAllFilesZipProcessor::Process(const char *filename,
-                                       const devtools_ijar::u4 attr,
-                                       const devtools_ijar::u1 *data,
-                                       const size_t size) {
-  BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
-      << "NoteAllFilesZipProcessor::Process shouldn't be called";
-}
+  BAZEL_LOG(USER) << "Extracting " << product_name
+                  << " installation...";
 
-ExtractBlazeZipProcessor::ExtractBlazeZipProcessor(
-    const string &embedded_binaries, blaze::embedded_binaries::Dumper *dumper)
-    : embedded_binaries_(embedded_binaries), dumper_(dumper) {}
+  std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
+      devtools_ijar::ZipExtractor::Create(archive_path.c_str(), &processor));
+  if (extractor == NULL) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to open " << product_name
+        << " as a zip file: " << blaze_util::GetLastErrorString();
+  }
+  if (extractor->ProcessAll() < 0) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to extract " << product_name
+        << " as a zip file: " << extractor->GetError();
+  }
 
-bool ExtractBlazeZipProcessor::AcceptPure(const char *filename,
-                                          const devtools_ijar::u4 attr) const {
-  return !devtools_ijar::zipattr_is_dir(attr);
-}
+  if (!dumper->Finish(&error)) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "Failed to extract embedded binaries: " << error;
+  }
 
-void ExtractBlazeZipProcessor::Process(const char *filename,
-                                       const devtools_ijar::u4 attr,
-                                       const devtools_ijar::u1 *data,
-                                       const size_t size) {
-  dumper_->Dump(data, size, blaze_util::JoinPath(embedded_binaries_, filename));
+  if (install_md5 != expected_install_md5) {
+    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+        << "The " << product_name << " binary at " << archive_path
+        << " was replaced during the client's self-extraction (old md5: "
+        << expected_install_md5 << " new md5: " << install_md5
+        << "). If you expected this then you should simply re-run "
+        << product_name
+        << " in order to pick up the different version. If you didn't expect "
+           "this then you should investigate what happened.";
+  }
 }
 
 }  // namespace blaze
diff --git a/src/main/cpp/archive_utils.h b/src/main/cpp/archive_utils.h
index ae25461..694900b 100644
--- a/src/main/cpp/archive_utils.h
+++ b/src/main/cpp/archive_utils.h
@@ -18,99 +18,22 @@
 #include <string>
 #include <vector>
 
-#include "src/main/cpp/blaze_util_platform.h"
-#include "src/main/cpp/util/strings.h"
-#include "third_party/ijar/zip.h"
-
 namespace blaze {
 
-using std::vector;
-using std::string;
+// Determines the contents of the archive, storing the names of the contained
+// files into `files` and the install md5 key into `install_md5`.
+void DetermineArchiveContents(const std::string &archive_path,
+                              const std::string &product_name,
+                              std::vector<std::string> *files,
+                              std::string *install_md5);
 
-// A devtools_ijar::ZipExtractorProcessor that has a pure version of Accept.
-class PureZipExtractorProcessor : public devtools_ijar::ZipExtractorProcessor {
- public:
-  virtual ~PureZipExtractorProcessor() {}
-
-  // Like devtools_ijar::ZipExtractorProcessor::Accept, but is guaranteed to not
-  // have side-effects.
-  virtual bool AcceptPure(const char *filename,
-                          const devtools_ijar::u4 attr) const = 0;
-};
-
-// A devtools_ijar::ZipExtractorProcessor that processes the ZIP entries using
-// the given PureZipExtractorProcessors.
-class CompoundZipProcessor : public devtools_ijar::ZipExtractorProcessor {
- public:
-  explicit CompoundZipProcessor(
-      const vector<PureZipExtractorProcessor *> &processors);
-
-  bool Accept(const char *filename, const devtools_ijar::u4 attr) override;
-
-  void Process(const char *filename, const devtools_ijar::u4 attr,
-               const devtools_ijar::u1 *data, const size_t size) override;
-
- private:
-  const vector<PureZipExtractorProcessor *> processors_;
-};
-
-// A PureZipExtractorProcessor to extract the InstallKeyFile
-class GetInstallKeyFileProcessor : public PureZipExtractorProcessor {
- public:
-  explicit GetInstallKeyFileProcessor(string *install_base_key);
-
-  bool Accept(const char *filename, const devtools_ijar::u4 attr) override {
-    return AcceptPure(filename, attr);
-  }
-
-  bool AcceptPure(const char *filename,
-                  const devtools_ijar::u4 attr) const override;
-
-  void Process(const char *filename, const devtools_ijar::u4 attr,
-               const devtools_ijar::u1 *data, const size_t size) override;
-
- private:
-  string *install_base_key_;
-};
-
-// A PureZipExtractorProcessor that adds the names of all the files ZIP up in
-// the Blaze binary to the given vector.
-class NoteAllFilesZipProcessor : public PureZipExtractorProcessor {
- public:
-  explicit NoteAllFilesZipProcessor(std::vector<std::string> *files);
-
-  bool AcceptPure(const char *filename,
-                  const devtools_ijar::u4 attr) const override;
-
-  bool Accept(const char *filename, const devtools_ijar::u4 attr) override;
-
-  void Process(const char *filename, const devtools_ijar::u4 attr,
-               const devtools_ijar::u1 *data, const size_t size) override;
-
- private:
-  std::vector<std::string> *files_;
-};
-
-// A PureZipExtractorProcessor to extract the files from the blaze zip.
-class ExtractBlazeZipProcessor : public PureZipExtractorProcessor {
- public:
-  explicit ExtractBlazeZipProcessor(const string &embedded_binaries,
-                                    blaze::embedded_binaries::Dumper *dumper);
-
-  bool AcceptPure(const char *filename,
-                  const devtools_ijar::u4 attr) const override;
-
-  bool Accept(const char *filename, const devtools_ijar::u4 attr) override {
-    return AcceptPure(filename, attr);
-  }
-
-  void Process(const char *filename, const devtools_ijar::u4 attr,
-               const devtools_ijar::u1 *data, const size_t size) override;
-
- private:
-  const string embedded_binaries_;
-  blaze::embedded_binaries::Dumper *dumper_;
-};
+// Extracts the embedded data files in `archive_path` into `output_dir`.
+// Fails if `expected_install_md5` doesn't match that contained in the archive,
+// as this could indicate that the contents has unexpectedly changed.
+void ExtractArchiveOrDie(const std::string &archive_path,
+                         const std::string &product_name,
+                         const std::string &expected_install_md5,
+                         const std::string &output_dir);
 
 }  // namespace blaze
 
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 23ee05c..3114fab 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -259,32 +259,6 @@
 static map<string, EnvVarValue> PrepareEnvironmentForJvm();
 
 
-// Populates globals->install_md5 and globals->extracted_binaries by reading the
-// ZIP entries in the Blaze binary.
-static void ComputeInstallMd5AndNoteAllFiles(const string &self_path) {
-  NoteAllFilesZipProcessor note_all_files_processor(
-      &globals->extracted_binaries);
-  GetInstallKeyFileProcessor install_key_processor(&globals->install_md5);
-  CompoundZipProcessor processor({&note_all_files_processor,
-                                  &install_key_processor});
-  std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
-      devtools_ijar::ZipExtractor::Create(self_path.c_str(), &processor));
-  if (extractor == NULL) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to open " << globals->options->product_name
-        << " as a zip file: " << GetLastErrorString();
-  }
-  if (extractor->ProcessAll() < 0) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to extract install_base_key: " << extractor->GetError();
-  }
-
-  if (globals->install_md5.empty()) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to find install_base_key's in zip file";
-  }
-}
-
 // Escapes colons by replacing them with '_C' and underscores by replacing them
 // with '_U'. E.g. "name:foo_bar" becomes "name_Cfoo_Ubar"
 static string EscapeForOptionSource(const string &input) {
@@ -804,62 +778,7 @@
       << "couldn't connect to server (" << server_pid << ") after 120 seconds.";
 }
 
-// Actually extracts the embedded data files into the tree whose root
-// is 'embedded_binaries'.
-static void ActuallyExtractData(const string &argv0,
-                                const string &embedded_binaries) {
-  std::string install_md5;
-  GetInstallKeyFileProcessor install_key_processor(&install_md5);
-
-  std::string error;
-  std::unique_ptr<blaze::embedded_binaries::Dumper> dumper(
-      blaze::embedded_binaries::Create(&error));
-  if (dumper == nullptr) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) << error;
-  }
-  ExtractBlazeZipProcessor extract_blaze_processor(embedded_binaries,
-                                                   dumper.get());
-
-  CompoundZipProcessor processor({&extract_blaze_processor,
-                                  &install_key_processor});
-  if (!blaze_util::MakeDirectories(embedded_binaries, 0777)) {
-    BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
-        << "couldn't create '" << embedded_binaries
-        << "': " << GetLastErrorString();
-  }
-
-  BAZEL_LOG(USER) << "Extracting " << globals->options->product_name
-                  << " installation...";
-
-  std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
-      devtools_ijar::ZipExtractor::Create(argv0.c_str(), &processor));
-  if (extractor == NULL) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to open " << globals->options->product_name
-        << " as a zip file: " << GetLastErrorString();
-  }
-  if (extractor->ProcessAll() < 0) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to extract " << globals->options->product_name
-        << " as a zip file: " << extractor->GetError();
-  }
-
-  if (!dumper->Finish(&error)) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "Failed to extract embedded binaries: " << error;
-  }
-
-  if (install_md5 != globals->install_md5) {
-    BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-        << "The " << globals->options->product_name << " binary at " << argv0
-        << " was replaced during the client's self-extraction (old md5: "
-        << globals->install_md5 << " new md5: " << install_md5
-        << "). If you expected this then you should simply re-run "
-        << globals->options->product_name
-        << " in order to pick up the different version. If you didn't expect "
-           "this then you should investigate what happened.";
-  }
-
+static void MoveFiles(const string &embedded_binaries) {
   // Set the timestamps of the extracted files to the future and make sure (or
   // at least as sure as we can...) that the files we have written are actually
   // on the disk.
@@ -909,6 +828,7 @@
   blaze_util::SyncFile(embedded_binaries);
 }
 
+
 // Installs Blaze by extracting the embedded data files, iff necessary.
 // The MD5-named install_base directory on disk is trusted; we assume
 // no-one has modified the extracted files beneath this directory once
@@ -924,7 +844,12 @@
                          blaze::GetProcessIdAsString();
     string tmp_binaries =
         blaze_util::JoinPath(tmp_install, "_embedded_binaries");
-    ActuallyExtractData(self_path, tmp_binaries);
+    ExtractArchiveOrDie(
+        self_path,
+        globals->options->product_name,
+        globals->install_md5,
+        tmp_binaries);
+    MoveFiles(tmp_binaries);
 
     uint64_t et = GetMillisecondsMonotonic();
     globals->extract_data_time = et - st;
@@ -1228,19 +1153,20 @@
     globals->options->batch = true;
   }
 
+  DetermineArchiveContents(
+      self_path,
+      globals->options->product_name,
+      &globals->extracted_binaries,
+      &globals->install_md5);
+
   // The default install_base is <output_user_root>/install/<md5(blaze)>
   // but if an install_base is specified on the command line, we use that as
   // the base instead.
   if (globals->options->install_base.empty()) {
     string install_user_root =
         blaze_util::JoinPath(globals->options->output_user_root, "install");
-    ComputeInstallMd5AndNoteAllFiles(self_path);
     globals->options->install_base = blaze_util::JoinPath(install_user_root,
                                                           globals->install_md5);
-  } else {
-    // We still need to populate globals->install_md5 and
-    // globals->extracted_binaries.
-    ComputeInstallMd5AndNoteAllFiles(self_path);
   }
 
   if (globals->options->output_base.empty()) {