Add a basic test for blaze archive extraction.
Blaze expects all extracted archive files to have an `mtime` of > 9 years in the future - see `blaze_util::IFileMtime::IsUntampered`.
The new test checks that archives passed to `ExtractArchiveOrDie` and `BlessFiles` are extracted and that `mtime` adheres to this requirement.
Also moved `BlessFiles` to `archive_utils`.
PiperOrigin-RevId: 534233490
Change-Id: I03ef4277e6cfe58112d5cc8b0d929e161f778133
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b20b6cb..b24e7b0 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -11,12 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
+#include "src/main/cpp/archive_utils.h"
+
#include <functional>
#include <memory>
+#include <set>
#include <string>
#include <vector>
-#include "src/main/cpp/archive_utils.h"
#include "src/main/cpp/blaze_util_platform.h"
#include "src/main/cpp/util/errors.h"
#include "src/main/cpp/util/exit_code.h"
@@ -28,6 +30,7 @@
namespace blaze {
+using std::set;
using std::string;
using std::vector;
@@ -146,6 +149,58 @@
}
}
+void BlessFiles(const string &embedded_binaries) {
+ blaze_util::Path embedded_binaries_(embedded_binaries);
+
+ // Set the timestamps of the extracted files to the future and make sure (or
+ // at least as sure as we can...) that the files we have written are actually
+ // on the disk.
+
+ vector<string> extracted_files;
+
+ // Walks the temporary directory recursively and collects full file paths.
+ blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
+
+ std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
+ set<blaze_util::Path> synced_directories;
+ for (const auto &f : extracted_files) {
+ blaze_util::Path it(f);
+
+ // Set the time to a distantly futuristic value so we can observe tampering.
+ // Note that keeping a static, deterministic timestamp, such as the default
+ // timestamp set by unzip (1970-01-01) and using that to detect tampering is
+ // not enough, because we also need the timestamp to change between Bazel
+ // releases so that the metadata cache knows that the files may have
+ // changed. This is essential for the correctness of actions that use
+ // embedded binaries as artifacts.
+ if (!mtime->SetToDistantFuture(it)) {
+ string err = blaze_util::GetLastErrorString();
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "failed to set timestamp on '" << it.AsPrintablePath()
+ << "': " << err;
+ }
+
+ blaze_util::SyncFile(it);
+
+ blaze_util::Path directory = it.GetParent();
+
+ // Now walk up until embedded_binaries and sync every directory in between.
+ // synced_directories is used to avoid syncing the same directory twice.
+ // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
+ // conditions are not strictly needed, but it makes this loop more robust,
+ // because otherwise, if due to some glitch, directory was not under
+ // embedded_binaries, it would get into an infinite loop.
+ while (directory != embedded_binaries_ && !directory.IsEmpty() &&
+ !blaze_util::IsRootDirectory(directory) &&
+ synced_directories.insert(directory).second) {
+ blaze_util::SyncFile(directory);
+ directory = directory.GetParent();
+ }
+ }
+
+ blaze_util::SyncFile(embedded_binaries_);
+}
+
void ExtractBuildLabel(const string &archive_path, string *build_label) {
PartialZipExtractor pze;
*build_label = pze.UnzipUntil(archive_path, "build-label.txt");
diff --git a/src/main/cpp/archive_utils.h b/src/main/cpp/archive_utils.h
index f970080..e74ae67 100644
--- a/src/main/cpp/archive_utils.h
+++ b/src/main/cpp/archive_utils.h
@@ -35,6 +35,13 @@
const std::string &expected_install_md5,
const std::string &output_dir);
+// Sets the timestamps of the extracted files to the future via
+// blaze_util::IFileMtime::SetToDistanceFuture and ensures that the files we
+// have written are actually on the disk. Later, the blaze client calls
+// blaze_util::IFileMtime::IsUntampered to ensure the files were "blessed" with
+// these distant mtimes.
+void BlessFiles(const std::string &embedded_binaries);
+
// Retrieves the build label (version string) from `archive_path` into
// `build_label`.
void ExtractBuildLabel(const std::string &archive_path,
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 1ef38f8..a8ae099 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -901,58 +901,6 @@
delete server_startup;
}
-static void BlessFiles(const string &embedded_binaries) {
- blaze_util::Path embedded_binaries_(embedded_binaries);
-
- // Set the timestamps of the extracted files to the future and make sure (or
- // at least as sure as we can...) that the files we have written are actually
- // on the disk.
-
- vector<string> extracted_files;
-
- // Walks the temporary directory recursively and collects full file paths.
- blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
-
- std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
- set<blaze_util::Path> synced_directories;
- for (const auto &f : extracted_files) {
- blaze_util::Path it(f);
-
- // Set the time to a distantly futuristic value so we can observe tampering.
- // Note that keeping a static, deterministic timestamp, such as the default
- // timestamp set by unzip (1970-01-01) and using that to detect tampering is
- // not enough, because we also need the timestamp to change between Bazel
- // releases so that the metadata cache knows that the files may have
- // changed. This is essential for the correctness of actions that use
- // embedded binaries as artifacts.
- if (!mtime->SetToDistantFuture(it)) {
- string err = GetLastErrorString();
- BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
- << "failed to set timestamp on '" << it.AsPrintablePath()
- << "': " << err;
- }
-
- blaze_util::SyncFile(it);
-
- blaze_util::Path directory = it.GetParent();
-
- // Now walk up until embedded_binaries and sync every directory in between.
- // synced_directories is used to avoid syncing the same directory twice.
- // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
- // conditions are not strictly needed, but it makes this loop more robust,
- // because otherwise, if due to some glitch, directory was not under
- // embedded_binaries, it would get into an infinite loop.
- while (directory != embedded_binaries_ && !directory.IsEmpty() &&
- !blaze_util::IsRootDirectory(directory) &&
- synced_directories.insert(directory).second) {
- blaze_util::SyncFile(directory);
- directory = directory.GetParent();
- }
- }
-
- blaze_util::SyncFile(embedded_binaries_);
-}
-
// Installs Blaze by extracting the embedded data files, iff necessary.
// The MD5-named install_base directory on disk is trusted; we assume
// no-one has modified the extracted files beneath this directory once
diff --git a/src/test/cpp/blaze_archive_test.cc b/src/test/cpp/blaze_archive_test.cc
new file mode 100644
index 0000000..c7fe579
--- /dev/null
+++ b/src/test/cpp/blaze_archive_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2023 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include "file/base/helpers.h"
+#include "file/base/path.h"
+#include "file/util/temp_path.h"
+#include "file/zipfile/zipfilewriter.h"
+#include "src/main/cpp/archive_utils.h"
+#include "googlemock/include/gmock/gmock.h"
+#include "googletest/include/gtest/gtest.h"
+#include "third_party/absl/strings/escaping.h"
+#include "src/main/cpp/blaze.h"
+#include "src/main/cpp/util/file_platform.h"
+
+using ::testing::Gt;
+using ::testing::status::IsOkAndHolds;
+
+namespace blaze {
+
+static absl::StatusOr<std::string> MakeZipAndReturnInstallBase(
+ absl::string_view path, std::vector<std::pair<std::string, std::string>>
+ blaze_zip_file_to_contents) {
+ ASSIGN_OR_RETURN(auto writer,
+ file_zipfile::ZipfileWriter::Create(path, file::Defaults()));
+ for (const auto file_and_contents : blaze_zip_file_to_contents) {
+ writer->AddFileFromString(file_and_contents.first,
+ file_and_contents.second);
+ }
+ RETURN_IF_ERROR(writer->CloseWithStatus(file::Defaults()));
+ return "install_base";
+}
+
+auto get_mtime = [](absl::string_view path) -> absl::StatusOr<absl::Time> {
+ ASSIGN_OR_RETURN(
+ const auto stat,
+ file::Stat(path, file::StatMask(tech::file::STAT_MTIME_NSECS)));
+ return absl::FromUnixNanos(stat.mtime_nsecs());
+};
+
+// TODO(b/269617634) - add more tests to formalize archive unpacking.
+class BlazeArchiveTest : public ::testing::Test {
+ protected:
+ BlazeArchiveTest() {}
+
+ virtual ~BlazeArchiveTest() {}
+
+ const TempPath temp_{TempPath::Local};
+};
+
+TEST_F(BlazeArchiveTest, TestZipExtractionAndFarOutMTimes) {
+ const std::string blaze_path = file::JoinPath(temp_.path(), "blaze");
+ std::vector<std::pair<std::string, std::string>> blaze_zip_file_to_contents;
+ blaze_zip_file_to_contents.push_back(std::make_pair("foo", "foo content"));
+ blaze_zip_file_to_contents.push_back(std::make_pair("bar", "bar content"));
+ blaze_zip_file_to_contents.push_back(
+ std::make_pair("path/to/subdir/baz", "baz content"));
+ blaze_zip_file_to_contents.push_back(
+ std::make_pair("install_base_key", "expected_install_md5"));
+ ASSERT_OK_AND_ASSIGN(
+ const std::string install_base,
+ MakeZipAndReturnInstallBase(blaze_path, blaze_zip_file_to_contents));
+ const std::string output_dir = file::JoinPath(temp_.path(), install_base);
+ ASSERT_OK(file::RecursivelyCreateDir(output_dir, file::CreationMode(0750)));
+
+ ExtractArchiveOrDie(blaze_path, "blaze", "expected_install_md5", output_dir);
+ BlessFiles(output_dir);
+
+ const std::string foo_path = file::JoinPath(output_dir, "foo");
+ const std::string bar_path = file::JoinPath(output_dir, "bar");
+ const std::string baz_path = file::JoinPath(output_dir, "path/to/subdir/baz");
+
+ EXPECT_THAT(file::GetContents(foo_path, file::Defaults()),
+ IsOkAndHolds("foo content"));
+ EXPECT_THAT(file::GetContents(bar_path, file::Defaults()),
+ IsOkAndHolds("bar content"));
+ EXPECT_THAT(file::GetContents(baz_path, file::Defaults()),
+ IsOkAndHolds("baz content"));
+
+ std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
+ EXPECT_TRUE(mtime->IsUntampered(blaze_util::Path(foo_path)));
+ EXPECT_TRUE(mtime->IsUntampered(blaze_util::Path(bar_path)));
+ EXPECT_TRUE(mtime->IsUntampered(blaze_util::Path(baz_path)));
+
+ const auto far_future = absl::Now() + absl::Hours(24 * 365 * 9);
+ EXPECT_THAT(get_mtime(foo_path), IsOkAndHolds(Gt(far_future)));
+ EXPECT_THAT(get_mtime(bar_path), IsOkAndHolds(Gt(far_future)));
+ EXPECT_THAT(get_mtime(baz_path), IsOkAndHolds(Gt(far_future)));
+}
+} // namespace blaze