Add a basic test for blaze archive extraction.

Blaze expects all extracted archive files to have an `mtime` of > 9 years in the future - see `blaze_util::IFileMtime::IsUntampered`.

The new test checks that archives passed to `ExtractArchiveOrDie` and `BlessFiles` are extracted and that `mtime` adheres to this requirement.

Also moved `BlessFiles` to `archive_utils`.

PiperOrigin-RevId: 534233490
Change-Id: I03ef4277e6cfe58112d5cc8b0d929e161f778133
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b20b6cb..b24e7b0 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -11,12 +11,14 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "src/main/cpp/archive_utils.h"
+
 #include <functional>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
 
-#include "src/main/cpp/archive_utils.h"
 #include "src/main/cpp/blaze_util_platform.h"
 #include "src/main/cpp/util/errors.h"
 #include "src/main/cpp/util/exit_code.h"
@@ -28,6 +30,7 @@
 
 namespace blaze {
 
+using std::set;
 using std::string;
 using std::vector;
 
@@ -146,6 +149,58 @@
   }
 }
 
+void BlessFiles(const string &embedded_binaries) {
+  blaze_util::Path embedded_binaries_(embedded_binaries);
+
+  // Set the timestamps of the extracted files to the future and make sure (or
+  // at least as sure as we can...) that the files we have written are actually
+  // on the disk.
+
+  vector<string> extracted_files;
+
+  // Walks the temporary directory recursively and collects full file paths.
+  blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
+
+  std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
+  set<blaze_util::Path> synced_directories;
+  for (const auto &f : extracted_files) {
+    blaze_util::Path it(f);
+
+    // Set the time to a distantly futuristic value so we can observe tampering.
+    // Note that keeping a static, deterministic timestamp, such as the default
+    // timestamp set by unzip (1970-01-01) and using that to detect tampering is
+    // not enough, because we also need the timestamp to change between Bazel
+    // releases so that the metadata cache knows that the files may have
+    // changed. This is essential for the correctness of actions that use
+    // embedded binaries as artifacts.
+    if (!mtime->SetToDistantFuture(it)) {
+      string err = blaze_util::GetLastErrorString();
+      BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+          << "failed to set timestamp on '" << it.AsPrintablePath()
+          << "': " << err;
+    }
+
+    blaze_util::SyncFile(it);
+
+    blaze_util::Path directory = it.GetParent();
+
+    // Now walk up until embedded_binaries and sync every directory in between.
+    // synced_directories is used to avoid syncing the same directory twice.
+    // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
+    // conditions are not strictly needed, but it makes this loop more robust,
+    // because otherwise, if due to some glitch, directory was not under
+    // embedded_binaries, it would get into an infinite loop.
+    while (directory != embedded_binaries_ && !directory.IsEmpty() &&
+           !blaze_util::IsRootDirectory(directory) &&
+           synced_directories.insert(directory).second) {
+      blaze_util::SyncFile(directory);
+      directory = directory.GetParent();
+    }
+  }
+
+  blaze_util::SyncFile(embedded_binaries_);
+}
+
 void ExtractBuildLabel(const string &archive_path, string *build_label) {
   PartialZipExtractor pze;
   *build_label = pze.UnzipUntil(archive_path, "build-label.txt");