Add a basic test for blaze archive extraction.
Blaze expects all extracted archive files to have an `mtime` of > 9 years in the future - see `blaze_util::IFileMtime::IsUntampered`.
The new test checks that archives passed to `ExtractArchiveOrDie` and `BlessFiles` are extracted and that `mtime` adheres to this requirement.
Also moved `BlessFiles` to `archive_utils`.
PiperOrigin-RevId: 534233490
Change-Id: I03ef4277e6cfe58112d5cc8b0d929e161f778133
diff --git a/src/main/cpp/archive_utils.cc b/src/main/cpp/archive_utils.cc
index b20b6cb..b24e7b0 100644
--- a/src/main/cpp/archive_utils.cc
+++ b/src/main/cpp/archive_utils.cc
@@ -11,12 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
+#include "src/main/cpp/archive_utils.h"
+
#include <functional>
#include <memory>
+#include <set>
#include <string>
#include <vector>
-#include "src/main/cpp/archive_utils.h"
#include "src/main/cpp/blaze_util_platform.h"
#include "src/main/cpp/util/errors.h"
#include "src/main/cpp/util/exit_code.h"
@@ -28,6 +30,7 @@
namespace blaze {
+using std::set;
using std::string;
using std::vector;
@@ -146,6 +149,58 @@
}
}
+void BlessFiles(const string &embedded_binaries) {
+ blaze_util::Path embedded_binaries_(embedded_binaries);
+
+ // Set the timestamps of the extracted files to the future and make sure (or
+ // at least as sure as we can...) that the files we have written are actually
+ // on the disk.
+
+ vector<string> extracted_files;
+
+ // Walks the temporary directory recursively and collects full file paths.
+ blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
+
+ std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
+ set<blaze_util::Path> synced_directories;
+ for (const auto &f : extracted_files) {
+ blaze_util::Path it(f);
+
+ // Set the time to a distantly futuristic value so we can observe tampering.
+ // Note that keeping a static, deterministic timestamp, such as the default
+ // timestamp set by unzip (1970-01-01) and using that to detect tampering is
+ // not enough, because we also need the timestamp to change between Bazel
+ // releases so that the metadata cache knows that the files may have
+ // changed. This is essential for the correctness of actions that use
+ // embedded binaries as artifacts.
+ if (!mtime->SetToDistantFuture(it)) {
+ string err = blaze_util::GetLastErrorString();
+ BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
+ << "failed to set timestamp on '" << it.AsPrintablePath()
+ << "': " << err;
+ }
+
+ blaze_util::SyncFile(it);
+
+ blaze_util::Path directory = it.GetParent();
+
+ // Now walk up until embedded_binaries and sync every directory in between.
+ // synced_directories is used to avoid syncing the same directory twice.
+ // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
+ // conditions are not strictly needed, but it makes this loop more robust,
+ // because otherwise, if due to some glitch, directory was not under
+ // embedded_binaries, it would get into an infinite loop.
+ while (directory != embedded_binaries_ && !directory.IsEmpty() &&
+ !blaze_util::IsRootDirectory(directory) &&
+ synced_directories.insert(directory).second) {
+ blaze_util::SyncFile(directory);
+ directory = directory.GetParent();
+ }
+ }
+
+ blaze_util::SyncFile(embedded_binaries_);
+}
+
void ExtractBuildLabel(const string &archive_path, string *build_label) {
PartialZipExtractor pze;
*build_label = pze.UnzipUntil(archive_path, "build-label.txt");