blob: a86d803d7b5084e3636af9b3eba4e0d25afadc01 [file] [log] [blame]
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/main/cpp/archive_utils.h"
#include <functional>
#include <memory>
#include <set>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "src/main/cpp/blaze_util_platform.h"
#include "src/main/cpp/startup_options.h"
#include "src/main/cpp/util/errors.h"
#include "src/main/cpp/util/exit_code.h"
#include "src/main/cpp/util/file.h"
#include "src/main/cpp/util/logging.h"
#include "src/main/cpp/util/path.h"
#include "src/main/cpp/util/strings.h"
#include "third_party/ijar/zip.h"
namespace blaze {
using std::set;
using std::string;
using std::vector;
struct PartialZipExtractor : public devtools_ijar::ZipExtractorProcessor {
using CallbackType =
std::function<void(const char *name, const char *data, size_t size)>;
// Scan the zip file "archive_path" until a file named "stop_entry" is seen,
// then stop.
// If entry_names is not nullptr, it receives a list of all file members
// up to and including "stop_entry".
// If a callback is given, it is run with the name and contents of
// each such member.
// Returns the contents of the "stop_entry" member.
string UnzipUntil(const string &archive_path, const string &stop_entry,
vector<string> *entry_names = nullptr,
CallbackType &&callback = {}) {
std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
devtools_ijar::ZipExtractor::Create(archive_path.c_str(), this));
if (!extractor) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "Failed to open '" << archive_path
<< "' as a zip file: " << blaze_util::GetLastErrorString();
}
stop_name_ = stop_entry;
seen_names_.clear();
callback_ = callback;
done_ = false;
while (!done_ && extractor->ProcessNext()) {
// Scan zip until EOF, an error, or Accept() has seen stop_entry.
}
if (const char *err = extractor->GetError()) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "Error reading zip file '" << archive_path << "': " << err;
}
if (!done_) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "Failed to find member '" << stop_entry << "' in zip file '"
<< archive_path << "'";
}
if (entry_names) *entry_names = std::move(seen_names_);
return stop_value_;
}
bool Accept(const char *filename, devtools_ijar::u4 attr) override {
if (devtools_ijar::zipattr_is_dir(attr)) return false;
// Sometimes that fails to detect directories. Check the name too.
string fn = filename;
if (fn.empty() || fn.back() == '/') return false;
if (stop_name_ == fn) done_ = true;
seen_names_.push_back(std::move(fn));
return done_ || !!callback_; // true if a callback was supplied
}
void Process(const char *filename, devtools_ijar::u4 attr,
const devtools_ijar::u1 *data, size_t size) override {
if (done_) {
stop_value_.assign(reinterpret_cast<const char *>(data), size);
}
if (callback_) {
callback_(filename, reinterpret_cast<const char *>(data), size);
}
}
string stop_name_;
string stop_value_;
vector<string> seen_names_;
CallbackType callback_;
bool done_ = false;
};
// Installs Blaze by extracting the embedded data files, iff necessary.
// The MD5-named install_base directory on disk is trusted; we assume
// no-one has modified the extracted files beneath this directory once
// it is in place. Concurrency during extraction is handled by
// extracting in a tmp dir and then renaming it into place where it
// becomes visible atomically at the new path.
ExtractionDurationMillis ExtractData(const string &self_path,
const vector<string> &archive_contents,
const string &expected_install_md5,
const StartupOptions &startup_options,
LoggingInfo *logging_info) {
const string &install_base = startup_options.install_base;
// If the install dir doesn't exist, create it, if it does, we know it's good.
if (!blaze_util::PathExists(install_base)) {
uint64_t st = GetMillisecondsMonotonic();
// Work in a temp dir to avoid races.
string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
ExtractArchiveOrDie(self_path, startup_options.product_name,
expected_install_md5, tmp_install);
BlessFiles(tmp_install);
uint64_t et = GetMillisecondsMonotonic();
const ExtractionDurationMillis extract_data_duration(
et - st, /*archived_extracted=*/true);
// Now rename the completed installation to its final name.
int attempts = 0;
while (attempts < 120) {
int result = blaze_util::RenameDirectory(tmp_install, install_base);
if (result == blaze_util::kRenameDirectorySuccess ||
result == blaze_util::kRenameDirectoryFailureNotEmpty) {
// If renaming fails because the directory already exists and is not
// empty, then we assume another good installation snuck in before us.
blaze_util::RemoveRecursively(tmp_install);
break;
} else {
// Otherwise the install directory may still be scanned by the antivirus
// (in case we're running on Windows) so we need to wait for that to
// finish and try renaming again.
++attempts;
BAZEL_LOG(USER) << "install base directory '" << tmp_install
<< "' could not be renamed into place after "
<< attempts << " second(s), trying again\r";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
// Give up renaming after 120 failed attempts / 2 minutes.
if (attempts == 120) {
blaze_util::RemoveRecursively(tmp_install);
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "install base directory '" << tmp_install
<< "' could not be renamed into place: "
<< blaze_util::GetLastErrorString();
}
return extract_data_duration;
} else {
// This would be detected implicitly below, but checking explicitly lets
// us give a better error message.
if (!blaze_util::IsDirectory(install_base)) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "install base directory '" << install_base
<< "' could not be created. It exists but is not a directory.";
}
blaze_util::Path install_dir(install_base);
// Check that all files are present and have timestamps from BlessFiles().
std::unique_ptr<blaze_util::IFileMtime> mtime(
blaze_util::CreateFileMtime());
for (const auto &it : archive_contents) {
blaze_util::Path path = install_dir.GetRelative(it);
if (!mtime->IsUntampered(path)) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "corrupt installation: file '" << path.AsPrintablePath()
<< "' is missing or modified. Please remove '" << install_base
<< "' and try again.";
}
}
// Also check that the installed files claim to match this binary.
// We check this afterward because the above diagnostic is better
// for a missing install_base_key file.
blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
string on_disk_key;
if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "cannot read '" << key_path.AsPrintablePath()
<< "': " << blaze_util::GetLastErrorString();
}
if (on_disk_key != expected_install_md5) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "The install_base directory '" << install_base
<< "' contains a different " << startup_options.product_name
<< " version (found " << on_disk_key << " but this binary is "
<< expected_install_md5
<< "). Remove it or specify a different --install_base.";
}
return ExtractionDurationMillis();
}
}
void DetermineArchiveContents(const string &archive_path, vector<string> *files,
string *install_md5) {
PartialZipExtractor pze;
*install_md5 = pze.UnzipUntil(archive_path, "install_base_key", files);
}
void ExtractArchiveOrDie(const string &archive_path, const string &product_name,
const string &expected_install_md5,
const string &output_dir) {
string error;
std::unique_ptr<blaze::embedded_binaries::Dumper> dumper(
blaze::embedded_binaries::Create(&error));
if (dumper == nullptr) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) << error;
}
if (!blaze_util::PathExists(output_dir)) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "Archive output directory didn't exist: " << output_dir;
}
BAZEL_LOG(USER) << "Extracting " << product_name << " installation...";
PartialZipExtractor pze;
string install_md5 = pze.UnzipUntil(
archive_path, "install_base_key", nullptr,
[&](const char *name, const char *data, size_t size) {
dumper->Dump(data, size, blaze_util::JoinPath(output_dir, name));
});
if (!dumper->Finish(&error)) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "Failed to extract embedded binaries: " << error;
}
if (install_md5 != expected_install_md5) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "The " << product_name << " binary at " << archive_path
<< " was replaced during the client's self-extraction (old md5: "
<< expected_install_md5 << " new md5: " << install_md5
<< "). If you expected this then you should simply re-run "
<< product_name
<< " in order to pick up the different version. If you didn't expect "
"this then you should investigate what happened.";
}
}
void BlessFiles(const string &embedded_binaries) {
blaze_util::Path embedded_binaries_(embedded_binaries);
// Set the timestamps of the extracted files to the future and make sure (or
// at least as sure as we can...) that the files we have written are actually
// on the disk.
vector<string> extracted_files;
// Walks the temporary directory recursively and collects full file paths.
blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
set<blaze_util::Path> synced_directories;
for (const auto &f : extracted_files) {
blaze_util::Path it(f);
// Set the time to a distantly futuristic value so we can observe tampering.
// Note that keeping a static, deterministic timestamp, such as the default
// timestamp set by unzip (1970-01-01) and using that to detect tampering is
// not enough, because we also need the timestamp to change between Bazel
// releases so that the metadata cache knows that the files may have
// changed. This is essential for the correctness of actions that use
// embedded binaries as artifacts.
if (!mtime->SetToDistantFuture(it)) {
string err = blaze_util::GetLastErrorString();
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "failed to set timestamp on '" << it.AsPrintablePath()
<< "': " << err;
}
blaze_util::SyncFile(it);
blaze_util::Path directory = it.GetParent();
// Now walk up until embedded_binaries and sync every directory in between.
// synced_directories is used to avoid syncing the same directory twice.
// The !directory.empty() and !blaze_util::IsRootDirectory(directory)
// conditions are not strictly needed, but it makes this loop more robust,
// because otherwise, if due to some glitch, directory was not under
// embedded_binaries, it would get into an infinite loop.
while (directory != embedded_binaries_ && !directory.IsEmpty() &&
!blaze_util::IsRootDirectory(directory) &&
synced_directories.insert(directory).second) {
blaze_util::SyncFile(directory);
directory = directory.GetParent();
}
}
blaze_util::SyncFile(embedded_binaries_);
}
void ExtractBuildLabel(const string &archive_path, string *build_label) {
PartialZipExtractor pze;
*build_label = pze.UnzipUntil(archive_path, "build-label.txt");
}
string GetServerJarPath(const vector<string> &archive_contents) {
if (archive_contents.empty()) {
BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
<< "Couldn't find server jar in archive";
}
return archive_contents[0];
}
} // namespace blaze