blob: a86d803d7b5084e3636af9b3eba4e0d25afadc01 [file] [log] [blame]
michajlo3d8925d2019-05-20 16:10:41 -07001// Copyright 2014 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
Googler9b7b4cc2023-05-22 17:18:04 -070014#include "src/main/cpp/archive_utils.h"
15
Googler9cc03462019-11-05 00:22:26 -080016#include <functional>
17#include <memory>
Googler9b7b4cc2023-05-22 17:18:04 -070018#include <set>
Googler9cc03462019-11-05 00:22:26 -080019#include <string>
Googler057b6b72023-05-25 11:57:59 -070020#include <thread> // NOLINT
michajlo3d8925d2019-05-20 16:10:41 -070021#include <vector>
22
michajlo6f38f342019-05-22 11:53:22 -070023#include "src/main/cpp/blaze_util_platform.h"
Googler057b6b72023-05-25 11:57:59 -070024#include "src/main/cpp/startup_options.h"
michajlo6f38f342019-05-22 11:53:22 -070025#include "src/main/cpp/util/errors.h"
michajlo3d8925d2019-05-20 16:10:41 -070026#include "src/main/cpp/util/exit_code.h"
Googler9cc03462019-11-05 00:22:26 -080027#include "src/main/cpp/util/file.h"
michajlo3d8925d2019-05-20 16:10:41 -070028#include "src/main/cpp/util/logging.h"
29#include "src/main/cpp/util/path.h"
30#include "src/main/cpp/util/strings.h"
31#include "third_party/ijar/zip.h"
32
33namespace blaze {
34
Googler9b7b4cc2023-05-22 17:18:04 -070035using std::set;
michajlo6f38f342019-05-22 11:53:22 -070036using std::string;
Googler9cc03462019-11-05 00:22:26 -080037using std::vector;
michajlo6f38f342019-05-22 11:53:22 -070038
Googler9cc03462019-11-05 00:22:26 -080039struct PartialZipExtractor : public devtools_ijar::ZipExtractorProcessor {
40 using CallbackType =
41 std::function<void(const char *name, const char *data, size_t size)>;
michajlo6f38f342019-05-22 11:53:22 -070042
Googler9cc03462019-11-05 00:22:26 -080043 // Scan the zip file "archive_path" until a file named "stop_entry" is seen,
44 // then stop.
Vertexwahn26c7e102021-03-10 07:25:59 -080045 // If entry_names is not nullptr, it receives a list of all file members
Googlerf8050542019-11-09 13:25:35 -080046 // up to and including "stop_entry".
Googler9cc03462019-11-05 00:22:26 -080047 // If a callback is given, it is run with the name and contents of
48 // each such member.
49 // Returns the contents of the "stop_entry" member.
50 string UnzipUntil(const string &archive_path, const string &stop_entry,
51 vector<string> *entry_names = nullptr,
52 CallbackType &&callback = {}) {
53 std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
54 devtools_ijar::ZipExtractor::Create(archive_path.c_str(), this));
55 if (!extractor) {
michajlo6f38f342019-05-22 11:53:22 -070056 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
Googler9cc03462019-11-05 00:22:26 -080057 << "Failed to open '" << archive_path
58 << "' as a zip file: " << blaze_util::GetLastErrorString();
michajlo6f38f342019-05-22 11:53:22 -070059 }
Googler9cc03462019-11-05 00:22:26 -080060 stop_name_ = stop_entry;
61 seen_names_.clear();
62 callback_ = callback;
63 done_ = false;
64 while (!done_ && extractor->ProcessNext()) {
65 // Scan zip until EOF, an error, or Accept() has seen stop_entry.
66 }
67 if (const char *err = extractor->GetError()) {
68 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
69 << "Error reading zip file '" << archive_path << "': " << err;
70 }
71 if (!done_) {
72 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
73 << "Failed to find member '" << stop_entry << "' in zip file '"
74 << archive_path << "'";
75 }
76 if (entry_names) *entry_names = std::move(seen_names_);
77 return stop_value_;
michajlo6f38f342019-05-22 11:53:22 -070078 }
79
Googler9cc03462019-11-05 00:22:26 -080080 bool Accept(const char *filename, devtools_ijar::u4 attr) override {
Googler9cc03462019-11-05 00:22:26 -080081 if (devtools_ijar::zipattr_is_dir(attr)) return false;
82 // Sometimes that fails to detect directories. Check the name too.
Googlerf8050542019-11-09 13:25:35 -080083 string fn = filename;
Googler9cc03462019-11-05 00:22:26 -080084 if (fn.empty() || fn.back() == '/') return false;
Googlerf8050542019-11-09 13:25:35 -080085 if (stop_name_ == fn) done_ = true;
Googler9cc03462019-11-05 00:22:26 -080086 seen_names_.push_back(std::move(fn));
Googlerf8050542019-11-09 13:25:35 -080087 return done_ || !!callback_; // true if a callback was supplied
Googler9cc03462019-11-05 00:22:26 -080088 }
89
90 void Process(const char *filename, devtools_ijar::u4 attr,
91 const devtools_ijar::u1 *data, size_t size) override {
92 if (done_) {
93 stop_value_.assign(reinterpret_cast<const char *>(data), size);
Googlerf8050542019-11-09 13:25:35 -080094 }
95 if (callback_) {
Googler9cc03462019-11-05 00:22:26 -080096 callback_(filename, reinterpret_cast<const char *>(data), size);
97 }
98 }
99
100 string stop_name_;
101 string stop_value_;
102 vector<string> seen_names_;
103 CallbackType callback_;
104 bool done_ = false;
michajlo6f38f342019-05-22 11:53:22 -0700105};
106
Googler057b6b72023-05-25 11:57:59 -0700107// Installs Blaze by extracting the embedded data files, iff necessary.
108// The MD5-named install_base directory on disk is trusted; we assume
109// no-one has modified the extracted files beneath this directory once
110// it is in place. Concurrency during extraction is handled by
111// extracting in a tmp dir and then renaming it into place where it
112// becomes visible atomically at the new path.
113ExtractionDurationMillis ExtractData(const string &self_path,
114 const vector<string> &archive_contents,
115 const string &expected_install_md5,
116 const StartupOptions &startup_options,
117 LoggingInfo *logging_info) {
118 const string &install_base = startup_options.install_base;
119 // If the install dir doesn't exist, create it, if it does, we know it's good.
120 if (!blaze_util::PathExists(install_base)) {
121 uint64_t st = GetMillisecondsMonotonic();
122 // Work in a temp dir to avoid races.
123 string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp.");
124 ExtractArchiveOrDie(self_path, startup_options.product_name,
125 expected_install_md5, tmp_install);
126 BlessFiles(tmp_install);
127
128 uint64_t et = GetMillisecondsMonotonic();
129 const ExtractionDurationMillis extract_data_duration(
130 et - st, /*archived_extracted=*/true);
131
132 // Now rename the completed installation to its final name.
133 int attempts = 0;
134 while (attempts < 120) {
135 int result = blaze_util::RenameDirectory(tmp_install, install_base);
136 if (result == blaze_util::kRenameDirectorySuccess ||
137 result == blaze_util::kRenameDirectoryFailureNotEmpty) {
138 // If renaming fails because the directory already exists and is not
139 // empty, then we assume another good installation snuck in before us.
140 blaze_util::RemoveRecursively(tmp_install);
141 break;
142 } else {
143 // Otherwise the install directory may still be scanned by the antivirus
144 // (in case we're running on Windows) so we need to wait for that to
145 // finish and try renaming again.
146 ++attempts;
147 BAZEL_LOG(USER) << "install base directory '" << tmp_install
148 << "' could not be renamed into place after "
149 << attempts << " second(s), trying again\r";
150 std::this_thread::sleep_for(std::chrono::seconds(1));
151 }
152 }
153
154 // Give up renaming after 120 failed attempts / 2 minutes.
155 if (attempts == 120) {
156 blaze_util::RemoveRecursively(tmp_install);
157 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
158 << "install base directory '" << tmp_install
159 << "' could not be renamed into place: "
160 << blaze_util::GetLastErrorString();
161 }
162 return extract_data_duration;
163 } else {
164 // This would be detected implicitly below, but checking explicitly lets
165 // us give a better error message.
166 if (!blaze_util::IsDirectory(install_base)) {
167 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
168 << "install base directory '" << install_base
169 << "' could not be created. It exists but is not a directory.";
170 }
171 blaze_util::Path install_dir(install_base);
172 // Check that all files are present and have timestamps from BlessFiles().
173 std::unique_ptr<blaze_util::IFileMtime> mtime(
174 blaze_util::CreateFileMtime());
175 for (const auto &it : archive_contents) {
176 blaze_util::Path path = install_dir.GetRelative(it);
177 if (!mtime->IsUntampered(path)) {
178 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
179 << "corrupt installation: file '" << path.AsPrintablePath()
180 << "' is missing or modified. Please remove '" << install_base
181 << "' and try again.";
182 }
183 }
184 // Also check that the installed files claim to match this binary.
185 // We check this afterward because the above diagnostic is better
186 // for a missing install_base_key file.
187 blaze_util::Path key_path = install_dir.GetRelative("install_base_key");
188 string on_disk_key;
189 if (!blaze_util::ReadFile(key_path, &on_disk_key)) {
190 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
191 << "cannot read '" << key_path.AsPrintablePath()
192 << "': " << blaze_util::GetLastErrorString();
193 }
194 if (on_disk_key != expected_install_md5) {
195 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
196 << "The install_base directory '" << install_base
197 << "' contains a different " << startup_options.product_name
198 << " version (found " << on_disk_key << " but this binary is "
199 << expected_install_md5
200 << "). Remove it or specify a different --install_base.";
201 }
202 return ExtractionDurationMillis();
203 }
204}
205
Googler9cc03462019-11-05 00:22:26 -0800206void DetermineArchiveContents(const string &archive_path, vector<string> *files,
207 string *install_md5) {
208 PartialZipExtractor pze;
209 *install_md5 = pze.UnzipUntil(archive_path, "install_base_key", files);
michajlo371a2e32019-05-23 13:14:39 -0700210}
211
Googler9cc03462019-11-05 00:22:26 -0800212void ExtractArchiveOrDie(const string &archive_path, const string &product_name,
michajlo6f38f342019-05-22 11:53:22 -0700213 const string &expected_install_md5,
214 const string &output_dir) {
Googler9cc03462019-11-05 00:22:26 -0800215 string error;
michajlo6f38f342019-05-22 11:53:22 -0700216 std::unique_ptr<blaze::embedded_binaries::Dumper> dumper(
217 blaze::embedded_binaries::Create(&error));
218 if (dumper == nullptr) {
219 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) << error;
220 }
michajloaeae59a2020-03-27 12:21:25 -0700221
222 if (!blaze_util::PathExists(output_dir)) {
michajlo6f38f342019-05-22 11:53:22 -0700223 BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
michajloaeae59a2020-03-27 12:21:25 -0700224 << "Archive output directory didn't exist: " << output_dir;
michajlo6f38f342019-05-22 11:53:22 -0700225 }
michajlo3d8925d2019-05-20 16:10:41 -0700226
Googler9cc03462019-11-05 00:22:26 -0800227 BAZEL_LOG(USER) << "Extracting " << product_name << " installation...";
michajlo3d8925d2019-05-20 16:10:41 -0700228
Googler9cc03462019-11-05 00:22:26 -0800229 PartialZipExtractor pze;
230 string install_md5 = pze.UnzipUntil(
231 archive_path, "install_base_key", nullptr,
232 [&](const char *name, const char *data, size_t size) {
233 dumper->Dump(data, size, blaze_util::JoinPath(output_dir, name));
234 });
michajlo3d8925d2019-05-20 16:10:41 -0700235
michajlo6f38f342019-05-22 11:53:22 -0700236 if (!dumper->Finish(&error)) {
237 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
238 << "Failed to extract embedded binaries: " << error;
239 }
michajlo3d8925d2019-05-20 16:10:41 -0700240
michajlo6f38f342019-05-22 11:53:22 -0700241 if (install_md5 != expected_install_md5) {
242 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
243 << "The " << product_name << " binary at " << archive_path
244 << " was replaced during the client's self-extraction (old md5: "
245 << expected_install_md5 << " new md5: " << install_md5
246 << "). If you expected this then you should simply re-run "
247 << product_name
248 << " in order to pick up the different version. If you didn't expect "
249 "this then you should investigate what happened.";
250 }
michajlo3d8925d2019-05-20 16:10:41 -0700251}
252
Googler9b7b4cc2023-05-22 17:18:04 -0700253void BlessFiles(const string &embedded_binaries) {
254 blaze_util::Path embedded_binaries_(embedded_binaries);
255
256 // Set the timestamps of the extracted files to the future and make sure (or
257 // at least as sure as we can...) that the files we have written are actually
258 // on the disk.
259
260 vector<string> extracted_files;
261
262 // Walks the temporary directory recursively and collects full file paths.
263 blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
264
265 std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
266 set<blaze_util::Path> synced_directories;
267 for (const auto &f : extracted_files) {
268 blaze_util::Path it(f);
269
270 // Set the time to a distantly futuristic value so we can observe tampering.
271 // Note that keeping a static, deterministic timestamp, such as the default
272 // timestamp set by unzip (1970-01-01) and using that to detect tampering is
273 // not enough, because we also need the timestamp to change between Bazel
274 // releases so that the metadata cache knows that the files may have
275 // changed. This is essential for the correctness of actions that use
276 // embedded binaries as artifacts.
277 if (!mtime->SetToDistantFuture(it)) {
278 string err = blaze_util::GetLastErrorString();
279 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
280 << "failed to set timestamp on '" << it.AsPrintablePath()
281 << "': " << err;
282 }
283
284 blaze_util::SyncFile(it);
285
286 blaze_util::Path directory = it.GetParent();
287
288 // Now walk up until embedded_binaries and sync every directory in between.
289 // synced_directories is used to avoid syncing the same directory twice.
290 // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
291 // conditions are not strictly needed, but it makes this loop more robust,
292 // because otherwise, if due to some glitch, directory was not under
293 // embedded_binaries, it would get into an infinite loop.
294 while (directory != embedded_binaries_ && !directory.IsEmpty() &&
295 !blaze_util::IsRootDirectory(directory) &&
296 synced_directories.insert(directory).second) {
297 blaze_util::SyncFile(directory);
298 directory = directory.GetParent();
299 }
300 }
301
302 blaze_util::SyncFile(embedded_binaries_);
303}
304
Googler9cc03462019-11-05 00:22:26 -0800305void ExtractBuildLabel(const string &archive_path, string *build_label) {
306 PartialZipExtractor pze;
307 *build_label = pze.UnzipUntil(archive_path, "build-label.txt");
michajlo371a2e32019-05-23 13:14:39 -0700308}
309
Googler9cc03462019-11-05 00:22:26 -0800310string GetServerJarPath(const vector<string> &archive_contents) {
michajlo97559ba2019-06-03 14:14:22 -0700311 if (archive_contents.empty()) {
312 BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
313 << "Couldn't find server jar in archive";
314 }
315 return archive_contents[0];
316}
317
michajlo3d8925d2019-05-20 16:10:41 -0700318} // namespace blaze