michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 1 | // Copyright 2014 The Bazel Authors. All rights reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
Googler | 9b7b4cc | 2023-05-22 17:18:04 -0700 | [diff] [blame] | 14 | #include "src/main/cpp/archive_utils.h" |
| 15 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 16 | #include <functional> |
| 17 | #include <memory> |
Googler | 9b7b4cc | 2023-05-22 17:18:04 -0700 | [diff] [blame] | 18 | #include <set> |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 19 | #include <string> |
Googler | 057b6b7 | 2023-05-25 11:57:59 -0700 | [diff] [blame] | 20 | #include <thread> // NOLINT |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 21 | #include <vector> |
| 22 | |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 23 | #include "src/main/cpp/blaze_util_platform.h" |
Googler | 057b6b7 | 2023-05-25 11:57:59 -0700 | [diff] [blame] | 24 | #include "src/main/cpp/startup_options.h" |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 25 | #include "src/main/cpp/util/errors.h" |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 26 | #include "src/main/cpp/util/exit_code.h" |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 27 | #include "src/main/cpp/util/file.h" |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 28 | #include "src/main/cpp/util/logging.h" |
| 29 | #include "src/main/cpp/util/path.h" |
| 30 | #include "src/main/cpp/util/strings.h" |
| 31 | #include "third_party/ijar/zip.h" |
| 32 | |
| 33 | namespace blaze { |
| 34 | |
Googler | 9b7b4cc | 2023-05-22 17:18:04 -0700 | [diff] [blame] | 35 | using std::set; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 36 | using std::string; |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 37 | using std::vector; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 38 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 39 | struct PartialZipExtractor : public devtools_ijar::ZipExtractorProcessor { |
| 40 | using CallbackType = |
| 41 | std::function<void(const char *name, const char *data, size_t size)>; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 42 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 43 | // Scan the zip file "archive_path" until a file named "stop_entry" is seen, |
| 44 | // then stop. |
Vertexwahn | 26c7e10 | 2021-03-10 07:25:59 -0800 | [diff] [blame] | 45 | // If entry_names is not nullptr, it receives a list of all file members |
Googler | f805054 | 2019-11-09 13:25:35 -0800 | [diff] [blame] | 46 | // up to and including "stop_entry". |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 47 | // If a callback is given, it is run with the name and contents of |
| 48 | // each such member. |
| 49 | // Returns the contents of the "stop_entry" member. |
| 50 | string UnzipUntil(const string &archive_path, const string &stop_entry, |
| 51 | vector<string> *entry_names = nullptr, |
| 52 | CallbackType &&callback = {}) { |
| 53 | std::unique_ptr<devtools_ijar::ZipExtractor> extractor( |
| 54 | devtools_ijar::ZipExtractor::Create(archive_path.c_str(), this)); |
| 55 | if (!extractor) { |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 56 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 57 | << "Failed to open '" << archive_path |
| 58 | << "' as a zip file: " << blaze_util::GetLastErrorString(); |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 59 | } |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 60 | stop_name_ = stop_entry; |
| 61 | seen_names_.clear(); |
| 62 | callback_ = callback; |
| 63 | done_ = false; |
| 64 | while (!done_ && extractor->ProcessNext()) { |
| 65 | // Scan zip until EOF, an error, or Accept() has seen stop_entry. |
| 66 | } |
| 67 | if (const char *err = extractor->GetError()) { |
| 68 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 69 | << "Error reading zip file '" << archive_path << "': " << err; |
| 70 | } |
| 71 | if (!done_) { |
| 72 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 73 | << "Failed to find member '" << stop_entry << "' in zip file '" |
| 74 | << archive_path << "'"; |
| 75 | } |
| 76 | if (entry_names) *entry_names = std::move(seen_names_); |
| 77 | return stop_value_; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 78 | } |
| 79 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 80 | bool Accept(const char *filename, devtools_ijar::u4 attr) override { |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 81 | if (devtools_ijar::zipattr_is_dir(attr)) return false; |
| 82 | // Sometimes that fails to detect directories. Check the name too. |
Googler | f805054 | 2019-11-09 13:25:35 -0800 | [diff] [blame] | 83 | string fn = filename; |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 84 | if (fn.empty() || fn.back() == '/') return false; |
Googler | f805054 | 2019-11-09 13:25:35 -0800 | [diff] [blame] | 85 | if (stop_name_ == fn) done_ = true; |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 86 | seen_names_.push_back(std::move(fn)); |
Googler | f805054 | 2019-11-09 13:25:35 -0800 | [diff] [blame] | 87 | return done_ || !!callback_; // true if a callback was supplied |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | void Process(const char *filename, devtools_ijar::u4 attr, |
| 91 | const devtools_ijar::u1 *data, size_t size) override { |
| 92 | if (done_) { |
| 93 | stop_value_.assign(reinterpret_cast<const char *>(data), size); |
Googler | f805054 | 2019-11-09 13:25:35 -0800 | [diff] [blame] | 94 | } |
| 95 | if (callback_) { |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 96 | callback_(filename, reinterpret_cast<const char *>(data), size); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | string stop_name_; |
| 101 | string stop_value_; |
| 102 | vector<string> seen_names_; |
| 103 | CallbackType callback_; |
| 104 | bool done_ = false; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 105 | }; |
| 106 | |
Googler | 057b6b7 | 2023-05-25 11:57:59 -0700 | [diff] [blame] | 107 | // Installs Blaze by extracting the embedded data files, iff necessary. |
| 108 | // The MD5-named install_base directory on disk is trusted; we assume |
| 109 | // no-one has modified the extracted files beneath this directory once |
| 110 | // it is in place. Concurrency during extraction is handled by |
| 111 | // extracting in a tmp dir and then renaming it into place where it |
| 112 | // becomes visible atomically at the new path. |
| 113 | ExtractionDurationMillis ExtractData(const string &self_path, |
| 114 | const vector<string> &archive_contents, |
| 115 | const string &expected_install_md5, |
| 116 | const StartupOptions &startup_options, |
| 117 | LoggingInfo *logging_info) { |
| 118 | const string &install_base = startup_options.install_base; |
| 119 | // If the install dir doesn't exist, create it, if it does, we know it's good. |
| 120 | if (!blaze_util::PathExists(install_base)) { |
| 121 | uint64_t st = GetMillisecondsMonotonic(); |
| 122 | // Work in a temp dir to avoid races. |
| 123 | string tmp_install = blaze_util::CreateTempDir(install_base + ".tmp."); |
| 124 | ExtractArchiveOrDie(self_path, startup_options.product_name, |
| 125 | expected_install_md5, tmp_install); |
| 126 | BlessFiles(tmp_install); |
| 127 | |
| 128 | uint64_t et = GetMillisecondsMonotonic(); |
| 129 | const ExtractionDurationMillis extract_data_duration( |
| 130 | et - st, /*archived_extracted=*/true); |
| 131 | |
| 132 | // Now rename the completed installation to its final name. |
| 133 | int attempts = 0; |
| 134 | while (attempts < 120) { |
| 135 | int result = blaze_util::RenameDirectory(tmp_install, install_base); |
| 136 | if (result == blaze_util::kRenameDirectorySuccess || |
| 137 | result == blaze_util::kRenameDirectoryFailureNotEmpty) { |
| 138 | // If renaming fails because the directory already exists and is not |
| 139 | // empty, then we assume another good installation snuck in before us. |
| 140 | blaze_util::RemoveRecursively(tmp_install); |
| 141 | break; |
| 142 | } else { |
| 143 | // Otherwise the install directory may still be scanned by the antivirus |
| 144 | // (in case we're running on Windows) so we need to wait for that to |
| 145 | // finish and try renaming again. |
| 146 | ++attempts; |
| 147 | BAZEL_LOG(USER) << "install base directory '" << tmp_install |
| 148 | << "' could not be renamed into place after " |
| 149 | << attempts << " second(s), trying again\r"; |
| 150 | std::this_thread::sleep_for(std::chrono::seconds(1)); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | // Give up renaming after 120 failed attempts / 2 minutes. |
| 155 | if (attempts == 120) { |
| 156 | blaze_util::RemoveRecursively(tmp_install); |
| 157 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 158 | << "install base directory '" << tmp_install |
| 159 | << "' could not be renamed into place: " |
| 160 | << blaze_util::GetLastErrorString(); |
| 161 | } |
| 162 | return extract_data_duration; |
| 163 | } else { |
| 164 | // This would be detected implicitly below, but checking explicitly lets |
| 165 | // us give a better error message. |
| 166 | if (!blaze_util::IsDirectory(install_base)) { |
| 167 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 168 | << "install base directory '" << install_base |
| 169 | << "' could not be created. It exists but is not a directory."; |
| 170 | } |
| 171 | blaze_util::Path install_dir(install_base); |
| 172 | // Check that all files are present and have timestamps from BlessFiles(). |
| 173 | std::unique_ptr<blaze_util::IFileMtime> mtime( |
| 174 | blaze_util::CreateFileMtime()); |
| 175 | for (const auto &it : archive_contents) { |
| 176 | blaze_util::Path path = install_dir.GetRelative(it); |
| 177 | if (!mtime->IsUntampered(path)) { |
| 178 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 179 | << "corrupt installation: file '" << path.AsPrintablePath() |
| 180 | << "' is missing or modified. Please remove '" << install_base |
| 181 | << "' and try again."; |
| 182 | } |
| 183 | } |
| 184 | // Also check that the installed files claim to match this binary. |
| 185 | // We check this afterward because the above diagnostic is better |
| 186 | // for a missing install_base_key file. |
| 187 | blaze_util::Path key_path = install_dir.GetRelative("install_base_key"); |
| 188 | string on_disk_key; |
| 189 | if (!blaze_util::ReadFile(key_path, &on_disk_key)) { |
| 190 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 191 | << "cannot read '" << key_path.AsPrintablePath() |
| 192 | << "': " << blaze_util::GetLastErrorString(); |
| 193 | } |
| 194 | if (on_disk_key != expected_install_md5) { |
| 195 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 196 | << "The install_base directory '" << install_base |
| 197 | << "' contains a different " << startup_options.product_name |
| 198 | << " version (found " << on_disk_key << " but this binary is " |
| 199 | << expected_install_md5 |
| 200 | << "). Remove it or specify a different --install_base."; |
| 201 | } |
| 202 | return ExtractionDurationMillis(); |
| 203 | } |
| 204 | } |
| 205 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 206 | void DetermineArchiveContents(const string &archive_path, vector<string> *files, |
| 207 | string *install_md5) { |
| 208 | PartialZipExtractor pze; |
| 209 | *install_md5 = pze.UnzipUntil(archive_path, "install_base_key", files); |
michajlo | 371a2e3 | 2019-05-23 13:14:39 -0700 | [diff] [blame] | 210 | } |
| 211 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 212 | void ExtractArchiveOrDie(const string &archive_path, const string &product_name, |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 213 | const string &expected_install_md5, |
| 214 | const string &output_dir) { |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 215 | string error; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 216 | std::unique_ptr<blaze::embedded_binaries::Dumper> dumper( |
| 217 | blaze::embedded_binaries::Create(&error)); |
| 218 | if (dumper == nullptr) { |
| 219 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) << error; |
| 220 | } |
michajlo | aeae59a | 2020-03-27 12:21:25 -0700 | [diff] [blame] | 221 | |
| 222 | if (!blaze_util::PathExists(output_dir)) { |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 223 | BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) |
michajlo | aeae59a | 2020-03-27 12:21:25 -0700 | [diff] [blame] | 224 | << "Archive output directory didn't exist: " << output_dir; |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 225 | } |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 226 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 227 | BAZEL_LOG(USER) << "Extracting " << product_name << " installation..."; |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 228 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 229 | PartialZipExtractor pze; |
| 230 | string install_md5 = pze.UnzipUntil( |
| 231 | archive_path, "install_base_key", nullptr, |
| 232 | [&](const char *name, const char *data, size_t size) { |
| 233 | dumper->Dump(data, size, blaze_util::JoinPath(output_dir, name)); |
| 234 | }); |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 235 | |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 236 | if (!dumper->Finish(&error)) { |
| 237 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 238 | << "Failed to extract embedded binaries: " << error; |
| 239 | } |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 240 | |
michajlo | 6f38f34 | 2019-05-22 11:53:22 -0700 | [diff] [blame] | 241 | if (install_md5 != expected_install_md5) { |
| 242 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 243 | << "The " << product_name << " binary at " << archive_path |
| 244 | << " was replaced during the client's self-extraction (old md5: " |
| 245 | << expected_install_md5 << " new md5: " << install_md5 |
| 246 | << "). If you expected this then you should simply re-run " |
| 247 | << product_name |
| 248 | << " in order to pick up the different version. If you didn't expect " |
| 249 | "this then you should investigate what happened."; |
| 250 | } |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 251 | } |
| 252 | |
Googler | 9b7b4cc | 2023-05-22 17:18:04 -0700 | [diff] [blame] | 253 | void BlessFiles(const string &embedded_binaries) { |
| 254 | blaze_util::Path embedded_binaries_(embedded_binaries); |
| 255 | |
| 256 | // Set the timestamps of the extracted files to the future and make sure (or |
| 257 | // at least as sure as we can...) that the files we have written are actually |
| 258 | // on the disk. |
| 259 | |
| 260 | vector<string> extracted_files; |
| 261 | |
| 262 | // Walks the temporary directory recursively and collects full file paths. |
| 263 | blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files); |
| 264 | |
| 265 | std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime()); |
| 266 | set<blaze_util::Path> synced_directories; |
| 267 | for (const auto &f : extracted_files) { |
| 268 | blaze_util::Path it(f); |
| 269 | |
| 270 | // Set the time to a distantly futuristic value so we can observe tampering. |
| 271 | // Note that keeping a static, deterministic timestamp, such as the default |
| 272 | // timestamp set by unzip (1970-01-01) and using that to detect tampering is |
| 273 | // not enough, because we also need the timestamp to change between Bazel |
| 274 | // releases so that the metadata cache knows that the files may have |
| 275 | // changed. This is essential for the correctness of actions that use |
| 276 | // embedded binaries as artifacts. |
| 277 | if (!mtime->SetToDistantFuture(it)) { |
| 278 | string err = blaze_util::GetLastErrorString(); |
| 279 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 280 | << "failed to set timestamp on '" << it.AsPrintablePath() |
| 281 | << "': " << err; |
| 282 | } |
| 283 | |
| 284 | blaze_util::SyncFile(it); |
| 285 | |
| 286 | blaze_util::Path directory = it.GetParent(); |
| 287 | |
| 288 | // Now walk up until embedded_binaries and sync every directory in between. |
| 289 | // synced_directories is used to avoid syncing the same directory twice. |
| 290 | // The !directory.empty() and !blaze_util::IsRootDirectory(directory) |
| 291 | // conditions are not strictly needed, but it makes this loop more robust, |
| 292 | // because otherwise, if due to some glitch, directory was not under |
| 293 | // embedded_binaries, it would get into an infinite loop. |
| 294 | while (directory != embedded_binaries_ && !directory.IsEmpty() && |
| 295 | !blaze_util::IsRootDirectory(directory) && |
| 296 | synced_directories.insert(directory).second) { |
| 297 | blaze_util::SyncFile(directory); |
| 298 | directory = directory.GetParent(); |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | blaze_util::SyncFile(embedded_binaries_); |
| 303 | } |
| 304 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 305 | void ExtractBuildLabel(const string &archive_path, string *build_label) { |
| 306 | PartialZipExtractor pze; |
| 307 | *build_label = pze.UnzipUntil(archive_path, "build-label.txt"); |
michajlo | 371a2e3 | 2019-05-23 13:14:39 -0700 | [diff] [blame] | 308 | } |
| 309 | |
Googler | 9cc0346 | 2019-11-05 00:22:26 -0800 | [diff] [blame] | 310 | string GetServerJarPath(const vector<string> &archive_contents) { |
michajlo | 97559ba | 2019-06-03 14:14:22 -0700 | [diff] [blame] | 311 | if (archive_contents.empty()) { |
| 312 | BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR) |
| 313 | << "Couldn't find server jar in archive"; |
| 314 | } |
| 315 | return archive_contents[0]; |
| 316 | } |
| 317 | |
michajlo | 3d8925d | 2019-05-20 16:10:41 -0700 | [diff] [blame] | 318 | } // namespace blaze |