| // Copyright 2016 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| /* |
| * The implementation of the OutputJar methods. |
| */ |
| #include "src/tools/singlejar/output_jar.h" |
| |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <sys/stat.h> |
| #include <time.h> |
| |
| #ifndef _WIN32 |
| #include <unistd.h> |
| #else |
| |
| #ifndef WIN32_LEAN_AND_MEAN |
| #define WIN32_LEAN_AND_MEAN |
| #endif // WIN32_LEAN_AND_MEAN |
| #include <windows.h> |
| |
| #endif // _WIN32 |
| |
| #include "src/main/cpp/util/path_platform.h" |
| #include "src/tools/singlejar/combiners.h" |
| #include "src/tools/singlejar/diag.h" |
| #include "src/tools/singlejar/input_jar.h" |
| #include "src/tools/singlejar/mapped_file.h" |
| #include "src/tools/singlejar/options.h" |
| #include "src/tools/singlejar/zip_headers.h" |
| |
| #include <zlib.h> |
| |
| #define TODO(cond, msg) \ |
| if (!(cond)) { \ |
| diag_errx(2, "%s:%d: TODO(asmundak): " msg, __FILE__, __LINE__); \ |
| } |
| |
| OutputJar::OutputJar() |
| : options_(nullptr), |
| file_(nullptr), |
| outpos_(0), |
| buffer_(nullptr), |
| entries_(0), |
| duplicate_entries_(0), |
| cen_(nullptr), |
| cen_size_(0), |
| cen_capacity_(0), |
| spring_handlers_("META-INF/spring.handlers"), |
| spring_schemas_("META-INF/spring.schemas"), |
| protobuf_meta_handler_("protobuf.meta", false), |
| manifest_("META-INF/MANIFEST.MF"), |
| build_properties_("build-data.properties") { |
| known_members_.emplace(spring_handlers_.filename(), |
| EntryInfo{&spring_handlers_}); |
| known_members_.emplace(spring_schemas_.filename(), |
| EntryInfo{&spring_schemas_}); |
| known_members_.emplace(manifest_.filename(), EntryInfo{&manifest_}); |
| known_members_.emplace(protobuf_meta_handler_.filename(), |
| EntryInfo{&protobuf_meta_handler_}); |
| manifest_.Append( |
| "Manifest-Version: 1.0\r\n" |
| "Created-By: singlejar\r\n"); |
| } |
| |
| static std::string Basename(const std::string &path) { |
| size_t pos = path.rfind('/'); |
| if (pos == std::string::npos) { |
| return path; |
| } else { |
| return std::string(path, pos + 1); |
| } |
| } |
| |
| int OutputJar::Doit(Options *options) { |
| if (nullptr != options_) { |
| diag_errx(1, "%s:%d: Doit() can be called only once.", __FILE__, __LINE__); |
| } |
| options_ = options; |
| |
| // Register the handler for the build-data.properties file unless |
| // --exclude_build_data is present. Otherwise we do not generate this file, |
| // and it will be copied from the first source archive containing it. |
| if (!options_->exclude_build_data) { |
| known_members_.emplace(build_properties_.filename(), |
| EntryInfo{&build_properties_}); |
| } |
| |
| // TODO(b/28294322): do we need to resolve the path to be absolute or |
| // canonical? |
| build_properties_.AddProperty("build.target", options_->output_jar.c_str()); |
| if (options_->verbose) { |
| fprintf(stderr, "combined_file_name=%s\n", options_->output_jar.c_str()); |
| if (!options_->main_class.empty()) { |
| fprintf(stderr, "main_class=%s\n", options_->main_class.c_str()); |
| } |
| if (!options_->java_launcher.empty()) { |
| fprintf(stderr, "java_launcher_file=%s\n", |
| options_->java_launcher.c_str()); |
| } |
| fprintf(stderr, "%zu source files\n", options_->input_jars.size()); |
| fprintf(stderr, "%zu manifest lines\n", options_->manifest_lines.size()); |
| } |
| |
| if (!Open()) { |
| exit(1); |
| } |
| |
| // Copy launcher if it is set. |
| if (!options_->java_launcher.empty()) { |
| const char *const launcher_path = options_->java_launcher.c_str(); |
| int in_fd = open(launcher_path, O_RDONLY); |
| struct stat statbuf; |
| if (file_ == nullptr || fstat(in_fd, &statbuf)) { |
| diag_err(1, "%s", launcher_path); |
| } |
| // TODO(asmundak): Consider going back to sendfile() or reflink |
| // (BTRFS_IOC_CLONE/XFS_IOC_CLONE) here. The launcher preamble can |
| // be very large for targets with many native deps. |
| ssize_t byte_count = AppendFile(in_fd, 0, statbuf.st_size); |
| if (byte_count < 0) { |
| diag_err(1, "%s:%d: Cannot copy %s to %s", __FILE__, __LINE__, |
| launcher_path, options_->output_jar.c_str()); |
| } else if (byte_count != statbuf.st_size) { |
| diag_err(1, "%s:%d: Copied only %zu bytes out of %" PRIu64 " from %s", |
| __FILE__, __LINE__, byte_count, statbuf.st_size, launcher_path); |
| } |
| close(in_fd); |
| if (options_->verbose) { |
| fprintf(stderr, "Prepended %s (%" PRIu64 " bytes)\n", launcher_path, |
| statbuf.st_size); |
| } |
| } |
| |
| if (!options_->main_class.empty()) { |
| build_properties_.AddProperty("main.class", options_->main_class); |
| manifest_.Append("Main-Class: "); |
| manifest_.Append(options_->main_class); |
| manifest_.Append("\r\n"); |
| } |
| |
| for (auto &manifest_line : options_->manifest_lines) { |
| if (!manifest_line.empty()) { |
| manifest_.Append(manifest_line); |
| if (manifest_line[manifest_line.size() - 1] != '\n') { |
| manifest_.Append("\r\n"); |
| } |
| } |
| } |
| |
| for (auto &build_info_line : options_->build_info_lines) { |
| build_properties_.Append(build_info_line); |
| build_properties_.Append("\n"); |
| } |
| |
| for (auto &build_info_file : options_->build_info_files) { |
| MappedFile mapped_file; |
| if (!mapped_file.Open(build_info_file)) { |
| diag_err(1, "%s:%d: Bad build info file %s", __FILE__, __LINE__, |
| build_info_file.c_str()); |
| } |
| const char *data = reinterpret_cast<const char *>(mapped_file.start()); |
| const char *data_end = reinterpret_cast<const char *>(mapped_file.end()); |
| // TODO(asmundak): this isn't right, we should parse properties file. |
| while (data < data_end) { |
| const char *next_data = strchr(static_cast<const char *>(data), '\n'); |
| if (next_data) { |
| ++next_data; |
| } else { |
| next_data = data_end; |
| } |
| build_properties_.Append(data, next_data - data); |
| data = next_data; |
| } |
| mapped_file.Close(); |
| } |
| |
| for (auto &rpath : options_->classpath_resources) { |
| ClasspathResource(Basename(rpath), rpath); |
| } |
| |
| for (auto &rdesc : options_->resources) { |
| // A resource description is either NAME or PATH:NAME |
| // Find the last ':' instead of the first because Windows uses ':' as volume |
| // separator in absolute path. |
| std::size_t colon = rdesc.find_last_of(':'); |
| if (0 == colon) { |
| diag_errx(1, "%s:%d: Bad resource description %s", __FILE__, __LINE__, |
| rdesc.c_str()); |
| } |
| bool shouldSplit = colon != std::string::npos; |
| #ifdef _WIN32 |
| // If colon points to volume separator, don't split. |
| if (colon == 1 && blaze_util::IsAbsolute(rdesc)) { |
| shouldSplit = false; |
| } |
| #endif |
| if (shouldSplit) { |
| ClasspathResource(rdesc.substr(colon + 1), rdesc.substr(0, colon)); |
| } else { |
| ClasspathResource(rdesc, rdesc); |
| } |
| } |
| |
| // Ready to write zip entries. Decide whether created entries should be |
| // compressed. |
| bool compress = options_->force_compression || options_->preserve_compression; |
| // First, write a directory entry for the META-INF, followed by the manifest |
| // file, followed by the build properties file. |
| WriteMetaInf(); |
| manifest_.Append("\r\n"); |
| WriteEntry(manifest_.OutputEntry(compress)); |
| if (!options_->exclude_build_data) { |
| WriteEntry(build_properties_.OutputEntry(compress)); |
| } |
| |
| // Then classpath resources. |
| for (auto &classpath_resource : classpath_resources_) { |
| bool do_compress = compress; |
| if (do_compress && !options_->nocompress_suffixes.empty()) { |
| for (auto &suffix : options_->nocompress_suffixes) { |
| auto entry_name = classpath_resource->filename(); |
| if (entry_name.length() >= suffix.size() && |
| !entry_name.compare(entry_name.length() - suffix.size(), |
| suffix.size(), suffix)) { |
| do_compress = false; |
| break; |
| } |
| } |
| } |
| |
| // Add parent directory entries. |
| size_t pos = classpath_resource->filename().find('/'); |
| while (pos != std::string::npos) { |
| std::string dir(classpath_resource->filename(), 0, pos + 1); |
| if (NewEntry(dir)) { |
| WriteDirEntry(dir, nullptr, 0); |
| } |
| pos = classpath_resource->filename().find('/', pos + 1); |
| } |
| |
| WriteEntry(classpath_resource->OutputEntry(do_compress)); |
| } |
| |
| // Then copy source files' contents. |
| for (size_t ix = 0; ix < options_->input_jars.size(); ++ix) { |
| if (!AddJar(ix)) { |
| exit(1); |
| } |
| } |
| |
| // All entries written, write Central Directory and close. |
| Close(); |
| return 0; |
| } |
| |
| OutputJar::~OutputJar() { |
| if (file_) { |
| diag_warnx("%s:%d: Close() should be called first", __FILE__, __LINE__); |
| } |
| } |
| |
| // Try to perform I/O in units of this size. |
| // (128KB is the default max request size for fuse filesystems.) |
| static constexpr size_t kBufferSize = 128 << 10; |
| |
| bool OutputJar::Open() { |
| if (file_) { |
| diag_errx(1, "%s:%d: Cannot open output archive twice", __FILE__, __LINE__); |
| } |
| |
| int mode = O_CREAT | O_WRONLY | O_TRUNC; |
| |
| #ifdef _WIN32 |
| std::wstring wpath; |
| std::string error; |
| if (!blaze_util::AsAbsoluteWindowsPath(path(), &wpath, &error)) { |
| diag_warn("%s:%d: AsAbsoluteWindowsPath failed: %s", __FILE__, __LINE__, |
| error.c_str()); |
| return false; |
| } |
| |
| HANDLE hFile = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, |
| NULL, CREATE_ALWAYS, 0, NULL); |
| if (hFile == INVALID_HANDLE_VALUE) { |
| diag_warn("%s:%d: CreateFileW failed for %S", __FILE__, __LINE__, |
| wpath.c_str()); |
| return false; |
| } |
| |
| // Make sure output file is in binary mode, or \r\n will be converted to \n. |
| mode |= _O_BINARY; |
| int fd = _open_osfhandle(reinterpret_cast<intptr_t>(hFile), mode); |
| #else |
| // Set execute bits since we may produce an executable output file. |
| int fd = open(path(), mode, 0777); |
| #endif |
| |
| if (fd < 0) { |
| diag_warn("%s:%d: %s", __FILE__, __LINE__, path()); |
| return false; |
| } |
| file_ = fdopen(fd, "w"); |
| if (file_ == nullptr) { |
| diag_warn("%s:%d: fdopen of %s", __FILE__, __LINE__, path()); |
| close(fd); |
| return false; |
| } |
| outpos_ = 0; |
| buffer_.reset(new char[kBufferSize]); |
| setvbuf(file_, buffer_.get(), _IOFBF, kBufferSize); |
| if (options_->verbose) { |
| fprintf(stderr, "Writing to %s\n", path()); |
| } |
| return true; |
| } |
| |
| // January 1, 2010 as a DOS date |
| static const uint16_t kDefaultDate = 30 << 9 | 1 << 5 | 1; |
| |
| bool OutputJar::AddJar(int jar_path_index) { |
| const std::string &input_jar_path = |
| options_->input_jars[jar_path_index].first; |
| const std::string &input_jar_aux_label = |
| options_->input_jars[jar_path_index].second; |
| |
| InputJar input_jar; |
| if (!input_jar.Open(input_jar_path)) { |
| return false; |
| } |
| const CDH *jar_entry; |
| const LH *lh; |
| while ((jar_entry = input_jar.NextEntry(&lh))) { |
| const char *file_name = jar_entry->file_name(); |
| auto file_name_length = jar_entry->file_name_length(); |
| if (!file_name_length) { |
| diag_errx( |
| 1, "%s:%d: Bad central directory record in %s at offset 0x%" PRIx64, |
| __FILE__, __LINE__, input_jar_path.c_str(), |
| input_jar.CentralDirectoryRecordOffset(jar_entry)); |
| } |
| // Special files that cannot be handled by looking up known_members_ map: |
| // * ignore *.SF, *.RSA, *.DSA |
| // (TODO(asmundak): should this be done only in META-INF? |
| // |
| if (ends_with(file_name, file_name_length, ".SF") || |
| ends_with(file_name, file_name_length, ".RSA") || |
| ends_with(file_name, file_name_length, ".DSA")) { |
| continue; |
| } |
| |
| bool include_entry = true; |
| if (!options_->include_prefixes.empty()) { |
| for (auto &prefix : options_->include_prefixes) { |
| if ((include_entry = |
| (prefix.size() <= file_name_length && |
| 0 == strncmp(file_name, prefix.c_str(), prefix.size())))) { |
| break; |
| } |
| } |
| } |
| if (!include_entry) { |
| continue; |
| } |
| |
| bool is_file = (file_name[file_name_length - 1] != '/'); |
| if (is_file && |
| begins_with(file_name, file_name_length, "META-INF/services/")) { |
| // The contents of the META-INF/services/<SERVICE> on the output is the |
| // concatenation of the META-INF/services/<SERVICE> files from all inputs. |
| std::string service_path(file_name, file_name_length); |
| if (NewEntry(service_path)) { |
| // Create a concatenator and add it to the known_members_ map. |
| // The call to Merge() below will then take care of the rest. |
| Concatenator *service_handler = new Concatenator(service_path); |
| service_handlers_.emplace_back(service_handler); |
| known_members_.emplace(service_path, EntryInfo{service_handler}); |
| } |
| } else { |
| ExtraHandler(input_jar_path, jar_entry, &input_jar_aux_label); |
| } |
| |
| if (options_->check_desugar_deps && |
| begins_with(file_name, file_name_length, "j$/")) { |
| diag_errx(1, "%s:%d: desugar_jdk_libs file %.*s unexpectedly found in %s", |
| __FILE__, __LINE__, file_name_length, file_name, |
| input_jar_path.c_str()); |
| } |
| |
| // Install a new entry unless it is already present. All the plain (non-dir) |
| // entries that require a combiner have been already installed, so the call |
| // will add either a directory entry whose handler will ignore subsequent |
| // duplicates, or an ordinary plain entry, for which we save the index of |
| // the first input jar (in order to provide diagnostics on duplicate). |
| auto got = |
| known_members_.emplace(std::string(file_name, file_name_length), |
| EntryInfo{is_file ? nullptr : &null_combiner_, |
| is_file ? jar_path_index : -1}); |
| if (!got.second) { |
| auto &entry_info = got.first->second; |
| // Handle special entries (the ones that have a combiner). |
| if (entry_info.combiner_ != nullptr) { |
| // TODO(kmb,asmundak): Should be checking Merge() return value but fails |
| // for build-data.properties when merging deploy jars into deploy jars. |
| entry_info.combiner_->Merge(jar_entry, lh); |
| continue; |
| } |
| |
| // Plain file entry. If duplicates are not allowed, bail out. Otherwise |
| // just ignore this entry. |
| if (options_->no_duplicates || |
| (options_->no_duplicate_classes && |
| ends_with(file_name, file_name_length, ".class"))) { |
| diag_errx( |
| 1, "%s:%d: %.*s is present both in %s and %s", __FILE__, __LINE__, |
| file_name_length, file_name, |
| options_->input_jars[entry_info.input_jar_index_].first.c_str(), |
| input_jar_path.c_str()); |
| } else { |
| duplicate_entries_++; |
| continue; |
| } |
| } |
| |
| // For the file entries, decide whether output should be compressed. |
| if (is_file) { |
| bool input_compressed = |
| jar_entry->compression_method() != Z_NO_COMPRESSION; |
| bool output_compressed = |
| options_->force_compression || |
| (options_->preserve_compression && input_compressed); |
| if (output_compressed && !options_->nocompress_suffixes.empty()) { |
| for (auto &suffix : options_->nocompress_suffixes) { |
| if (file_name_length >= suffix.size() && |
| !strncmp(file_name + file_name_length - suffix.size(), |
| suffix.c_str(), suffix.size())) { |
| output_compressed = false; |
| break; |
| } |
| } |
| } |
| if (input_compressed != output_compressed) { |
| Concatenator combiner(jar_entry->file_name_string()); |
| if (!combiner.Merge(jar_entry, lh)) { |
| diag_err(1, "%s:%d: cannot add %.*s", __FILE__, __LINE__, |
| jar_entry->file_name_length(), jar_entry->file_name()); |
| } |
| WriteEntry(combiner.OutputEntry(output_compressed)); |
| continue; |
| } |
| } |
| |
| // Now we have to copy: |
| // local header |
| // file data |
| // data descriptor, if present. |
| off64_t copy_from = jar_entry->local_header_offset(); |
| size_t num_bytes = lh->size(); |
| if (jar_entry->no_size_in_local_header()) { |
| const DDR *ddr = reinterpret_cast<const DDR *>( |
| lh->data() + jar_entry->compressed_file_size()); |
| num_bytes += |
| jar_entry->compressed_file_size() + |
| ddr->size( |
| ziph::zfield_has_ext64(jar_entry->compressed_file_size32()), |
| ziph::zfield_has_ext64(jar_entry->uncompressed_file_size32())); |
| } else { |
| num_bytes += lh->compressed_file_size(); |
| } |
| off64_t local_header_offset = Position(); |
| |
| // When normalize_timestamps is set, entry's timestamp is to be set to |
| // 01/01/2010 00:00:00 (or to 01/01/2010 00:00:02, if an entry is a .class |
| // file). This is somewhat expensive because we have to copy the local |
| // header to memory as input jar is memory mapped as read-only. Try to copy |
| // as little as possible. |
| uint16_t normalized_time = 0; |
| const UnixTimeExtraField *lh_field_to_remove = nullptr; |
| bool fix_timestamp = false; |
| if (options_->normalize_timestamps) { |
| if (ends_with(file_name, file_name_length, ".class")) { |
| normalized_time = 1; |
| } |
| lh_field_to_remove = lh->unix_time_extra_field(); |
| fix_timestamp = jar_entry->last_mod_file_date() != kDefaultDate || |
| jar_entry->last_mod_file_time() != normalized_time || |
| lh_field_to_remove != nullptr; |
| } |
| if (fix_timestamp) { |
| uint8_t lh_buffer[512]; |
| size_t lh_size = lh->size(); |
| LH *lh_new = lh_size > sizeof(lh_buffer) |
| ? reinterpret_cast<LH *>(malloc(lh_size)) |
| : reinterpret_cast<LH *>(lh_buffer); |
| // Remove Unix timestamp field. |
| if (lh_field_to_remove != nullptr) { |
| auto from_end = ziph::byte_ptr(lh) + lh->size(); |
| size_t removed_size = lh_field_to_remove->size(); |
| size_t chunk1_size = |
| ziph::byte_ptr(lh_field_to_remove) - ziph::byte_ptr(lh); |
| size_t chunk2_size = lh->size() - (chunk1_size + removed_size); |
| memcpy(lh_new, lh, chunk1_size); |
| if (chunk2_size) { |
| memcpy(reinterpret_cast<uint8_t *>(lh_new) + chunk1_size, |
| from_end - chunk2_size, chunk2_size); |
| } |
| lh_new->extra_fields(lh_new->extra_fields(), |
| lh->extra_fields_length() - removed_size); |
| } else { |
| memcpy(lh_new, lh, lh_size); |
| } |
| lh_new->last_mod_file_date(kDefaultDate); |
| lh_new->last_mod_file_time(normalized_time); |
| // Now write these few bytes and adjust read/write positions accordingly. |
| if (!WriteBytes(lh_new, lh_new->size())) { |
| diag_err(1, "%s:%d: Cannot copy modified local header for %.*s", |
| __FILE__, __LINE__, file_name_length, file_name); |
| } |
| copy_from += lh_size; |
| num_bytes -= lh_size; |
| if (reinterpret_cast<uint8_t *>(lh_new) != lh_buffer) { |
| free(lh_new); |
| } |
| } |
| |
| // Do the actual copy. |
| if (!WriteBytes(input_jar.mapped_start() + copy_from, num_bytes)) { |
| diag_err(1, "%s:%d: Cannot write %zu bytes of %.*s from %s", __FILE__, |
| __LINE__, num_bytes, file_name_length, file_name, |
| input_jar_path.c_str()); |
| } |
| |
| AppendToDirectoryBuffer(jar_entry, local_header_offset, normalized_time, |
| fix_timestamp); |
| ++entries_; |
| } |
| return input_jar.Close(); |
| } |
| |
| off64_t OutputJar::Position() { |
| if (file_ == nullptr) { |
| diag_err(1, "%s:%d: output file is not open", __FILE__, __LINE__); |
| } |
| // You'd think this could be "return ftell(file_);", but that |
| // generates a needless call to lseek. So instead we cache our |
| // current position in the output. |
| return outpos_; |
| } |
| |
| // Writes an entry. The argument is the pointer to the contiguous block of |
| // memory containing Local Header for the entry, immediately followed by |
| // the data. The memory is freed after the data has been written. |
| void OutputJar::WriteEntry(void *buffer) { |
| if (buffer == nullptr) { |
| return; |
| } |
| LH *entry = reinterpret_cast<LH *>(buffer); |
| if (options_->verbose) { |
| fprintf(stderr, "%-.*s combiner has %zu bytes, %s to %zu\n", |
| entry->file_name_length(), entry->file_name(), |
| entry->uncompressed_file_size(), |
| entry->compression_method() == Z_NO_COMPRESSION ? "copied" |
| : "compressed", |
| entry->compressed_file_size()); |
| } |
| |
| // Set this entry's timestamp. |
| // MSDOS file timestamp format that Zip uses is described here: |
| // https://msdn.microsoft.com/en-us/library/9kkf9tah.aspx |
| // ("32-Bit Windows Time/Date Formats") |
| if (options_->normalize_timestamps) { |
| // Regular "normalized" timestamp is 01/01/2010 00:00:00, while for the |
| // .class file it is 01/01/2010 00:00:02 |
| entry->last_mod_file_date(kDefaultDate); |
| entry->last_mod_file_time( |
| ends_with(entry->file_name(), entry->file_name_length(), ".class") ? 1 |
| : 0); |
| } else { |
| struct tm tm; |
| // Time has 2-second resolution, so round up: |
| time_t t_adjusted = (time(nullptr) + 1) & ~1; |
| localtime_r(&t_adjusted, &tm); |
| uint16_t dos_date = |
| ((tm.tm_year - 80) << 9) | ((tm.tm_mon + 1) << 5) | tm.tm_mday; |
| uint16_t dos_time = |
| (tm.tm_hour << 11) | (tm.tm_min << 5) | (tm.tm_sec >> 1); |
| entry->last_mod_file_time(dos_time); |
| entry->last_mod_file_date(dos_date); |
| } |
| |
| uint8_t *data = reinterpret_cast<uint8_t *>(entry); |
| off64_t output_position = Position(); |
| if (!WriteBytes(data, entry->data() + entry->in_zip_size() - data)) { |
| diag_err(1, "%s:%d: write", __FILE__, __LINE__); |
| } |
| // Data written, allocate CDH space and populate CDH. |
| // Space needed for the CDH varies depending on whether output position field |
| // fits into 32 bits (we do not handle compressed/uncompressed entry sizes |
| // exceeding 32 bits at the moment). |
| uint16_t zip64_size = ziph::zfield_needs_ext64(output_position) |
| ? Zip64ExtraField::space_needed(1) |
| : 0; |
| CDH *cdh = reinterpret_cast<CDH *>( |
| ReserveCdh(sizeof(CDH) + entry->file_name_length() + |
| entry->extra_fields_length() + zip64_size)); |
| cdh->signature(); |
| // Note: do not set the version to Unix 3.0 spec, otherwise |
| // unzip will think that 'external_attributes' field contains access mode |
| cdh->version(20); |
| cdh->version_to_extract(20); // 2.0 |
| cdh->bit_flag(0x0); |
| cdh->compression_method(entry->compression_method()); |
| cdh->last_mod_file_time(entry->last_mod_file_time()); |
| cdh->last_mod_file_date(entry->last_mod_file_date()); |
| cdh->crc32(entry->crc32()); |
| TODO(entry->compressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); |
| cdh->compressed_file_size32(entry->compressed_file_size32()); |
| TODO(entry->uncompressed_file_size32() != 0xFFFFFFFF, "Handle Zip64"); |
| cdh->uncompressed_file_size32(entry->uncompressed_file_size32()); |
| cdh->file_name(entry->file_name(), entry->file_name_length()); |
| cdh->extra_fields(entry->extra_fields(), entry->extra_fields_length()); |
| if (zip64_size > 0) { |
| Zip64ExtraField *zip64_ef = reinterpret_cast<Zip64ExtraField *>( |
| cdh->extra_fields() + cdh->extra_fields_length()); |
| zip64_ef->signature(); |
| zip64_ef->attr_count(1); |
| zip64_ef->attr64(0, output_position); |
| cdh->local_header_offset32(0xFFFFFFFF); |
| // Field address argument points to the already existing field, |
| // so the call just updates the length. |
| cdh->extra_fields(cdh->extra_fields(), |
| cdh->extra_fields_length() + zip64_size); |
| } else { |
| cdh->local_header_offset32(output_position); |
| } |
| cdh->comment_length(0); |
| cdh->start_disk_nr(0); |
| cdh->internal_attributes(0); |
| cdh->external_attributes(0); |
| ++entries_; |
| free(reinterpret_cast<void *>(entry)); |
| } |
| |
| void OutputJar::WriteMetaInf() { |
| std::string path("META-INF/"); |
| |
| // META_INF/ is always the first entry, and as such it should have an extra |
| // field with the tag 0xCAFE and zero bytes of data. This is not the part of |
| // the jar file spec, but Unix 'file' utility relies on it to distiguish jar |
| // file from zip file. See https://bugs.openjdk.java.net/browse/JDK-6808540 |
| const uint8_t extra_fields[] = {0xFE, 0xCA, 0, 0}; |
| const uint16_t n_extra_fields = |
| sizeof(extra_fields) / sizeof(extra_fields[0]); |
| WriteDirEntry(path, extra_fields, n_extra_fields); |
| } |
| |
| // Writes a directory entry with the given name and extra fields. |
| void OutputJar::WriteDirEntry(const std::string &name, |
| const uint8_t *extra_fields, |
| const uint16_t n_extra_fields) { |
| size_t lh_size = sizeof(LH) + name.size() + n_extra_fields; |
| LH *lh = reinterpret_cast<LH *>(malloc(lh_size)); |
| lh->signature(); |
| lh->version(20); // 2.0 |
| lh->bit_flag(0); // TODO(asmundak): should I set UTF8 flag? |
| lh->compression_method(Z_NO_COMPRESSION); |
| lh->crc32(0); |
| lh->compressed_file_size32(0); |
| lh->uncompressed_file_size32(0); |
| lh->file_name(name.c_str(), name.size()); |
| lh->extra_fields(extra_fields, n_extra_fields); |
| known_members_.emplace(name, EntryInfo{&null_combiner_}); |
| WriteEntry(lh); |
| } |
| |
| // Create output Central Directory entry for the input jar entry. |
| void OutputJar::AppendToDirectoryBuffer(const CDH *cdh, off64_t lh_pos, |
| uint16_t normalized_time, |
| bool fix_timestamp) { |
| // While copying from the input CDH pointed to by 'cdh', we may need to drop |
| // Unix timestamp extra field, and we might need to change the number of |
| // attributes of the Zip64 extra field, or create it, or destroy it if entry's |
| // position relative to 4G boundary changes. |
| // The rest of the input CDH is copied. |
| |
| // 1. Decide if we need to drop UnixTime. |
| size_t removed_unix_time_field_size = 0; |
| if (fix_timestamp) { |
| auto unix_time_field = cdh->unix_time_extra_field(); |
| if (unix_time_field != nullptr) { |
| removed_unix_time_field_size = unix_time_field->size(); |
| } |
| } |
| |
| // 2. Figure out how many attributes input entry has and how many |
| // the output entry is going to have. |
| const Zip64ExtraField *zip64_ef = cdh->zip64_extra_field(); |
| const int zip64_attr_count = zip64_ef == nullptr ? 0 : zip64_ef->attr_count(); |
| const bool lh_pos_needs64 = ziph::zfield_needs_ext64(lh_pos); |
| int out_zip64_attr_count; |
| if (zip64_attr_count > 0) { |
| out_zip64_attr_count = zip64_attr_count; |
| // The number of attributes may remain the same, or it may increase or |
| // decrease by 1, depending on local_header_offset value. |
| if (ziph::zfield_has_ext64(cdh->local_header_offset32()) != |
| lh_pos_needs64) { |
| if (lh_pos_needs64) { |
| out_zip64_attr_count += 1; |
| } else { |
| out_zip64_attr_count -= 1; |
| } |
| } |
| } else { |
| out_zip64_attr_count = lh_pos_needs64 ? 1 : 0; |
| } |
| const uint16_t zip64_size = Zip64ExtraField::space_needed(zip64_attr_count); |
| const uint16_t out_zip64_size = |
| Zip64ExtraField::space_needed(out_zip64_attr_count); |
| |
| // Allocate output CDH and copy everything but extra fields. |
| const uint16_t ef_size = cdh->extra_fields_length(); |
| const uint16_t out_ef_size = |
| (ef_size + out_zip64_size) - (removed_unix_time_field_size + zip64_size); |
| |
| const size_t out_cdh_size = cdh->size() + out_ef_size - ef_size; |
| CDH *out_cdh = reinterpret_cast<CDH *>(ReserveCdr(out_cdh_size)); |
| |
| // Calculate ExtraFields boundaries in the input and output entries. |
| auto ef_begin = reinterpret_cast<const ExtraField *>(cdh->extra_fields()); |
| auto ef_end = |
| reinterpret_cast<const ExtraField *>(ziph::byte_ptr(ef_begin) + ef_size); |
| // Copy [cdh..ef_begin) -> [out_cdh..out_ef_begin) |
| memcpy(out_cdh, cdh, ziph::byte_ptr(ef_begin) - ziph::byte_ptr(cdh)); |
| |
| auto out_ef_begin = reinterpret_cast<ExtraField *>( |
| const_cast<uint8_t *>(out_cdh->extra_fields())); |
| auto out_ef_end = reinterpret_cast<ExtraField *>( |
| reinterpret_cast<uint8_t *>(out_ef_begin) + out_ef_size); |
| |
| // Copy [ef_end..cdh_end) -> [out_ef_end..out_cdh_end) |
| memcpy(out_ef_end, ef_end, |
| ziph::byte_ptr(cdh) + cdh->size() - ziph::byte_ptr(ef_end)); |
| |
| // Copy extra fields, dropping Zip64 and possibly UnixTime fields. |
| ExtraField *out_ef = out_ef_begin; |
| for (const ExtraField *ef = ef_begin; ef < ef_end; ef = ef->next()) { |
| if ((fix_timestamp && ef->is_unix_time()) || ef->is_zip64()) { |
| // Skip this one. |
| } else { |
| memcpy(out_ef, ef, ef->size()); |
| out_ef = reinterpret_cast<ExtraField *>( |
| reinterpret_cast<uint8_t *>(out_ef) + ef->size()); |
| } |
| } |
| |
| // Set up Zip64 extra field if necessary. |
| if (out_zip64_size > 0) { |
| Zip64ExtraField *out_zip64_ef = reinterpret_cast<Zip64ExtraField *>(out_ef); |
| out_zip64_ef->signature(); |
| out_zip64_ef->attr_count(out_zip64_attr_count); |
| int copy_count = out_zip64_attr_count < zip64_attr_count |
| ? out_zip64_attr_count |
| : zip64_attr_count; |
| if (copy_count > 0) { |
| out_zip64_ef->attr64(0, zip64_ef->attr64(0)); |
| if (copy_count > 1) { |
| out_zip64_ef->attr64(1, zip64_ef->attr64(1)); |
| } |
| } |
| // Set 64-bit local_header_offset if necessary. It's always the last |
| // attribute. |
| if (lh_pos_needs64) { |
| out_zip64_ef->attr64(out_zip64_attr_count - 1, lh_pos); |
| } |
| } |
| out_cdh->extra_fields(ziph::byte_ptr(out_ef_begin), out_ef_size); |
| out_cdh->local_header_offset32(lh_pos_needs64 ? 0xFFFFFFFF : lh_pos); |
| if (fix_timestamp) { |
| out_cdh->last_mod_file_time(normalized_time); |
| out_cdh->last_mod_file_date(kDefaultDate); |
| } |
| } |
| |
| uint8_t *OutputJar::ReserveCdr(size_t chunk_size) { |
| if (cen_size_ + chunk_size > cen_capacity_) { |
| cen_capacity_ += 1000000; |
| cen_ = reinterpret_cast<uint8_t *>(realloc(cen_, cen_capacity_)); |
| if (!cen_) { |
| diag_errx(1, "%s:%d: Cannot allocate %zu bytes for the directory", |
| __FILE__, __LINE__, cen_capacity_); |
| } |
| } |
| uint8_t *entry = cen_ + cen_size_; |
| cen_size_ += chunk_size; |
| return entry; |
| } |
| |
| uint8_t *OutputJar::ReserveCdh(size_t size) { |
| return static_cast<uint8_t *>(memset(ReserveCdr(size), 0, size)); |
| } |
| |
| // Write out combined jar. |
| bool OutputJar::Close() { |
| if (file_ == nullptr) { |
| return true; |
| } |
| |
| for (auto &service_handler : service_handlers_) { |
| WriteEntry(service_handler->OutputEntry(options_->force_compression)); |
| } |
| for (auto &extra_combiner : extra_combiners_) { |
| WriteEntry(extra_combiner->OutputEntry(options_->force_compression)); |
| } |
| WriteEntry(spring_handlers_.OutputEntry(options_->force_compression)); |
| WriteEntry(spring_schemas_.OutputEntry(options_->force_compression)); |
| WriteEntry(protobuf_meta_handler_.OutputEntry(options_->force_compression)); |
| // TODO(asmundak): handle manifest; |
| off64_t output_position = Position(); |
| bool write_zip64_ecd = output_position >= 0xFFFFFFFF || entries_ >= 0xFFFF || |
| cen_size_ >= 0xFFFFFFFF; |
| |
| size_t cen_size = cen_size_; // Save it before ReserveCdh updates it. |
| if (write_zip64_ecd) { |
| { |
| ECD64 *ecd64 = reinterpret_cast<ECD64 *>(ReserveCdh(sizeof(ECD64))); |
| ecd64->signature(); |
| ecd64->remaining_size(sizeof(ECD64) - 12); |
| ecd64->version(0x031E); // Unix, version 3.0 |
| ecd64->version_to_extract(45); // 4.5 (Zip64 support) |
| ecd64->this_disk_entries(entries_); |
| ecd64->total_entries(entries_); |
| ecd64->cen_size(cen_size); |
| ecd64->cen_offset(output_position); |
| } |
| { |
| ECD64Locator *ecd64_locator = |
| reinterpret_cast<ECD64Locator *>(ReserveCdh(sizeof(ECD64Locator))); |
| ecd64_locator->signature(); |
| ecd64_locator->ecd64_offset(output_position + cen_size); |
| ecd64_locator->total_disks(1); |
| } |
| { |
| ECD *ecd = reinterpret_cast<ECD *>(ReserveCdh(sizeof(ECD))); |
| ecd->signature(); |
| ecd->this_disk_entries16(0xFFFF); |
| ecd->total_entries16(0xFFFF); |
| // Java Compiler (javac) uses its own "optimized" Zip handler (see |
| // https://bugs.openjdk.java.net/browse/JDK-7018859) which may fail |
| // to handle 0xFFFFFFFF in the CEN size and CEN offset fields. Try |
| // to use 32-bit values here, too. Hopefully by the time we need to |
| // handle really large archives, this is fixes upstream. Note that this |
| // affects javac and javah only, 'jar' experiences no problems. |
| ecd->cen_size32(std::min(cen_size, static_cast<size_t>(0xFFFFFFFFUL))); |
| ecd->cen_offset32( |
| std::min(output_position, static_cast<off64_t>(0x0FFFFFFFFL))); |
| } |
| } else { |
| ECD *ecd = reinterpret_cast<ECD *>(ReserveCdh(sizeof(ECD))); |
| ecd->signature(); |
| ecd->this_disk_entries16((uint16_t)entries_); |
| ecd->total_entries16((uint16_t)entries_); |
| ecd->cen_size32(cen_size); |
| ecd->cen_offset32(output_position); |
| } |
| |
| // Save Central Directory and wrap up. |
| if (!WriteBytes(cen_, cen_size_)) { |
| diag_err(1, "%s:%d: Cannot write central directory", __FILE__, __LINE__); |
| } |
| free(cen_); |
| |
| if (fclose(file_)) { |
| diag_err(1, "%s:%d: %s", __FILE__, __LINE__, path()); |
| } |
| file_ = nullptr; |
| // Free the buffer only after fclose(); stdio may flush data from the |
| // buffer on close. |
| buffer_.reset(); |
| |
| if (options_->verbose) { |
| fprintf(stderr, "Wrote %s with %d entries", path(), entries_); |
| if (duplicate_entries_) { |
| fprintf(stderr, ", skipped %d entries", duplicate_entries_); |
| } |
| fprintf(stderr, "\n"); |
| } |
| return true; |
| } |
| |
| bool IsDir(const std::string &path) { |
| struct stat st; |
| if (stat(path.c_str(), &st)) { |
| diag_warn("%s:%d: stat %s:", __FILE__, __LINE__, path.c_str()); |
| return false; |
| } |
| return (st.st_mode & S_IFDIR) == S_IFDIR; |
| } |
| |
| void OutputJar::ClasspathResource(const std::string &resource_name, |
| const std::string &resource_path) { |
| if (known_members_.count(resource_name)) { |
| if (options_->warn_duplicate_resources) { |
| diag_warnx( |
| "%s:%d: Duplicate resource name %s in the --classpath_resource or " |
| "--resource option", |
| __FILE__, __LINE__, resource_name.c_str()); |
| // TODO(asmundak): this mimics old behaviour. Confirm that unless |
| // we run with --warn_duplicate_resources, the output zip file contains |
| // the concatenated contents of the all the resources with the same name. |
| return; |
| } |
| } |
| MappedFile mapped_file; |
| if (mapped_file.Open(resource_path)) { |
| Concatenator *classpath_resource = new Concatenator(resource_name); |
| classpath_resource->Append( |
| reinterpret_cast<const char *>(mapped_file.start()), |
| mapped_file.size()); |
| classpath_resources_.emplace_back(classpath_resource); |
| known_members_.emplace(resource_name, EntryInfo{classpath_resource}); |
| } else if (IsDir(resource_path)) { |
| // add an empty entry for the directory so its path ends up in the |
| // manifest |
| classpath_resources_.emplace_back(new Concatenator(resource_name + "/")); |
| known_members_.emplace(resource_name, EntryInfo{&null_combiner_}); |
| } else { |
| diag_err(1, "%s:%d: %s", __FILE__, __LINE__, resource_path.c_str()); |
| } |
| } |
| |
| ssize_t OutputJar::AppendFile(int in_fd, off64_t offset, size_t count) { |
| if (count == 0) { |
| return 0; |
| } |
| std::unique_ptr<void, decltype(free) *> buffer(malloc(kBufferSize), free); |
| if (buffer == nullptr) { |
| diag_err(1, "%s:%d: malloc", __FILE__, __LINE__); |
| } |
| ssize_t total_written = 0; |
| |
| #ifdef _WIN32 |
| HANDLE hFile = reinterpret_cast<HANDLE>(_get_osfhandle(in_fd)); |
| while (static_cast<size_t>(total_written) < count) { |
| ssize_t len = std::min(kBufferSize, count - total_written); |
| DWORD n_read; |
| if (!::ReadFile(hFile, buffer.get(), len, &n_read, NULL)) { |
| return -1; |
| } |
| if (n_read == 0) { |
| break; |
| } |
| if (!WriteBytes(buffer.get(), n_read)) { |
| return -1; |
| } |
| total_written += n_read; |
| } |
| #else |
| while (static_cast<size_t>(total_written) < count) { |
| size_t len = std::min(kBufferSize, count - total_written); |
| ssize_t n_read = pread(in_fd, buffer.get(), len, offset + total_written); |
| if (n_read > 0) { |
| if (!WriteBytes(buffer.get(), n_read)) { |
| return -1; |
| } |
| total_written += n_read; |
| } else if (n_read == 0) { |
| break; |
| } else { |
| return -1; |
| } |
| } |
| #endif // _WIN32 |
| |
| return total_written; |
| } |
| |
| void OutputJar::ExtraCombiner(const std::string &entry_name, |
| Combiner *combiner) { |
| extra_combiners_.emplace_back(combiner); |
| known_members_.emplace(entry_name, EntryInfo{combiner}); |
| } |
| |
| bool OutputJar::WriteBytes(const void *buffer, size_t count) { |
| size_t written = fwrite(buffer, 1, count, file_); |
| outpos_ += written; |
| return written == count; |
| } |
| |
| void OutputJar::ExtraHandler(const std::string &input_jar_path, const CDH *, |
| const std::string *) {} |