src/tools/singlejar/output_jar.cc - bazel - Git at Google

 // Copyright 2016 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 /*
  * The implementation of the OutputJar methods.
  */
 #include "src/tools/singlejar/output_jar.h"

 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <time.h>

 #ifndef _WIN32
 #include <unistd.h>
 #else

 #ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #endif  // WIN32_LEAN_AND_MEAN
 #include <windows.h>

 #endif  // _WIN32

 #include "src/main/cpp/util/path_platform.h"
 #include "src/tools/singlejar/combiners.h"
 #include "src/tools/singlejar/diag.h"
 #include "src/tools/singlejar/input_jar.h"
 #include "src/tools/singlejar/mapped_file.h"
 #include "src/tools/singlejar/options.h"
 #include "src/tools/singlejar/zip_headers.h"

 #include <zlib.h>

 #define TODO(cond, msg)                                              \
   if (!(cond)) {                                                     \
     diag_errx(2, "%s:%d: TODO(asmundak): " msg, __FILE__, __LINE__); \
   }

 OutputJar::OutputJar()
     : options_(nullptr),
       file_(nullptr),
       outpos_(0),
       buffer_(nullptr),
       entries_(0),
       duplicate_entries_(0),
       cen_(nullptr),
       cen_size_(0),
       cen_capacity_(0),
       spring_handlers_("META-INF/spring.handlers"),
       spring_schemas_("META-INF/spring.schemas"),
       protobuf_meta_handler_("protobuf.meta", false),
       manifest_("META-INF/MANIFEST.MF"),
       build_properties_("build-data.properties") {
   known_members_.emplace(spring_handlers_.filename(),
                          EntryInfo{&spring_handlers_});
   known_members_.emplace(spring_schemas_.filename(),
                          EntryInfo{&spring_schemas_});
   known_members_.emplace(manifest_.filename(), EntryInfo{&manifest_});
   known_members_.emplace(protobuf_meta_handler_.filename(),
                          EntryInfo{&protobuf_meta_handler_});
   manifest_.Append(
       "Manifest-Version: 1.0\r\n"
       "Created-By: singlejar\r\n");
 }

 static std::string Basename(const std::string &path) {
   size_t pos = path.rfind('/');
   if (pos == std::string::npos) {
     return path;
   } else {
     return std::string(path, pos + 1);
   }
 }

 int OutputJar::Doit(Options *options) {
   if (nullptr != options_) {
     diag_errx(1, "%s:%d: Doit() can be called only once.", __FILE__, __LINE__);
   }
   options_ = options;

   // Register the handler for the build-data.properties file unless
   // --exclude_build_data is present. Otherwise we do not generate this file,
   // and it will be copied from the first source archive containing it.
   if (!options_->exclude_build_data) {
     known_members_.emplace(build_properties_.filename(),
                            EntryInfo{&build_properties_});
   }

   // TODO(b/28294322): do we need to resolve the path to be absolute or
   // canonical?
   build_properties_.AddProperty("build.target", options_->output_jar.c_str());
   if (options_->verbose) {
     fprintf(stderr, "combined_file_name=%s\n", options_->output_jar.c_str());
     if (!options_->main_class.empty()) {
       fprintf(stderr, "main_class=%s\n", options_->main_class.c_str());
     }
     if (!options_->java_launcher.empty()) {
       fprintf(stderr, "java_launcher_file=%s\n",
               options_->java_launcher.c_str());
     }
     fprintf(stderr, "%zu source files\n", options_->input_jars.size());
     fprintf(stderr, "%zu manifest lines\n", options_->manifest_lines.size());
   }

   if (!Open()) {
     exit(1);
   }

   // Copy launcher if it is set.
   if (!options_->java_launcher.empty()) {
     const char *const launcher_path = options_->java_launcher.c_str();
     int in_fd = open(launcher_path, O_RDONLY);
     struct stat statbuf;
     if (file_ == nullptr || fstat(in_fd, &statbuf)) {
       diag_err(1, "%s", launcher_path);
     }
     // TODO(asmundak):  Consider going back to sendfile() or reflink
     // (BTRFS_IOC_CLONE/XFS_IOC_CLONE) here.  The launcher preamble can
     // be very large for targets with many native deps.
     ssize_t byte_count = AppendFile(in_fd, 0, statbuf.st_size);
     if (byte_count < 0) {
       diag_err(1, "%s:%d: Cannot copy %s to %s", __FILE__, __LINE__,
                launcher_path, options_->output_jar.c_str());
     } else if (byte_count != statbuf.st_size) {
       diag_err(1, "%s:%d: Copied only %zu bytes out of %" PRIu64 " from %s",
                __FILE__, __LINE__, byte_count, statbuf.st_size, launcher_path);
     }
     close(in_fd);
     if (options_->verbose) {
       fprintf(stderr, "Prepended %s (%" PRIu64 " bytes)\n", launcher_path,
               statbuf.st_size);
     }
   }

   if (!options_->main_class.empty()) {
     build_properties_.AddProperty("main.class", options_->main_class);
     manifest_.Append("Main-Class: ");
     manifest_.Append(options_->main_class);
     manifest_.Append("\r\n");
   }

   for (auto &manifest_line : options_->manifest_lines) {
     if (!manifest_line.empty()) {
       manifest_.Append(manifest_line);
       if (manifest_line[manifest_line.size() - 1] != '\n') {
         manifest_.Append("\r\n");
       }
     }
   }

   for (auto &build_info_line : options_->build_info_lines) {
     build_properties_.Append(build_info_line);
     build_properties_.Append("\n");
   }

   for (auto &build_info_file : options_->build_info_files) {
     MappedFile mapped_file;
     if (!mapped_file.Open(build_info_file)) {
       diag_err(1, "%s:%d: Bad build info file %s", __FILE__, __LINE__,
                build_info_file.c_str());
     }
     const char *data = reinterpret_cast<const char *>(mapped_file.start());
     const char *data_end = reinterpret_cast<const char *>(mapped_file.end());
     // TODO(asmundak): this isn't right, we should parse properties file.
     while (data < data_end) {
       const char *next_data = strchr(static_cast<const char *>(data), '\n');
       if (next_data) {
         ++next_data;
       } else {
         next_data = data_end;
       }
       build_properties_.Append(data, next_data - data);
       data = next_data;
     }
     mapped_file.Close();
   }

   for (auto &rpath : options_->classpath_resources) {
     ClasspathResource(Basename(rpath), rpath);
   }

   for (auto &rdesc : options_->resources) {
     // A resource description is either NAME or PATH:NAME
     // Find the last ':' instead of the first because Windows uses ':' as volume
     // separator in absolute path.
     std::size_t colon = rdesc.find_last_of(':');
     if (0 == colon) {
       diag_errx(1, "%s:%d: Bad resource description %s", __FILE__, __LINE__,
                 rdesc.c_str());
     }
     bool shouldSplit = colon != std::string::npos;
 #ifdef _WIN32
     // If colon points to volume separator, don't split.
     if (colon == 1 && blaze_util::IsAbsolute(rdesc)) {
       shouldSplit = false;
     }
 #endif
     if (shouldSplit) {
       ClasspathResource(rdesc.substr(colon + 1), rdesc.substr(0, colon));
     } else {
       ClasspathResource(rdesc, rdesc);
     }
   }

   // Ready to write zip entries. Decide whether created entries should be
   // compressed.
   bool compress = options_->force_compression || options_->preserve_compression;
   // First, write a directory entry for the META-INF, followed by the manifest
   // file, followed by the build properties file.
   WriteMetaInf();
   manifest_.Append("\r\n");
   WriteEntry(manifest_.OutputEntry(compress));
   if (!options_->exclude_build_data) {
     WriteEntry(build_properties_.OutputEntry(compress));
   }

   // Then classpath resources.
   for (auto &classpath_resource : classpath_resources_) {
     bool do_compress = compress;
     if (do_compress && !options_->nocompress_suffixes.empty()) {
       for (auto &suffix : options_->nocompress_suffixes) {
         auto entry_name = classpath_resource->filename();
         if (entry_name.length() >= suffix.size() &&
             !entry_name.compare(entry_name.length() - suffix.size(),
                                 suffix.size(), suffix)) {
           do_compress = false;
           break;
         }
       }
     }

     // Add parent directory entries.
     size_t pos = classpath_resource->filename().find('/');
     while (pos != std::string::npos) {
       std::string dir(classpath_resource->filename(), 0, pos + 1);
       if (NewEntry(dir)) {
         WriteDirEntry(dir, nullptr, 0);
       }
       pos = classpath_resource->filename().find('/', pos + 1);
     }

     WriteEntry(classpath_resource->OutputEntry(do_compress));
   }

   // Then copy source files' contents.
   for (size_t ix = 0; ix < options_->input_jars.size(); ++ix) {
     if (!AddJar(ix)) {
       exit(1);
     }
   }

   // All entries written, write Central Directory and close.
   Close();
   return 0;
 }

 OutputJar::~OutputJar() {
   if (file_) {
     diag_warnx("%s:%d: Close() should be called first", __FILE__, __LINE__);
   }
 }

 // Try to perform I/O in units of this size.
 // (128KB is the default max request size for fuse filesystems.)
 static constexpr size_t kBufferSize = 128 << 10;

 bool OutputJar::Open() {
   if (file_) {
     diag_errx(1, "%s:%d: Cannot open output archive twice", __FILE__, __LINE__);
   }

   int mode = O_CREAT | O_WRONLY | O_TRUNC;

 #ifdef _WIN32
   std::wstring wpath;
   std::string error;
   if (!blaze_util::AsAbsoluteWindowsPath(path(), &wpath, &error)) {
     diag_warn("%s:%d: AsAbsoluteWindowsPath failed: %s", __FILE__, __LINE__,
               error.c_str());
     return false;
   }

   HANDLE hFile = CreateFileW(wpath.c_str(), GENERIC_READ | GENERIC_WRITE, 0,
                              NULL, CREATE_ALWAYS, 0, NULL);
   if (hFile == INVALID_HANDLE_VALUE) {
     diag_warn("%s:%d: CreateFileW failed for %S", __FILE__, __LINE__,
               wpath.c_str());
     return false;
   }

   // Make sure output file is in binary mode, or \r\n will be converted to \n.
   mode |= _O_BINARY;
   int fd = _open_osfhandle(reinterpret_cast<intptr_t>(hFile), mode);
 #else
   // Set execute bits since we may produce an executable output file.
   int fd = open(path(), mode, 0777);
 #endif

   if (fd < 0) {
     diag_warn("%s:%d: %s", __FILE__, __LINE__, path());
     return false;
   }
   file_ = fdopen(fd, "w");
   if (file_ == nullptr) {
     diag_warn("%s:%d: fdopen of %s", __FILE__, __LINE__, path());
     close(fd);
     return false;
   }
   outpos_ = 0;
   buffer_.reset(new char[kBufferSize]);
   setvbuf(file_, buffer_.get(), _IOFBF, kBufferSize);
   if (options_->verbose) {
     fprintf(stderr, "Writing to %s\n", path());
   }
   return true;
 }

 // January 1, 2010 as a DOS date
 static const uint16_t kDefaultDate = 30 << 9 | 1 << 5 | 1;

 bool OutputJar::AddJar(int jar_path_index) {
   const std::string &input_jar_path =
       options_->input_jars[jar_path_index].first;
   const std::string &input_jar_aux_label =
       options_->input_jars[jar_path_index].second;

   InputJar input_jar;
   if (!input_jar.Open(input_jar_path)) {
     return false;
   }
   const CDH *jar_entry;
   const LH *lh;
   while ((jar_entry = input_jar.NextEntry(&lh))) {
     const char *file_name = jar_entry->file_name();
     auto file_name_length = jar_entry->file_name_length();
     if (!file_name_length) {
       diag_errx(
           1, "%s:%d: Bad central directory record in %s at offset 0x%" PRIx64,
           __FILE__, __LINE__, input_jar_path.c_str(),
           input_jar.CentralDirectoryRecordOffset(jar_entry));
     }
     // Special files that cannot be handled by looking up known_members_ map:
     // * ignore *.SF, *.RSA, *.DSA
     //   (TODO(asmundak): should this be done only in META-INF?
     //
     if (ends_with(file_name, file_name_length, ".SF") ||
         ends_with(file_name, file_name_length, ".RSA") ||
         ends_with(file_name, file_name_length, ".DSA")) {
       continue;
     }

     bool include_entry = true;
     if (!options_->include_prefixes.empty()) {
       for (auto &prefix : options_->include_prefixes) {
         if ((include_entry =
                  (prefix.size() <= file_name_length &&
                   0 == strncmp(file_name, prefix.c_str(), prefix.size())))) {
           break;
         }
       }
     }
     if (!include_entry) {
       continue;
     }

     bool is_file = (file_name[file_name_length - 1] != '/');
     if (is_file &&
         begins_with(file_name, file_name_length, "META-INF/services/")) {
       // The contents of the META-INF/services/<SERVICE> on the output is the
       // concatenation of the META-INF/services/<SERVICE> files from all inputs.
       std::string service_path(file_name, file_name_length);
       if (NewEntry(service_path)) {
         // Create a concatenator and add it to the known_members_ map.
         // The call to Merge() below will then take care of the rest.
         Concatenator *service_handler = new Concatenator(service_path);
         service_handlers_.emplace_back(service_handler);
         known_members_.emplace(service_path, EntryInfo{service_handler});
       }
     } else {
       ExtraHandler(input_jar_path, jar_entry, &input_jar_aux_label);
     }

     if (options_->check_desugar_deps &&
         begins_with(file_name, file_name_length, "j$/")) {
       diag_errx(1, "%s:%d: desugar_jdk_libs file %.*s unexpectedly found in %s",
                 __FILE__, __LINE__, file_name_length, file_name,
                 input_jar_path.c_str());
     }

     // Install a new entry unless it is already present. All the plain (non-dir)
     // entries that require a combiner have been already installed, so the call
     // will add either a directory entry whose handler will ignore subsequent
     // duplicates, or an ordinary plain entry, for which we save the index of
     // the first input jar (in order to provide diagnostics on duplicate).
     auto got =
         known_members_.emplace(std::string(file_name, file_name_length),
                                EntryInfo{is_file ? nullptr : &null_combiner_,
                                          is_file ? jar_path_index : -1});
     if (!got.second) {
       auto &entry_info = got.first->second;
       // Handle special entries (the ones that have a combiner).
       if (entry_info.combiner_ != nullptr) {
         // TODO(kmb,asmundak): Should be checking Merge() return value but fails
         // for build-data.properties when merging deploy jars into deploy jars.
         entry_info.combiner_->Merge(jar_entry, lh);
         continue;
       }

       // Plain file entry. If duplicates are not allowed, bail out. Otherwise
       // just ignore this entry.
       if (options_->no_duplicates ||
           (options_->no_duplicate_classes &&
            ends_with(file_name, file_name_length, ".class"))) {
         diag_errx(
             1, "%s:%d: %.*s is present both in %s and %s", __FILE__, __LINE__,
             file_name_length, file_name,
             options_->input_jars[entry_info.input_jar_index_].first.c_str(),
             input_jar_path.c_str());
       } else {
         duplicate_entries_++;
         continue;
       }
     }

     // For the file entries, decide whether output should be compressed.
     if (is_file) {
       bool input_compressed =
           jar_entry->compression_method() != Z_NO_COMPRESSION;
       bool output_compressed =
           options_->force_compression ||
           (options_->preserve_compression && input_compressed);
       if (output_compressed && !options_->nocompress_suffixes.empty()) {
         for (auto &suffix : options_->nocompress_suffixes) {
           if (file_name_length >= suffix.size() &&
               !strncmp(file_name + file_name_length - suffix.size(),
                        suffix.c_str(), suffix.size())) {
             output_compressed = false;
             break;
           }
         }
       }
       if (input_compressed != output_compressed) {
         Concatenator combiner(jar_entry->file_name_string());
         if (!combiner.Merge(jar_entry, lh)) {
           diag_err(1, "%s:%d: cannot add %.*s", __FILE__, __LINE__,
                    jar_entry->file_name_length(), jar_entry->file_name());
         }
         WriteEntry(combiner.OutputEntry(output_compressed));
         continue;
       }
     }

     // Now we have to copy:
     //  local header
     //  file data
     //  data descriptor, if present.
     off64_t copy_from = jar_entry->local_header_offset();
     size_t num_bytes = lh->size();
     if (jar_entry->no_size_in_local_header()) {
       const DDR *ddr = reinterpret_cast<const DDR *>(
           lh->data() + jar_entry->compressed_file_size());
       num_bytes +=
           jar_entry->compressed_file_size() +
           ddr->size(
               ziph::zfield_has_ext64(jar_entry->compressed_file_size32()),
               ziph::zfield_has_ext64(jar_entry->uncompressed_file_size32()));
     } else {
       num_bytes += lh->compressed_file_size();
     }
     off64_t local_header_offset = Position();

     // When normalize_timestamps is set, entry's timestamp is to be set to
     // 01/01/2010 00:00:00 (or to 01/01/2010 00:00:02, if an entry is a .class
     // file). This is somewhat expensive because we have to copy the local
     // header to memory as input jar is memory mapped as read-only. Try to copy
     // as little as possible.
     uint16_t normalized_time = 0;
     const UnixTimeExtraField *lh_field_to_remove = nullptr;
     bool fix_timestamp = false;
     if (options_->normalize_timestamps) {
       if (ends_with(file_name, file_name_length, ".class")) {
         normalized_time = 1;
       }
       lh_field_to_remove = lh->unix_time_extra_field();
       fix_timestamp = jar_entry->last_mod_file_date() != kDefaultDate ||
                       jar_entry->last_mod_file_time() != normalized_time ||
                       lh_field_to_remove != nullptr;
     }
     if (fix_timestamp) {
       uint8_t lh_buffer[512];
       size_t lh_size = lh->size();
       LH *lh_new = lh_size > sizeof(lh_buffer)
                        ? reinterpret_cast<LH *>(malloc(lh_size))
                        : reinterpret_cast<LH *>(lh_buffer);
       // Remove Unix timestamp field.
       if (lh_field_to_remove != nullptr) {
         auto from_end = ziph::byte_ptr(lh) + lh->size();
         size_t removed_size = lh_field_to_remove->size();
         size_t chunk1_size =
             ziph::byte_ptr(lh_field_to_remove) - ziph::byte_ptr(lh);
         size_t chunk2_size = lh->size() - (chunk1_size + removed_size);
         memcpy(lh_new, lh, chunk1_size);
         if (chunk2_size) {
           memcpy(reinterpret_cast<uint8_t *>(lh_new) + chunk1_size,
                  from_end - chunk2_size, chunk2_size);
         }
         lh_new->extra_fields(lh_new->extra_fields(),
                              lh->extra_fields_length() - removed_size);
       } else {
         memcpy(lh_new, lh, lh_size);
       }
       lh_new->last_mod_file_date(kDefaultDate);
       lh_new->last_mod_file_time(normalized_time);
       // Now write these few bytes and adjust read/write positions accordingly.
       if (!WriteBytes(lh_new, lh_new->size())) {
         diag_err(1, "%s:%d: Cannot copy modified local header for %.*s",
                  __FILE__, __LINE__, file_name_length, file_name);
       }
       copy_from += lh_size;
       num_bytes -= lh_size;
       if (reinterpret_cast<uint8_t *>(lh_new) != lh_buffer) {
         free(lh_new);
       }
     }

     // Do the actual copy.
     if (!WriteBytes(input_jar.mapped_start() + copy_from, num_bytes)) {
       diag_err(1, "%s:%d: Cannot write %zu bytes of %.*s from %s", __FILE__,
                __LINE__, num_bytes, file_name_length, file_name,
                input_jar_path.c_str());
     }

     AppendToDirectoryBuffer(jar_entry, local_header_offset, normalized_time,
                             fix_timestamp);
     ++entries_;
   }
   return input_jar.Close();
 }

 off64_t OutputJar::Position() {
   if (file_ == nullptr) {
     diag_err(1, "%s:%d: output file is not open", __FILE__, __LINE__);
   }
   // You'd think this could be "return ftell(file_);", but that
   // generates a needless call to lseek.  So instead we cache our
   // current position in the output.
   return outpos_;
 }

 // Writes an entry. The argument is the pointer to the contiguous block of
 // memory containing Local Header for the entry, immediately followed by
 // the data. The memory is freed after the data has been written.
 void OutputJar::WriteEntry(void *buffer) {
   if (buffer == nullptr) {
     return;
   }
   LH *entry = reinterpret_cast<LH *>(buffer);
   if (options_->verbose) {
     fprintf(stderr, "%-.*s combiner has %zu bytes, %s to %zu\n",
             entry->file_name_length(), entry->file_name(),
             entry->uncompressed_file_size(),
             entry->compression_method() == Z_NO_COMPRESSION ? "copied"
                                                             : "compressed",
             entry->compressed_file_size());
   }

   // Set this entry's timestamp.
   // MSDOS file timestamp format that Zip uses is described here:
   // https://msdn.microsoft.com/en-us/library/9kkf9tah.aspx
   // ("32-Bit Windows Time/Date Formats")
   if (options_->normalize_timestamps) {
     // Regular "normalized" timestamp is 01/01/2010 00:00:00, while for the
     // .class file it is 01/01/2010 00:00:02
     entry->last_mod_file_date(kDefaultDate);
     entry->last_mod_file_time(
         ends_with(entry->file_name(), entry->file_name_length(), ".class") ? 1
                                                                            : 0);
   } else {
     struct tm tm;
     // Time has 2-second resolution, so round up:
     time_t t_adjusted = (time(nullptr) + 1) & ~1;
     localtime_r(&t_adjusted, &tm);
     uint16_t dos_date =
         ((tm.tm_year - 80) << 9) | ((tm.tm_mon + 1) << 5) | tm.tm_mday;
     uint16_t dos_time =
         (tm.tm_hour << 11) | (tm.tm_min << 5) | (tm.tm_sec >> 1);
     entry->last_mod_file_time(dos_time);
     entry->last_mod_file_date(dos_date);
   }

   uint8_t *data = reinterpret_cast<uint8_t *>(entry);
   off64_t output_position = Position();
   if (!WriteBytes(data, entry->data() + entry->in_zip_size() - data)) {
     diag_err(1, "%s:%d: write", __FILE__, __LINE__);
   }
   // Data written, allocate CDH space and populate CDH.
   // Space needed for the CDH varies depending on whether output position field
   // fits into 32 bits (we do not handle compressed/uncompressed entry sizes
   // exceeding 32 bits at the moment).
   uint16_t zip64_size = ziph::zfield_needs_ext64(output_position)
                             ? Zip64ExtraField::space_needed(1)
                             : 0;
   CDH *cdh = reinterpret_cast<CDH *>(
       ReserveCdh(sizeof(CDH) + entry->file_name_length() +
                  entry->extra_fields_length() + zip64_size));
   cdh->signature();
   // Note: do not set the version to Unix 3.0 spec, otherwise
   // unzip will think that 'external_attributes' field contains access mode
   cdh->version(20);
   cdh->version_to_extract(20);  // 2.0
   cdh->bit_flag(0x0);
   cdh->compression_method(entry->compression_method());
   cdh->last_mod_file_time(entry->last_mod_file_time());
   cdh->last_mod_file_date(entry->last_mod_file_date());
   cdh->crc32(entry->crc32());
   TODO(entry->compressed_file_size32() != 0xFFFFFFFF, "Handle Zip64");
   cdh->compressed_file_size32(entry->compressed_file_size32());
   TODO(entry->uncompressed_file_size32() != 0xFFFFFFFF, "Handle Zip64");
   cdh->uncompressed_file_size32(entry->uncompressed_file_size32());
   cdh->file_name(entry->file_name(), entry->file_name_length());
   cdh->extra_fields(entry->extra_fields(), entry->extra_fields_length());
   if (zip64_size > 0) {
     Zip64ExtraField *zip64_ef = reinterpret_cast<Zip64ExtraField *>(
         cdh->extra_fields() + cdh->extra_fields_length());
     zip64_ef->signature();
     zip64_ef->attr_count(1);
     zip64_ef->attr64(0, output_position);
     cdh->local_header_offset32(0xFFFFFFFF);
     // Field address argument points to the already existing field,
     // so the call just updates the length.
     cdh->extra_fields(cdh->extra_fields(),
                       cdh->extra_fields_length() + zip64_size);
   } else {
     cdh->local_header_offset32(output_position);
   }
   cdh->comment_length(0);
   cdh->start_disk_nr(0);
   cdh->internal_attributes(0);
   cdh->external_attributes(0);
   ++entries_;
   free(reinterpret_cast<void *>(entry));
 }

 void OutputJar::WriteMetaInf() {
   std::string path("META-INF/");

   // META_INF/ is always the first entry, and as such it should have an extra
   // field with the tag 0xCAFE and zero bytes of data. This is not the part of
   // the jar file spec, but Unix 'file' utility relies on it to distiguish jar
   // file from zip file. See https://bugs.openjdk.java.net/browse/JDK-6808540
   const uint8_t extra_fields[] = {0xFE, 0xCA, 0, 0};
   const uint16_t n_extra_fields =
       sizeof(extra_fields) / sizeof(extra_fields[0]);
   WriteDirEntry(path, extra_fields, n_extra_fields);
 }

 // Writes a directory entry with the given name and extra fields.
 void OutputJar::WriteDirEntry(const std::string &name,
                               const uint8_t *extra_fields,
                               const uint16_t n_extra_fields) {
   size_t lh_size = sizeof(LH) + name.size() + n_extra_fields;
   LH *lh = reinterpret_cast<LH *>(malloc(lh_size));
   lh->signature();
   lh->version(20);  // 2.0
   lh->bit_flag(0);  // TODO(asmundak): should I set UTF8 flag?
   lh->compression_method(Z_NO_COMPRESSION);
   lh->crc32(0);
   lh->compressed_file_size32(0);
   lh->uncompressed_file_size32(0);
   lh->file_name(name.c_str(), name.size());
   lh->extra_fields(extra_fields, n_extra_fields);
   known_members_.emplace(name, EntryInfo{&null_combiner_});
   WriteEntry(lh);
 }

 // Create output Central Directory entry for the input jar entry.
 void OutputJar::AppendToDirectoryBuffer(const CDH *cdh, off64_t lh_pos,
                                         uint16_t normalized_time,
                                         bool fix_timestamp) {
   // While copying from the input CDH pointed to by 'cdh', we may need to drop
   // Unix timestamp extra field, and we might need to change the number of
   // attributes of the Zip64 extra field, or create it, or destroy it if entry's
   // position relative to 4G boundary changes.
   // The rest of the input CDH is copied.

   // 1. Decide if we need to drop UnixTime.
   size_t removed_unix_time_field_size = 0;
   if (fix_timestamp) {
     auto unix_time_field = cdh->unix_time_extra_field();
     if (unix_time_field != nullptr) {
       removed_unix_time_field_size = unix_time_field->size();
     }
   }

   // 2. Figure out how many attributes input entry has and how many
   // the output entry is going to have.
   const Zip64ExtraField *zip64_ef = cdh->zip64_extra_field();
   const int zip64_attr_count = zip64_ef == nullptr ? 0 : zip64_ef->attr_count();
   const bool lh_pos_needs64 = ziph::zfield_needs_ext64(lh_pos);
   int out_zip64_attr_count;
   if (zip64_attr_count > 0) {
     out_zip64_attr_count = zip64_attr_count;
     // The number of attributes may remain the same, or it may increase or
     // decrease by 1, depending on local_header_offset value.
     if (ziph::zfield_has_ext64(cdh->local_header_offset32()) !=
         lh_pos_needs64) {
       if (lh_pos_needs64) {
         out_zip64_attr_count += 1;
       } else {
         out_zip64_attr_count -= 1;
       }
     }
   } else {
     out_zip64_attr_count = lh_pos_needs64 ? 1 : 0;
   }
   const uint16_t zip64_size = Zip64ExtraField::space_needed(zip64_attr_count);
   const uint16_t out_zip64_size =
       Zip64ExtraField::space_needed(out_zip64_attr_count);

   // Allocate output CDH and copy everything but extra fields.
   const uint16_t ef_size = cdh->extra_fields_length();
   const uint16_t out_ef_size =
       (ef_size + out_zip64_size) - (removed_unix_time_field_size + zip64_size);

   const size_t out_cdh_size = cdh->size() + out_ef_size - ef_size;
   CDH *out_cdh = reinterpret_cast<CDH *>(ReserveCdr(out_cdh_size));

   // Calculate ExtraFields boundaries in the input and output entries.
   auto ef_begin = reinterpret_cast<const ExtraField *>(cdh->extra_fields());
   auto ef_end =
       reinterpret_cast<const ExtraField *>(ziph::byte_ptr(ef_begin) + ef_size);
   // Copy [cdh..ef_begin) -> [out_cdh..out_ef_begin)
   memcpy(out_cdh, cdh, ziph::byte_ptr(ef_begin) - ziph::byte_ptr(cdh));

   auto out_ef_begin = reinterpret_cast<ExtraField *>(
       const_cast<uint8_t *>(out_cdh->extra_fields()));
   auto out_ef_end = reinterpret_cast<ExtraField *>(
       reinterpret_cast<uint8_t *>(out_ef_begin) + out_ef_size);

   // Copy [ef_end..cdh_end) -> [out_ef_end..out_cdh_end)
   memcpy(out_ef_end, ef_end,
          ziph::byte_ptr(cdh) + cdh->size() - ziph::byte_ptr(ef_end));

   // Copy extra fields, dropping Zip64 and possibly UnixTime fields.
   ExtraField *out_ef = out_ef_begin;
   for (const ExtraField *ef = ef_begin; ef < ef_end; ef = ef->next()) {
     if ((fix_timestamp && ef->is_unix_time()) || ef->is_zip64()) {
       // Skip this one.
     } else {
       memcpy(out_ef, ef, ef->size());
       out_ef = reinterpret_cast<ExtraField *>(
           reinterpret_cast<uint8_t *>(out_ef) + ef->size());
     }
   }

   // Set up Zip64 extra field if necessary.
   if (out_zip64_size > 0) {
     Zip64ExtraField *out_zip64_ef = reinterpret_cast<Zip64ExtraField *>(out_ef);
     out_zip64_ef->signature();
     out_zip64_ef->attr_count(out_zip64_attr_count);
     int copy_count = out_zip64_attr_count < zip64_attr_count
                          ? out_zip64_attr_count
                          : zip64_attr_count;
     if (copy_count > 0) {
       out_zip64_ef->attr64(0, zip64_ef->attr64(0));
       if (copy_count > 1) {
         out_zip64_ef->attr64(1, zip64_ef->attr64(1));
       }
     }
     // Set 64-bit local_header_offset if necessary. It's always the last
     // attribute.
     if (lh_pos_needs64) {
       out_zip64_ef->attr64(out_zip64_attr_count - 1, lh_pos);
     }
   }
   out_cdh->extra_fields(ziph::byte_ptr(out_ef_begin), out_ef_size);
   out_cdh->local_header_offset32(lh_pos_needs64 ? 0xFFFFFFFF : lh_pos);
   if (fix_timestamp) {
     out_cdh->last_mod_file_time(normalized_time);
     out_cdh->last_mod_file_date(kDefaultDate);
   }
 }

 uint8_t *OutputJar::ReserveCdr(size_t chunk_size) {
   if (cen_size_ + chunk_size > cen_capacity_) {
     cen_capacity_ += 1000000;
     cen_ = reinterpret_cast<uint8_t *>(realloc(cen_, cen_capacity_));
     if (!cen_) {
       diag_errx(1, "%s:%d: Cannot allocate %zu bytes for the directory",
                 __FILE__, __LINE__, cen_capacity_);
     }
   }
   uint8_t *entry = cen_ + cen_size_;
   cen_size_ += chunk_size;
   return entry;
 }

 uint8_t *OutputJar::ReserveCdh(size_t size) {
   return static_cast<uint8_t *>(memset(ReserveCdr(size), 0, size));
 }

 // Write out combined jar.
 bool OutputJar::Close() {
   if (file_ == nullptr) {
     return true;
   }

   for (auto &service_handler : service_handlers_) {
     WriteEntry(service_handler->OutputEntry(options_->force_compression));
   }
   for (auto &extra_combiner : extra_combiners_) {
     WriteEntry(extra_combiner->OutputEntry(options_->force_compression));
   }
   WriteEntry(spring_handlers_.OutputEntry(options_->force_compression));
   WriteEntry(spring_schemas_.OutputEntry(options_->force_compression));
   WriteEntry(protobuf_meta_handler_.OutputEntry(options_->force_compression));
   // TODO(asmundak): handle manifest;
   off64_t output_position = Position();
   bool write_zip64_ecd = output_position >= 0xFFFFFFFF || entries_ >= 0xFFFF ||
                          cen_size_ >= 0xFFFFFFFF;

   size_t cen_size = cen_size_;  // Save it before ReserveCdh updates it.
   if (write_zip64_ecd) {
     {
       ECD64 *ecd64 = reinterpret_cast<ECD64 *>(ReserveCdh(sizeof(ECD64)));
       ecd64->signature();
       ecd64->remaining_size(sizeof(ECD64) - 12);
       ecd64->version(0x031E);         // Unix, version 3.0
       ecd64->version_to_extract(45);  // 4.5 (Zip64 support)
       ecd64->this_disk_entries(entries_);
       ecd64->total_entries(entries_);
       ecd64->cen_size(cen_size);
       ecd64->cen_offset(output_position);
     }
     {
       ECD64Locator *ecd64_locator =
           reinterpret_cast<ECD64Locator *>(ReserveCdh(sizeof(ECD64Locator)));
       ecd64_locator->signature();
       ecd64_locator->ecd64_offset(output_position + cen_size);
       ecd64_locator->total_disks(1);
     }
     {
       ECD *ecd = reinterpret_cast<ECD *>(ReserveCdh(sizeof(ECD)));
       ecd->signature();
       ecd->this_disk_entries16(0xFFFF);
       ecd->total_entries16(0xFFFF);
       // Java Compiler (javac) uses its own "optimized" Zip handler (see
       // https://bugs.openjdk.java.net/browse/JDK-7018859) which may fail
       // to handle 0xFFFFFFFF in the CEN size and CEN offset fields. Try
       // to use 32-bit values here, too. Hopefully by the time we need to
       // handle really large archives, this is fixes upstream. Note that this
       // affects javac and javah only, 'jar' experiences no problems.
       ecd->cen_size32(std::min(cen_size, static_cast<size_t>(0xFFFFFFFFUL)));
       ecd->cen_offset32(
           std::min(output_position, static_cast<off64_t>(0x0FFFFFFFFL)));
     }
   } else {
     ECD *ecd = reinterpret_cast<ECD *>(ReserveCdh(sizeof(ECD)));
     ecd->signature();
     ecd->this_disk_entries16((uint16_t)entries_);
     ecd->total_entries16((uint16_t)entries_);
     ecd->cen_size32(cen_size);
     ecd->cen_offset32(output_position);
   }

   // Save Central Directory and wrap up.
   if (!WriteBytes(cen_, cen_size_)) {
     diag_err(1, "%s:%d: Cannot write central directory", __FILE__, __LINE__);
   }
   free(cen_);

   if (fclose(file_)) {
     diag_err(1, "%s:%d: %s", __FILE__, __LINE__, path());
   }
   file_ = nullptr;
   // Free the buffer only after fclose(); stdio may flush data from the
   // buffer on close.
   buffer_.reset();

   if (options_->verbose) {
     fprintf(stderr, "Wrote %s with %d entries", path(), entries_);
     if (duplicate_entries_) {
       fprintf(stderr, ", skipped %d entries", duplicate_entries_);
     }
     fprintf(stderr, "\n");
   }
   return true;
 }

 bool IsDir(const std::string &path) {
   struct stat st;
   if (stat(path.c_str(), &st)) {
     diag_warn("%s:%d: stat %s:", __FILE__, __LINE__, path.c_str());
     return false;
   }
   return (st.st_mode & S_IFDIR) == S_IFDIR;
 }

 void OutputJar::ClasspathResource(const std::string &resource_name,
                                   const std::string &resource_path) {
   if (known_members_.count(resource_name)) {
     if (options_->warn_duplicate_resources) {
       diag_warnx(
           "%s:%d: Duplicate resource name %s in the --classpath_resource or "
           "--resource option",
           __FILE__, __LINE__, resource_name.c_str());
       // TODO(asmundak): this mimics old behaviour. Confirm that unless
       // we run with --warn_duplicate_resources, the output zip file contains
       // the concatenated contents of the all the resources with the same name.
       return;
     }
   }
   MappedFile mapped_file;
   if (mapped_file.Open(resource_path)) {
     Concatenator *classpath_resource = new Concatenator(resource_name);
     classpath_resource->Append(
         reinterpret_cast<const char *>(mapped_file.start()),
         mapped_file.size());
     classpath_resources_.emplace_back(classpath_resource);
     known_members_.emplace(resource_name, EntryInfo{classpath_resource});
   } else if (IsDir(resource_path)) {
     // add an empty entry for the directory so its path ends up in the
     // manifest
     classpath_resources_.emplace_back(new Concatenator(resource_name + "/"));
     known_members_.emplace(resource_name, EntryInfo{&null_combiner_});
   } else {
     diag_err(1, "%s:%d: %s", __FILE__, __LINE__, resource_path.c_str());
   }
 }

 ssize_t OutputJar::AppendFile(int in_fd, off64_t offset, size_t count) {
   if (count == 0) {
     return 0;
   }
   std::unique_ptr<void, decltype(free) *> buffer(malloc(kBufferSize), free);
   if (buffer == nullptr) {
     diag_err(1, "%s:%d: malloc", __FILE__, __LINE__);
   }
   ssize_t total_written = 0;

 #ifdef _WIN32
   HANDLE hFile = reinterpret_cast<HANDLE>(_get_osfhandle(in_fd));
   while (static_cast<size_t>(total_written) < count) {
     ssize_t len = std::min(kBufferSize, count - total_written);
     DWORD n_read;
     if (!::ReadFile(hFile, buffer.get(), len, &n_read, NULL)) {
       return -1;
     }
     if (n_read == 0) {
       break;
     }
     if (!WriteBytes(buffer.get(), n_read)) {
       return -1;
     }
     total_written += n_read;
   }
 #else
   while (static_cast<size_t>(total_written) < count) {
     size_t len = std::min(kBufferSize, count - total_written);
     ssize_t n_read = pread(in_fd, buffer.get(), len, offset + total_written);
     if (n_read > 0) {
       if (!WriteBytes(buffer.get(), n_read)) {
         return -1;
       }
       total_written += n_read;
     } else if (n_read == 0) {
       break;
     } else {
       return -1;
     }
   }
 #endif  // _WIN32

   return total_written;
 }

 void OutputJar::ExtraCombiner(const std::string &entry_name,
                               Combiner *combiner) {
   extra_combiners_.emplace_back(combiner);
   known_members_.emplace(entry_name, EntryInfo{combiner});
 }

 bool OutputJar::WriteBytes(const void *buffer, size_t count) {
   size_t written = fwrite(buffer, 1, count, file_);
   outpos_ += written;
   return written == count;
 }

 void OutputJar::ExtraHandler(const std::string &input_jar_path, const CDH *,
                              const std::string *) {}