Use WriteBytes instead of AppendFile to write entries.
This improves wallclock times for building deploy jars by ~5%.
Also, calculate the length of the Data Descriptor Record more accurately.
--
MOS_MIGRATED_REVID=133849413
diff --git a/src/tools/singlejar/input_jar.h b/src/tools/singlejar/input_jar.h
index 265a392..3905245 100644
--- a/src/tools/singlejar/input_jar.h
+++ b/src/tools/singlejar/input_jar.h
@@ -86,6 +86,10 @@
return mapped_file_.offset(lh);
}
+ const uint8_t *mapped_start() const {
+ return mapped_file_.address(0);
+ }
+
private:
std::string path_;
MappedFile mapped_file_;
diff --git a/src/tools/singlejar/output_jar.cc b/src/tools/singlejar/output_jar.cc
index 222919b..855afe6 100644
--- a/src/tools/singlejar/output_jar.cc
+++ b/src/tools/singlejar/output_jar.cc
@@ -373,14 +373,12 @@
off_t copy_from = jar_entry->local_header_offset();
size_t num_bytes = lh->size();
if (jar_entry->no_size_in_local_header()) {
- // The size of the data descriptor varies. The actual data in it is three
- // uint32's (crc32, compressed size, uncompressed size), but these can be
- // preceded by the "PK\x7\x8" signature word (alas, 'jar' has it).
- // Reading the descriptor just to figure out whether we need to copy four
- // or three words will cost us another page read, let us assume the data
- // description is always 4 words long at the cost of having an occasional
- // one word gap between the entries.
- num_bytes += jar_entry->compressed_file_size() + 4 * sizeof(uint32_t);
+ const DDR *ddr = reinterpret_cast<const DDR *>(
+ lh->data() + jar_entry->compressed_file_size());
+ num_bytes +=
+ jar_entry->compressed_file_size() +
+ ddr->size(0xFFFFFFFF == jar_entry->compressed_file_size32(),
+ 0xFFFFFFFF == jar_entry->uncompressed_file_size32());
} else {
num_bytes += lh->compressed_file_size();
}
@@ -440,14 +438,10 @@
}
// Do the actual copy.
- ssize_t n_copied = AppendFile(input_jar.fd(), copy_from, num_bytes);
- if (n_copied < 0) {
- diag_err(1, "%s:%d: Cannot copy %ld bytes of %.*s from %s", __FILE__,
+ if (!WriteBytes(input_jar.mapped_start() + copy_from, num_bytes)) {
+ diag_err(1, "%s:%d: Cannot write %ld bytes of %.*s from %s", __FILE__,
__LINE__, num_bytes, file_name_length, file_name,
input_jar_path.c_str());
- } else if (static_cast<size_t>(n_copied) != num_bytes) {
- diag_err(1, "%s:%d: Copied only %ld bytes out of %ld from %s", __FILE__,
- __LINE__, n_copied, num_bytes, input_jar_path.c_str());
}
// Append central directory header for this file to the output central
@@ -756,7 +750,7 @@
known_members_.emplace(entry_name, EntryInfo{combiner});
}
-bool OutputJar::WriteBytes(void *buffer, size_t count) {
+bool OutputJar::WriteBytes(const void *buffer, size_t count) {
size_t written = fwrite(buffer, 1, count, file_);
outpos_ += written;
return written == count;
diff --git a/src/tools/singlejar/output_jar.h b/src/tools/singlejar/output_jar.h
index ce7ff0e..082bf7a 100644
--- a/src/tools/singlejar/output_jar.h
+++ b/src/tools/singlejar/output_jar.h
@@ -87,7 +87,7 @@
// Copy 'count' bytes starting at 'offset' from the given file.
ssize_t AppendFile(int in_fd, off_t offset, size_t count);
// Write bytes to the output file, return true on success.
- bool WriteBytes(void *buffer, size_t count);
+ bool WriteBytes(const void *buffer, size_t count);
Options *options_;
diff --git a/src/tools/singlejar/zip_headers.h b/src/tools/singlejar/zip_headers.h
index 7698d9c..02f8167 100644
--- a/src/tools/singlejar/zip_headers.h
+++ b/src/tools/singlejar/zip_headers.h
@@ -288,6 +288,46 @@
} __attribute__((packed));
static_assert(30 == sizeof(LH), "The fields layout for class LH is incorrect");
+/* Data descriptor Record:
+ * 4.3.9 Data descriptor:
+ *
+ * crc-32 4 bytes
+ * compressed size 4 bytes
+ * uncompressed size 4 bytes
+ *
+ * 4.3.9.1 This descriptor MUST exist if bit 3 of the general purpose bit
+ * flag is set (see below). It is byte aligned and immediately follows the
+ * last byte of compressed data. This descriptor SHOULD be used only when it
+ * was not possible to seek in the output .ZIP file, e.g., when the output
+ * .ZIP file was standard output or a non-seekable device. For ZIP64(tm)
+ * format archives, the compressed and uncompressed sizes are 8 bytes each.
+ *
+ * 4.3.9.2 When compressing files, compressed and uncompressed sizes should
+ * be stored in ZIP64 format (as 8 byte values) when a file's size exceeds
+ * 0xFFFFFFFF. However ZIP64 format may be used regardless of the size of a
+ * file. When extracting, if the zip64 extended information extra field is
+ * present for the file the compressed and uncompressed sizes will be 8 byte
+ * values.
+ *
+ * 4.3.9.3 Although not originally assigned a signature, the value 0x08074b50
+ * has commonly been adopted as a signature value for the data descriptor
+ * record. Implementers should be aware that ZIP files may be encountered
+ * with or without this signature marking data descriptors and SHOULD account
+ * for either case when reading ZIP files to ensure compatibility.
+ */
+class DDR {
+ public:
+ size_t size(bool compressed_size_is_64bits,
+ bool original_size_is_64bits) const {
+ return (0x08074b50 == le32toh(optional_signature_) ? 8 : 4) +
+ (compressed_size_is_64bits ? 8 : 4) +
+ (original_size_is_64bits ? 8 : 4);
+ }
+
+ private:
+ uint32_t optional_signature_;
+} __attribute__((packed));
+
/* Central Directory Header. */
class CDH {
public: