Add transient_bytes.h and test for it.

--
MOS_MIGRATED_REVID=127863019
diff --git a/src/tools/singlejar/BUILD b/src/tools/singlejar/BUILD
index c70af41..de0e0d6 100644
--- a/src/tools/singlejar/BUILD
+++ b/src/tools/singlejar/BUILD
@@ -57,6 +57,22 @@
 )
 
 cc_test(
+    name = "transient_bytes_test",
+    size = "large",
+    srcs = [
+        "transient_bytes_test.cc",
+        ":input_jar",
+        ":test_util",
+        ":transient_bytes",
+        ":zlib_interface",
+    ],
+    deps = [
+        "//third_party:gtest",
+        "//third_party/zlib",
+    ],
+)
+
+cc_test(
     name = "zlib_interface_test",
     srcs = [
         "zlib_interface_test.cc",
@@ -92,6 +108,19 @@
 )
 
 filegroup(
+    name = "transient_bytes",
+    srcs = [
+        "diag.h",
+        "transient_bytes.h",
+        "zip_headers.h",
+        "zlib_interface.h",
+    ],
+)
+
+filegroup(
     name = "zlib_interface",
-    srcs = ["zlib_interface.h"],
+    srcs = [
+        "diag.h",
+        "zlib_interface.h",
+    ],
 )
diff --git a/src/tools/singlejar/transient_bytes.h b/src/tools/singlejar/transient_bytes.h
new file mode 100644
index 0000000..af97ac2
--- /dev/null
+++ b/src/tools/singlejar/transient_bytes.h
@@ -0,0 +1,302 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
+#define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
+
+#include <inttypes.h>
+#include <algorithm>
+#include <ostream>
+
+#include "src/tools/singlejar/diag.h"
+#include "src/tools/singlejar/zip_headers.h"
+#include "src/tools/singlejar/zlib_interface.h"
+
+/*
+ * An instance of this class holds decompressed data in a list of chunks,
+ * to be eventually compressed to the output buffer.
+ * Use DecompressFile() or ReadFile() (depending on whether an entry is
+ * compressed or not) to append the contents of a Zip entry.
+ * Use Append() to append a sequence of bytes or a string.
+ * Use Write() to write out the contents, it will compress the entry if
+ * necessary.
+ */
+class TransientBytes {
+ public:
+  TransientBytes()
+      : allocated_(0),
+        data_size_(0),
+        first_block_(nullptr),
+        last_block_(nullptr) {}
+
+  ~TransientBytes() {
+    while (first_block_) {
+      auto block = first_block_;
+      first_block_ = first_block_->next_block_;
+      delete block;
+    }
+    last_block_ = nullptr;
+  }
+
+  // Appends raw bytes.
+  void Append(const uint8_t *data, uint64_t data_size) {
+    uint64_t chunk_size;
+    auto data_end = data + data_size;
+    for (; data < data_end; data += chunk_size) {
+      chunk_size =
+          std::min(static_cast<uint64_t>(data_end - data), ensure_space());
+      copy(data, chunk_size);
+    }
+  }
+
+  // Same, but for a string.
+  void Append(const char *str) {
+    Append(reinterpret_cast<const uint8_t *>(str), strlen(str));
+  }
+
+  // Appends the contents of the uncompressed Zip entry.
+  void ReadEntryContents(const LH *lh) {
+    Append(lh->data(), lh->uncompressed_file_size());
+  }
+
+  // Appends the contents of the compressed Zip entry. Resets the inflater
+  // used to decompress.
+  void DecompressEntryContents(const CDH *cdh, const LH *lh,
+                               Inflater *inflater) {
+    uint64_t old_total_out = inflater->total_out();
+    uint64_t in_bytes;
+    uint64_t out_bytes;
+    const uint8_t *data = lh->data();
+
+    if (cdh->no_size_in_local_header()) {
+      in_bytes = cdh->compressed_file_size();
+      out_bytes = cdh->uncompressed_file_size();
+    } else {
+      in_bytes = lh->compressed_file_size();
+      out_bytes = lh->uncompressed_file_size();
+    }
+
+    while (in_bytes > 0) {
+      // A single region to inflate cannot exceed 4GB-1.
+      uint32_t in_bytes_chunk = 0xFFFFFFFF;
+      if (in_bytes_chunk > in_bytes) {
+        in_bytes_chunk = in_bytes;
+      }
+      inflater->DataToInflate(data, in_bytes_chunk);
+      for (;;) {
+        uint32_t available_out = ensure_space();
+        int ret = inflater->Inflate(append_position(), available_out);
+        uint32_t inflated = available_out - inflater->available_out();
+        if (Z_STREAM_END == ret) {
+          // No more data to decompress. Update write position and we are done
+          // for this input chunk.
+          advance(inflated);
+          break;
+        } else if (Z_OK == ret) {
+          // No more space in the output buffer. Advance write position, update
+          // the number of remaining bytes.
+          if (inflater->available_out()) {
+            diag_errx(2,
+                      "%s:%d: Internal error inflating %.*s: Inflate reported "
+                      "Z_OK but there are still %" PRIu32
+                      " bytes available in the output buffer",
+                      __FILE__, __LINE__, lh->file_name_length(),
+                      lh->file_name(), inflater->available_out());
+          }
+          advance(inflated);
+        } else {
+          diag_errx(2,
+                    "%s:%d: Internal error inflating %.*s: inflate() call "
+                    "returned %d (%s)",
+                    __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
+                    ret, inflater->error_message());
+        }
+      }
+      data += in_bytes_chunk;
+      in_bytes -= in_bytes_chunk;
+    }
+
+    // Smog check
+    if (inflater->total_out() - old_total_out != out_bytes) {
+      diag_errx(2,
+                "%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64
+                " bytes , but the uncompressed entry should be %" PRIu64
+                "bytes long",
+                __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
+                inflater->total_out() - old_total_out, out_bytes);
+    }
+    inflater->reset();
+    return;
+  }
+
+  // Writes the contents bytes to the given buffer in an optimal way, i.e., the
+  // shorter of compressed or uncompressed. Sets the checksum and number of
+  // bytes written and returns Z_DEFLATED if compression took place or
+  // Z_NO_COMPRESSION otherwise.
+  uint16_t Write(uint8_t *buffer, uint32_t *checksum, uint64_t *bytes_written) {
+    Deflater deflater;
+    deflater.next_out = buffer;
+    *checksum = 0;
+    uint16_t compression_method = Z_DEFLATED;
+    uint64_t to_compress = data_size();
+
+    // Feed data blocks to the deflater one by one, but break if the compressed
+    // size exceeds the original size.
+    for (auto data_block = first_block_;
+         data_block && compression_method != Z_NO_COMPRESSION;
+         data_block = data_block->next_block_) {
+      // The compressed size should not exceed the original size less the number
+      // of bytes already compressed. And, it should not exceed 4GB-1.
+      deflater.avail_out = std::min(data_size() - deflater.total_out,
+                                    static_cast<uint64_t>(0xFFFFFFFF));
+      // Out of the total number of bytes that remain to be compressed, we
+      // can compress no more than this block.
+      uint32_t chunk_size = static_cast<uint32_t>(std::min(
+          static_cast<uint64_t>(sizeof(data_block->data_)), to_compress));
+      *checksum = crc32(*checksum, data_block->data_, chunk_size);
+      deflater.avail_in = chunk_size;
+      to_compress -= chunk_size;
+      int ret = deflater.Deflate(data_block->data_, chunk_size,
+                                 to_compress ? Z_NO_FLUSH : Z_FINISH);
+      if (ret == Z_OK) {
+        if (!deflater.avail_out) {
+          // We ran out of space in the output buffer, which means
+          // that deflated size exceeds original size. Leave the loop
+          // and just copy the data.
+          compression_method = Z_NO_COMPRESSION;
+        }
+      } else if (ret == Z_BUF_ERROR && !deflater.avail_in) {
+        // We ran out of data block, this is not a error.
+      } else if (ret == Z_STREAM_END) {
+        if (data_block->next_block_ || to_compress) {
+          diag_errx(2,
+                    "%s:%d: Internal error: deflate() call at the end, but "
+                    "there is more data to compress!",
+                    __FILE__, __LINE__);
+        }
+      } else {
+        diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret,
+                  deflater.msg);
+      }
+    }
+    if (compression_method != Z_NO_COMPRESSION) {
+      *bytes_written = deflater.total_out;
+      return compression_method;
+    }
+
+    // Compression does not help, just copy the bytes to the output buffer.
+    uint64_t to_copy = data_size();
+    uint8_t *buffer_end = buffer + to_copy;
+    *checksum = 0;
+    for (auto data_block = first_block_; data_block;
+         data_block = data_block->next_block_) {
+      size_t chunk_size =
+          std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy);
+      *checksum = crc32(*checksum, data_block->data_, chunk_size);
+      memcpy(buffer_end - to_copy, data_block->data_, chunk_size);
+      to_copy -= chunk_size;
+    }
+    *bytes_written = data_size();
+    return Z_NO_COMPRESSION;
+  }
+
+  // Number of data bytes.
+  uint64_t data_size() const { return data_size_; }
+
+  // This is mostly for testing: stream out contents to a Sink instance.
+  // The class Sink has to have
+  //     void operator()(const void *chunk, uint64_t chunk_size) const;
+  //
+  template <class Sink>
+  void stream_out(const Sink &sink) const {
+    uint64_t to_copy = data_size();
+    for (auto data_block = first_block_; data_block;
+         data_block = data_block->next_block_) {
+      uint64_t chunk_size = sizeof(data_block->data_);
+      if (chunk_size > to_copy) {
+        chunk_size = to_copy;
+      }
+      sink.operator()(data_block->data_, chunk_size);
+      to_copy -= chunk_size;
+    }
+  }
+
+ private:
+  // Ensures there is some space to write to, returns the amount available.
+  uint64_t ensure_space() {
+    if (!free_size()) {
+      auto *data_block = new DataBlock();
+      if (last_block_) {
+        last_block_->next_block_ = data_block;
+      }
+      last_block_ = data_block;
+      if (!first_block_) {
+        first_block_ = data_block;
+      }
+      allocated_ += sizeof(data_block->data_);
+    }
+    return free_size();
+  }
+
+  // Records that given amount of bytes is to be appended to the buffer.
+  // Returns the old write position.
+  uint8_t *advance(size_t amount) {
+    if (amount > free_size()) {
+      diag_errx(2, "%s: %d: Cannot advance %ld bytes, only %" PRIu64
+                   " is available",
+                __FILE__, __LINE__, amount, free_size());
+    }
+    uint8_t *pos = append_position();
+    data_size_ += amount;
+    return pos;
+  }
+
+  void copy(const uint8_t *from, size_t count) {
+    memcpy(advance(count), from, count);
+  }
+
+  uint8_t *append_position() {
+    return last_block_ ? last_block_->End() - free_size() : nullptr;
+  }
+
+  // Returns the amount of free space.
+  uint64_t free_size() const { return allocated_ - data_size_; }
+
+  // The bytes are kept in an linked list of the DataBlock instances.
+  // TODO(asmundak): perhaps use mmap to allocate these?
+  struct DataBlock {
+    struct DataBlock *next_block_;
+    uint8_t data_[0x40000 - 8];
+    DataBlock() : next_block_(nullptr) {}
+    uint8_t *End() { return data_ + sizeof(data_); }
+  };
+
+  uint64_t allocated_;
+  uint64_t data_size_;
+  struct DataBlock *first_block_;
+  struct DataBlock *last_block_;
+};
+
+std::ostream &operator<<(std::ostream &out, TransientBytes const &bytes) {
+  struct Sink {
+    void operator()(const void *chunk, uint64_t chunk_size) const {
+      out_.write(reinterpret_cast<const char *>(chunk), chunk_size);
+    }
+    std::ostream &out_;
+  };
+  Sink sink{out};
+  bytes.stream_out(sink);
+  return out;
+}
+
+#endif  // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
diff --git a/src/tools/singlejar/transient_bytes_test.cc b/src/tools/singlejar/transient_bytes_test.cc
new file mode 100644
index 0000000..e52a7f8
--- /dev/null
+++ b/src/tools/singlejar/transient_bytes_test.cc
@@ -0,0 +1,295 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <fstream>
+#include <ios>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+#include "src/tools/singlejar/input_jar.h"
+#include "src/tools/singlejar/test_util.h"
+#include "src/tools/singlejar/transient_bytes.h"
+#include "gtest/gtest.h"
+
+namespace {
+const char kStoredJar[] = "stored.zip";
+const char kCompressedJar[] = "compressed.zip";
+const char kBytesSmall[] =
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789"
+    "0123456789012345678901234567890123456789";
+
+class TransientBytesTest : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() {
+    ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+    CreateCompressedJar();
+  }
+
+  static void TearDownTestCase() { unlink(kCompressedJar); }
+
+  void SetUp() override { transient_bytes_.reset(new TransientBytes); }
+
+  // The value of the byte at a given position in a file created by the
+  // CreateFile method below.
+  static __attribute__((always_inline)) uint8_t file_byte_at(uint64_t offset) {
+    // return offset >> (8 * (offset & 7));
+    return offset & 255;
+  }
+
+  // Create file with given name and size and contents.
+  static bool CreateFile(const char *filename, uint64_t size) {
+    FILE *fp = fopen(filename, "wb");
+    if (fp == nullptr) {
+      perror(filename);
+      return false;
+    }
+    const uint64_t buffer_size = 4096;
+    uint8_t buffer[buffer_size];
+    uint64_t offset = 0;
+    while (offset < size) {
+      uint64_t offset_end = std::min(size, offset + buffer_size);
+      uint64_t to_write = 0;
+      while (offset < offset_end) {
+        buffer[to_write++] = file_byte_at(offset++);
+      }
+      if (fwrite(buffer, to_write, 1, fp) != 1) {
+        perror(filename);
+        fclose(fp);
+        return false;
+      }
+    }
+    if (0 == fclose(fp)) {
+      return true;
+    }
+    perror(filename);
+    return false;
+  }
+
+  static void CreateStoredJar() {
+    ASSERT_TRUE(TestUtil::AllocateFile("small1", 100));
+    ASSERT_TRUE(TestUtil::AllocateFile("huge", 0x100000001));
+    ASSERT_TRUE(TestUtil::AllocateFile("small2", 100));
+    unlink(kStoredJar);
+    ASSERT_EQ(0, system("zip -0qm stored.zip small1 huge small2"));
+#if !defined(__APPLE__)
+    ASSERT_EQ(0, system("unzip -v stored.zip"));
+#endif
+  }
+
+  static void CreateCompressedJar() {
+    unlink(kCompressedJar);
+    ASSERT_TRUE(CreateFile("511", 511));
+    ASSERT_TRUE(CreateFile("huge", 0x100000001));
+    ASSERT_TRUE(CreateFile("1K", 1024));
+    ASSERT_EQ(0, system("zip -qm compressed.zip 511 huge 1K"));
+#if !defined(__APPLE__)
+    ASSERT_EQ(0, system("unzip -v compressed.zip"));
+#endif
+  }
+  std::unique_ptr<TransientBytes> transient_bytes_;
+};
+
+TEST_F(TransientBytesTest, AppendBytes) {
+  int const kIter = 10000;
+  transient_bytes_->Append(kBytesSmall);
+  EXPECT_EQ(strlen(kBytesSmall), transient_bytes_->data_size());
+  std::ostringstream out;
+  out << *transient_bytes_.get();
+  EXPECT_STREQ(kBytesSmall, out.str().c_str());
+  out.flush();
+
+  for (int i = 1; i < kIter; ++i) {
+    transient_bytes_->Append(kBytesSmall);
+    ASSERT_EQ((i + 1) * strlen(kBytesSmall), transient_bytes_->data_size());
+  }
+
+  out << *transient_bytes_.get();
+  std::string out_string = out.str();
+  size_t size = strlen(kBytesSmall);
+  for (size_t pos = 0; pos < kIter * size; pos += size) {
+    ASSERT_STREQ(kBytesSmall, out_string.substr(pos, size).c_str())
+        << (pos / size) << "-th chunk does not match";
+  }
+}
+
+TEST_F(TransientBytesTest, ReadEntryContents) {
+  ASSERT_EQ(0, chdir(getenv("TEST_TMPDIR")));
+  CreateStoredJar();
+  std::unique_ptr<InputJar> input_jar(new InputJar);
+  ASSERT_TRUE(input_jar->Open(kStoredJar));
+  const LH *lh;
+  const CDH *cdh;
+  while ((cdh = input_jar->NextEntry(&lh))) {
+    transient_bytes_.reset(new TransientBytes);
+    if (!cdh->uncompressed_file_size()) {
+      continue;
+    }
+    ASSERT_EQ(Z_NO_COMPRESSION, lh->compression_method());
+    transient_bytes_->ReadEntryContents(lh);
+    ASSERT_EQ(cdh->uncompressed_file_size(), transient_bytes_->data_size());
+    struct Sink {
+      Sink(const LH *lh)
+          : data_start_(lh->data()),
+            data_(lh->data()),
+            entry_name_(lh->file_name(), lh->file_name_length()) {}
+      void operator()(const void *chunk, uint64_t chunk_size) const {
+        ASSERT_EQ(0, memcmp(chunk, data_, chunk_size))
+            << "Entry " << entry_name_ << "The chunk [" << data_ - data_start_
+            << ".." << data_ + chunk_size - data_start_ << ") differs";
+        data_ += chunk_size;
+      }
+      const uint8_t *data_start_;
+      mutable const uint8_t *data_;
+      std::string entry_name_;
+    };
+    Sink sink(lh);
+    transient_bytes_->stream_out(sink);
+  }
+  input_jar->Close();
+  unlink(kStoredJar);
+}
+
+TEST_F(TransientBytesTest, DecompressEntryContents) {
+  std::unique_ptr<InputJar> input_jar(new InputJar);
+  ASSERT_TRUE(input_jar->Open(kCompressedJar));
+  const LH *lh;
+  const CDH *cdh;
+  std::unique_ptr<Inflater> inflater;
+  while ((cdh = input_jar->NextEntry(&lh))) {
+    transient_bytes_.reset(new TransientBytes);
+    inflater.reset(new Inflater);
+    if (!cdh->uncompressed_file_size()) {
+      continue;
+    }
+    ASSERT_EQ(Z_DEFLATED, lh->compression_method());
+    transient_bytes_->DecompressEntryContents(cdh, lh, inflater.get());
+
+    ASSERT_EQ(cdh->uncompressed_file_size(), transient_bytes_->data_size());
+    // A sink that verifies decompressed entry contents.
+    struct Sink {
+      Sink(const LH *lh)
+          : offset_(0), entry_name_(lh->file_name(), lh->file_name_length()) {}
+      void operator()(const void *chunk, uint64_t chunk_size) const {
+        for (uint64_t i = 0; i < chunk_size; ++i) {
+          // ASSERT_EQ is quite slow in the non-optimized build, avoid calling
+          // it 4billion files on a 4GB file.
+          if (file_byte_at(offset_ + i) ==
+              reinterpret_cast<const uint8_t *>(chunk)[i]) {
+            break;
+          }
+          ASSERT_EQ(file_byte_at(offset_ + i),
+                    reinterpret_cast<const uint8_t *>(chunk)[i])
+              << "Entry " << entry_name_ << ": mismatch at offset "
+              << (offset_ + i);
+        }
+        offset_ += chunk_size;
+      }
+      mutable uint64_t offset_;
+      std::string entry_name_;
+    };
+    Sink sink(lh);
+    transient_bytes_->stream_out(sink);
+  }
+  input_jar->Close();
+}
+
+TEST_F(TransientBytesTest, WriteCompress) {
+  std::unique_ptr<InputJar> input_jar(new InputJar);
+  ASSERT_TRUE(input_jar->Open(kCompressedJar));
+  const LH *lh;
+  const CDH *cdh;
+  std::unique_ptr<Inflater> inflater;
+  while ((cdh = input_jar->NextEntry(&lh))) {
+    transient_bytes_.reset(new TransientBytes);
+    inflater.reset(new Inflater);
+    if (!cdh->uncompressed_file_size()) {
+      continue;
+    }
+    ASSERT_EQ(Z_DEFLATED, lh->compression_method());
+    transient_bytes_->DecompressEntryContents(cdh, lh, inflater.get());
+    ASSERT_EQ(cdh->uncompressed_file_size(), transient_bytes_->data_size());
+    // Now let us compress it back.
+    uint8_t *buffer =
+        reinterpret_cast<uint8_t *>(malloc(cdh->uncompressed_file_size()));
+    ASSERT_NE(nullptr, buffer);
+    uint32_t crc32 = 0;
+    uint64_t bytes_written;
+    uint16_t rc = transient_bytes_->Write(buffer, &crc32, &bytes_written);
+
+    EXPECT_EQ(Z_DEFLATED, rc) << "TransientBytes::Write did not compress "
+                              << cdh->file_name_string();
+    EXPECT_EQ(cdh->crc32(), crc32)
+        << "TransientBytes::Write has wrong crc32 for "
+        << cdh->file_name_string();
+
+    // Verify contents.
+    Inflater inf2;
+    inf2.DataToInflate(buffer, 0);  // Just to save the position.
+    uint64_t to_inflate = bytes_written;
+    uint64_t position = 0;
+    while (to_inflate > 0) {
+      uint32_t to_inflate_chunk =
+          std::min(to_inflate, static_cast<uint64_t>(0xFFFFFFFF));
+      inf2.DataToInflate(inf2.next_in(), to_inflate_chunk);
+      to_inflate -= to_inflate_chunk;
+      for (;;) {
+        uint8_t decomp_buf[1024];
+        int rc = inf2.Inflate(decomp_buf, sizeof(decomp_buf));
+        ASSERT_TRUE(Z_STREAM_END == rc || Z_OK == rc)
+            << "Decompressiong contents of " << cdh->file_name_string()
+            << " at offset " << position << " returned " << rc;
+        for (uint32_t i = 0; i < sizeof(decomp_buf) - inf2.available_out();
+             ++i) {
+          if (file_byte_at(position) != decomp_buf[i]) {
+            EXPECT_EQ(file_byte_at(position), decomp_buf[i])
+                << "Decompressed contents of " << cdh->file_name_string()
+                << " at offset " << position << " is wrong";
+          }
+          ++position;
+        }
+        if (Z_STREAM_END == rc) {
+          // Input buffer done.
+          break;
+        } else {
+          EXPECT_EQ(0, inf2.available_out());
+        }
+      }
+    }
+    free(buffer);
+  }
+  input_jar->Close();
+}
+
+TEST_F(TransientBytesTest, WriteStore) {
+  transient_bytes_->Append("a");
+  uint8_t buffer[400] = {0xfe, 0xfb};
+  uint32_t crc32 = 0;
+  uint64_t bytes_written;
+  uint16_t rc = transient_bytes_->Write(buffer, &crc32, &bytes_written);
+  ASSERT_EQ(Z_NO_COMPRESSION, rc);
+  ASSERT_EQ(1, bytes_written);
+  ASSERT_EQ(0xfb, buffer[1]);
+}
+
+}  // namespace
diff --git a/src/tools/singlejar/zip_headers.h b/src/tools/singlejar/zip_headers.h
index d1018d3..35509d5 100644
--- a/src/tools/singlejar/zip_headers.h
+++ b/src/tools/singlejar/zip_headers.h
@@ -38,6 +38,7 @@
 #error "This platform is not supported."
 #endif
 
+#include <string>
 #include <type_traits>
 
 static const uint8_t *byte_ptr(const void *ptr) {
@@ -180,6 +181,9 @@
     return file_name_length() == name_len &&
            0 == strncmp(file_name(), name, name_len);
   }
+  std::string file_name_string() const {
+    return std::string(file_name(), file_name_length());
+  }
 
   uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); }
   void extra_fields_length(uint16_t v) {
@@ -240,6 +244,7 @@
   void last_mod_file_date(uint16_t v) { last_mod_file_date_ = htole16(v); }
 
   void crc32(uint32_t v) { crc32_ = htole32(v); }
+  uint32_t crc32() const { return le32toh(crc32_); }
 
   size_t compressed_file_size() const {
     size_t size32 = compressed_file_size32();
@@ -287,6 +292,9 @@
     return file_name_length() == name_len &&
            0 == strncmp(file_name(), name, name_len);
   }
+  std::string file_name_string() const {
+    return std::string(file_name(), file_name_length());
+  }
 
   uint16_t extra_fields_length() const { return le16toh(extra_fields_length_); }
   const uint8_t *extra_fields() const {
diff --git a/src/tools/singlejar/zlib_interface.h b/src/tools/singlejar/zlib_interface.h
index 1a06b8e..b9bc0c5 100644
--- a/src/tools/singlejar/zlib_interface.h
+++ b/src/tools/singlejar/zlib_interface.h
@@ -15,11 +15,11 @@
 #ifndef BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_
 #define BAZEL_SRC_TOOLS_SINGLEJAR_ZLIB_INTERFACE_H_
 
-#include <zlib.h>
-
-#include <err.h>
 #include <stdint.h>
 
+#include "src/tools/singlejar/diag.h"
+#include <zlib.h>
+
 // An interface to zlib's inflater. Usage:
 //   Inflater inflater;
 //   inflater.DataToInflate(data, data_size);
@@ -30,8 +30,10 @@
 //       }
 //       // If we ran out of out_buffer, create a new one
 //   }
-//   inflater.Reset();
-//
+//   inflater.reset();
+// NOTE that the sizes of the input/output buffers in zlib are 32-bit entities.
+// Call Inflater::DataToInflate multiple times if 'data_size' in the usage
+// example exceeds 4GB-1.
 class Inflater {
  public:
   Inflater() {
@@ -42,7 +44,7 @@
     zstream_.next_in = nullptr;
     int ret = inflateInit2(&zstream_, -MAX_WBITS);
     if (ret != Z_OK) {
-      errx(2, "inflateInit2 returned %d\n", ret);
+      diag_errx(2, "inflateInit2 returned %d\n", ret);
     }
   }
 
@@ -50,18 +52,22 @@
 
   void reset() { inflateReset(&zstream_); }
 
-  void DataToInflate(const uint8_t *in_buffer, unsigned in_buffer_length) {
+  void DataToInflate(const uint8_t *in_buffer, uint32_t in_buffer_length) {
     zstream_.next_in = const_cast<uint8_t *>(in_buffer);
     zstream_.avail_in = in_buffer_length;
   }
 
-  int Inflate(uint8_t *out_buffer, unsigned out_buffer_length) {
+  int Inflate(uint8_t *out_buffer, uint32_t out_buffer_length) {
     zstream_.next_out = out_buffer;
     zstream_.avail_out = out_buffer_length;
     return inflate(&zstream_, Z_SYNC_FLUSH);
   }
 
-  unsigned available_out() const { return zstream_.avail_out; }
+  const uint8_t *next_in() const { return zstream_.next_in; }
+  uint64_t total_in() const { return zstream_.total_in; }
+
+  uint32_t available_out() const { return zstream_.avail_out; }
+  uint64_t total_out() const { return zstream_.total_out; }
 
   const char *error_message() const { return zstream_.msg; }
 
@@ -70,6 +76,8 @@
 };
 
 // A little wrapper around zlib's deflater.
+// NOTE that the size of the data to inflate by a single call cannot exceed
+// 4GB-1.
 struct Deflater : z_stream {
   Deflater() {
     zalloc = Z_NULL;
@@ -82,13 +90,13 @@
     int ret = deflateInit2(this, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS,
                            8, Z_DEFAULT_STRATEGY);
     if (ret != Z_OK) {
-      errx(2, "deflateInit returned %d (%s)", ret, msg);
+      diag_errx(2, "deflateInit returned %d (%s)", ret, msg);
     }
   }
 
   ~Deflater() { deflateEnd(this); }
 
-  int Deflate(const uint8_t *data, size_t data_size, int flag) {
+  int Deflate(const uint8_t *data, uint32_t data_size, int flag) {
     next_in = const_cast<uint8_t *>(data);
     avail_in = data_size;
     return deflate(this, flag);