Add support for compression in //third_party/ijar:zipper

Add the 'C' flag to //third_party/ijar:zipper to try to compress
files using the DEFLATE algorithm. To avoid regression, creation
of uncompressed ZIP files is still the default.

--
Change-Id: I8ad7182d8f6f152abd56ae472a406cbb8d39fb64
Reviewed-on: https://bazel-review.googlesource.com/#/c/1821/
MOS_MIGRATED_REVID=101925953
diff --git a/third_party/ijar/test/zip_test.sh b/third_party/ijar/test/zip_test.sh
index 83ca3f5..4b92f32 100755
--- a/third_party/ijar/test/zip_test.sh
+++ b/third_party/ijar/test/zip_test.sh
@@ -45,6 +45,12 @@
   (cd $folder && $UNZIP -q ${zipfile} || true)  # ignore CRC32 errors
   diff -r $1 $folder &> $TEST_log \
       || fail "Unzip after zipper output differ"
+  # Retry with compression
+  (cd $1 && $ZIPPER cC ${zipfile} $(find . | sed 's|^./||' | grep -v '^.$'))
+  local folder=$(mktemp -d ${TEST_TMPDIR}/output.XXXXXXXX)
+  (cd $folder && $UNZIP -q ${zipfile} || true)  # ignore CRC32 errors
+  diff -r $1 $folder &> $TEST_log \
+      || fail "Unzip after zipper output differ"
 }
 
 #### Tests
diff --git a/third_party/ijar/zip.cc b/third_party/ijar/zip.cc
index 3a6f92c..cb9d1cc 100644
--- a/third_party/ijar/zip.cc
+++ b/third_party/ijar/zip.cc
@@ -200,7 +200,6 @@
   OutputZipFile(int fd, u1 * const zipdata_out) :
       fd_out(fd),
       zipdata_out_(zipdata_out),
-      zipdata_out_mapped_(zipdata_out),
       q(zipdata_out) {
     errmsg[0] = 0;
   }
@@ -214,8 +213,8 @@
 
   virtual ~OutputZipFile() { Finish(); }
   virtual u1* NewFile(const char* filename, const u4 attr);
-  virtual int FinishFile(size_t filelength);
-  virtual int WriteEmptyFile(const char* filename);
+  virtual int FinishFile(size_t filelength, bool compress = false);
+  virtual int WriteEmptyFile(const char *filename);
   virtual size_t GetSize() {
     return Offset(q);
   }
@@ -228,7 +227,13 @@
   struct LocalFileEntry {
     // Start of the local header (in the output buffer).
     size_t local_header_offset;
+
+    // Sizes of the file entry
     size_t uncompressed_length;
+    size_t compressed_length;
+
+    // Compression method
+    u2 compression_method;
 
     // external attributes field
     u4 external_attr;
@@ -248,10 +253,9 @@
   // pointers. They are allocated by the Create() method before
   // the object is actually created using mmap.
   u1 * const zipdata_out_;        // start of output file mmap
-  u1 * zipdata_out_mapped_;       // start of still mapped region
   u1 *q;  // output cursor
 
-  u1 *compressed_size_ptr;        // Current pointer to "compressed size" entry.
+  u1 *header_ptr;  // Current pointer to "compression method" entry.
 
   // List of entries to write the central directory
   std::vector<LocalFileEntry*> entries_;
@@ -285,12 +289,10 @@
 
   // Fill in the "compressed size" and "uncompressed size" fields in a local
   // file header previously written by WriteLocalFileHeader().
-  void WriteFileSizeInLocalFileHeader(u1 *compressed_size_ptr,
-                                      size_t out_length);
+  size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, size_t out_length,
+                                        bool compress = false);
 };
 
-
-
 //
 // Implementation of InputZipFile
 //
@@ -775,7 +777,10 @@
 
   entry->file_name_length = file_name_length;
   entry->extra_field_length = 0;
-  entry->extra_field = (const u1*) "";
+  entry->compressed_length = 0;
+  entry->uncompressed_length = 0;
+  entry->compression_method = 0;
+  entry->extra_field = (const u1 *)"";
   entry->file_name = (u1*) strdup((const char *) file_name);
   entries_.push_back(entry);
 
@@ -792,11 +797,11 @@
 
     put_u2le(q, ZIP_VERSION_TO_EXTRACT);  // version to extract
     put_u2le(q, 0);  // general purpose bit flag
-    put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method:
-    put_u2le(q, 0);  // last_mod_file_time
+    put_u2le(q, entry->compression_method);  // compression method:
+    put_u2le(q, 0);                          // last_mod_file_time
     put_u2le(q, 0);  // last_mod_file_date
     put_u4le(q, 0);  // crc32 (jar/javac tools don't care)
-    put_u4le(q, entry->uncompressed_length);  // compressed_size
+    put_u4le(q, entry->compressed_length);    // compressed_size
     put_u4le(q, entry->uncompressed_length);  // uncompressed_size
     put_u2le(q, entry->file_name_length);
     put_u2le(q, entry->extra_field_length);
@@ -839,11 +844,11 @@
   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
   put_u2le(q, ZIP_VERSION_TO_EXTRACT);     // version to extract
   put_u2le(q, 0);                          // general purpose bit flag
-  put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method:
+  u1 *header_ptr = q;
+  put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method = placeholder
   put_u2le(q, 0);                          // last_mod_file_time
   put_u2le(q, 0);                          // last_mod_file_date
   put_u4le(q, 0);                          // crc32 (jar/javac tools don't care)
-  u1 *compressed_size_ptr = q;
   put_u4le(q, 0);  // compressed_size = placeholder
   put_u4le(q, 0);  // uncompressed_size = placeholder
   put_u2le(q, entry->file_name_length);
@@ -853,15 +858,64 @@
   put_n(q, entry->extra_field, entry->extra_field_length);
   entries_.push_back(entry);
 
-  return compressed_size_ptr;
+  return header_ptr;
 }
 
-void OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *compressed_size_ptr,
-                                                   size_t out_length) {
-  // uncompressed size and compressed size are the same, since the output
-  // ijar is uncompressed.
-  put_u4le(compressed_size_ptr, out_length);  // compressed_size
-  put_u4le(compressed_size_ptr, out_length);  // uncompressed_size
+// Try to compress a file entry in memory using the deflate algorithm.
+// It will compress buf (of size length) unless the compressed size is bigger
+// than the input size. The result will overwrite the content of buf and the
+// final size is returned.
+size_t TryDeflate(u1 *buf, size_t length) {
+  u1 *outbuf = reinterpret_cast<u1 *>(malloc(length));
+  z_stream stream;
+
+  // Initialize the z_stream strcut for reading from buf and wrinting in outbuf.
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.total_in = length;
+  stream.avail_in = length;
+  stream.total_out = length;
+  stream.avail_out = length;
+  stream.next_in = buf;
+  stream.next_out = outbuf;
+
+  if (deflateInit(&stream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+    // Failure to compress => return the buffer uncompressed
+    free(outbuf);
+    return length;
+  }
+
+  if (deflate(&stream, Z_FINISH) == Z_STREAM_END) {
+    // Compression successful and fits in outbuf, let's copy the result in buf.
+    length = stream.total_out;
+    memcpy(buf, outbuf, length);
+  }
+
+  deflateEnd(&stream);
+  free(outbuf);
+
+  // Return the length of the resulting buffer
+  return length;
+}
+
+size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr,
+                                                     size_t out_length,
+                                                     bool compress) {
+  size_t compressed_size = out_length;
+  if (compress) {
+    compressed_size = TryDeflate(q, out_length);
+  }
+  // compression method
+  if (compressed_size < out_length) {
+    put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED);
+  } else {
+    put_u2le(header_ptr, COMPRESSION_METHOD_STORED);
+  }
+  header_ptr += 8;
+  put_u4le(header_ptr, compressed_size);  // compressed_size
+  put_u4le(header_ptr, out_length);       // uncompressed_size
+  return compressed_size;
 }
 
 int OutputZipFile::Finish() {
@@ -879,14 +933,21 @@
 }
 
 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) {
-  compressed_size_ptr = WriteLocalFileHeader(filename, attr);
+  header_ptr = WriteLocalFileHeader(filename, attr);
   return q;
 }
 
-int OutputZipFile::FinishFile(size_t filelength) {
-  WriteFileSizeInLocalFileHeader(compressed_size_ptr, filelength);
+int OutputZipFile::FinishFile(size_t filelength, bool compress) {
+  size_t compressed_size =
+      WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress);
+  entries_.back()->compressed_length = compressed_size;
   entries_.back()->uncompressed_length = filelength;
-  q += filelength;
+  if (compressed_size < filelength) {
+    entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED;
+  } else {
+    entries_.back()->compression_method = COMPRESSION_METHOD_STORED;
+  }
+  q += compressed_size;
   return 0;
 }
 
diff --git a/third_party/ijar/zip.h b/third_party/ijar/zip.h
index 046072d..c548cb3 100644
--- a/third_party/ijar/zip.h
+++ b/third_party/ijar/zip.h
@@ -61,9 +61,10 @@
   virtual u1* NewFile(const char* filename, const u4 attr) = 0;
 
   // Finish writing a file and specify its length. After calling this method
-  // one should not reuse the pointer given by NewFile.
+  // one should not reuse the pointer given by NewFile. The file can be
+  // compressed using the deflate algorithm by setting `compress` to true.
   // On failure, returns -1 and GetError() will return an non-empty message.
-  virtual int FinishFile(size_t filelength) = 0;
+  virtual int FinishFile(size_t filelength, bool compress = false) = 0;
 
   // Write an empty file, it is equivalent to:
   //   NewFile(filename, 0);
diff --git a/third_party/ijar/zip_main.cc b/third_party/ijar/zip_main.cc
index aeda237..6c2a974 100644
--- a/third_party/ijar/zip_main.cc
+++ b/third_party/ijar/zip_main.cc
@@ -177,7 +177,8 @@
 }
 
 // Execute the create operation
-int create(char *zipfile, char **files, bool flatten, bool verbose) {
+int create(char *zipfile, char **files, bool flatten, bool verbose,
+           bool compress) {
   struct stat statst;
   u8 size = ZipBuilder::EstimateSize(files);
   if (size == 0) {
@@ -236,7 +237,7 @@
       }
       memcpy(buffer, data, statst.st_size);
       munmap(data, statst.st_size);
-      builder->FinishFile(statst.st_size);
+      builder->FinishFile(statst.st_size, compress);
     }
   }
   if (builder->Finish() < 0) {
@@ -252,11 +253,13 @@
 // main method
 //
 static void usage(char *progname) {
-  fprintf(stderr, "Usage: %s [vxc[f]] x.zip [file1...filen]\n", progname);
+  fprintf(stderr, "Usage: %s [vxc[fC]] x.zip [file1...filen]\n", progname);
   fprintf(stderr, "  v verbose - list all file in x.zip\n");
   fprintf(stderr, "  x extract - extract file in x.zip in current directory\n");
   fprintf(stderr, "  c create  - add files to x.zip\n");
   fprintf(stderr, "  f flatten - flatten files to use with create operation\n");
+  fprintf(stderr,
+          "  C compress - compress files when using the create operation\n");
   fprintf(stderr, "x and c cannot be used in the same command-line.\n");
   exit(1);
 }
@@ -265,6 +268,7 @@
   bool extract = false;
   bool verbose = false;
   bool create = false;
+  bool compress = false;
   bool flatten = false;
 
   if (argc < 3) {
@@ -285,6 +289,9 @@
     case 'f':
       flatten = true;
       break;
+    case 'C':
+      compress = true;
+      break;
     default:
       usage(argv[0]);
     }
@@ -294,7 +301,7 @@
       usage(argv[0]);
     }
     // Create a zip
-    return devtools_ijar::create(argv[2], argv+3, flatten, verbose);
+    return devtools_ijar::create(argv[2], argv + 3, flatten, verbose, compress);
   } else {
     if (flatten) {
       usage(argv[0]);