| // Copyright 2016 The Bazel Authors. All rights reserved. | 
 | // | 
 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | // you may not use this file except in compliance with the License. | 
 | // You may obtain a copy of the License at | 
 | // | 
 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
 | // | 
 | // Unless required by applicable law or agreed to in writing, software | 
 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | // See the License for the specific language governing permissions and | 
 | // limitations under the License. | 
 | #ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_ | 
 | #define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_ | 
 |  | 
 | #ifndef __STDC_FORMAT_MACROS | 
 | #define __STDC_FORMAT_MACROS 1 | 
 | #endif | 
 |  | 
 | #include <inttypes.h> | 
 | #include <algorithm> | 
 | #include <ostream> | 
 |  | 
 | #include "src/tools/singlejar/diag.h" | 
 | #include "src/tools/singlejar/zip_headers.h" | 
 | #include "src/tools/singlejar/zlib_interface.h" | 
 |  | 
 | /* | 
 |  * An instance of this class holds decompressed data in a list of chunks, | 
 |  * to be eventually compressed to the output buffer. | 
 |  * Use DecompressFile() or ReadFile() (depending on whether an entry is | 
 |  * compressed or not) to append the contents of a Zip entry. | 
 |  * Use Append() to append a sequence of bytes or a string. | 
 |  * Use Write() to write out the contents, it will compress the entry if | 
 |  * necessary. | 
 |  */ | 
 | class TransientBytes { | 
 |  public: | 
 |   TransientBytes() | 
 |       : allocated_(0), | 
 |         data_size_(0), | 
 |         first_block_(nullptr), | 
 |         last_block_(nullptr) {} | 
 |  | 
 |   ~TransientBytes() { | 
 |     while (first_block_) { | 
 |       auto block = first_block_; | 
 |       first_block_ = first_block_->next_block_; | 
 |       delete block; | 
 |     } | 
 |     last_block_ = nullptr; | 
 |   } | 
 |  | 
 |   // Appends raw bytes. | 
 |   void Append(const uint8_t *data, uint64_t data_size) { | 
 |     uint64_t chunk_size; | 
 |     auto data_end = data + data_size; | 
 |     for (; data < data_end; data += chunk_size) { | 
 |       chunk_size = | 
 |           std::min(static_cast<uint64_t>(data_end - data), ensure_space()); | 
 |       copy(data, chunk_size); | 
 |     } | 
 |   } | 
 |  | 
 |   // Same, but for a string. | 
 |   void Append(const char *str) { | 
 |     Append(reinterpret_cast<const uint8_t *>(str), strlen(str)); | 
 |   } | 
 |  | 
 |   // Appends the contents of the uncompressed Zip entry. | 
 |   void ReadEntryContents(const LH *lh) { | 
 |     Append(lh->data(), lh->uncompressed_file_size()); | 
 |   } | 
 |  | 
 |   // Appends the contents of the compressed Zip entry. Resets the inflater | 
 |   // used to decompress. | 
 |   void DecompressEntryContents(const CDH *cdh, const LH *lh, | 
 |                                Inflater *inflater) { | 
 |     uint64_t old_total_out = inflater->total_out(); | 
 |     uint64_t in_bytes; | 
 |     uint64_t out_bytes; | 
 |     const uint8_t *data = lh->data(); | 
 |  | 
 |     if (cdh->no_size_in_local_header()) { | 
 |       in_bytes = cdh->compressed_file_size(); | 
 |       out_bytes = cdh->uncompressed_file_size(); | 
 |     } else { | 
 |       in_bytes = lh->compressed_file_size(); | 
 |       out_bytes = lh->uncompressed_file_size(); | 
 |     } | 
 |  | 
 |     while (in_bytes > 0) { | 
 |       // A single region to inflate cannot exceed 4GB-1. | 
 |       uint32_t in_bytes_chunk = 0xFFFFFFFF; | 
 |       if (in_bytes_chunk > in_bytes) { | 
 |         in_bytes_chunk = in_bytes; | 
 |       } | 
 |       inflater->DataToInflate(data, in_bytes_chunk); | 
 |       for (;;) { | 
 |         uint32_t available_out = ensure_space(); | 
 |         int ret = inflater->Inflate(append_position(), available_out); | 
 |         uint32_t inflated = available_out - inflater->available_out(); | 
 |         if (Z_STREAM_END == ret) { | 
 |           // No more data to decompress. Update write position and we are done | 
 |           // for this input chunk. | 
 |           advance(inflated); | 
 |           break; | 
 |         } else if (Z_OK == ret) { | 
 |           // No more space in the output buffer. Advance write position, update | 
 |           // the number of remaining bytes. | 
 |           if (inflater->available_out()) { | 
 |             diag_errx(2, | 
 |                       "%s:%d: Internal error inflating %.*s: Inflate reported " | 
 |                       "Z_OK but there are still %" PRIu32 | 
 |                       " bytes available in the output buffer", | 
 |                       __FILE__, __LINE__, lh->file_name_length(), | 
 |                       lh->file_name(), inflater->available_out()); | 
 |           } | 
 |           advance(inflated); | 
 |         } else { | 
 |           diag_errx(2, | 
 |                     "%s:%d: Internal error inflating %.*s: inflate() call " | 
 |                     "returned %d (%s)", | 
 |                     __FILE__, __LINE__, lh->file_name_length(), lh->file_name(), | 
 |                     ret, inflater->error_message()); | 
 |         } | 
 |       } | 
 |       data += in_bytes_chunk; | 
 |       in_bytes -= in_bytes_chunk; | 
 |     } | 
 |  | 
 |     // Smog check | 
 |     // This check is disabled on Windows because z_stream::total_out is of type | 
 |     // of uLong (unsigned long), which is 64-bit for most 64-bit Unix platforms, | 
 |     // but it is 32-bit even for Win64. This means even though zlib is capable | 
 |     // of compressing data >4GB as long as it is processed by chunks, zlib | 
 |     // cannot report the correct total number of processed bytes >4GB through | 
 |     // z_stream::total_out on Windows. | 
 | #ifndef _WIN32 | 
 |     if (inflater->total_out() - old_total_out != out_bytes) { | 
 |       diag_errx(2, | 
 |                 "%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64 | 
 |                 " bytes , but the uncompressed entry should be %" PRIu64 | 
 |                 "bytes long", | 
 |                 __FILE__, __LINE__, lh->file_name_length(), lh->file_name(), | 
 |                 inflater->total_out() - old_total_out, out_bytes); | 
 |     } | 
 | #endif | 
 |     inflater->reset(); | 
 |   } | 
 |  | 
 |   // Writes the contents bytes to the given buffer in an optimal way, i.e., the | 
 |   // shorter of compressed or uncompressed. Sets the checksum and number of | 
 |   // bytes written and returns Z_DEFLATED if compression took place or | 
 |   // Z_NO_COMPRESSION otherwise. | 
 |   uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum, | 
 |                        uint64_t *bytes_written) { | 
 |     *checksum = 0; | 
 |     uint64_t to_compress = data_size(); | 
 |     if (to_compress == 0) { | 
 |       *bytes_written = 0; | 
 |       return Z_NO_COMPRESSION; | 
 |     } | 
 |  | 
 |     Deflater deflater; | 
 |     deflater.next_out = buffer; | 
 |     uint16_t compression_method = Z_DEFLATED; | 
 |  | 
 |     // Feed data blocks to the deflater one by one, but break if the compressed | 
 |     // size exceeds the original size. | 
 |     for (auto data_block = first_block_; | 
 |          data_block && compression_method != Z_NO_COMPRESSION; | 
 |          data_block = data_block->next_block_) { | 
 |       // The compressed size should not exceed the original size less the number | 
 |       // of bytes already compressed. And, it should not exceed 4GB-1. | 
 |       deflater.avail_out = std::min(data_size() - deflater.total_out, | 
 |                                     static_cast<uint64_t>(0xFFFFFFFF)); | 
 |       // Out of the total number of bytes that remain to be compressed, we | 
 |       // can compress no more than this block. | 
 |       uint32_t chunk_size = static_cast<uint32_t>(std::min( | 
 |           static_cast<uint64_t>(sizeof(data_block->data_)), to_compress)); | 
 |       *checksum = crc32(*checksum, data_block->data_, chunk_size); | 
 |       deflater.avail_in = chunk_size; | 
 |       to_compress -= chunk_size; | 
 |       int ret = deflater.Deflate(data_block->data_, chunk_size, | 
 |                                  to_compress ? Z_NO_FLUSH : Z_FINISH); | 
 |       if (ret == Z_OK) { | 
 |         if (!deflater.avail_out) { | 
 |           // We ran out of space in the output buffer, which means | 
 |           // that deflated size exceeds original size. Leave the loop | 
 |           // and just copy the data. | 
 |           compression_method = Z_NO_COMPRESSION; | 
 |         } | 
 |       } else if (ret == Z_BUF_ERROR && !deflater.avail_in) { | 
 |         // We ran out of data block, this is not a error. | 
 |       } else if (ret == Z_STREAM_END) { | 
 |         if (data_block->next_block_ || to_compress) { | 
 |           diag_errx(2, | 
 |                     "%s:%d: Internal error: deflate() call at the end, but " | 
 |                     "there is more data to compress!", | 
 |                     __FILE__, __LINE__); | 
 |         } | 
 |       } else { | 
 |         diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret, | 
 |                   deflater.msg); | 
 |       } | 
 |     } | 
 |     if (compression_method != Z_NO_COMPRESSION) { | 
 |       *bytes_written = deflater.total_out; | 
 |       return compression_method; | 
 |     } | 
 |  | 
 |     // Compression does not help, just copy the bytes to the output buffer. | 
 |     CopyOut(buffer, checksum); | 
 |     *bytes_written = data_size(); | 
 |     return Z_NO_COMPRESSION; | 
 |   } | 
 |  | 
 |   // Copies the bytes to the buffer and sets the checksum. | 
 |   void CopyOut(uint8_t *buffer, uint32_t *checksum) { | 
 |     uint64_t to_copy = data_size(); | 
 |     uint8_t *buffer_end = buffer + to_copy; | 
 |     *checksum = 0; | 
 |     for (auto data_block = first_block_; data_block; | 
 |          data_block = data_block->next_block_) { | 
 |       size_t chunk_size = | 
 |           std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy); | 
 |       *checksum = crc32(*checksum, data_block->data_, chunk_size); | 
 |       memcpy(buffer_end - to_copy, data_block->data_, chunk_size); | 
 |       to_copy -= chunk_size; | 
 |     } | 
 |   } | 
 |  | 
 |   // Number of data bytes. | 
 |   uint64_t data_size() const { return data_size_; } | 
 |  | 
 |   // This is mostly for testing: stream out contents to a Sink instance. | 
 |   // The class Sink has to have | 
 |   //     void operator()(const void *chunk, uint64_t chunk_size) const; | 
 |   // | 
 |   template <class Sink> | 
 |   void stream_out(const Sink &sink) const { | 
 |     uint64_t to_copy = data_size(); | 
 |     for (auto data_block = first_block_; data_block; | 
 |          data_block = data_block->next_block_) { | 
 |       uint64_t chunk_size = sizeof(data_block->data_); | 
 |       if (chunk_size > to_copy) { | 
 |         chunk_size = to_copy; | 
 |       } | 
 |       sink.operator()(data_block->data_, chunk_size); | 
 |       to_copy -= chunk_size; | 
 |     } | 
 |   } | 
 |  | 
 |   uint8_t last_byte() const { | 
 |     if (!data_size()) { | 
 |       diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty", | 
 |                 __FILE__, __LINE__); | 
 |     } | 
 |     if (free_size() >= sizeof(last_block_->data_)) { | 
 |       diag_errx(1, "%s:%d: internal error: the last data block is empty", | 
 |                 __FILE__, __LINE__); | 
 |     } | 
 |     return *(last_block_->End() - free_size() - 1); | 
 |   } | 
 |  | 
 |  private: | 
 |   // Ensures there is some space to write to, returns the amount available. | 
 |   uint64_t ensure_space() { | 
 |     if (!free_size()) { | 
 |       auto *data_block = new DataBlock(); | 
 |       if (last_block_) { | 
 |         last_block_->next_block_ = data_block; | 
 |       } | 
 |       last_block_ = data_block; | 
 |       if (!first_block_) { | 
 |         first_block_ = data_block; | 
 |       } | 
 |       allocated_ += sizeof(data_block->data_); | 
 |     } | 
 |     return free_size(); | 
 |   } | 
 |  | 
 |   // Records that given amount of bytes is to be appended to the buffer. | 
 |   // Returns the old write position. | 
 |   uint8_t *advance(size_t amount) { | 
 |     if (amount > free_size()) { | 
 |       diag_errx( | 
 |           2, "%s: %d: Cannot advance %zu bytes, only %" PRIu64 " is available", | 
 |           __FILE__, __LINE__, amount, free_size()); | 
 |     } | 
 |     uint8_t *pos = append_position(); | 
 |     data_size_ += amount; | 
 |     return pos; | 
 |   } | 
 |  | 
 |   void copy(const uint8_t *from, size_t count) { | 
 |     memcpy(advance(count), from, count); | 
 |   } | 
 |  | 
 |   uint8_t *append_position() { | 
 |     return last_block_ ? last_block_->End() - free_size() : nullptr; | 
 |   } | 
 |  | 
 |   // Returns the amount of free space. | 
 |   uint64_t free_size() const { return allocated_ - data_size_; } | 
 |  | 
 |   // The bytes are kept in an linked list of the DataBlock instances. | 
 |   // TODO(asmundak): perhaps use mmap to allocate these? | 
 |   struct DataBlock { | 
 |     struct DataBlock *next_block_; | 
 |     uint8_t data_[0x40000 - 8]; | 
 |     DataBlock() : next_block_(nullptr) {} | 
 |     uint8_t *End() { return data_ + sizeof(data_); } | 
 |   }; | 
 |  | 
 |   uint64_t allocated_; | 
 |   uint64_t data_size_; | 
 |   struct DataBlock *first_block_; | 
 |   struct DataBlock *last_block_; | 
 | }; | 
 |  | 
 | #endif  // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_ |