src/tools/singlejar/transient_bytes.h - bazel - Git at Google

 // Copyright 2016 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
 #define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_

 #include <inttypes.h>
 #include <algorithm>
 #include <ostream>

 #include "src/tools/singlejar/diag.h"
 #include "src/tools/singlejar/zip_headers.h"
 #include "src/tools/singlejar/zlib_interface.h"

 /*
  * An instance of this class holds decompressed data in a list of chunks,
  * to be eventually compressed to the output buffer.
  * Use DecompressFile() or ReadFile() (depending on whether an entry is
  * compressed or not) to append the contents of a Zip entry.
  * Use Append() to append a sequence of bytes or a string.
  * Use Write() to write out the contents, it will compress the entry if
  * necessary.
  */
 class TransientBytes {
  public:
   TransientBytes()
       : allocated_(0),
         data_size_(0),
         first_block_(nullptr),
         last_block_(nullptr) {}

   ~TransientBytes() {
     while (first_block_) {
       auto block = first_block_;
       first_block_ = first_block_->next_block_;
       delete block;
     }
     last_block_ = nullptr;
   }

   // Appends raw bytes.
   void Append(const uint8_t *data, uint64_t data_size) {
     uint64_t chunk_size;
     auto data_end = data + data_size;
     for (; data < data_end; data += chunk_size) {
       chunk_size =
           std::min(static_cast<uint64_t>(data_end - data), ensure_space());
       copy(data, chunk_size);
     }
   }

   // Same, but for a string.
   void Append(const char *str) {
     Append(reinterpret_cast<const uint8_t *>(str), strlen(str));
   }

   // Appends the contents of the uncompressed Zip entry.
   void ReadEntryContents(const LH *lh) {
     Append(lh->data(), lh->uncompressed_file_size());
   }

   // Appends the contents of the compressed Zip entry. Resets the inflater
   // used to decompress.
   void DecompressEntryContents(const CDH *cdh, const LH *lh,
                                Inflater *inflater) {
     uint64_t old_total_out = inflater->total_out();
     uint64_t in_bytes;
     uint64_t out_bytes;
     const uint8_t *data = lh->data();

     if (cdh->no_size_in_local_header()) {
       in_bytes = cdh->compressed_file_size();
       out_bytes = cdh->uncompressed_file_size();
     } else {
       in_bytes = lh->compressed_file_size();
       out_bytes = lh->uncompressed_file_size();
     }

     while (in_bytes > 0) {
       // A single region to inflate cannot exceed 4GB-1.
       uint32_t in_bytes_chunk = 0xFFFFFFFF;
       if (in_bytes_chunk > in_bytes) {
         in_bytes_chunk = in_bytes;
       }
       inflater->DataToInflate(data, in_bytes_chunk);
       for (;;) {
         uint32_t available_out = ensure_space();
         int ret = inflater->Inflate(append_position(), available_out);
         uint32_t inflated = available_out - inflater->available_out();
         if (Z_STREAM_END == ret) {
           // No more data to decompress. Update write position and we are done
           // for this input chunk.
           advance(inflated);
           break;
         } else if (Z_OK == ret) {
           // No more space in the output buffer. Advance write position, update
           // the number of remaining bytes.
           if (inflater->available_out()) {
             diag_errx(2,
                       "%s:%d: Internal error inflating %.*s: Inflate reported "
                       "Z_OK but there are still %" PRIu32
                       " bytes available in the output buffer",
                       __FILE__, __LINE__, lh->file_name_length(),
                       lh->file_name(), inflater->available_out());
           }
           advance(inflated);
         } else {
           diag_errx(2,
                     "%s:%d: Internal error inflating %.*s: inflate() call "
                     "returned %d (%s)",
                     __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
                     ret, inflater->error_message());
         }
       }
       data += in_bytes_chunk;
       in_bytes -= in_bytes_chunk;
     }

     // Smog check
     if (inflater->total_out() - old_total_out != out_bytes) {
       diag_errx(2,
                 "%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64
                 " bytes , but the uncompressed entry should be %" PRIu64
                 "bytes long",
                 __FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
                 inflater->total_out() - old_total_out, out_bytes);
     }
     inflater->reset();
     return;
   }

   // Writes the contents bytes to the given buffer in an optimal way, i.e., the
   // shorter of compressed or uncompressed. Sets the checksum and number of
   // bytes written and returns Z_DEFLATED if compression took place or
   // Z_NO_COMPRESSION otherwise.
   uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum,
                        uint64_t *bytes_written) {
     *checksum = 0;
     uint64_t to_compress = data_size();
     if (to_compress == 0) {
       *bytes_written = 0;
       return Z_NO_COMPRESSION;
     }

     Deflater deflater;
     deflater.next_out = buffer;
     uint16_t compression_method = Z_DEFLATED;

     // Feed data blocks to the deflater one by one, but break if the compressed
     // size exceeds the original size.
     for (auto data_block = first_block_;
          data_block && compression_method != Z_NO_COMPRESSION;
          data_block = data_block->next_block_) {
       // The compressed size should not exceed the original size less the number
       // of bytes already compressed. And, it should not exceed 4GB-1.
       deflater.avail_out = std::min(data_size() - deflater.total_out,
                                     static_cast<uint64_t>(0xFFFFFFFF));
       // Out of the total number of bytes that remain to be compressed, we
       // can compress no more than this block.
       uint32_t chunk_size = static_cast<uint32_t>(std::min(
           static_cast<uint64_t>(sizeof(data_block->data_)), to_compress));
       *checksum = crc32(*checksum, data_block->data_, chunk_size);
       deflater.avail_in = chunk_size;
       to_compress -= chunk_size;
       int ret = deflater.Deflate(data_block->data_, chunk_size,
                                  to_compress ? Z_NO_FLUSH : Z_FINISH);
       if (ret == Z_OK) {
         if (!deflater.avail_out) {
           // We ran out of space in the output buffer, which means
           // that deflated size exceeds original size. Leave the loop
           // and just copy the data.
           compression_method = Z_NO_COMPRESSION;
         }
       } else if (ret == Z_BUF_ERROR && !deflater.avail_in) {
         // We ran out of data block, this is not a error.
       } else if (ret == Z_STREAM_END) {
         if (data_block->next_block_ || to_compress) {
           diag_errx(2,
                     "%s:%d: Internal error: deflate() call at the end, but "
                     "there is more data to compress!",
                     __FILE__, __LINE__);
         }
       } else {
         diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret,
                   deflater.msg);
       }
     }
     if (compression_method != Z_NO_COMPRESSION) {
       *bytes_written = deflater.total_out;
       return compression_method;
     }

     // Compression does not help, just copy the bytes to the output buffer.
     CopyOut(buffer, checksum);
     *bytes_written = data_size();
     return Z_NO_COMPRESSION;
   }

   // Copies the bytes to the buffer and sets the checksum.
   void CopyOut(uint8_t *buffer, uint32_t *checksum) {
     uint64_t to_copy = data_size();
     uint8_t *buffer_end = buffer + to_copy;
     *checksum = 0;
     for (auto data_block = first_block_; data_block;
          data_block = data_block->next_block_) {
       size_t chunk_size =
           std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy);
       *checksum = crc32(*checksum, data_block->data_, chunk_size);
       memcpy(buffer_end - to_copy, data_block->data_, chunk_size);
       to_copy -= chunk_size;
     }
   }

   // Number of data bytes.
   uint64_t data_size() const { return data_size_; }

   // This is mostly for testing: stream out contents to a Sink instance.
   // The class Sink has to have
   //     void operator()(const void *chunk, uint64_t chunk_size) const;
   //
   template <class Sink>
   void stream_out(const Sink &sink) const {
     uint64_t to_copy = data_size();
     for (auto data_block = first_block_; data_block;
          data_block = data_block->next_block_) {
       uint64_t chunk_size = sizeof(data_block->data_);
       if (chunk_size > to_copy) {
         chunk_size = to_copy;
       }
       sink.operator()(data_block->data_, chunk_size);
       to_copy -= chunk_size;
     }
   }

   uint8_t last_byte() const {
     if (!data_size()) {
       diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty",
                 __FILE__, __LINE__);
     }
     if (free_size() >= sizeof(last_block_->data_)) {
       diag_errx(1, "%s:%d: internal error: the last data block is empty",
                 __FILE__, __LINE__);
     }
     return *(last_block_->End() - free_size() - 1);
   }

  private:
   // Ensures there is some space to write to, returns the amount available.
   uint64_t ensure_space() {
     if (!free_size()) {
       auto *data_block = new DataBlock();
       if (last_block_) {
         last_block_->next_block_ = data_block;
       }
       last_block_ = data_block;
       if (!first_block_) {
         first_block_ = data_block;
       }
       allocated_ += sizeof(data_block->data_);
     }
     return free_size();
   }

   // Records that given amount of bytes is to be appended to the buffer.
   // Returns the old write position.
   uint8_t *advance(size_t amount) {
     if (amount > free_size()) {
       diag_errx(2, "%s: %d: Cannot advance %ld bytes, only %" PRIu64
                    " is available",
                 __FILE__, __LINE__, amount, free_size());
     }
     uint8_t *pos = append_position();
     data_size_ += amount;
     return pos;
   }

   void copy(const uint8_t *from, size_t count) {
     memcpy(advance(count), from, count);
   }

   uint8_t *append_position() {
     return last_block_ ? last_block_->End() - free_size() : nullptr;
   }

   // Returns the amount of free space.
   uint64_t free_size() const { return allocated_ - data_size_; }

   // The bytes are kept in an linked list of the DataBlock instances.
   // TODO(asmundak): perhaps use mmap to allocate these?
   struct DataBlock {
     struct DataBlock *next_block_;
     uint8_t data_[0x40000 - 8];
     DataBlock() : next_block_(nullptr) {}
     uint8_t *End() { return data_ + sizeof(data_); }
   };

   uint64_t allocated_;
   uint64_t data_size_;
   struct DataBlock *first_block_;
   struct DataBlock *last_block_;
 };

 #endif  // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
	// Copyright 2016 The Bazel Authors. All rights reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	#ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
	#define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_

	#include <inttypes.h>
	#include <algorithm>
	#include <ostream>

	#include "src/tools/singlejar/diag.h"
	#include "src/tools/singlejar/zip_headers.h"
	#include "src/tools/singlejar/zlib_interface.h"

	/*
	* An instance of this class holds decompressed data in a list of chunks,
	* to be eventually compressed to the output buffer.
	* Use DecompressFile() or ReadFile() (depending on whether an entry is
	* compressed or not) to append the contents of a Zip entry.
	* Use Append() to append a sequence of bytes or a string.
	* Use Write() to write out the contents, it will compress the entry if
	* necessary.
	*/
	class TransientBytes {
	public:
	TransientBytes()
	: allocated_(0),
	data_size_(0),
	first_block_(nullptr),
	last_block_(nullptr) {}

	~TransientBytes() {
	while (first_block_) {
	auto block = first_block_;
	first_block_ = first_block_->next_block_;
	delete block;
	}
	last_block_ = nullptr;
	}

	// Appends raw bytes.
	void Append(const uint8_t *data, uint64_t data_size) {
	uint64_t chunk_size;
	auto data_end = data + data_size;
	for (; data < data_end; data += chunk_size) {
	chunk_size =
	std::min(static_cast<uint64_t>(data_end - data), ensure_space());
	copy(data, chunk_size);
	}
	}

	// Same, but for a string.
	void Append(const char *str) {
	Append(reinterpret_cast<const uint8_t *>(str), strlen(str));
	}

	// Appends the contents of the uncompressed Zip entry.
	void ReadEntryContents(const LH *lh) {
	Append(lh->data(), lh->uncompressed_file_size());
	}

	// Appends the contents of the compressed Zip entry. Resets the inflater
	// used to decompress.
	void DecompressEntryContents(const CDH cdh, const LH lh,
	Inflater *inflater) {
	uint64_t old_total_out = inflater->total_out();
	uint64_t in_bytes;
	uint64_t out_bytes;
	const uint8_t *data = lh->data();

	if (cdh->no_size_in_local_header()) {
	in_bytes = cdh->compressed_file_size();
	out_bytes = cdh->uncompressed_file_size();
	} else {
	in_bytes = lh->compressed_file_size();
	out_bytes = lh->uncompressed_file_size();
	}

	while (in_bytes > 0) {
	// A single region to inflate cannot exceed 4GB-1.
	uint32_t in_bytes_chunk = 0xFFFFFFFF;
	if (in_bytes_chunk > in_bytes) {
	in_bytes_chunk = in_bytes;
	}
	inflater->DataToInflate(data, in_bytes_chunk);
	for (;;) {
	uint32_t available_out = ensure_space();
	int ret = inflater->Inflate(append_position(), available_out);
	uint32_t inflated = available_out - inflater->available_out();
	if (Z_STREAM_END == ret) {
	// No more data to decompress. Update write position and we are done
	// for this input chunk.
	advance(inflated);
	break;
	} else if (Z_OK == ret) {
	// No more space in the output buffer. Advance write position, update
	// the number of remaining bytes.
	if (inflater->available_out()) {
	diag_errx(2,
	"%s:%d: Internal error inflating %.*s: Inflate reported "
	"Z_OK but there are still %" PRIu32
	" bytes available in the output buffer",
	__FILE__, __LINE__, lh->file_name_length(),
	lh->file_name(), inflater->available_out());
	}
	advance(inflated);
	} else {
	diag_errx(2,
	"%s:%d: Internal error inflating %.*s: inflate() call "
	"returned %d (%s)",
	__FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
	ret, inflater->error_message());
	}
	}
	data += in_bytes_chunk;
	in_bytes -= in_bytes_chunk;
	}

	// Smog check
	if (inflater->total_out() - old_total_out != out_bytes) {
	diag_errx(2,
	"%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64
	" bytes , but the uncompressed entry should be %" PRIu64
	"bytes long",
	__FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
	inflater->total_out() - old_total_out, out_bytes);
	}
	inflater->reset();
	return;
	}

	// Writes the contents bytes to the given buffer in an optimal way, i.e., the
	// shorter of compressed or uncompressed. Sets the checksum and number of
	// bytes written and returns Z_DEFLATED if compression took place or
	// Z_NO_COMPRESSION otherwise.
	uint16_t CompressOut(uint8_t buffer, uint32_t checksum,
	uint64_t *bytes_written) {
	*checksum = 0;
	uint64_t to_compress = data_size();
	if (to_compress == 0) {
	*bytes_written = 0;
	return Z_NO_COMPRESSION;
	}

	Deflater deflater;
	deflater.next_out = buffer;
	uint16_t compression_method = Z_DEFLATED;

	// Feed data blocks to the deflater one by one, but break if the compressed
	// size exceeds the original size.
	for (auto data_block = first_block_;
	data_block && compression_method != Z_NO_COMPRESSION;
	data_block = data_block->next_block_) {
	// The compressed size should not exceed the original size less the number
	// of bytes already compressed. And, it should not exceed 4GB-1.
	deflater.avail_out = std::min(data_size() - deflater.total_out,
	static_cast<uint64_t>(0xFFFFFFFF));
	// Out of the total number of bytes that remain to be compressed, we
	// can compress no more than this block.
	uint32_t chunk_size = static_cast<uint32_t>(std::min(
	static_cast<uint64_t>(sizeof(data_block->data_)), to_compress));
	checksum = crc32(checksum, data_block->data_, chunk_size);
	deflater.avail_in = chunk_size;
	to_compress -= chunk_size;
	int ret = deflater.Deflate(data_block->data_, chunk_size,
	to_compress ? Z_NO_FLUSH : Z_FINISH);
	if (ret == Z_OK) {
	if (!deflater.avail_out) {
	// We ran out of space in the output buffer, which means
	// that deflated size exceeds original size. Leave the loop
	// and just copy the data.
	compression_method = Z_NO_COMPRESSION;
	}
	} else if (ret == Z_BUF_ERROR && !deflater.avail_in) {
	// We ran out of data block, this is not a error.
	} else if (ret == Z_STREAM_END) {
	if (data_block->next_block_ \|\| to_compress) {
	diag_errx(2,
	"%s:%d: Internal error: deflate() call at the end, but "
	"there is more data to compress!",
	__FILE__, __LINE__);
	}
	} else {
	diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret,
	deflater.msg);
	}
	}
	if (compression_method != Z_NO_COMPRESSION) {
	*bytes_written = deflater.total_out;
	return compression_method;
	}

	// Compression does not help, just copy the bytes to the output buffer.
	CopyOut(buffer, checksum);
	*bytes_written = data_size();
	return Z_NO_COMPRESSION;
	}

	// Copies the bytes to the buffer and sets the checksum.
	void CopyOut(uint8_t buffer, uint32_t checksum) {
	uint64_t to_copy = data_size();
	uint8_t *buffer_end = buffer + to_copy;
	*checksum = 0;
	for (auto data_block = first_block_; data_block;
	data_block = data_block->next_block_) {
	size_t chunk_size =
	std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy);
	checksum = crc32(checksum, data_block->data_, chunk_size);
	memcpy(buffer_end - to_copy, data_block->data_, chunk_size);
	to_copy -= chunk_size;
	}
	}

	// Number of data bytes.
	uint64_t data_size() const { return data_size_; }

	// This is mostly for testing: stream out contents to a Sink instance.
	// The class Sink has to have
	// void operator()(const void *chunk, uint64_t chunk_size) const;
	//
	template <class Sink>
	void stream_out(const Sink &sink) const {
	uint64_t to_copy = data_size();
	for (auto data_block = first_block_; data_block;
	data_block = data_block->next_block_) {
	uint64_t chunk_size = sizeof(data_block->data_);
	if (chunk_size > to_copy) {
	chunk_size = to_copy;
	}
	sink.operator()(data_block->data_, chunk_size);
	to_copy -= chunk_size;
	}
	}

	uint8_t last_byte() const {
	if (!data_size()) {
	diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty",
	__FILE__, __LINE__);
	}
	if (free_size() >= sizeof(last_block_->data_)) {
	diag_errx(1, "%s:%d: internal error: the last data block is empty",
	__FILE__, __LINE__);
	}
	return *(last_block_->End() - free_size() - 1);
	}

	private:
	// Ensures there is some space to write to, returns the amount available.
	uint64_t ensure_space() {
	if (!free_size()) {
	auto *data_block = new DataBlock();
	if (last_block_) {
	last_block_->next_block_ = data_block;
	}
	last_block_ = data_block;
	if (!first_block_) {
	first_block_ = data_block;
	}
	allocated_ += sizeof(data_block->data_);
	}
	return free_size();
	}

	// Records that given amount of bytes is to be appended to the buffer.
	// Returns the old write position.
	uint8_t *advance(size_t amount) {
	if (amount > free_size()) {
	diag_errx(2, "%s: %d: Cannot advance %ld bytes, only %" PRIu64
	" is available",
	__FILE__, __LINE__, amount, free_size());
	}
	uint8_t *pos = append_position();
	data_size_ += amount;
	return pos;
	}

	void copy(const uint8_t *from, size_t count) {
	memcpy(advance(count), from, count);
	}

	uint8_t *append_position() {
	return last_block_ ? last_block_->End() - free_size() : nullptr;
	}

	// Returns the amount of free space.
	uint64_t free_size() const { return allocated_ - data_size_; }

	// The bytes are kept in an linked list of the DataBlock instances.
	// TODO(asmundak): perhaps use mmap to allocate these?
	struct DataBlock {
	struct DataBlock *next_block_;
	uint8_t data_[0x40000 - 8];
	DataBlock() : next_block_(nullptr) {}
	uint8_t *End() { return data_ + sizeof(data_); }
	};

	uint64_t allocated_;
	uint64_t data_size_;
	struct DataBlock *first_block_;
	struct DataBlock *last_block_;
	};

	#endif // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_