blob: ecac602aec2365126f96299257302a4ba5017a05 [file] [log] [blame]
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
#define SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS 1
#endif
#include <inttypes.h>
#include <algorithm>
#include <ostream>
#include "src/tools/singlejar/diag.h"
#include "src/tools/singlejar/zip_headers.h"
#include "src/tools/singlejar/zlib_interface.h"
/*
* An instance of this class holds decompressed data in a list of chunks,
* to be eventually compressed to the output buffer.
* Use DecompressFile() or ReadFile() (depending on whether an entry is
* compressed or not) to append the contents of a Zip entry.
* Use Append() to append a sequence of bytes or a string.
* Use Write() to write out the contents, it will compress the entry if
* necessary.
*/
class TransientBytes {
public:
TransientBytes()
: allocated_(0),
data_size_(0),
first_block_(nullptr),
last_block_(nullptr) {}
~TransientBytes() {
while (first_block_) {
auto block = first_block_;
first_block_ = first_block_->next_block_;
delete block;
}
last_block_ = nullptr;
}
// Appends raw bytes.
void Append(const uint8_t *data, uint64_t data_size) {
uint64_t chunk_size;
auto data_end = data + data_size;
for (; data < data_end; data += chunk_size) {
chunk_size =
std::min(static_cast<uint64_t>(data_end - data), ensure_space());
copy(data, chunk_size);
}
}
// Same, but for a string.
void Append(const char *str) {
Append(reinterpret_cast<const uint8_t *>(str), strlen(str));
}
// Appends the contents of the uncompressed Zip entry.
void ReadEntryContents(const LH *lh) {
Append(lh->data(), lh->uncompressed_file_size());
}
// Appends the contents of the compressed Zip entry. Resets the inflater
// used to decompress.
void DecompressEntryContents(const CDH *cdh, const LH *lh,
Inflater *inflater) {
uint64_t old_total_out = inflater->total_out();
uint64_t in_bytes;
uint64_t out_bytes;
const uint8_t *data = lh->data();
if (cdh->no_size_in_local_header()) {
in_bytes = cdh->compressed_file_size();
out_bytes = cdh->uncompressed_file_size();
} else {
in_bytes = lh->compressed_file_size();
out_bytes = lh->uncompressed_file_size();
}
while (in_bytes > 0) {
// A single region to inflate cannot exceed 4GB-1.
uint32_t in_bytes_chunk = 0xFFFFFFFF;
if (in_bytes_chunk > in_bytes) {
in_bytes_chunk = in_bytes;
}
inflater->DataToInflate(data, in_bytes_chunk);
for (;;) {
uint32_t available_out = ensure_space();
int ret = inflater->Inflate(append_position(), available_out);
uint32_t inflated = available_out - inflater->available_out();
if (Z_STREAM_END == ret) {
// No more data to decompress. Update write position and we are done
// for this input chunk.
advance(inflated);
break;
} else if (Z_OK == ret) {
// No more space in the output buffer. Advance write position, update
// the number of remaining bytes.
if (inflater->available_out()) {
diag_errx(2,
"%s:%d: Internal error inflating %.*s: Inflate reported "
"Z_OK but there are still %" PRIu32
" bytes available in the output buffer",
__FILE__, __LINE__, lh->file_name_length(),
lh->file_name(), inflater->available_out());
}
advance(inflated);
} else {
diag_errx(2,
"%s:%d: Internal error inflating %.*s: inflate() call "
"returned %d (%s)",
__FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
ret, inflater->error_message());
}
}
data += in_bytes_chunk;
in_bytes -= in_bytes_chunk;
}
// Smog check
// This check is disabled on Windows because z_stream::total_out is of type
// of uLong (unsigned long), which is 64-bit for most 64-bit Unix platforms,
// but it is 32-bit even for Win64. This means even though zlib is capable
// of compressing data >4GB as long as it is processed by chunks, zlib
// cannot report the correct total number of processed bytes >4GB through
// z_stream::total_out on Windows.
#ifndef _WIN32
if (inflater->total_out() - old_total_out != out_bytes) {
diag_errx(2,
"%s:%d: Internal error inflating %.*s: inflater wrote %" PRIu64
" bytes , but the uncompressed entry should be %" PRIu64
"bytes long",
__FILE__, __LINE__, lh->file_name_length(), lh->file_name(),
inflater->total_out() - old_total_out, out_bytes);
}
#endif
inflater->reset();
}
// Writes the contents bytes to the given buffer in an optimal way, i.e., the
// shorter of compressed or uncompressed. Sets the checksum and number of
// bytes written and returns Z_DEFLATED if compression took place or
// Z_NO_COMPRESSION otherwise.
uint16_t CompressOut(uint8_t *buffer, uint32_t *checksum,
uint64_t *bytes_written) {
*checksum = 0;
uint64_t to_compress = data_size();
if (to_compress == 0) {
*bytes_written = 0;
return Z_NO_COMPRESSION;
}
Deflater deflater;
deflater.next_out = buffer;
uint16_t compression_method = Z_DEFLATED;
// Feed data blocks to the deflater one by one, but break if the compressed
// size exceeds the original size.
for (auto data_block = first_block_;
data_block && compression_method != Z_NO_COMPRESSION;
data_block = data_block->next_block_) {
// The compressed size should not exceed the original size less the number
// of bytes already compressed. And, it should not exceed 4GB-1.
deflater.avail_out = std::min(data_size() - deflater.total_out,
static_cast<uint64_t>(0xFFFFFFFF));
// Out of the total number of bytes that remain to be compressed, we
// can compress no more than this block.
uint32_t chunk_size = static_cast<uint32_t>(std::min(
static_cast<uint64_t>(sizeof(data_block->data_)), to_compress));
*checksum = crc32(*checksum, data_block->data_, chunk_size);
deflater.avail_in = chunk_size;
to_compress -= chunk_size;
int ret = deflater.Deflate(data_block->data_, chunk_size,
to_compress ? Z_NO_FLUSH : Z_FINISH);
if (ret == Z_OK) {
if (!deflater.avail_out) {
// We ran out of space in the output buffer, which means
// that deflated size exceeds original size. Leave the loop
// and just copy the data.
compression_method = Z_NO_COMPRESSION;
}
} else if (ret == Z_BUF_ERROR && !deflater.avail_in) {
// We ran out of data block, this is not a error.
} else if (ret == Z_STREAM_END) {
if (data_block->next_block_ || to_compress) {
diag_errx(2,
"%s:%d: Internal error: deflate() call at the end, but "
"there is more data to compress!",
__FILE__, __LINE__);
}
} else {
diag_errx(2, "%s:%d: deflate error %d(%s)", __FILE__, __LINE__, ret,
deflater.msg);
}
}
if (compression_method != Z_NO_COMPRESSION) {
*bytes_written = deflater.total_out;
return compression_method;
}
// Compression does not help, just copy the bytes to the output buffer.
CopyOut(buffer, checksum);
*bytes_written = data_size();
return Z_NO_COMPRESSION;
}
// Copies the bytes to the buffer and sets the checksum.
void CopyOut(uint8_t *buffer, uint32_t *checksum) {
uint64_t to_copy = data_size();
uint8_t *buffer_end = buffer + to_copy;
*checksum = 0;
for (auto data_block = first_block_; data_block;
data_block = data_block->next_block_) {
size_t chunk_size =
std::min(static_cast<uint64_t>(sizeof(data_block->data_)), to_copy);
*checksum = crc32(*checksum, data_block->data_, chunk_size);
memcpy(buffer_end - to_copy, data_block->data_, chunk_size);
to_copy -= chunk_size;
}
}
// Number of data bytes.
uint64_t data_size() const { return data_size_; }
// This is mostly for testing: stream out contents to a Sink instance.
// The class Sink has to have
// void operator()(const void *chunk, uint64_t chunk_size) const;
//
template <class Sink>
void stream_out(const Sink &sink) const {
uint64_t to_copy = data_size();
for (auto data_block = first_block_; data_block;
data_block = data_block->next_block_) {
uint64_t chunk_size = sizeof(data_block->data_);
if (chunk_size > to_copy) {
chunk_size = to_copy;
}
sink.operator()(data_block->data_, chunk_size);
to_copy -= chunk_size;
}
}
uint8_t last_byte() const {
if (!data_size()) {
diag_errx(1, "%s:%d: last_char() cannot be called if buffer is empty",
__FILE__, __LINE__);
}
if (free_size() >= sizeof(last_block_->data_)) {
diag_errx(1, "%s:%d: internal error: the last data block is empty",
__FILE__, __LINE__);
}
return *(last_block_->End() - free_size() - 1);
}
private:
// Ensures there is some space to write to, returns the amount available.
uint64_t ensure_space() {
if (!free_size()) {
auto *data_block = new DataBlock();
if (last_block_) {
last_block_->next_block_ = data_block;
}
last_block_ = data_block;
if (!first_block_) {
first_block_ = data_block;
}
allocated_ += sizeof(data_block->data_);
}
return free_size();
}
// Records that given amount of bytes is to be appended to the buffer.
// Returns the old write position.
uint8_t *advance(size_t amount) {
if (amount > free_size()) {
diag_errx(
2, "%s: %d: Cannot advance %zu bytes, only %" PRIu64 " is available",
__FILE__, __LINE__, amount, free_size());
}
uint8_t *pos = append_position();
data_size_ += amount;
return pos;
}
void copy(const uint8_t *from, size_t count) {
memcpy(advance(count), from, count);
}
uint8_t *append_position() {
return last_block_ ? last_block_->End() - free_size() : nullptr;
}
// Returns the amount of free space.
uint64_t free_size() const { return allocated_ - data_size_; }
// The bytes are kept in an linked list of the DataBlock instances.
// TODO(asmundak): perhaps use mmap to allocate these?
struct DataBlock {
struct DataBlock *next_block_;
uint8_t data_[0x40000 - 8];
DataBlock() : next_block_(nullptr) {}
uint8_t *End() { return data_ + sizeof(data_); }
};
uint64_t allocated_;
uint64_t data_size_;
struct DataBlock *first_block_;
struct DataBlock *last_block_;
};
#endif // SRC_TOOLS_SINGLEJAR_TRANSIENT_BYTES_H_