blob: cf2f690207cf140e0b1875886d1cc78f207fe6d1 [file] [log] [blame]
// Copyright 2015 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.zip;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
/**
* A ZIP file reader.
*
* <p>This class provides entry data in the form of {@link ZipFileEntry}, which provides more detail
* about the entry than the JDK equivalent {@link ZipEntry}. In addition to providing
* {@link InputStream}s for entries, similar to JDK {@link ZipFile#getInputStream(ZipEntry)}, it
* also provides access to the raw byte entry data via {@link #getRawInputStream(ZipFileEntry)}.
*
* <p>Using the raw access capabilities allows for more efficient ZIP file processing, such as
* merging, by not requiring each entry's data to be decompressed when read.
*
* <p><em>NOTE:</em> The entries are read from the central directory. If the entry is not listed
* there, it will not be returned from {@link #entries()} or {@link #getEntry(String)}.
*/
public class ZipReader implements Closeable, AutoCloseable {
private final File file;
private final RandomAccessFile in;
private final ZipFileData zipData;
/**
* Opens a zip file for raw acceess.
*
* <p>The UTF-8 charset is used to decode the entry names and comments.
*
* @param file the zip file
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
public ZipReader(File file) throws IOException {
this(file, UTF_8);
}
/**
* Opens a zip file for raw acceess.
*
* @param file the zip file
* @param charset the charset to use to decode the entry names and comments
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
public ZipReader(File file, Charset charset) throws IOException {
this(file, charset, false);
}
/**
* Opens a zip file for raw acceess.
*
* @param file the zip file
* @param charset the charset to use to decode the entry names and comments
* @param strictEntries force parsing to use the number of entries recorded in the end of
* central directory as the correct value, not as an estimate
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
public ZipReader(File file, Charset charset, boolean strictEntries) throws IOException {
if (file == null || charset == null) {
throw new NullPointerException();
}
this.file = file;
this.in = new RandomAccessFile(file, "r");
this.zipData = new ZipFileData(charset);
readCentralDirectory(strictEntries);
}
/**
* Returns the zip file's name.
*/
public String getFilename() {
return file.getName();
}
/**
* Returns the ZIP file comment.
*/
public String getComment() {
return zipData.getComment();
}
/**
* Returns a collection of the ZIP file entries.
*/
public Collection<ZipFileEntry> entries() {
return zipData.getEntries();
}
/**
* Returns the ZIP file entry for the specified name, or null if not found.
*/
public ZipFileEntry getEntry(String name) {
return zipData.getEntry(name);
}
/**
* Returns the number of entries in the ZIP file.
*/
public long size() {
return zipData.getNumEntries();
}
/**
* Returns an input stream for reading the contents of the specified ZIP file entry.
*
* <p>Closing this ZIP file will, in turn, close all input streams that have been returned by
* invocations of this method.
*
* @param entry the ZIP file entry
* @return the input stream for reading the contents of the specified zip file entry
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
public InputStream getInputStream(ZipFileEntry entry) throws IOException {
if (!zipData.getEntry(entry.getName()).equals(entry)) {
throw new ZipException(String.format(
"Zip file '%s' does not contain the requested entry '%s'.", file.getName(),
entry.getName()));
}
return new ZipEntryInputStream(this, entry, /* raw */ false);
}
/**
* Returns an input stream for reading the raw contents of the specified ZIP file entry.
*
* <p><em>NOTE:</em> No inflating will take place; The data read from the input stream will be
* the exact byte content of the ZIP file entry on disk.
*
* <p>Closing this ZIP file will, in turn, close all input streams that have been returned by
* invocations of this method.
*
* @param entry the ZIP file entry
* @return the input stream for reading the contents of the specified zip file entry
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
public InputStream getRawInputStream(ZipFileEntry entry) throws IOException {
if (!zipData.getEntry(entry.getName()).equals(entry)) {
throw new ZipException(String.format(
"Zip file '%s' does not contain the requested entry '%s'.", file.getName(),
entry.getName()));
}
return new ZipEntryInputStream(this, entry, /* raw */ true);
}
/**
* Closes the ZIP file.
*
* <p>Closing this ZIP file will close all of the input streams previously returned by invocations
* of the {@link #getRawInputStream(ZipFileEntry)} method.
*/
@Override public void close() throws IOException {
in.close();
}
/**
* Finds, reads and parses ZIP file entries from the central directory.
*
* @param strictEntries force parsing to use the number of entries recorded in the end of
* central directory as the correct value, not as an estimate
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
private void readCentralDirectory(boolean strictEntries) throws IOException {
long eocdLocation = findEndOfCentralDirectoryRecord();
InputStream stream = getStreamAt(eocdLocation);
EndOfCentralDirectoryRecord.read(stream, zipData);
if (zipData.isMaybeZip64()) {
try {
stream = getStreamAt(eocdLocation - Zip64EndOfCentralDirectoryLocator.FIXED_DATA_SIZE);
Zip64EndOfCentralDirectoryLocator.read(stream, zipData);
stream = getStreamAt(zipData.getZip64EndOfCentralDirectoryOffset());
Zip64EndOfCentralDirectory.read(stream, zipData);
} catch (ZipException e) {
// expected if not in Zip64 format
}
}
if (zipData.isZip64() || strictEntries) {
// If in Zip64 format or using strict entry numbers, use the parsed information as is to read
// the central directory file headers.
readCentralDirectoryFileHeaders(zipData.getExpectedEntries(),
zipData.getCentralDirectoryOffset());
} else {
// If not in Zip64 format, compute central directory offset by end of central directory record
// offset and central directory size to allow reading large non-compliant Zip32 directories.
long centralDirectoryOffset = eocdLocation - zipData.getCentralDirectorySize();
// If the lower 4 bytes match, the above calculation is correct; otherwise fallback to
// reported offset.
if ((int) centralDirectoryOffset == (int) zipData.getCentralDirectoryOffset()) {
readCentralDirectoryFileHeaders(centralDirectoryOffset);
} else {
readCentralDirectoryFileHeaders(zipData.getExpectedEntries(),
zipData.getCentralDirectoryOffset());
}
}
}
/**
* Looks for the target sub array in the buffer scanning backwards starting at offset. Returns the
* index where the target is found or -1 if not found.
*
* @param target the sub array to find
* @param buffer the array to scan
* @param offset the index of where to begin scanning
* @return the index of target within buffer or -1 if not found
*/
private int scanBackwards(byte[] target, byte[] buffer, int offset) {
int start = Math.min(offset, buffer.length - target.length);
for (int i = start; i >= 0; i--) {
for (int j = 0; j < target.length; j++) {
if (buffer[i + j] != target[j]) {
break;
} else if (j == target.length - 1) {
return i;
}
}
}
return -1;
}
/**
* Finds the file offset of the end of central directory record.
*
* @return the file offset of the end of central directory record
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
private long findEndOfCentralDirectoryRecord() throws IOException {
byte[] signature = ZipUtil.intToLittleEndian(EndOfCentralDirectoryRecord.SIGNATURE);
byte[] buffer = new byte[(int) Math.min(64, in.length())];
int readLength = buffer.length;
if (readLength < EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end"
+ " of central directory record.", file.getName()));
}
long offset = in.length() - buffer.length;
while (offset >= 0) {
in.seek(offset);
in.readFully(buffer, 0, readLength);
int signatureLocation = scanBackwards(signature, buffer, buffer.length);
while (signatureLocation != -1) {
long eocdSize = in.length() - offset - signatureLocation;
if (eocdSize >= EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
int commentLength = ZipUtil.getUnsignedShort(buffer, signatureLocation
+ EndOfCentralDirectoryRecord.COMMENT_LENGTH_OFFSET);
long readCommentLength = eocdSize - EndOfCentralDirectoryRecord.FIXED_DATA_SIZE;
if (commentLength == readCommentLength) {
return offset + signatureLocation;
}
}
signatureLocation = scanBackwards(signature, buffer, signatureLocation - 1);
}
readLength = buffer.length - 3;
buffer[buffer.length - 3] = buffer[0];
buffer[buffer.length - 2] = buffer[1];
buffer[buffer.length - 1] = buffer[2];
offset -= readLength;
}
throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end"
+ " of central directory record.", file.getName()));
}
/**
* Reads and parses ZIP file entries from the central directory.
*
* @param count the number of entries in the central directory
* @param fileOffset the file offset of the start of the central directory
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
private void readCentralDirectoryFileHeaders(long count, long fileOffset) throws IOException {
InputStream centralDirectory = getStreamAt(fileOffset);
for (long i = 0; i < count; i++) {
ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset());
zipData.addEntry(entry);
}
}
/**
* Reads and parses ZIP file entries from the central directory.
*
* @param fileOffset the file offset of the start of the central directory
* @throws ZipException if a ZIP format error has occurred
* @throws IOException if an I/O error has occurred
*/
private void readCentralDirectoryFileHeaders(long fileOffset) throws IOException {
CountingInputStream centralDirectory = new CountingInputStream(getStreamAt(fileOffset));
while (centralDirectory.getCount() < zipData.getCentralDirectorySize()) {
ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset());
zipData.addEntry(entry);
}
}
/**
* Returns a new {@link InputStream} positioned at fileOffset.
*
* @throws IOException if an I/O error has occurred
*/
protected InputStream getStreamAt(long fileOffset) throws IOException {
return new BufferedInputStream(Channels.newInputStream(in.getChannel().position(fileOffset)));
}
}