| // Copyright 2014 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package com.google.devtools.build.lib.actions.cache; |
| |
| import com.google.common.annotations.VisibleForTesting; |
| import com.google.common.base.Preconditions; |
| import com.google.common.cache.Cache; |
| import com.google.common.cache.CacheBuilder; |
| import com.google.common.cache.CacheStats; |
| import com.google.common.primitives.Longs; |
| import com.google.devtools.build.lib.clock.BlazeClock; |
| import com.google.devtools.build.lib.profiler.Profiler; |
| import com.google.devtools.build.lib.profiler.ProfilerTask; |
| import com.google.devtools.build.lib.util.Fingerprint; |
| import com.google.devtools.build.lib.util.VarInt; |
| import com.google.devtools.build.lib.vfs.FileStatus; |
| import com.google.devtools.build.lib.vfs.Path; |
| import com.google.devtools.build.lib.vfs.PathFragment; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.nio.ByteBuffer; |
| import java.util.Map; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| |
| /** |
| * Utility class for getting digests of files. |
| * |
| * <p>This class implements an optional cache of file digests when the computation of the digests is |
| * costly (i.e. when {@link Path#getFastDigest()} is not available). The cache can be enabled via |
| * the {@link #configureCache(long)} function, but note that enabling this cache might have an |
| * impact on correctness because not all changes to files can be purely detected from their |
| * metadata. |
| * |
| * <p>Note that this class is responsible for digesting file metadata in an order-independent |
| * manner. Care must be taken to do this properly. The digest must be a function of the set of |
| * (path, metadata) tuples. While the order of these pairs must not matter, it would <b>not</b> be |
| * safe to make the digest be a function of the set of paths and the set of metadata. |
| * |
| * <p>Note that the (path, metadata) tuples must be unique, otherwise the XOR-based approach will |
| * fail. |
| */ |
| public final class DigestUtils { |
| |
| // Object to synchronize on when serializing large file reads. |
| private static final Object DIGEST_LOCK = new Object(); |
| private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false); |
| |
| // Typical size for a digest byte array. |
| public static final int ESTIMATED_SIZE = 32; |
| |
| // Files of this size or less are assumed to be readable in one seek. |
| // (This is the default readahead window on Linux.) |
| @VisibleForTesting // the unittest is in a different package! |
| public static final int MULTI_THREADED_DIGEST_MAX_FILE_SIZE = 128 * 1024; |
| |
| // The time that a digest computation has to take at least in order to be considered a slow-read. |
| private static final long SLOW_READ_MILLIS = 5000L; |
| |
| // The average bytes-per-millisecond throughput that a digest computation has to go below in order |
| // to be considered a slow-read. |
| private static final long SLOW_READ_THROUGHPUT = (10 * 1024 * 1024) / 1000; |
| |
| /** |
| * Keys used to cache the values of the digests for files where we don't have fast digests. |
| * |
| * <p>The cache keys are derived from many properties of the file metadata in an attempt to be |
| * able to detect most file changes. |
| */ |
| private static class CacheKey { |
| /** Path to the file. */ |
| private final PathFragment path; |
| |
| /** File system identifier of the file (typically the inode number). */ |
| private final long nodeId; |
| |
| /** Last modification time of the file. */ |
| private final long modifiedTime; |
| |
| /** Size of the file. */ |
| private final long size; |
| |
| /** |
| * Constructs a new cache key. |
| * |
| * @param path path to the file |
| * @param status file status data from which to obtain the cache key properties |
| * @throws IOException if reading the file status data fails |
| */ |
| public CacheKey(Path path, FileStatus status) throws IOException { |
| this.path = path.asFragment(); |
| this.nodeId = status.getNodeId(); |
| this.modifiedTime = status.getLastModifiedTime(); |
| this.size = status.getSize(); |
| } |
| |
| @Override |
| public boolean equals(Object object) { |
| if (object == this) { |
| return true; |
| } else if (!(object instanceof CacheKey)) { |
| return false; |
| } else { |
| CacheKey key = (CacheKey) object; |
| return path.equals(key.path) |
| && nodeId == key.nodeId |
| && modifiedTime == key.modifiedTime |
| && size == key.size; |
| } |
| } |
| |
| @Override |
| public int hashCode() { |
| int result = 17; |
| result = 31 * result + path.hashCode(); |
| result = 31 * result + Longs.hashCode(nodeId); |
| result = 31 * result + Longs.hashCode(modifiedTime); |
| result = 31 * result + Longs.hashCode(size); |
| return result; |
| } |
| } |
| |
| /** |
| * Global cache of files to their digests. |
| * |
| * <p>This is null when the cache is disabled. |
| * |
| * <p>Note that we do not use a {@link com.google.common.cache.LoadingCache} because our keys |
| * represent the paths as strings, not as {@link Path} instances. As a result, the loading |
| * function cannot actually compute the digests of the files so we have to handle this externally. |
| */ |
| private static Cache<CacheKey, byte[]> globalCache = null; |
| |
| /** Private constructor to prevent instantiation of utility class. */ |
| private DigestUtils() {} |
| |
| /** |
| * Obtain file's digset using synchronized method, ensuring that system is not overloaded in case |
| * when multiple threads are requesting digest calculations and underlying file system cannot |
| * provide it via extended attribute. |
| */ |
| private static byte[] getDigestInExclusiveMode(Path path) throws IOException { |
| long startTime = BlazeClock.nanoTime(); |
| synchronized (DIGEST_LOCK) { |
| Profiler.instance().logSimpleTask(startTime, ProfilerTask.WAIT, path.getPathString()); |
| return getDigestInternal(path); |
| } |
| } |
| |
| private static byte[] getDigestInternal(Path path) throws IOException { |
| long startTime = BlazeClock.nanoTime(); |
| byte[] digest = path.getDigest(); |
| |
| // When using multi-threaded digesting, it makes no sense to use the throughput of a single |
| // digest operation to determine whether a read was abnormally slow (as the scheduler might just |
| // have preferred other reads). |
| if (!MULTI_THREADED_DIGEST.get()) { |
| long millis = (BlazeClock.nanoTime() - startTime) / 1000000; |
| if (millis > SLOW_READ_MILLIS && (path.getFileSize() / millis) < SLOW_READ_THROUGHPUT) { |
| System.err.printf( |
| "Slow read: a %d-byte read from %s took %d ms.%n", path.getFileSize(), path, millis); |
| } |
| } |
| |
| return digest; |
| } |
| |
| /** |
| * Enables the caching of file digests based on file status data. |
| * |
| * <p>If the cache was already enabled, this causes the cache to be reinitialized thus losing all |
| * contents. If the given size is zero, the cache is disabled altogether. |
| * |
| * @param maximumSize maximumSize of the cache in number of entries |
| */ |
| public static void configureCache(long maximumSize) { |
| if (maximumSize == 0) { |
| globalCache = null; |
| } else { |
| globalCache = CacheBuilder.newBuilder().maximumSize(maximumSize).recordStats().build(); |
| } |
| } |
| |
| /** |
| * Obtains cache statistics. |
| * |
| * <p>The cache must have previously been enabled by a call to {@link #configureCache(long)}. |
| * |
| * @return an immutable snapshot of the cache statistics |
| */ |
| public static CacheStats getCacheStats() { |
| Cache<CacheKey, byte[]> cache = globalCache; |
| Preconditions.checkNotNull(cache, "configureCache() must have been called with a size >= 0"); |
| return cache.stats(); |
| } |
| |
| /** |
| * Enable or disable multi-threaded digesting even for large files. |
| */ |
| public static void setMultiThreadedDigest(boolean multiThreadedDigest) { |
| DigestUtils.MULTI_THREADED_DIGEST.set(multiThreadedDigest); |
| } |
| |
| /** |
| * Get the digest of {@code path}, using a constant-time xattr call if the filesystem supports |
| * it, and calculating the digest manually otherwise. |
| * |
| * @param path Path of the file. |
| * @param fileSize size of the file. Used to determine if digest calculation should be done |
| * serially or in parallel. Files larger than a certain threshold will be read serially, in order |
| * to avoid excessive disk seeks. |
| */ |
| public static byte[] getDigestOrFail(Path path, long fileSize) |
| throws IOException { |
| byte[] digest = path.getFastDigest(); |
| if (digest != null) { |
| return digest; |
| } |
| |
| // Attempt a cache lookup if the cache is enabled. |
| Cache<CacheKey, byte[]> cache = globalCache; |
| CacheKey key = null; |
| if (cache != null) { |
| key = new CacheKey(path, path.stat()); |
| digest = cache.getIfPresent(key); |
| if (digest != null) { |
| return digest; |
| } |
| } |
| |
| // Compute digest from the file contents. |
| if (fileSize > MULTI_THREADED_DIGEST_MAX_FILE_SIZE && !MULTI_THREADED_DIGEST.get()) { |
| // We'll have to read file content in order to calculate the digest. |
| // We avoid overlapping this process for multiple large files, as |
| // seeking back and forth between them will result in an overall loss of |
| // throughput. |
| digest = getDigestInExclusiveMode(path); |
| } else { |
| digest = getDigestInternal(path); |
| } |
| |
| Preconditions.checkNotNull(digest); |
| if (cache != null) { |
| cache.put(key, digest); |
| } |
| return digest; |
| } |
| |
| /** |
| * @param source the byte buffer source. |
| * @return the digest from the given buffer. |
| * @throws IOException if the byte buffer is incorrectly formatted. |
| */ |
| public static byte[] read(ByteBuffer source) throws IOException { |
| int size = VarInt.getVarInt(source); |
| byte[] bytes = new byte[size]; |
| source.get(bytes); |
| return bytes; |
| } |
| |
| /** Write the digest to the output stream. */ |
| public static void write(byte[] digest, OutputStream sink) throws IOException { |
| VarInt.putVarInt(digest.length, sink); |
| sink.write(digest); |
| } |
| |
| /** |
| * @param env A collection of (String, String) pairs. |
| * @return an order-independent digest of the given set of pairs. |
| */ |
| public static byte[] fromEnv(Map<String, String> env) { |
| byte[] result = new byte[0]; |
| Fingerprint fp = new Fingerprint(); |
| for (Map.Entry<String, String> entry : env.entrySet()) { |
| fp.addString(entry.getKey()); |
| fp.addString(entry.getValue()); |
| result = xor(result, fp.digestAndReset()); |
| } |
| return result; |
| } |
| |
| /** Compute lhs ^= rhs bitwise operation of the arrays. May clobber either argument. */ |
| private static byte[] xor(byte[] lhs, byte[] rhs) { |
| int n = rhs.length; |
| if (lhs.length >= n) { |
| for (int i = 0; i < n; i++) { |
| lhs[i] ^= rhs[i]; |
| } |
| return lhs; |
| } |
| return xor(rhs, lhs); |
| } |
| } |