Adding an option to set the digest function that everything uses. Minor refactoring: enabling potential fast digest computation of more than one digest function type. Usage: bazel --host_jvm_args="-Dbazel.DigestFunction=SHA1" build ... Ugliness: using a system property (a static non-final variable), because the better way to do it (a flag) would result in a much, much larger refactoring. More ugliness: I have updated the minimal amount of tests. A lot of tests are still relying on the default value of MD5. Ideally, they need to be updated as well. -- MOS_MIGRATED_REVID=139490836
diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index cd602c5..a50c22e 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD
@@ -141,6 +141,7 @@ ":preconditions", ":unix", ":windows", + "//src/main/java/com/google/devtools/common/options", "//third_party:guava", "//third_party:jsr305", ],
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java index a35eed6..3dcfe54 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java +++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
@@ -19,17 +19,14 @@ import com.google.devtools.build.lib.util.BlazeClock; import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.util.LoggingUtil; -import com.google.devtools.build.lib.util.Preconditions; import com.google.devtools.build.lib.util.VarInt; import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.Map; -import java.util.Objects; import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; -import javax.annotation.Nullable; /** * Utility class for getting md5 digests of files. @@ -45,7 +42,7 @@ public class DigestUtils { // Object to synchronize on when serializing large file reads. - private static final Object MD5_LOCK = new Object(); + private static final Object DIGEST_LOCK = new Object(); private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false); /** Private constructor to prevent instantiation of utility class. */ @@ -57,9 +54,10 @@ * calculations and underlying file system cannot provide it via extended * attribute. */ - private static byte[] getDigestInExclusiveMode(Path path) throws IOException { + private static byte[] getDigestInExclusiveMode(Path path) + throws IOException { long startTime = BlazeClock.nanoTime(); - synchronized (MD5_LOCK) { + synchronized (DIGEST_LOCK) { Profiler.instance().logSimpleTask(startTime, ProfilerTask.WAIT, path.getPathString()); return getDigestInternal(path); } @@ -67,29 +65,14 @@ private static byte[] getDigestInternal(Path path) throws IOException { long startTime = BlazeClock.nanoTime(); - byte[] md5bin = path.getMD5Digest(); + byte[] digest = path.getDigest(); long millis = (BlazeClock.nanoTime() - startTime) / 1000000; if (millis > 5000L) { System.err.println("Slow read: a " + path.getFileSize() + "-byte read from " + path + " took " + millis + "ms."); } - return md5bin; - } - - private static boolean binaryDigestWellFormed(byte[] digest) { - Preconditions.checkNotNull(digest); - return digest.length == 16; - } - - /** - * Returns the the fast md5 digest of the file, or null if not available. - */ - @Nullable - private static byte[] getFastDigest(Path path) throws IOException { - // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to - // work with any opaque digest. - return Objects.equals(path.getFastDigestFunctionType(), "MD5") ? path.getFastDigest() : null; + return digest; } /** @@ -100,7 +83,7 @@ } /** - * Get the md5 digest of {@code path}, using a constant-time xattr call if the filesystem supports + * Get the digest of {@code path}, using a constant-time xattr call if the filesystem supports * it, and calculating the digest manually otherwise. * * @param path Path of the file. @@ -108,20 +91,21 @@ * serially or in parallel. Files larger than a certain threshold will be read serially, in order * to avoid excessive disk seeks. */ - public static byte[] getDigestOrFail(Path path, long fileSize) throws IOException { - byte[] md5bin = getFastDigest(path); + public static byte[] getDigestOrFail(Path path, long fileSize) + throws IOException { + byte[] digest = path.getFastDigest(); - if (md5bin != null && !binaryDigestWellFormed(md5bin)) { + if (digest != null && !path.isValidDigest(digest)) { // Fail-soft in cases where md5bin is non-null, but not a valid digest. String msg = String.format("Malformed digest '%s' for file %s", - BaseEncoding.base16().lowerCase().encode(md5bin), + BaseEncoding.base16().lowerCase().encode(digest), path); LoggingUtil.logToRemote(Level.SEVERE, msg, new IllegalStateException(msg)); - md5bin = null; + digest = null; } - if (md5bin != null) { - return md5bin; + if (digest != null) { + return digest; } else if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) { // We'll have to read file content in order to calculate the digest. In that case // it would be beneficial to serialize those calculations since there is a high
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java index 2111aed..a25bc2c 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java
@@ -132,7 +132,7 @@ Path path = entry.getValue().realRootedPath().asPath(); if (path.exists()) { fingerprint.addBoolean(true); - fingerprint.addBytes(path.getMD5Digest()); + fingerprint.addBytes(path.getDigest()); } else { fingerprint.addBoolean(false); }
diff --git a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java index 48e6f95..0d154f1 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java
@@ -70,7 +70,7 @@ Path path = null; try { path = fs.getPath(fullPath(input)); - byte[] digest = path.getMD5Digest(); + byte[] digest = path.getDigest(); BaseEncoding hex = BaseEncoding.base16().lowerCase(); ByteString hexDigest = ByteString.copyFrom(hex.encode(digest).getBytes(US_ASCII)); // Inject reverse mapping. Doing this unconditionally in getDigest() showed up
diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java index 7201797..29ace2f 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java
@@ -220,7 +220,7 @@ return null; } - return new CrosstoolProto(path.getMD5Digest(), "CROSSTOOL file " + path.getPathString()) { + return new CrosstoolProto(path.getDigest(), "CROSSTOOL file " + path.getPathString()) { @Override public String getContents() throws IOException { try (InputStream inputStream = path.getInputStream()) {
diff --git a/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java b/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java index 2759796..7590ef1 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java
@@ -438,8 +438,8 @@ try { // Avoid rebuilding the runfiles directory if the manifest in it matches the input manifest, // implying the symlinks exist and are already up to date. - if (Arrays.equals(runfilesDir.getRelative("MANIFEST").getMD5Digest(), - execSettings.getInputManifest().getPath().getMD5Digest())) { + if (Arrays.equals(runfilesDir.getRelative("MANIFEST").getDigest(), + execSettings.getInputManifest().getPath().getDigest())) { return; } } catch (IOException e1) {
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java index bf8276e..ad58b48 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
@@ -270,7 +270,7 @@ Parser.ParseResult result = Parser.parseFileForSkylark(input, eventHandler); return create( ImmutableList.<Statement>of(), result, - HashCode.fromBytes(file.getMD5Digest()).toString(), eventHandler); + HashCode.fromBytes(file.getDigest()).toString(), eventHandler); } /**
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java index 90aac0e..4d93804 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java
@@ -16,6 +16,7 @@ import static java.nio.charset.StandardCharsets.ISO_8859_1; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; import com.google.common.io.ByteSource; @@ -24,6 +25,8 @@ import com.google.devtools.build.lib.vfs.Dirent.Type; import com.google.devtools.build.lib.vfs.Path.PathFactory; import com.google.devtools.build.lib.vfs.Path.PathFactory.TranslatedPath; +import com.google.devtools.common.options.EnumConverter; +import com.google.devtools.common.options.OptionsParsingException; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -39,6 +42,49 @@ @ThreadSafe public abstract class FileSystem { + /** Type of hash function to use for digesting files. */ + public enum HashFunction { + MD5(16), + SHA1(20); + + private final int digestSize; + + HashFunction(int digestSize) { + this.digestSize = digestSize; + } + + /** Converts to {@link HashFunction}. */ + public static class Converter extends EnumConverter<HashFunction> { + public Converter() { + super(HashFunction.class, "hash function"); + } + } + + public boolean isValidDigest(byte[] digest) { + return digest != null && digest.length == digestSize; + } + } + + // This is effectively final, should be changed only in unit-tests! + private static HashFunction DIGEST_FUNCTION; + static { + try { + DIGEST_FUNCTION = new HashFunction.Converter().convert( + System.getProperty("bazel.DigestFunction", "MD5")); + } catch (OptionsParsingException e) { + throw new IllegalStateException(e); + } + } + + @VisibleForTesting + public static void setDigestFunctionForTesting(HashFunction value) { + DIGEST_FUNCTION = value; + } + + public static HashFunction getDigestFunction() { + return DIGEST_FUNCTION; + } + private enum UnixPathFactory implements PathFactory { INSTANCE { @Override @@ -261,10 +307,11 @@ } /** - * Returns the type of digest that may be returned by {@link #getFastDigest}, or {@code null} - * if the filesystem doesn't support them. + * Gets a fast digest for the given path and hash function type, or {@code null} if there + * isn't one available or the filesystem doesn't support them. This digest should be + * suitable for detecting changes to the file. */ - protected String getFastDigestFunctionType(Path path) { + protected byte[] getFastDigest(Path path, HashFunction hashFunction) throws IOException { return null; } @@ -273,8 +320,43 @@ * filesystem doesn't support them. This digest should be suitable for detecting changes to the * file. */ - protected byte[] getFastDigest(Path path) throws IOException { - return null; + protected final byte[] getFastDigest(Path path) throws IOException { + return getFastDigest(path, DIGEST_FUNCTION); + } + + /** + * Returns whether the given digest is a valid digest for the default digest function. + */ + public boolean isValidDigest(byte[] digest) { + return DIGEST_FUNCTION.isValidDigest(digest); + } + + /** + * Returns the digest of the file denoted by the path, following + * symbolic links, for the given hash digest function. + * + * @return a new byte array containing the file's digest + * @throws IOException if the digest could not be computed for any reason + */ + protected final byte[] getDigest(final Path path, HashFunction hashFunction) throws IOException { + switch(hashFunction) { + case MD5: + return getMD5Digest(path); + case SHA1: + return getSHA1Digest(path); + default: + throw new IOException("Unsupported hash function: " + hashFunction); + } + } + + /** + * Returns the digest of the file denoted by the path, following symbolic links. + * + * @return a new byte array containing the file's digest + * @throws IOException if the digest could not be computed for any reason + */ + protected byte[] getDigest(final Path path) throws IOException { + return getDigest(path, DIGEST_FUNCTION); } /**
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/Path.java b/src/main/java/com/google/devtools/build/lib/vfs/Path.java index 6599952..e039efe 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/Path.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/Path.java
@@ -19,6 +19,7 @@ import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.util.Preconditions; import com.google.devtools.build.lib.util.StringCanonicalizer; +import com.google.devtools.build.lib.vfs.FileSystem.HashFunction; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -1039,19 +1040,26 @@ } /** - * Returns the type of digest that may be returned by {@link #getFastDigest}, or {@code null} - * if the filesystem doesn't support them. + * Gets a fast digest for the given path, or {@code null} if there isn't one available. The + * digest should be suitable for detecting changes to the file. */ - public String getFastDigestFunctionType() { - return fileSystem.getFastDigestFunctionType(this); + public byte[] getFastDigest() throws IOException { + return fileSystem.getFastDigest(this); } /** * Gets a fast digest for the given path, or {@code null} if there isn't one available. The * digest should be suitable for detecting changes to the file. */ - public byte[] getFastDigest() throws IOException { - return fileSystem.getFastDigest(this); + public byte[] getFastDigest(HashFunction hashFunction) throws IOException { + return fileSystem.getFastDigest(this, hashFunction); + } + + /** + * Returns whether the given digest is a valid digest for the default system digest function. + */ + public boolean isValidDigest(byte[] digest) { + return fileSystem.isValidDigest(digest); } /** @@ -1083,6 +1091,28 @@ } /** + * Returns the digest of the file denoted by the current path, + * following symbolic links. + * + * @return a new byte array containing the file's digest + * @throws IOException if the digest could not be computed for any reason + */ + public byte[] getDigest() throws IOException { + return fileSystem.getDigest(this); + } + + /** + * Returns the digest of the file denoted by the current path and digest function, + * following symbolic links. + * + * @return a new byte array containing the file's digest + * @throws IOException if the digest could not be computed for any reason + */ + public byte[] getDigest(HashFunction hashFunction) throws IOException { + return fileSystem.getDigest(this, hashFunction); + } + + /** * Opens the file denoted by this path, following symbolic links, for reading, * and returns an input stream to it. *
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java index a0a0955..b246aa6 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java
@@ -18,6 +18,7 @@ import com.google.devtools.build.lib.concurrent.ThreadSafety; import com.google.devtools.build.lib.util.Preconditions; import com.google.devtools.build.lib.util.StringTrie; +import com.google.devtools.build.lib.vfs.FileSystem.HashFunction; import java.io.IOException; import java.io.InputStream; @@ -380,15 +381,9 @@ } @Override - protected String getFastDigestFunctionType(Path path) { + protected byte[] getFastDigest(Path path, HashFunction hashFunction) throws IOException { FileSystem delegate = getDelegate(path); - return delegate.getFastDigestFunctionType(adjustPath(path, delegate)); - } - - @Override - protected byte[] getFastDigest(Path path) throws IOException { - FileSystem delegate = getDelegate(path); - return delegate.getFastDigest(adjustPath(path, delegate)); + return delegate.getFastDigest(adjustPath(path, delegate), hashFunction); } @Override