Adding an option to set the digest function that everything uses. Minor refactoring: enabling potential fast digest computation of more than one digest function type.
Usage: bazel --host_jvm_args="-Dbazel.DigestFunction=SHA1" build ...
Ugliness: using a system property (a static non-final variable), because the better way to do it (a flag) would result in a much, much larger refactoring.
More ugliness: I have updated the minimal amount of tests. A lot of tests are still relying on the default value of MD5. Ideally, they need to be updated as well.
--
MOS_MIGRATED_REVID=139490836
diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD
index cd602c5..a50c22e 100644
--- a/src/main/java/com/google/devtools/build/lib/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/BUILD
@@ -141,6 +141,7 @@
":preconditions",
":unix",
":windows",
+ "//src/main/java/com/google/devtools/common/options",
"//third_party:guava",
"//third_party:jsr305",
],
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
index a35eed6..3dcfe54 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
@@ -19,17 +19,14 @@
import com.google.devtools.build.lib.util.BlazeClock;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.util.LoggingUtil;
-import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.util.VarInt;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Map;
-import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
-import javax.annotation.Nullable;
/**
* Utility class for getting md5 digests of files.
@@ -45,7 +42,7 @@
public class DigestUtils {
// Object to synchronize on when serializing large file reads.
- private static final Object MD5_LOCK = new Object();
+ private static final Object DIGEST_LOCK = new Object();
private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false);
/** Private constructor to prevent instantiation of utility class. */
@@ -57,9 +54,10 @@
* calculations and underlying file system cannot provide it via extended
* attribute.
*/
- private static byte[] getDigestInExclusiveMode(Path path) throws IOException {
+ private static byte[] getDigestInExclusiveMode(Path path)
+ throws IOException {
long startTime = BlazeClock.nanoTime();
- synchronized (MD5_LOCK) {
+ synchronized (DIGEST_LOCK) {
Profiler.instance().logSimpleTask(startTime, ProfilerTask.WAIT, path.getPathString());
return getDigestInternal(path);
}
@@ -67,29 +65,14 @@
private static byte[] getDigestInternal(Path path) throws IOException {
long startTime = BlazeClock.nanoTime();
- byte[] md5bin = path.getMD5Digest();
+ byte[] digest = path.getDigest();
long millis = (BlazeClock.nanoTime() - startTime) / 1000000;
if (millis > 5000L) {
System.err.println("Slow read: a " + path.getFileSize() + "-byte read from " + path
+ " took " + millis + "ms.");
}
- return md5bin;
- }
-
- private static boolean binaryDigestWellFormed(byte[] digest) {
- Preconditions.checkNotNull(digest);
- return digest.length == 16;
- }
-
- /**
- * Returns the the fast md5 digest of the file, or null if not available.
- */
- @Nullable
- private static byte[] getFastDigest(Path path) throws IOException {
- // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to
- // work with any opaque digest.
- return Objects.equals(path.getFastDigestFunctionType(), "MD5") ? path.getFastDigest() : null;
+ return digest;
}
/**
@@ -100,7 +83,7 @@
}
/**
- * Get the md5 digest of {@code path}, using a constant-time xattr call if the filesystem supports
+ * Get the digest of {@code path}, using a constant-time xattr call if the filesystem supports
* it, and calculating the digest manually otherwise.
*
* @param path Path of the file.
@@ -108,20 +91,21 @@
* serially or in parallel. Files larger than a certain threshold will be read serially, in order
* to avoid excessive disk seeks.
*/
- public static byte[] getDigestOrFail(Path path, long fileSize) throws IOException {
- byte[] md5bin = getFastDigest(path);
+ public static byte[] getDigestOrFail(Path path, long fileSize)
+ throws IOException {
+ byte[] digest = path.getFastDigest();
- if (md5bin != null && !binaryDigestWellFormed(md5bin)) {
+ if (digest != null && !path.isValidDigest(digest)) {
// Fail-soft in cases where md5bin is non-null, but not a valid digest.
String msg = String.format("Malformed digest '%s' for file %s",
- BaseEncoding.base16().lowerCase().encode(md5bin),
+ BaseEncoding.base16().lowerCase().encode(digest),
path);
LoggingUtil.logToRemote(Level.SEVERE, msg, new IllegalStateException(msg));
- md5bin = null;
+ digest = null;
}
- if (md5bin != null) {
- return md5bin;
+ if (digest != null) {
+ return digest;
} else if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) {
// We'll have to read file content in order to calculate the digest. In that case
// it would be beneficial to serialize those calculations since there is a high
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java
index 2111aed..a25bc2c 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/MavenServerFunction.java
@@ -132,7 +132,7 @@
Path path = entry.getValue().realRootedPath().asPath();
if (path.exists()) {
fingerprint.addBoolean(true);
- fingerprint.addBytes(path.getMD5Digest());
+ fingerprint.addBytes(path.getDigest());
} else {
fingerprint.addBoolean(false);
}
diff --git a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java
index 48e6f95..0d154f1 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java
@@ -70,7 +70,7 @@
Path path = null;
try {
path = fs.getPath(fullPath(input));
- byte[] digest = path.getMD5Digest();
+ byte[] digest = path.getDigest();
BaseEncoding hex = BaseEncoding.base16().lowerCase();
ByteString hexDigest = ByteString.copyFrom(hex.encode(digest).getBytes(US_ASCII));
// Inject reverse mapping. Doing this unconditionally in getDigest() showed up
diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java
index 7201797..29ace2f 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CrosstoolConfigurationLoader.java
@@ -220,7 +220,7 @@
return null;
}
- return new CrosstoolProto(path.getMD5Digest(), "CROSSTOOL file " + path.getPathString()) {
+ return new CrosstoolProto(path.getDigest(), "CROSSTOOL file " + path.getPathString()) {
@Override
public String getContents() throws IOException {
try (InputStream inputStream = path.getInputStream()) {
diff --git a/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java b/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java
index 2759796..7590ef1 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/test/TestStrategy.java
@@ -438,8 +438,8 @@
try {
// Avoid rebuilding the runfiles directory if the manifest in it matches the input manifest,
// implying the symlinks exist and are already up to date.
- if (Arrays.equals(runfilesDir.getRelative("MANIFEST").getMD5Digest(),
- execSettings.getInputManifest().getPath().getMD5Digest())) {
+ if (Arrays.equals(runfilesDir.getRelative("MANIFEST").getDigest(),
+ execSettings.getInputManifest().getPath().getDigest())) {
return;
}
} catch (IOException e1) {
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
index bf8276e..ad58b48 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
@@ -270,7 +270,7 @@
Parser.ParseResult result = Parser.parseFileForSkylark(input, eventHandler);
return create(
ImmutableList.<Statement>of(), result,
- HashCode.fromBytes(file.getMD5Digest()).toString(), eventHandler);
+ HashCode.fromBytes(file.getDigest()).toString(), eventHandler);
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java
index 90aac0e..4d93804 100644
--- a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java
+++ b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java
@@ -16,6 +16,7 @@
import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
import com.google.common.io.ByteSource;
@@ -24,6 +25,8 @@
import com.google.devtools.build.lib.vfs.Dirent.Type;
import com.google.devtools.build.lib.vfs.Path.PathFactory;
import com.google.devtools.build.lib.vfs.Path.PathFactory.TranslatedPath;
+import com.google.devtools.common.options.EnumConverter;
+import com.google.devtools.common.options.OptionsParsingException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
@@ -39,6 +42,49 @@
@ThreadSafe
public abstract class FileSystem {
+ /** Type of hash function to use for digesting files. */
+ public enum HashFunction {
+ MD5(16),
+ SHA1(20);
+
+ private final int digestSize;
+
+ HashFunction(int digestSize) {
+ this.digestSize = digestSize;
+ }
+
+ /** Converts to {@link HashFunction}. */
+ public static class Converter extends EnumConverter<HashFunction> {
+ public Converter() {
+ super(HashFunction.class, "hash function");
+ }
+ }
+
+ public boolean isValidDigest(byte[] digest) {
+ return digest != null && digest.length == digestSize;
+ }
+ }
+
+ // This is effectively final, should be changed only in unit-tests!
+ private static HashFunction DIGEST_FUNCTION;
+ static {
+ try {
+ DIGEST_FUNCTION = new HashFunction.Converter().convert(
+ System.getProperty("bazel.DigestFunction", "MD5"));
+ } catch (OptionsParsingException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @VisibleForTesting
+ public static void setDigestFunctionForTesting(HashFunction value) {
+ DIGEST_FUNCTION = value;
+ }
+
+ public static HashFunction getDigestFunction() {
+ return DIGEST_FUNCTION;
+ }
+
private enum UnixPathFactory implements PathFactory {
INSTANCE {
@Override
@@ -261,10 +307,11 @@
}
/**
- * Returns the type of digest that may be returned by {@link #getFastDigest}, or {@code null}
- * if the filesystem doesn't support them.
+ * Gets a fast digest for the given path and hash function type, or {@code null} if there
+ * isn't one available or the filesystem doesn't support them. This digest should be
+ * suitable for detecting changes to the file.
*/
- protected String getFastDigestFunctionType(Path path) {
+ protected byte[] getFastDigest(Path path, HashFunction hashFunction) throws IOException {
return null;
}
@@ -273,8 +320,43 @@
* filesystem doesn't support them. This digest should be suitable for detecting changes to the
* file.
*/
- protected byte[] getFastDigest(Path path) throws IOException {
- return null;
+ protected final byte[] getFastDigest(Path path) throws IOException {
+ return getFastDigest(path, DIGEST_FUNCTION);
+ }
+
+ /**
+ * Returns whether the given digest is a valid digest for the default digest function.
+ */
+ public boolean isValidDigest(byte[] digest) {
+ return DIGEST_FUNCTION.isValidDigest(digest);
+ }
+
+ /**
+ * Returns the digest of the file denoted by the path, following
+ * symbolic links, for the given hash digest function.
+ *
+ * @return a new byte array containing the file's digest
+ * @throws IOException if the digest could not be computed for any reason
+ */
+ protected final byte[] getDigest(final Path path, HashFunction hashFunction) throws IOException {
+ switch(hashFunction) {
+ case MD5:
+ return getMD5Digest(path);
+ case SHA1:
+ return getSHA1Digest(path);
+ default:
+ throw new IOException("Unsupported hash function: " + hashFunction);
+ }
+ }
+
+ /**
+ * Returns the digest of the file denoted by the path, following symbolic links.
+ *
+ * @return a new byte array containing the file's digest
+ * @throws IOException if the digest could not be computed for any reason
+ */
+ protected byte[] getDigest(final Path path) throws IOException {
+ return getDigest(path, DIGEST_FUNCTION);
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/Path.java b/src/main/java/com/google/devtools/build/lib/vfs/Path.java
index 6599952..e039efe 100644
--- a/src/main/java/com/google/devtools/build/lib/vfs/Path.java
+++ b/src/main/java/com/google/devtools/build/lib/vfs/Path.java
@@ -19,6 +19,7 @@
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.util.StringCanonicalizer;
+import com.google.devtools.build.lib.vfs.FileSystem.HashFunction;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -1039,19 +1040,26 @@
}
/**
- * Returns the type of digest that may be returned by {@link #getFastDigest}, or {@code null}
- * if the filesystem doesn't support them.
+ * Gets a fast digest for the given path, or {@code null} if there isn't one available. The
+ * digest should be suitable for detecting changes to the file.
*/
- public String getFastDigestFunctionType() {
- return fileSystem.getFastDigestFunctionType(this);
+ public byte[] getFastDigest() throws IOException {
+ return fileSystem.getFastDigest(this);
}
/**
* Gets a fast digest for the given path, or {@code null} if there isn't one available. The
* digest should be suitable for detecting changes to the file.
*/
- public byte[] getFastDigest() throws IOException {
- return fileSystem.getFastDigest(this);
+ public byte[] getFastDigest(HashFunction hashFunction) throws IOException {
+ return fileSystem.getFastDigest(this, hashFunction);
+ }
+
+ /**
+ * Returns whether the given digest is a valid digest for the default system digest function.
+ */
+ public boolean isValidDigest(byte[] digest) {
+ return fileSystem.isValidDigest(digest);
}
/**
@@ -1083,6 +1091,28 @@
}
/**
+ * Returns the digest of the file denoted by the current path,
+ * following symbolic links.
+ *
+ * @return a new byte array containing the file's digest
+ * @throws IOException if the digest could not be computed for any reason
+ */
+ public byte[] getDigest() throws IOException {
+ return fileSystem.getDigest(this);
+ }
+
+ /**
+ * Returns the digest of the file denoted by the current path and digest function,
+ * following symbolic links.
+ *
+ * @return a new byte array containing the file's digest
+ * @throws IOException if the digest could not be computed for any reason
+ */
+ public byte[] getDigest(HashFunction hashFunction) throws IOException {
+ return fileSystem.getDigest(this, hashFunction);
+ }
+
+ /**
* Opens the file denoted by this path, following symbolic links, for reading,
* and returns an input stream to it.
*
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java
index a0a0955..b246aa6 100644
--- a/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java
+++ b/src/main/java/com/google/devtools/build/lib/vfs/UnionFileSystem.java
@@ -18,6 +18,7 @@
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.util.StringTrie;
+import com.google.devtools.build.lib.vfs.FileSystem.HashFunction;
import java.io.IOException;
import java.io.InputStream;
@@ -380,15 +381,9 @@
}
@Override
- protected String getFastDigestFunctionType(Path path) {
+ protected byte[] getFastDigest(Path path, HashFunction hashFunction) throws IOException {
FileSystem delegate = getDelegate(path);
- return delegate.getFastDigestFunctionType(adjustPath(path, delegate));
- }
-
- @Override
- protected byte[] getFastDigest(Path path) throws IOException {
- FileSystem delegate = getDelegate(path);
- return delegate.getFastDigest(adjustPath(path, delegate));
+ return delegate.getFastDigest(adjustPath(path, delegate), hashFunction);
}
@Override