Add --experimental_multi_threaded_digest which lets DigestUtils use multiple threads when calculating the MD5 hash even for large files. Might improve performance when using an SSD.

Fixes #835 and #1210.

--
MOS_MIGRATED_REVID=124128233
diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD
index 419adff..fe0b267 100644
--- a/src/main/java/com/google/devtools/build/lib/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/BUILD
@@ -27,6 +27,7 @@
         "//src/main/java/com/google/devtools/build/lib/rules/genquery:srcs",
         "//src/main/java/com/google/devtools/build/lib/rules/objc:srcs",
         "//src/main/java/com/google/devtools/build/lib/sandbox:srcs",
+        "//src/main/java/com/google/devtools/build/lib/ssd:srcs",
         "//src/main/java/com/google/devtools/build/lib/standalone:srcs",
         "//src/main/java/com/google/devtools/build/lib/worker:srcs",
         "//src/main/java/com/google/devtools/build/skyframe:srcs",
@@ -574,6 +575,7 @@
         "//src/main/java/com/google/devtools/build/lib/bazel/dash",
         "//src/main/java/com/google/devtools/build/lib/remote",
         "//src/main/java/com/google/devtools/build/lib/sandbox",
+        "//src/main/java/com/google/devtools/build/lib/ssd",
         "//src/main/java/com/google/devtools/build/lib/standalone",
         "//src/main/java/com/google/devtools/build/lib/worker",
         "//src/main/java/com/google/devtools/build/skyframe",
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
index 2ecc041..377ea43 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
@@ -23,6 +23,7 @@
 
 import java.io.IOException;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.logging.Level;
 
 import javax.annotation.Nullable;
@@ -33,6 +34,7 @@
 public class DigestUtils {
   // Object to synchronize on when serializing large file reads.
   private static final Object MD5_LOCK = new Object();
+  private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false);
 
   /** Private constructor to prevent instantiation of utility class. */
   private DigestUtils() {}
@@ -44,7 +46,7 @@
    * @param size size of Artifact on filesystem in bytes, getSize() on its stat.
    */
   public static boolean useFileDigest(boolean isFile, long size) {
-    // Use timestamps for directories. Use digests for everything else.
+    // Use timestamps for directories and empty files. Use digests for everything else.
     return isFile && size != 0;
   }
 
@@ -83,8 +85,17 @@
    * Returns the the fast md5 digest of the file, or null if not available.
    */
   @Nullable
-  public static byte[] getFastDigest(Path path) throws IOException {
-    return path.getFastDigestFunctionType().equals("MD5") ? path.getFastDigest() : null;
+  private static byte[] getFastDigest(Path path) throws IOException {
+    // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to
+    // work with any opaque digest.
+    return Objects.equals(path.getFastDigestFunctionType(), "MD5") ? path.getFastDigest() : null;
+  }
+
+  /**
+   * Enable or disable multi-threaded digesting even for large files.
+   */
+  public static void setMultiThreadedDigest(boolean multiThreadedDigest) {
+    DigestUtils.MULTI_THREADED_DIGEST.set(multiThreadedDigest);
   }
 
   /**
@@ -97,12 +108,8 @@
    * to avoid excessive disk seeks.
    */
   public static byte[] getDigestOrFail(Path path, long fileSize) throws IOException {
-    // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to
-    // work with any opaque digest.
-    byte[] md5bin = null;
-    if (Objects.equals(path.getFastDigestFunctionType(), "MD5")) {
-      md5bin = getFastDigest(path);
-    }
+    byte[] md5bin = getFastDigest(path);
+
     if (md5bin != null && !binaryDigestWellFormed(md5bin)) {
       // Fail-soft in cases where md5bin is non-null, but not a valid digest.
       String msg = String.format("Malformed digest '%s' for file %s",
@@ -111,9 +118,10 @@
       LoggingUtil.logToRemote(Level.SEVERE, msg, new IllegalStateException(msg));
       md5bin = null;
     }
+
     if (md5bin != null) {
       return md5bin;
-    } else if (fileSize > 4096) {
+    } else if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) {
       // We'll have to read file content in order to calculate the digest. In that case
       // it would be beneficial to serialize those calculations since there is a high
       // probability that MD5 will be requested for multiple output files simultaneously.
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
index 8273932..673c964 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
@@ -45,6 +45,7 @@
           com.google.devtools.build.lib.bazel.BazelRepositoryModule.class,
           com.google.devtools.build.lib.bazel.dash.DashModule.class,
           com.google.devtools.build.lib.bazel.rules.BazelRulesModule.class,
+          com.google.devtools.build.lib.ssd.SsdModule.class,
           com.google.devtools.build.lib.worker.WorkerModule.class,
           com.google.devtools.build.lib.remote.RemoteModule.class,
           com.google.devtools.build.lib.standalone.StandaloneModule.class,
diff --git a/src/main/java/com/google/devtools/build/lib/ssd/BUILD b/src/main/java/com/google/devtools/build/lib/ssd/BUILD
new file mode 100644
index 0000000..7a334df
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/ssd/BUILD
@@ -0,0 +1,31 @@
+package(
+    default_visibility = ["//src:__subpackages__"],
+)
+
+java_library(
+    name = "ssd",
+    srcs = glob(["*.java"]),
+    deps = [
+        "//src/main/java/com/google/devtools/build/lib:build-base",
+        "//src/main/java/com/google/devtools/build/lib:concurrent",
+        "//src/main/java/com/google/devtools/build/lib:events",
+        "//src/main/java/com/google/devtools/build/lib:io",
+        "//src/main/java/com/google/devtools/build/lib:packages-internal",
+        "//src/main/java/com/google/devtools/build/lib:runtime",
+        "//src/main/java/com/google/devtools/build/lib:util",
+        "//src/main/java/com/google/devtools/build/lib:vfs",
+        "//src/main/java/com/google/devtools/build/lib/actions",
+        "//src/main/java/com/google/devtools/build/lib/standalone",
+        "//src/main/java/com/google/devtools/common/options",
+        "//src/main/protobuf:worker_protocol_java_proto",
+        "//third_party:apache_commons_pool2",
+        "//third_party:guava",
+        "//third_party:jsr305",
+        "//third_party/protobuf",
+    ],
+)
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["**"]),
+)
diff --git a/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java b/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java
new file mode 100644
index 0000000..a41a3ba
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java
@@ -0,0 +1,40 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.ssd;
+
+import com.google.common.collect.ImmutableList;
+import com.google.devtools.build.lib.actions.cache.DigestUtils;
+import com.google.devtools.build.lib.runtime.BlazeModule;
+import com.google.devtools.build.lib.runtime.Command;
+import com.google.devtools.common.options.OptionsBase;
+import com.google.devtools.common.options.OptionsProvider;
+
+/**
+ * BlazeModule that applies optimizations to Bazel's internals in order to improve performance when
+ * using an SSD.
+ */
+public final class SsdModule extends BlazeModule {
+  @Override
+  public Iterable<Class<? extends OptionsBase>> getCommandOptions(Command command) {
+    return ImmutableList.<Class<? extends OptionsBase>>of(SsdOptions.class);
+  }
+
+  @Override
+  public void handleOptions(OptionsProvider optionsProvider) {
+    SsdOptions options = optionsProvider.getOptions(SsdOptions.class);
+    if (options.experimentalMultiThreadedDigest) {
+      DigestUtils.setMultiThreadedDigest(options.experimentalMultiThreadedDigest);
+    }
+  }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java b/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java
new file mode 100644
index 0000000..a9da284
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java
@@ -0,0 +1,32 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.ssd;
+
+import com.google.devtools.common.options.Option;
+import com.google.devtools.common.options.OptionsBase;
+
+/**
+ * Options that tune Bazel's performance in order to increase performance on workstations with an
+ * SSD.
+ */
+public class SsdOptions extends OptionsBase {
+  @Option(
+    name = "experimental_multi_threaded_digest",
+    defaultValue = "false",
+    help =
+        "Whether to always compute MD5 digests of files with multiple threads. Might improve "
+            + "performance when using an SSD."
+  )
+  public boolean experimentalMultiThreadedDigest;
+}