remote: re-implement merkle tree buildling The TreeNodeRepository served us well, but has become a bit dated over time and it's increasingly hard to understand and to fix bugs. Additionally, it was written with the initial intend of incrementally updating the merkle tree between actions, however internal performance analysis has shown that doing so is 1) hard to implement reliably and 2) would increase memory consumption multifold. This is a rewrite of the code that also fixes bugs like #4663. MerkleTree implements a visitor pattern over an InputTree and is responsible for serializing the InputTree to the protobuf merkle tree that's used by remote caching / execution. Benchmarks on my local machine have shown this implementation to consumes between 2-10x less wall time than the current implementation. Tests by users have shown equally encouraging speedups [1]. [1] https://groups.google.com/d/msg/bazel-discuss/wPHrqm2z8lU/mzoRF236GQAJ Closes #7583. PiperOrigin-RevId: 237421145

commit: 7f72544f6e76d5184a558427e90b768f84b0634c [log] [tgz]
author: buchgr <buchgr@google.com> Fri Mar 08 03:20:25 2019 -0800
committer: Copybara-Service <copybara-worker@google.com> Fri Mar 08 03:22:43 2019 -0800
tree: 8dc3b248731411b77cad84379e2cfaa8c97c9302
parent: 04f28cb87cde05ed2504a5fa974f282ebb727ff5 [diff] [blame]
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java
index 2da0cc1..f86de07 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java

@@ -18,6 +18,7 @@
 import build.bazel.remote.execution.v2.Action;
 import build.bazel.remote.execution.v2.ActionResult;
 import build.bazel.remote.execution.v2.Command;
+import build.bazel.remote.execution.v2.Digest;
 import build.bazel.remote.execution.v2.Platform;
 import com.google.devtools.build.lib.actions.ActionInput;
 import com.google.devtools.build.lib.actions.ExecException;
@@ -36,7 +37,7 @@
 import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionContext;
 import com.google.devtools.build.lib.profiler.Profiler;
 import com.google.devtools.build.lib.profiler.SilentCloseable;
-import com.google.devtools.build.lib.remote.TreeNodeRepository.TreeNode;
+import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
 import com.google.devtools.build.lib.remote.util.DigestUtil;
 import com.google.devtools.build.lib.remote.util.DigestUtil.ActionKey;
 import com.google.devtools.build.lib.remote.util.TracingMetadataUtils;
@@ -100,14 +101,9 @@
     }
 
     SortedMap<PathFragment, ActionInput> inputMap = context.getInputMapping(true);
-    // Temporary hack: the TreeNodeRepository should be created and maintained upstream!
-    TreeNodeRepository repository =
-        new TreeNodeRepository(execRoot, context.getMetadataProvider(), digestUtil);
-    TreeNode inputRoot;
-    try (SilentCloseable c = Profiler.instance().profile("RemoteCache.computeMerkleDigests")) {
-      inputRoot = repository.buildFromActionInputs(inputMap);
-      repository.computeMerkleDigests(inputRoot);
-    }
+    MerkleTree merkleTree =
+        MerkleTree.build(inputMap, context.getMetadataProvider(), execRoot, digestUtil);
+    Digest merkleTreeRoot = merkleTree.getRootDigest();
 
     // Get the remote platform properties.
     Platform platform =
@@ -122,10 +118,7 @@
     try (SilentCloseable c = Profiler.instance().profile("RemoteCache.buildAction")) {
       action =
           RemoteSpawnRunner.buildAction(
-              digestUtil.compute(command),
-              repository.getMerkleDigest(inputRoot),
-              context.getTimeout(),
-              true);
+              digestUtil.compute(command), merkleTreeRoot, context.getTimeout(), true);
       // Look up action cache, and reuse the action output if it is found.
       actionKey = digestUtil.computeActionKey(action);
     }
@@ -144,8 +137,6 @@
         }
         if (result != null) {
           // We don't cache failed actions, so we know the outputs exist.
-          // For now, download all outputs locally; in the future, we can reuse the digests to
-          // just update the TreeNodeRepository and continue the build.
           try (SilentCloseable c = Profiler.instance().profile("RemoteCache.download")) {
             remoteCache.download(result, execRoot, context.getFileOutErr());
           }
commit	7f72544f6e76d5184a558427e90b768f84b0634c	[log] [tgz]
author	buchgr <buchgr@google.com>	Fri Mar 08 03:20:25 2019 -0800
committer	Copybara-Service <copybara-worker@google.com>	Fri Mar 08 03:22:43 2019 -0800
tree	8dc3b248731411b77cad84379e2cfaa8c97c9302
parent	04f28cb87cde05ed2504a5fa974f282ebb727ff5 [diff] [blame]