remote: add directory support for remote caching and execution
Add support for directory trees as artifacts. Closes #4011.
PiperOrigin-RevId: 179691001
diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractRemoteActionCache.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractRemoteActionCache.java
new file mode 100644
index 0000000..9401945
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractRemoteActionCache.java
@@ -0,0 +1,378 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.remote;
+
+import com.google.devtools.build.lib.actions.EnvironmentalExecException;
+import com.google.devtools.build.lib.actions.ExecException;
+import com.google.devtools.build.lib.concurrent.ThreadSafety;
+import com.google.devtools.build.lib.remote.TreeNodeRepository.TreeNode;
+import com.google.devtools.build.lib.util.io.FileOutErr;
+import com.google.devtools.build.lib.vfs.Dirent;
+import com.google.devtools.build.lib.vfs.FileSystemUtils;
+import com.google.devtools.build.lib.vfs.Path;
+import com.google.devtools.remoteexecution.v1test.ActionResult;
+import com.google.devtools.remoteexecution.v1test.Command;
+import com.google.devtools.remoteexecution.v1test.Digest;
+import com.google.devtools.remoteexecution.v1test.Directory;
+import com.google.devtools.remoteexecution.v1test.DirectoryNode;
+import com.google.devtools.remoteexecution.v1test.FileNode;
+import com.google.devtools.remoteexecution.v1test.OutputDirectory;
+import com.google.devtools.remoteexecution.v1test.OutputFile;
+import com.google.devtools.remoteexecution.v1test.Tree;
+import com.google.protobuf.ByteString;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+
+/** A cache for storing artifacts (input and output) as well as the output of running an action. */
+@ThreadSafety.ThreadSafe
+public abstract class AbstractRemoteActionCache implements AutoCloseable {
+ protected final DigestUtil digestUtil;
+
+ public AbstractRemoteActionCache(DigestUtil digestUtil) {
+ this.digestUtil = digestUtil;
+ }
+
+ /**
+ * Ensures that the tree structure of the inputs, the input files themselves, and the command are
+ * available in the remote cache, such that the tree can be reassembled and executed on another
+ * machine given the root digest.
+ *
+ * <p>The cache may check whether files or parts of the tree structure are already present, and do
+ * not need to be uploaded again.
+ *
+ * <p>Note that this method is only required for remote execution, not for caching itself.
+ * However, remote execution uses a cache to store input files, and that may be a separate
+ * end-point from the executor itself, so the functionality lives here. A pure remote caching
+ * implementation that does not support remote execution may choose not to implement this
+ * function, and throw {@link UnsupportedOperationException} instead. If so, it should be clearly
+ * documented that it cannot be used for remote execution.
+ */
+ public abstract void ensureInputsPresent(
+ TreeNodeRepository repository, Path execRoot, TreeNode root, Command command)
+ throws IOException, InterruptedException;
+
+ /**
+ * Attempts to look up the given action in the remote cache and return its result, if present.
+ * Returns {@code null} if there is no such entry. Note that a successful result from this method
+ * does not guarantee the availability of the corresponding output files in the remote cache.
+ *
+ * @throws IOException if the remote cache is unavailable.
+ */
+ abstract @Nullable ActionResult getCachedActionResult(DigestUtil.ActionKey actionKey)
+ throws IOException, InterruptedException;
+
+ /**
+ * Upload the result of a locally executed action to the cache by uploading any necessary files,
+ * stdin / stdout, as well as adding an entry for the given action key to the cache if
+ * uploadAction is true.
+ *
+ * @throws IOException if the remote cache is unavailable.
+ */
+ abstract void upload(
+ DigestUtil.ActionKey actionKey,
+ Path execRoot,
+ Collection<Path> files,
+ FileOutErr outErr,
+ boolean uploadAction)
+ throws IOException, InterruptedException;
+
+ /**
+ * Download a remote blob to a local destination.
+ *
+ * @param digest The digest of the remote blob.
+ * @param dest The path to the local file.
+ * @throws IOException if download failed.
+ */
+ protected abstract void downloadBlob(Digest digest, Path dest)
+ throws IOException, InterruptedException;
+
+ /**
+ * Download a remote blob and store it in memory.
+ *
+ * @param digest The digest of the remote blob.
+ * @return The remote blob.
+ * @throws IOException if download failed.
+ */
+ protected abstract byte[] downloadBlob(Digest digest) throws IOException, InterruptedException;
+
+ /**
+ * Download the output files and directory trees of a remotely executed action to the local
+ * machine, as well stdin / stdout to the given files.
+ *
+ * <p>In case of failure, this method deletes any output files it might have already created.
+ *
+ * @throws IOException in case of a cache miss or if the remote cache is unavailable.
+ * @throws ExecException in case clean up after a failed download failed.
+ */
+ // TODO(olaola): will need to amend to include the TreeNodeRepository for updating.
+ public void download(ActionResult result, Path execRoot, FileOutErr outErr)
+ throws ExecException, IOException, InterruptedException {
+ try {
+ for (OutputFile file : result.getOutputFilesList()) {
+ Path path = execRoot.getRelative(file.getPath());
+ downloadFile(path, file.getDigest(), file.getIsExecutable(), file.getContent());
+ }
+ for (OutputDirectory dir : result.getOutputDirectoriesList()) {
+ Digest treeDigest = dir.getTreeDigest();
+ byte[] b = downloadBlob(treeDigest);
+ Digest receivedTreeDigest = digestUtil.compute(b);
+ if (!receivedTreeDigest.equals(treeDigest)) {
+ throw new IOException(
+ "Digest does not match " + receivedTreeDigest + " != " + treeDigest);
+ }
+ Tree tree = Tree.parseFrom(b);
+ Map<Digest, Directory> childrenMap = new HashMap<>();
+ for (Directory child : tree.getChildrenList()) {
+ childrenMap.put(digestUtil.compute(child), child);
+ }
+ Path path = execRoot.getRelative(dir.getPath());
+ downloadDirectory(path, tree.getRoot(), childrenMap);
+ }
+ // TODO(ulfjack): use same code as above also for stdout / stderr if applicable.
+ downloadOutErr(result, outErr);
+ } catch (IOException downloadException) {
+ try {
+ // Delete any (partially) downloaded output files, since any subsequent local execution
+ // of this action may expect none of the output files to exist.
+ for (OutputFile file : result.getOutputFilesList()) {
+ execRoot.getRelative(file.getPath()).delete();
+ }
+ for (OutputDirectory directory : result.getOutputDirectoriesList()) {
+ execRoot.getRelative(directory.getPath()).delete();
+ }
+ if (outErr != null) {
+ outErr.getOutputPath().delete();
+ outErr.getErrorPath().delete();
+ }
+ } catch (IOException e) {
+ // If deleting of output files failed, we abort the build with a decent error message as
+ // any subsequent local execution failure would likely be incomprehensible.
+
+ // We don't propagate the downloadException, as this is a recoverable error and the cause
+ // of the build failure is really that we couldn't delete output files.
+ throw new EnvironmentalExecException(
+ "Failed to delete output files after incomplete "
+ + "download. Cannot continue with local execution.",
+ e,
+ true);
+ }
+ throw downloadException;
+ }
+ }
+
+ /**
+ * Download a directory recursively. The directory is represented by a {@link Directory} protobuf
+ * message, and the descendant directories are in {@code childrenMap}, accessible through their
+ * digest.
+ */
+ private void downloadDirectory(Path path, Directory dir, Map<Digest, Directory> childrenMap)
+ throws IOException, InterruptedException {
+ // Ensure that the directory is created here even though the directory might be empty
+ FileSystemUtils.createDirectoryAndParents(path);
+
+ for (FileNode child : dir.getFilesList()) {
+ Path childPath = path.getRelative(child.getName());
+ downloadFile(childPath, child.getDigest(), child.getIsExecutable(), null);
+ }
+
+ for (DirectoryNode child : dir.getDirectoriesList()) {
+ Path childPath = path.getRelative(child.getName());
+ Digest childDigest = child.getDigest();
+ Directory childDir = childrenMap.get(childDigest);
+ if (childDir == null) {
+ throw new IOException(
+ "could not find subdirectory "
+ + child.getName()
+ + " of directory "
+ + path
+ + " for download: digest "
+ + childDigest
+ + "not found");
+ }
+ downloadDirectory(childPath, childDir, childrenMap);
+
+ // Prevent reuse.
+ childrenMap.remove(childDigest);
+ }
+ }
+
+ /**
+ * Download a file (that is not a directory). If the {@code content} is not given, the content is
+ * fetched from the digest.
+ */
+ protected void downloadFile(
+ Path path, Digest digest, boolean isExecutable, @Nullable ByteString content)
+ throws IOException, InterruptedException {
+ FileSystemUtils.createDirectoryAndParents(path.getParentDirectory());
+ if (digest.getSizeBytes() == 0) {
+ // Handle empty file locally.
+ FileSystemUtils.writeContent(path, new byte[0]);
+ } else {
+ if (content != null && !content.isEmpty()) {
+ try (OutputStream stream = path.getOutputStream()) {
+ content.writeTo(stream);
+ }
+ } else {
+ downloadBlob(digest, path);
+ Digest receivedDigest = digestUtil.compute(path);
+ if (!receivedDigest.equals(digest)) {
+ throw new IOException("Digest does not match " + receivedDigest + " != " + digest);
+ }
+ }
+ }
+ path.setExecutable(isExecutable);
+ }
+
+ private void downloadOutErr(ActionResult result, FileOutErr outErr)
+ throws IOException, InterruptedException {
+ if (!result.getStdoutRaw().isEmpty()) {
+ result.getStdoutRaw().writeTo(outErr.getOutputStream());
+ outErr.getOutputStream().flush();
+ } else if (result.hasStdoutDigest()) {
+ byte[] stdoutBytes = downloadBlob(result.getStdoutDigest());
+ outErr.getOutputStream().write(stdoutBytes);
+ outErr.getOutputStream().flush();
+ }
+ if (!result.getStderrRaw().isEmpty()) {
+ result.getStderrRaw().writeTo(outErr.getErrorStream());
+ outErr.getErrorStream().flush();
+ } else if (result.hasStderrDigest()) {
+ byte[] stderrBytes = downloadBlob(result.getStderrDigest());
+ outErr.getErrorStream().write(stderrBytes);
+ outErr.getErrorStream().flush();
+ }
+ }
+
+ /**
+ * The UploadManifest is used to mutualize upload between the RemoteActionCache implementations.
+ */
+ public class UploadManifest {
+ private final ActionResult.Builder result;
+ private final Path execRoot;
+ private final Map<Digest, Path> digestToFile;
+ private final Map<Digest, Chunker> digestToChunkers;
+
+ /**
+ * Create an UploadManifest from an ActionResult builder and an exec root. The ActionResult
+ * builder is populated through a call to {@link #addFile(Digest, Path)}.
+ */
+ public UploadManifest(ActionResult.Builder result, Path execRoot) {
+ this.result = result;
+ this.execRoot = execRoot;
+
+ this.digestToFile = new HashMap<>();
+ this.digestToChunkers = new HashMap<>();
+ }
+
+ /**
+ * Add a collection of files (and directories) to the UploadManifest. Adding a directory has the
+ * effect of 1) uploading a {@link Tree} protobuf message from which the whole structure of the
+ * directory, including the descendants, can be reconstructed and 2) uploading all the
+ * non-directory descendant files.
+ */
+ public void addFiles(Collection<Path> files) throws IOException, InterruptedException {
+ for (Path file : files) {
+ // TODO(ulfjack): Maybe pass in a SpawnResult here, add a list of output files to that, and
+ // rely on the local spawn runner to stat the files, instead of statting here.
+ if (!file.exists()) {
+ // We ignore requested results that have not been generated by the action.
+ continue;
+ }
+ if (file.isDirectory()) {
+ addDirectory(file);
+ } else {
+ Digest digest = digestUtil.compute(file);
+ addFile(digest, file);
+ }
+ }
+ }
+
+ /** Map of digests to file paths to upload. */
+ public Map<Digest, Path> getDigestToFile() {
+ return digestToFile;
+ }
+
+ /**
+ * Map of digests to chunkers to upload. When the file is a regular, non-directory file it is
+ * transmitted through {@link #getDigestToFile()}. When it is a directory, it is transmitted as
+ * a {@link Tree} protobuf message through {@link #getDigestToChunkers()}.
+ */
+ public Map<Digest, Chunker> getDigestToChunkers() {
+ return digestToChunkers;
+ }
+
+ private void addFile(Digest digest, Path file) throws IOException {
+ result
+ .addOutputFilesBuilder()
+ .setPath(file.relativeTo(execRoot).getPathString())
+ .setDigest(digest)
+ .setIsExecutable(file.isExecutable());
+
+ digestToFile.put(digest, file);
+ }
+
+ private void addDirectory(Path dir) throws IOException {
+ Tree.Builder tree = Tree.newBuilder();
+ Directory root = computeDirectory(dir, tree);
+ tree.setRoot(root);
+
+ byte[] blob = tree.build().toByteArray();
+ Digest digest = digestUtil.compute(blob);
+ Chunker chunker = new Chunker(blob, blob.length, digestUtil);
+
+ if (result != null) {
+ result
+ .addOutputDirectoriesBuilder()
+ .setPath(dir.relativeTo(execRoot).getPathString())
+ .setTreeDigest(digest);
+ }
+
+ digestToChunkers.put(chunker.digest(), chunker);
+ }
+
+ private Directory computeDirectory(Path path, Tree.Builder tree) throws IOException {
+ Directory.Builder b = Directory.newBuilder();
+
+ List<Dirent> sortedDirent = new ArrayList<>(path.readdir(TreeNodeRepository.SYMLINK_POLICY));
+ sortedDirent.sort(Comparator.comparing(Dirent::getName));
+
+ for (Dirent dirent : sortedDirent) {
+ String name = dirent.getName();
+ Path child = path.getRelative(name);
+ if (dirent.getType() == Dirent.Type.DIRECTORY) {
+ Directory dir = computeDirectory(child, tree);
+ b.addDirectoriesBuilder().setName(name).setDigest(digestUtil.compute(dir));
+ tree.addChildren(dir);
+ } else {
+ Digest digest = digestUtil.compute(child);
+ b.addFilesBuilder().setName(name).setDigest(digest).setIsExecutable(child.isExecutable());
+ digestToFile.put(digest, child);
+ }
+ }
+
+ return b.build();
+ }
+ }
+
+ /** Release resources associated with the cache. The cache may not be used after calling this. */
+ @Override
+ public abstract void close();
+}