Suppress last-ditch download exceptions w/cleanup
Create an encapsulating DownloadException to represent the aggregate
exception set of an ActionResult download. IOExceptions will be retained
through exception suppression, and the outer exception has a property to
indicate if it only represents a sequence of CacheNotFoundExceptions.
InterruptedExceptions interception is improved to cancel pending work
and wrap, through suppression, any DownloadException that also occurred
during the download. InterruptedException being thrown on the download
control thread, it does not require suppression of further interrupts,
and can represent an outer download exception. Thread interrupt status
is suppressed for cancellations, and conveyed on throw.
These exception wrapping efforts allow non-asynchronous frame
representation in stack traces, and much clearer identification of
sources within remote strategy execution which produce failures based on
remote errors.
Any DownloadException in the last-ditch output download under
handleError in RemoteSpawnRunner is added as suppressed to the
initiating exception. Other exceptions (likely local IO) present clear
immediate traces and do not require specialized treatment.
Closes #10029.
PiperOrigin-RevId: 306619678
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java
index 1a1be30..caba306 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteCache.java
@@ -14,6 +14,7 @@
package com.google.devtools.build.lib.remote;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
+import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture;
import build.bazel.remote.execution.v2.Action;
import build.bazel.remote.execution.v2.ActionResult;
@@ -27,9 +28,7 @@
import build.bazel.remote.execution.v2.OutputSymlink;
import build.bazel.remote.execution.v2.SymlinkNode;
import build.bazel.remote.execution.v2.Tree;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
@@ -57,7 +56,6 @@
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
import com.google.devtools.build.lib.remote.util.DigestUtil;
-import com.google.devtools.build.lib.remote.util.Utils;
import com.google.devtools.build.lib.remote.util.Utils.InMemoryOutput;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.util.io.OutErr;
@@ -80,7 +78,6 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
@@ -116,7 +113,7 @@
public ActionResult downloadActionResult(ActionKey actionKey, boolean inlineOutErr)
throws IOException, InterruptedException {
- return Utils.getFromFuture(cacheProtocol.downloadActionResult(actionKey, inlineOutErr));
+ return getFromFuture(cacheProtocol.downloadActionResult(actionKey, inlineOutErr));
}
/**
@@ -181,8 +178,7 @@
digests.addAll(digestToFile.keySet());
digests.addAll(digestToBlobs.keySet());
- ImmutableSet<Digest> digestsToUpload =
- Utils.getFromFuture(cacheProtocol.findMissingDigests(digests));
+ ImmutableSet<Digest> digestsToUpload = getFromFuture(cacheProtocol.findMissingDigests(digests));
ImmutableList.Builder<ListenableFuture<Void>> uploads = ImmutableList.builder();
for (Digest digest : digestsToUpload) {
Path file = digestToFile.get(digest);
@@ -198,7 +194,7 @@
}
}
- waitForUploads(uploads.build());
+ waitForBulkTransfer(uploads.build(), /* cancelRemainingOnInterrupt=*/ false);
if (manifest.getStderrDigest() != null) {
result.setStderrDigest(manifest.getStderrDigest());
@@ -208,22 +204,45 @@
}
}
- private static void waitForUploads(List<ListenableFuture<Void>> uploads)
- throws IOException, InterruptedException {
- try {
- for (ListenableFuture<Void> upload : uploads) {
- upload.get();
+ protected static <T> void waitForBulkTransfer(
+ Iterable<ListenableFuture<T>> transfers, boolean cancelRemainingOnInterrupt)
+ throws BulkTransferException, InterruptedException {
+ BulkTransferException bulkTransferException = null;
+ InterruptedException interruptedException = null;
+ boolean interrupted = Thread.currentThread().isInterrupted();
+ for (ListenableFuture<T> transfer : transfers) {
+ try {
+ if (interruptedException == null) {
+ // Wait for all downloads to finish.
+ getFromFuture(transfer);
+ } else {
+ transfer.cancel(true);
+ }
+ } catch (IOException e) {
+ if (bulkTransferException == null) {
+ bulkTransferException = new BulkTransferException();
+ }
+ bulkTransferException.add(e);
+ } catch (InterruptedException e) {
+ interrupted = Thread.interrupted() || interrupted;
+ interruptedException = e;
+ if (!cancelRemainingOnInterrupt) {
+ // leave the rest of the transfers alone
+ break;
+ }
}
- } catch (ExecutionException e) {
- // TODO(buchgr): Add support for cancellation and factor this method out to be shared
- // between ByteStreamUploader as well.
- Throwable cause = e.getCause();
- Throwables.throwIfInstanceOf(cause, IOException.class);
- Throwables.throwIfInstanceOf(cause, InterruptedException.class);
- if (cause != null) {
- throw new IOException(cause);
+ }
+ if (interrupted) {
+ Thread.currentThread().interrupt();
+ }
+ if (interruptedException != null) {
+ if (bulkTransferException != null) {
+ interruptedException.addSuppressed(bulkTransferException);
}
- throw new IOException(e);
+ throw interruptedException;
+ }
+ if (bulkTransferException != null) {
+ throw bulkTransferException;
}
}
@@ -299,40 +318,16 @@
// Subsequently we need to wait for *every* download to finish, even if we already know that
// one failed. That's so that when exiting this method we can be sure that all downloads have
// finished and don't race with the cleanup routine.
- // TODO(buchgr): Look into cancellation.
- IOException downloadException = null;
- InterruptedException interruptedException = null;
FileOutErr tmpOutErr = null;
+ if (origOutErr != null) {
+ tmpOutErr = origOutErr.childOutErr();
+ }
+ downloads.addAll(downloadOutErr(result, tmpOutErr));
+
try {
- if (origOutErr != null) {
- tmpOutErr = origOutErr.childOutErr();
- }
- downloads.addAll(downloadOutErr(result, tmpOutErr));
- } catch (IOException e) {
- downloadException = e;
- }
-
- for (ListenableFuture<FileMetadata> download : downloads) {
- try {
- // Wait for all downloads to finish.
- getFromFuture(download);
- } catch (IOException e) {
- if (downloadException == null) {
- downloadException = e;
- } else if (e != downloadException) {
- downloadException.addSuppressed(e);
- }
- } catch (InterruptedException e) {
- if (interruptedException == null) {
- interruptedException = e;
- } else if (e != interruptedException) {
- interruptedException.addSuppressed(e);
- }
- }
- }
-
- if (downloadException != null || interruptedException != null) {
+ waitForBulkTransfer(downloads, /* cancelRemainingOnInterrupt=*/ true);
+ } catch (Exception e) {
try {
// Delete any (partially) downloaded output files.
for (OutputFile file : result.getOutputFilesList()) {
@@ -347,27 +342,18 @@
tmpOutErr.clearOut();
tmpOutErr.clearErr();
}
- } catch (IOException e) {
- if (downloadException != null && e != downloadException) {
- e.addSuppressed(downloadException);
- }
- if (interruptedException != null) {
- e.addSuppressed(interruptedException);
- }
+ } catch (IOException ioEx) {
+ ioEx.addSuppressed(e);
// If deleting of output files failed, we abort the build with a decent error message as
// any subsequent local execution failure would likely be incomprehensible.
- throw new EnvironmentalExecException(
- "Failed to delete output files after incomplete download", e);
+ ExecException execEx =
+ new EnvironmentalExecException(
+ "Failed to delete output files after incomplete download", ioEx);
+ execEx.addSuppressed(e);
+ throw execEx;
}
- }
-
- if (interruptedException != null) {
- throw interruptedException;
- }
-
- if (downloadException != null) {
- throw downloadException;
+ throw e;
}
if (tmpOutErr != null) {
@@ -487,12 +473,15 @@
return outerF;
}
- private List<ListenableFuture<FileMetadata>> downloadOutErr(ActionResult result, OutErr outErr)
- throws IOException {
+ private List<ListenableFuture<FileMetadata>> downloadOutErr(ActionResult result, OutErr outErr) {
List<ListenableFuture<FileMetadata>> downloads = new ArrayList<>();
if (!result.getStdoutRaw().isEmpty()) {
- result.getStdoutRaw().writeTo(outErr.getOutputStream());
- outErr.getOutputStream().flush();
+ try {
+ result.getStdoutRaw().writeTo(outErr.getOutputStream());
+ outErr.getOutputStream().flush();
+ } catch (IOException e) {
+ downloads.add(Futures.immediateFailedFuture(e));
+ }
} else if (result.hasStdoutDigest()) {
downloads.add(
Futures.transform(
@@ -501,8 +490,12 @@
directExecutor()));
}
if (!result.getStderrRaw().isEmpty()) {
- result.getStderrRaw().writeTo(outErr.getErrorStream());
- outErr.getErrorStream().flush();
+ try {
+ result.getStderrRaw().writeTo(outErr.getErrorStream());
+ outErr.getErrorStream().flush();
+ } catch (IOException e) {
+ downloads.add(Futures.immediateFailedFuture(e));
+ }
} else if (result.hasStderrDigest()) {
downloads.add(
Futures.transform(
@@ -1115,9 +1108,4 @@
return symlinks.values();
}
}
-
- @VisibleForTesting
- protected <T> T getFromFuture(ListenableFuture<T> f) throws IOException, InterruptedException {
- return Utils.getFromFuture(f);
- }
}