Add async support to StandaloneTestStrategy
Progress on #6394.
PiperOrigin-RevId: 238977749
diff --git a/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java
index c2c080c..1d3087c 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/StandaloneTestStrategy.java
@@ -18,6 +18,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
+import com.google.common.util.concurrent.ListenableFuture;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionInputHelper;
import com.google.devtools.build.lib.actions.Artifact;
@@ -28,12 +29,14 @@
import com.google.devtools.build.lib.actions.SimpleSpawn;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnActionContext;
+import com.google.devtools.build.lib.actions.SpawnContinuation;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.analysis.RunfilesSupplierImpl;
import com.google.devtools.build.lib.analysis.actions.SpawnAction;
import com.google.devtools.build.lib.analysis.test.TestActionContext;
import com.google.devtools.build.lib.analysis.test.TestResult;
import com.google.devtools.build.lib.analysis.test.TestRunnerAction;
+import com.google.devtools.build.lib.analysis.test.TestRunnerAction.ResolvedPaths;
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos;
import com.google.devtools.build.lib.buildeventstream.TestFileNameConstants;
import com.google.devtools.build.lib.events.Event;
@@ -58,6 +61,7 @@
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import javax.annotation.Nullable;
/** Runs TestRunnerAction actions. */
// TODO(bazel-team): add tests for this strategy.
@@ -226,6 +230,47 @@
relativeTmpDir);
}
+ private TestAttemptContinuation beginTestAttempt(
+ TestRunnerAction testAction,
+ Spawn spawn,
+ ActionExecutionContext actionExecutionContext,
+ Path execRoot)
+ throws ExecException, IOException, InterruptedException {
+ ResolvedPaths resolvedPaths = testAction.resolve(execRoot);
+ Path out = actionExecutionContext.getInputPath(testAction.getTestLog());
+ Path err = resolvedPaths.getTestStderr();
+ FileOutErr testOutErr = new FileOutErr(out, err);
+ Closeable streamed = null;
+ if (executionOptions.testOutput.equals(TestOutputFormat.STREAMED)) {
+ streamed =
+ new StreamedTestOutput(
+ Reporter.outErrForReporter(actionExecutionContext.getEventHandler()), out);
+ }
+ long startTimeMillis = actionExecutionContext.getClock().currentTimeMillis();
+ return new BazelTestAttemptContinuation(
+ testAction,
+ actionExecutionContext,
+ spawn,
+ resolvedPaths,
+ testOutErr,
+ streamed,
+ startTimeMillis,
+ new SpawnContinuation() {
+ @Override
+ public ListenableFuture<?> getFuture() {
+ return null;
+ }
+
+ @Override
+ public SpawnContinuation execute() throws ExecException, InterruptedException {
+ SpawnActionContext spawnActionContext =
+ actionExecutionContext.getContext(SpawnActionContext.class);
+ return spawnActionContext.beginExecution(spawn, actionExecutionContext);
+ }
+ })
+ .execute();
+ }
+
private StandaloneTestResult executeTestAttempt(
TestRunnerAction action,
Spawn spawn,
@@ -233,7 +278,6 @@
Path execRoot)
throws ExecException, IOException, InterruptedException {
Closeable streamed = null;
- Path testLogPath = actionExecutionContext.getInputPath(action.getTestLog());
// We have two protos to represent test attempts:
// 1. com.google.devtools.build.lib.view.test.TestStatus.TestResultData represents both failed
// attempts and finished tests. Bazel stores this to disk to persist cached test result
@@ -246,21 +290,18 @@
// created right here, or one that is read back from disk.
TestResultData.Builder builder = TestResultData.newBuilder();
- long startTime = actionExecutionContext.getClock().currentTimeMillis();
SpawnActionContext spawnActionContext =
actionExecutionContext.getContext(SpawnActionContext.class);
- Path xmlOutputPath = action.resolve(actionExecutionContext.getExecRoot()).getXmlOutputPath();
List<SpawnResult> spawnResults = new ArrayList<>();
- BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo =
- BuildEventStreamProtos.TestResult.ExecutionInfo.newBuilder();
Path out = actionExecutionContext.getInputPath(action.getTestLog());
Path err = action.resolve(execRoot).getTestStderr();
+ long startTime = actionExecutionContext.getClock().currentTimeMillis();
try (FileOutErr testOutErr = new FileOutErr(out, err)) {
if (executionOptions.testOutput.equals(TestOutputFormat.STREAMED)) {
streamed =
new StreamedTestOutput(
- Reporter.outErrForReporter(actionExecutionContext.getEventHandler()), testLogPath);
+ Reporter.outErrForReporter(actionExecutionContext.getEventHandler()), out);
}
try {
spawnResults.addAll(
@@ -268,7 +309,7 @@
builder
.setTestPassed(true)
.setStatus(BlazeTestStatus.PASSED)
- .setPassedLog(testLogPath.getPathString());
+ .setPassedLog(out.getPathString());
} catch (SpawnExecException e) {
// If this method returns normally, then the higher level will rerun the test (up to
// --flaky_test_attempts times).
@@ -283,7 +324,7 @@
builder
.setTestPassed(false)
.setStatus(e.hasTimedOut() ? BlazeTestStatus.TIMEOUT : BlazeTestStatus.FAILED)
- .addFailedLogs(testLogPath.getPathString());
+ .addFailedLogs(out.getPathString());
spawnResults.add(e.getSpawnResult());
}
if (!testOutErr.hasRecordedOutput()) {
@@ -291,7 +332,7 @@
FileSystemUtils.touchFile(out);
}
// Append any error output to the test.log. This is very rare.
- appendStderr(out, err);
+ appendStderr(testOutErr);
}
long endTime = actionExecutionContext.getClock().currentTimeMillis();
@@ -303,7 +344,8 @@
// The SpawnResult of a remotely cached or remotely executed action may not have walltime
// set. We fall back to the time measured here for backwards compatibility.
duration = primaryResult.getWallTime().orElse(Duration.ofMillis(duration)).toMillis();
- extractExecutionInfo(primaryResult, builder, executionInfo);
+ BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo =
+ extractExecutionInfo(primaryResult, builder);
builder.setStartTimeMillisEpoch(startTime);
builder.addTestTimes(duration);
@@ -317,6 +359,7 @@
// then we run a separate action to create a test.xml from test.log. We do this as a spawn
// rather than doing it locally in-process, as the test.log file may only exist remotely (when
// remote execution is enabled), and we do not want to have to download it.
+ Path xmlOutputPath = action.resolve(actionExecutionContext.getExecRoot()).getXmlOutputPath();
if (executionOptions.splitXmlGeneration
&& action.getTestLog().getPath().exists()
&& !xmlOutputPath.exists()) {
@@ -350,11 +393,13 @@
}
/** In rare cases, we might write something to stderr. Append it to the real test.log. */
- protected static void appendStderr(Path stdOut, Path stdErr) throws IOException {
+ private static void appendStderr(FileOutErr outErr) throws IOException {
+ Path stdErr = outErr.getErrorPath();
FileStatus stat = stdErr.statNullable();
if (stat != null) {
try {
if (stat.getSize() > 0) {
+ Path stdOut = outErr.getErrorPath();
if (stdOut.exists()) {
stdOut.setWritable(true);
}
@@ -369,10 +414,10 @@
}
}
- private static void extractExecutionInfo(
- SpawnResult spawnResult,
- TestResultData.Builder result,
- BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo) {
+ private static BuildEventStreamProtos.TestResult.ExecutionInfo.Builder extractExecutionInfo(
+ SpawnResult spawnResult, TestResultData.Builder result) {
+ BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo =
+ BuildEventStreamProtos.TestResult.ExecutionInfo.newBuilder();
if (spawnResult.isCacheHit()) {
result.setRemotelyCached(true);
executionInfo.setCachedRemotely(true);
@@ -385,6 +430,7 @@
if (spawnResult.getExecutorHostName() != null) {
executionInfo.setHostname(spawnResult.getExecutorHostName());
}
+ return executionInfo;
}
/**
@@ -511,6 +557,17 @@
return new TestResult(action, data, /*cached*/ true, execRoot);
}
+ private static void closeSuppressed(Throwable e, @Nullable Closeable c) {
+ if (c == null) {
+ return;
+ }
+ try {
+ c.close();
+ } catch (IOException e2) {
+ e.addSuppressed(e2);
+ }
+ }
+
private final class StandaloneFailedAttemptResult implements FailedAttemptResult {
private final TestResultData testResultData;
@@ -550,7 +607,15 @@
return actionExecutionContext;
}
- public StandaloneTestResult execute() throws InterruptedException, IOException, ExecException {
+ @Override
+ public TestAttemptContinuation beginExecution()
+ throws InterruptedException, IOException, ExecException {
+ prepareFileSystem(testAction, tmpDir, coverageDir, workingDirectory);
+ return beginTestAttempt(testAction, spawn, actionExecutionContext, execRoot);
+ }
+
+ @Override
+ public TestAttemptResult execute() throws InterruptedException, IOException, ExecException {
prepareFileSystem(testAction, tmpDir, coverageDir, workingDirectory);
return executeTestAttempt(testAction, spawn, actionExecutionContext, execRoot);
}
@@ -575,4 +640,240 @@
testAction, actionExecutionContext, (StandaloneTestResult) finalResult, failedAttempts);
}
}
+
+ private final class BazelTestAttemptContinuation extends TestAttemptContinuation {
+ private final TestRunnerAction testAction;
+ private final ActionExecutionContext actionExecutionContext;
+ private final Spawn spawn;
+ private final ResolvedPaths resolvedPaths;
+ private final FileOutErr fileOutErr;
+ private final Closeable streamed;
+ private final long startTimeMillis;
+ private final SpawnContinuation spawnContinuation;
+
+ BazelTestAttemptContinuation(
+ TestRunnerAction testAction,
+ ActionExecutionContext actionExecutionContext,
+ Spawn spawn,
+ ResolvedPaths resolvedPaths,
+ FileOutErr fileOutErr,
+ Closeable streamed,
+ long startTimeMillis,
+ SpawnContinuation spawnContinuation) {
+ this.testAction = testAction;
+ this.actionExecutionContext = actionExecutionContext;
+ this.spawn = spawn;
+ this.resolvedPaths = resolvedPaths;
+ this.fileOutErr = fileOutErr;
+ this.streamed = streamed;
+ this.startTimeMillis = startTimeMillis;
+ this.spawnContinuation = spawnContinuation;
+ }
+
+ @Nullable
+ @Override
+ public ListenableFuture<?> getFuture() {
+ return spawnContinuation.getFuture();
+ }
+
+ @Override
+ public TestAttemptContinuation execute()
+ throws InterruptedException, IOException, ExecException {
+ // We have two protos to represent test attempts:
+ // 1. com.google.devtools.build.lib.view.test.TestStatus.TestResultData represents both failed
+ // attempts and finished tests. Bazel stores this to disk to persist cached test result
+ // information across server restarts.
+ // 2. com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.TestResult
+ // represents only individual attempts (failed or not). Bazel reports this as an event to
+ // the Build Event Protocol, but never saves it to disk.
+ //
+ // The TestResult proto is always constructed from a TestResultData instance, either one that
+ // is created right here, or one that is read back from disk.
+ TestResultData.Builder builder;
+ List<SpawnResult> spawnResults;
+ try {
+ SpawnContinuation nextContinuation = spawnContinuation.execute();
+ if (!nextContinuation.isDone()) {
+ return new BazelTestAttemptContinuation(
+ testAction,
+ actionExecutionContext,
+ spawn,
+ resolvedPaths,
+ fileOutErr,
+ streamed,
+ startTimeMillis,
+ nextContinuation);
+ }
+ spawnResults = nextContinuation.get();
+ builder = TestResultData.newBuilder();
+ builder
+ .setTestPassed(true)
+ .setStatus(BlazeTestStatus.PASSED)
+ .setPassedLog(fileOutErr.getOutputPath().getPathString());
+ } catch (SpawnExecException e) {
+ if (e.isCatastrophic()) {
+ closeSuppressed(e, streamed);
+ closeSuppressed(e, fileOutErr);
+ throw e;
+ }
+ if (!e.getSpawnResult().setupSuccess()) {
+ closeSuppressed(e, streamed);
+ closeSuppressed(e, fileOutErr);
+ // Rethrow as the test could not be run and thus there's no point in retrying.
+ throw e;
+ }
+ spawnResults = ImmutableList.of(e.getSpawnResult());
+ builder = TestResultData.newBuilder();
+ builder
+ .setTestPassed(false)
+ .setStatus(e.hasTimedOut() ? BlazeTestStatus.TIMEOUT : BlazeTestStatus.FAILED)
+ .addFailedLogs(fileOutErr.getOutputPath().getPathString());
+ }
+ long endTimeMillis = actionExecutionContext.getClock().currentTimeMillis();
+
+ if (!fileOutErr.hasRecordedOutput()) {
+ // Make sure that the test.log exists.
+ FileSystemUtils.touchFile(fileOutErr.getOutputPath());
+ }
+ // Append any error output to the test.log. This is very rare.
+ appendStderr(fileOutErr);
+ fileOutErr.close();
+ if (streamed != null) {
+ streamed.close();
+ }
+
+ // SpawnActionContext guarantees the first entry to correspond to the spawn passed in (there
+ // may be additional entries due to tree artifact handling).
+ SpawnResult primaryResult = spawnResults.get(0);
+
+ // The SpawnResult of a remotely cached or remotely executed action may not have walltime
+ // set. We fall back to the time measured here for backwards compatibility.
+ long durationMillis = endTimeMillis - startTimeMillis;
+ durationMillis =
+ primaryResult.getWallTime().orElse(Duration.ofMillis(durationMillis)).toMillis();
+
+ builder.setStartTimeMillisEpoch(startTimeMillis);
+ builder.addTestTimes(durationMillis);
+ builder.addTestProcessTimes(durationMillis);
+ builder.setRunDurationMillis(durationMillis);
+ if (testAction.isCoverageMode()) {
+ builder.setHasCoverage(true);
+ }
+
+ // If the test did not create a test.xml, and --experimental_split_xml_generation is enabled,
+ // then we run a separate action to create a test.xml from test.log. We do this as a spawn
+ // rather than doing it locally in-process, as the test.log file may only exist remotely (when
+ // remote execution is enabled), and we do not want to have to download it.
+ Path xmlOutputPath = resolvedPaths.getXmlOutputPath();
+ if (executionOptions.splitXmlGeneration
+ && fileOutErr.getOutputPath().exists()
+ && !xmlOutputPath.exists()) {
+ Spawn xmlGeneratingSpawn = createXmlGeneratingSpawn(testAction, primaryResult);
+ SpawnActionContext spawnActionContext =
+ actionExecutionContext.getContext(SpawnActionContext.class);
+ // We treat all failures to generate the test.xml here as catastrophic, and won't rerun
+ // the test if this fails. We redirect the output to a temporary file.
+ FileOutErr xmlSpawnOutErr = actionExecutionContext.getFileOutErr().childOutErr();
+ SpawnContinuation xmlContinuation;
+ try {
+ xmlContinuation =
+ spawnActionContext.beginExecution(
+ xmlGeneratingSpawn, actionExecutionContext.withFileOutErr(xmlSpawnOutErr));
+ } catch (ExecException | InterruptedException e) {
+ xmlSpawnOutErr.close();
+ throw e;
+ }
+ if (!xmlContinuation.isDone()) {
+ return new BazelXmlCreationContinuation(
+ resolvedPaths, xmlSpawnOutErr, builder, spawnResults, xmlContinuation);
+ }
+ }
+
+ TestCase details = parseTestResult(xmlOutputPath);
+ if (details != null) {
+ builder.setTestCase(details);
+ }
+
+ BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo =
+ extractExecutionInfo(primaryResult, builder);
+ StandaloneTestResult standaloneTestResult =
+ StandaloneTestResult.builder()
+ .setSpawnResults(spawnResults)
+ // We return the TestResultData.Builder rather than the finished TestResultData
+ // instance, as we may have to rename the output files in case the test needs to be
+ // rerun (if it failed here _and_ is marked flaky _and_ the number of flaky attempts
+ // is larger than 1).
+ .setTestResultDataBuilder(builder)
+ .setExecutionInfo(executionInfo.build())
+ .build();
+ return TestAttemptContinuation.of(standaloneTestResult);
+ }
+ }
+
+ private final class BazelXmlCreationContinuation extends TestAttemptContinuation {
+ private final ResolvedPaths resolvedPaths;
+ private final FileOutErr fileOutErr;
+ private final TestResultData.Builder builder;
+ private final List<SpawnResult> primarySpawnResults;
+ private final SpawnContinuation spawnContinuation;
+
+ BazelXmlCreationContinuation(
+ ResolvedPaths resolvedPaths,
+ FileOutErr fileOutErr,
+ TestResultData.Builder builder,
+ List<SpawnResult> primarySpawnResults,
+ SpawnContinuation spawnContinuation) {
+ this.resolvedPaths = resolvedPaths;
+ this.fileOutErr = fileOutErr;
+ this.builder = builder;
+ this.primarySpawnResults = primarySpawnResults;
+ this.spawnContinuation = spawnContinuation;
+ }
+
+ @Nullable
+ @Override
+ public ListenableFuture<?> getFuture() {
+ return spawnContinuation.getFuture();
+ }
+
+ @Override
+ public TestAttemptContinuation execute()
+ throws InterruptedException, IOException, ExecException {
+ SpawnContinuation nextContinuation;
+ try {
+ nextContinuation = spawnContinuation.execute();
+ if (!nextContinuation.isDone()) {
+ return new BazelXmlCreationContinuation(
+ resolvedPaths, fileOutErr, builder, primarySpawnResults, nextContinuation);
+ }
+ } catch (ExecException | InterruptedException e) {
+ closeSuppressed(e, fileOutErr);
+ throw e;
+ }
+
+ List<SpawnResult> spawnResults = new ArrayList<>();
+ spawnResults.addAll(primarySpawnResults);
+ spawnResults.addAll(nextContinuation.get());
+
+ Path xmlOutputPath = resolvedPaths.getXmlOutputPath();
+ TestCase details = parseTestResult(xmlOutputPath);
+ if (details != null) {
+ builder.setTestCase(details);
+ }
+
+ BuildEventStreamProtos.TestResult.ExecutionInfo.Builder executionInfo =
+ extractExecutionInfo(primarySpawnResults.get(0), builder);
+ StandaloneTestResult standaloneTestResult =
+ StandaloneTestResult.builder()
+ .setSpawnResults(spawnResults)
+ // We return the TestResultData.Builder rather than the finished TestResultData
+ // instance, as we may have to rename the output files in case the test needs to be
+ // rerun (if it failed here _and_ is marked flaky _and_ the number of flaky attempts
+ // is larger than 1).
+ .setTestResultDataBuilder(builder)
+ .setExecutionInfo(executionInfo.build())
+ .build();
+ return TestAttemptContinuation.of(standaloneTestResult);
+ }
+ }
}