Handle external mutations to completionFuture in ActionExecutionState. The class assumes that the future will never be mutated except under its synchronized lock, but that is false: https://github.com/bazelbuild/bazel/blob/146d51aa1ec9dcb721a7483479ef0b1ac21d39f1/src/main/java/com/google/devtools/build/lib/concurrent/AbstractQueueVisitor.java#L485 cancels the combined future it's passed if it's interrupted, and more generally any registered futures will be canceled when the pool is interrupted.

In the case that there are three shared actions, this can trigger the following bug: action A starts executing. Actions B and C start executing. The build is interrupted at some point between here and later, but neither B's nor C's threads notice yet. Action B notices action A is already executing and sets completionFuture. It then exits, returning control to AbstractParallelEvaluator$Evaluate#run code. That code tries to register the future with AbstractQueueVisitor. Either here or later, the future is canceled when the pool is interrupted. Action A's execution thread takes some time to notice. After this cancellation, Action C starts running. It too notices Action A is already executing. But then it unexpectedly finds that the future is done.

PiperOrigin-RevId: 295031074
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ActionExecutionState.java b/src/main/java/com/google/devtools/build/lib/skyframe/ActionExecutionState.java
index bccd145..a0e465b 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/ActionExecutionState.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/ActionExecutionState.java
@@ -55,6 +55,9 @@
    *
    * <p>The reason for this roundabout approach is to avoid memory allocation if this is not a
    * shared action, and to release the memory once the action is done.
+   *
+   * <p>Skyframe will attempt to cancel this future if the evaluation is interrupted, which violates
+   * the concurrency assumptions this class makes. Beware!
    */
   @GuardedBy("this")
   @Nullable
@@ -65,7 +68,7 @@
     this.state = Preconditions.checkNotNull(state);
   }
 
-  public ActionExecutionValue getResultOrDependOnFuture(
+  ActionExecutionValue getResultOrDependOnFuture(
       SkyFunction.Environment env,
       ActionLookupData actionLookupData,
       Action action,
@@ -90,9 +93,26 @@
         // again after the future is completed.
         sharedActionCallback.actionStarted();
         env.dependOnFuture(completionFuture);
-        // No other thread can access completionFuture until we exit the synchronized block.
-        Preconditions.checkState(!completionFuture.isDone(), state);
-        Preconditions.checkState(env.valuesMissing(), state);
+        if (!env.valuesMissing()) {
+          Preconditions.checkState(
+              completionFuture.isCancelled(), "%s %s", this.actionLookupData, actionLookupData);
+          // The future is unexpectedly done. This must be because it was registered by another
+          // thread earlier and was canceled by Skyframe. We are about to be interrupted ourselves,
+          // but have to do something in the meantime. We can just register a dep with a new future,
+          // then complete it and return. If for some reason this argument is incorrect, we will be
+          // restarted immediately and hopefully have a more consistent result.
+          SettableFuture<Void> dummyFuture = SettableFuture.create();
+          env.dependOnFuture(dummyFuture);
+          dummyFuture.set(null);
+          return null;
+        }
+        // No other thread can modify completionFuture until we exit the synchronized block.
+        Preconditions.checkState(
+            !completionFuture.isDone(),
+            "Completion future modified? %s %s %s",
+            this.actionLookupData,
+            actionLookupData,
+            action);
         return null;
       }
       result = state.get();
diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutorTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutorTest.java
index 3640228..aa78b8e 100644
--- a/src/test/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutorTest.java
+++ b/src/test/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutorTest.java
@@ -22,6 +22,7 @@
 import static com.google.devtools.build.lib.testutil.MoreAsserts.assertNotContainsEventRegex;
 import static com.google.devtools.build.lib.testutil.MoreAsserts.assertThrows;
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.fail;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
@@ -33,6 +34,7 @@
 import com.google.common.eventbus.EventBus;
 import com.google.common.hash.HashCode;
 import com.google.common.testing.GcFinalization;
+import com.google.common.util.concurrent.Uninterruptibles;
 import com.google.devtools.build.lib.actions.AbstractAction;
 import com.google.devtools.build.lib.actions.Action;
 import com.google.devtools.build.lib.actions.ActionAnalysisMetadata;
@@ -105,6 +107,9 @@
 import com.google.devtools.build.lib.skyframe.DirtinessCheckerUtils.BasicFilesystemDirtinessChecker;
 import com.google.devtools.build.lib.skyframe.SkyframeActionExecutor.ActionCompletedReceiver;
 import com.google.devtools.build.lib.skyframe.SkyframeActionExecutor.ProgressSupplier;
+import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
+import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
 import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
 import com.google.devtools.build.lib.syntax.EvalException;
 import com.google.devtools.build.lib.syntax.Printer;
@@ -132,7 +137,10 @@
 import com.google.devtools.build.skyframe.TrackingAwaiter;
 import com.google.devtools.build.skyframe.ValueWithMetadata;
 import com.google.devtools.common.options.OptionsParser;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
 import java.io.IOException;
+import java.io.Serializable;
 import java.lang.ref.WeakReference;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -772,6 +780,172 @@
     TrackingAwaiter.INSTANCE.assertNoErrors();
   }
 
+  /**
+   * Tests a subtle situation when three shared actions race and are interrupted. Action A starts
+   * executing. Actions B and C start executing. Action B notices action A is already executing and
+   * sets completionFuture. It then exits, returning control to
+   * AbstractParallelEvaluator$Evaluate#run code. The build is interrupted. When B's code tries to
+   * register the future with AbstractQueueVisitor, the future is canceled (or if the interrupt
+   * races with B registering the future, shortly thereafter). Action C then starts running. It too
+   * notices Action A is already executing. The future's state should be consistent. A cannot finish
+   * until C runs, since otherwise C would see that A was done.
+   */
+  @Test
+  public void testThreeSharedActionsRacing() throws Exception {
+    Path root = getExecRoot();
+    PathFragment out = PathFragment.create("out");
+    PathFragment execPath = out.getRelative("file");
+    // We create three "configured targets" and three copies of the same artifact, each generated by
+    // an action from its respective configured target. The actions wouldn't actually do the same
+    // thing if they executed, but they look the same to our execution engine.
+    ActionLookupValue.ActionLookupKey lcA = new InjectedActionLookupKey("lcA");
+    Artifact outputA =
+        new Artifact.DerivedArtifact(
+            ArtifactRoot.asDerivedRoot(root, root.getRelative("out")), execPath, lcA);
+    CountDownLatch actionAStartedSoOthersCanProceed = new CountDownLatch(1);
+    CountDownLatch actionCFinishedSoACanFinish = new CountDownLatch(1);
+    Action actionA =
+        new TestAction(
+            (Serializable & Callable<Void>)
+                () -> {
+                  actionAStartedSoOthersCanProceed.countDown();
+                  try {
+                    Thread.sleep(TestUtils.WAIT_TIMEOUT_MILLISECONDS);
+                  } catch (InterruptedException e) {
+                    TrackingAwaiter.INSTANCE.awaitLatchAndTrackExceptions(
+                        actionCFinishedSoACanFinish, "third didn't finish");
+                    throw e;
+                  }
+                  throw new IllegalStateException("Should have been interrupted");
+                },
+            NestedSetBuilder.emptySet(Order.STABLE_ORDER),
+            ImmutableSet.of(outputA));
+    ConfiguredTargetValue ctA = createConfiguredTargetValue(actionA, lcA);
+
+    // Shared actions: they look the same from the point of view of Blaze data.
+    ActionLookupValue.ActionLookupKey lcB = new InjectedActionLookupKey("lcB");
+    Artifact outputB =
+        new Artifact.DerivedArtifact(
+            ArtifactRoot.asDerivedRoot(root, root.getRelative("out")), execPath, lcB);
+    Action actionB =
+        new DummyAction(
+            NestedSetBuilder.emptySet(Order.STABLE_ORDER), outputB, MiddlemanType.NORMAL);
+    ConfiguredTargetValue ctB = createConfiguredTargetValue(actionB, lcB);
+    ActionLookupValue.ActionLookupKey lcC = new InjectedActionLookupKey("lcC");
+    Artifact outputC =
+        new Artifact.DerivedArtifact(
+            ArtifactRoot.asDerivedRoot(root, root.getRelative("out")), execPath, lcC);
+    Action actionC =
+        new DummyAction(
+            NestedSetBuilder.emptySet(Order.STABLE_ORDER), outputC, MiddlemanType.NORMAL);
+    ConfiguredTargetValue ctC = createConfiguredTargetValue(actionC, lcC);
+
+    // Both shared actions wait for A to start executing. We do that by stalling their dep requests
+    // on their configured targets. We then let B proceed. Once B finishes its SkyFunction run, it
+    // interrupts the main thread. C just waits until it has been interrupted, and then another
+    // little bit, to give B time to attempt to add the future and try to cancel it. It not waiting
+    // long enough can lead to a flaky pass.
+
+    Thread mainThread = Thread.currentThread();
+
+    skyframeExecutor
+        .getEvaluatorForTesting()
+        .injectGraphTransformerForTesting(
+            NotifyingHelper.makeNotifyingTransformer(
+                (key, type, order, context) -> {
+                  if (type == EventType.GET_VALUE_WITH_METADATA
+                      && (key.equals(lcB) || key.equals(lcC))) {
+                    // One of the shared actions is requesting its configured target dep.
+                    TrackingAwaiter.INSTANCE.awaitLatchAndTrackExceptions(
+                        actionAStartedSoOthersCanProceed, "primary didn't start");
+                    if (key.equals(lcC)) {
+                      // Wait until interrupted.
+                      try {
+                        Thread.sleep(TestUtils.WAIT_TIMEOUT_MILLISECONDS);
+                        throw new IllegalStateException("Should have been interrupted");
+                      } catch (InterruptedException e) {
+                        // Because ActionExecutionFunction doesn't check for interrupts, this
+                        // interrupted state will persist until the ADD_REVERSE_DEP code below. If
+                        // it does not, this test will start to fail, which is good, since it would
+                        // be strange to check for interrupts in that stretch of hot code.
+                        Thread.currentThread().interrupt();
+                      }
+                      // Wait for B thread to cancel its future. It's hard to know exactly when that
+                      // will be, so give it time. No flakes in 2k runs with this sleep.
+                      Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+                    }
+                  } else if (type == EventType.ADD_REVERSE_DEP
+                      && key.equals(lcB)
+                      && order == NotifyingHelper.Order.BEFORE
+                      && context != null) {
+                    // B thread has finished its run. Interrupt build!
+                    mainThread.interrupt();
+                  } else if (type == EventType.ADD_REVERSE_DEP
+                      && key.equals(lcC)
+                      && order == NotifyingHelper.Order.BEFORE
+                      && context != null) {
+                    // Test is almost over: let action A finish now that C observed future.
+                    actionCFinishedSoACanFinish.countDown();
+                  }
+                }));
+
+    // Inject the "configured targets" and artifacts into the graph.
+    skyframeExecutor
+        .getDifferencerForTesting()
+        .inject(ImmutableMap.of(lcA, ctA, lcB, ctB, lcC, ctC));
+    // Do a null build, so that the skyframe executor initializes the action executor properly.
+    skyframeExecutor.setActionOutputRoot(getOutputPath());
+    skyframeExecutor.setActionExecutionProgressReportingObjects(
+        EMPTY_PROGRESS_SUPPLIER,
+        EMPTY_COMPLETION_RECEIVER,
+        ActionExecutionStatusReporter.create(reporter));
+    skyframeExecutor.buildArtifacts(
+        reporter,
+        ResourceManager.instanceForTestingOnly(),
+        new DummyExecutor(fileSystem, rootDirectory),
+        ImmutableSet.of(),
+        ImmutableSet.of(),
+        ImmutableSet.of(),
+        ImmutableSet.of(),
+        ImmutableSet.of(),
+        options,
+        NULL_CHECKER,
+        null,
+        null,
+        null);
+
+    skyframeExecutor.prepareBuildingForTestingOnly(
+        reporter, new DummyExecutor(fileSystem, rootDirectory), options, NULL_CHECKER, null);
+    reporter.removeHandler(failFastHandler);
+    try {
+      evaluate(Artifact.keys(ImmutableList.of(outputA, outputB, outputC)));
+      fail();
+    } catch (InterruptedException e) {
+      // Expected.
+    }
+    TrackingAwaiter.INSTANCE.assertNoErrors();
+  }
+
+  /** Dummy codec for serialization. Doesn't actually serialize {@link CountDownLatch}! */
+  @SuppressWarnings("unused")
+  private static class CountDownLatchCodec implements ObjectCodec<CountDownLatch> {
+    private static final CountDownLatch RETURNED = new CountDownLatch(0);
+
+    @Override
+    public Class<? extends CountDownLatch> getEncodedClass() {
+      return CountDownLatch.class;
+    }
+
+    @Override
+    public void serialize(
+        SerializationContext context, CountDownLatch obj, CodedOutputStream codedOut) {}
+
+    @Override
+    public CountDownLatch deserialize(DeserializationContext context, CodedInputStream codedIn) {
+      return RETURNED;
+    }
+  }
+
   /** Regression test for ##5396: successfully build shared actions with tree artifacts. */
   @Test
   public void sharedActionsWithTree() throws Exception {