Restart node building if previous dep is dirty, fix check-then-act races

While initializing the SkyFunctionEnvironment for a node being built, if
a previously requested dep is found to be not done, reset and re-enqueue
the building node. This lets the node handle the not-done dep like any
other not-done dep (e.g. by enqueuing it or by waiting to be signalled
by it).

Similarly, while registering newly requested deps when building a node
yields a value or an error, if a newly requested dep is found to be not
done, return without completing the node, so that it may be signalled by
the dep (without crashing; done nodes cannot be signalled).

Also fixes a handful of remaining check-then-act races during Skyframe
evaluation that were vulnerable to done->dirty node transitions.

(Note that done->dirty node transitions during evaluation are planned,
but not yet possible.)

RELNOTES: None.
PiperOrigin-RevId: 202886360
diff --git a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
index 31477b0..1a2cab9 100644
--- a/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
+++ b/src/main/java/com/google/devtools/build/skyframe/AbstractParallelEvaluator.java
@@ -33,6 +33,7 @@
 import com.google.devtools.build.skyframe.NodeEntry.DirtyState;
 import com.google.devtools.build.skyframe.ParallelEvaluatorContext.EnqueueParentBehavior;
 import com.google.devtools.build.skyframe.QueryableGraph.Reason;
+import com.google.devtools.build.skyframe.SkyFunctionEnvironment.UndonePreviouslyRequestedDep;
 import com.google.devtools.build.skyframe.SkyFunctionException.ReifiedSkyFunctionException;
 import java.time.Duration;
 import java.util.Collection;
@@ -226,26 +227,31 @@
           Map<SkyKey, ? extends NodeEntry> entriesToCheck =
               graph.getBatch(skyKey, Reason.OTHER, directDepsToCheck);
           for (Map.Entry<SkyKey, ? extends NodeEntry> entry : entriesToCheck.entrySet()) {
-            if (entry.getValue().isDone() && entry.getValue().getErrorInfo() != null) {
-              // If any child has an error, we arbitrarily add a dep on the first one (needed
-              // for error bubbling) and throw an exception coming from it.
-              SkyKey errorKey = entry.getKey();
-              NodeEntry errorEntry = entry.getValue();
-              state.addTemporaryDirectDeps(GroupedListHelper.create(errorKey));
-              errorEntry.checkIfDoneForDirtyReverseDep(skyKey);
-              // Perform the necessary bookkeeping for any deps that are not being used.
-              for (Map.Entry<SkyKey, ? extends NodeEntry> depEntry : entriesToCheck.entrySet()) {
-                if (!depEntry.getKey().equals(errorKey)) {
-                  depEntry.getValue().removeReverseDep(skyKey);
-                }
-              }
-              if (!evaluatorContext.getVisitor().preventNewEvaluations()) {
-                // An error was already thrown in the evaluator. Don't do anything here.
-                return DirtyOutcome.ALREADY_PROCESSED;
-              }
-              throw SchedulerException.ofError(
-                  errorEntry.getErrorInfo(), entry.getKey(), ImmutableSet.of(skyKey));
+            NodeEntry nodeEntryToCheck = entry.getValue();
+            SkyValue valueMaybeWithMetadata = nodeEntryToCheck.getValueMaybeWithMetadata();
+            if (valueMaybeWithMetadata == null) {
+              continue;
             }
+            ErrorInfo maybeErrorInfo = ValueWithMetadata.getMaybeErrorInfo(valueMaybeWithMetadata);
+            if (maybeErrorInfo == null) {
+              continue;
+            }
+            // This child has an error. We add a dep from this node to it and throw an exception
+            // coming from it.
+            SkyKey errorKey = entry.getKey();
+            state.addTemporaryDirectDeps(GroupedListHelper.create(errorKey));
+            nodeEntryToCheck.checkIfDoneForDirtyReverseDep(skyKey);
+            // Perform the necessary bookkeeping for any deps that are not being used.
+            for (Map.Entry<SkyKey, ? extends NodeEntry> depEntry : entriesToCheck.entrySet()) {
+              if (!depEntry.getKey().equals(errorKey)) {
+                depEntry.getValue().removeReverseDep(skyKey);
+              }
+            }
+            if (!evaluatorContext.getVisitor().preventNewEvaluations()) {
+              // An error was already thrown in the evaluator. Don't do anything here.
+              return DirtyOutcome.ALREADY_PROCESSED;
+            }
+            throw SchedulerException.ofError(maybeErrorInfo, errorKey, ImmutableSet.of(skyKey));
           }
         }
         // It is safe to add these deps back to the node -- even if one of them has changed, the
@@ -359,8 +365,15 @@
         try {
           evaluatorContext.getProgressReceiver().stateStarting(skyKey,
               NodeState.INITIALIZING_ENVIRONMENT);
-          env = new SkyFunctionEnvironment(skyKey, state.getTemporaryDirectDeps(), oldDeps,
-              evaluatorContext);
+          env =
+              new SkyFunctionEnvironment(
+                  skyKey, state.getTemporaryDirectDeps(), oldDeps, evaluatorContext);
+        } catch (UndonePreviouslyRequestedDep undonePreviouslyRequestedDep) {
+          // If a previously requested dep is no longer done, restart this node from scratch.
+          maybeEraseNodeToRestartFromScratch(
+              skyKey, state, SkyFunction.SENTINEL_FOR_RESTART_FROM_SCRATCH);
+          evaluatorContext.getVisitor().enqueueEvaluation(skyKey);
+          return;
         } finally {
           evaluatorContext.getProgressReceiver().stateEnding(skyKey,
               NodeState.INITIALIZING_ENVIRONMENT, -1);
@@ -417,23 +430,31 @@
                 return;
               } else {
                 logger.warning(
-                    "Aborting evaluation due to "
-                        + builderException
-                        + " while evaluating "
-                        + skyKey);
+                    String.format(
+                        "Aborting evaluation due to %s while evaluating %s",
+                        builderException, skyKey));
               }
             }
 
+            if (maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry(
+                skyKey, state, oldDeps, env, evaluatorContext.keepGoing())) {
+              // A newly requested dep transitioned from done to dirty before this node finished.
+              // If shouldFailFast is true, this node won't be signalled by any such newly dirtied
+              // dep (because new evaluations have been prevented), and this node is responsible for
+              // throwing the SchedulerException below.
+              // Otherwise, this node will be signalled again, and so we should return.
+              if (!shouldFailFast) {
+                return;
+              }
+            }
             boolean isTransitivelyTransient =
                 reifiedBuilderException.isTransient()
                     || env.isAnyDirectDepErrorTransitivelyTransient()
                     || env.isAnyNewlyRequestedDepErrorTransitivelyTransient();
-            ErrorInfo errorInfo = evaluatorContext.getErrorInfoManager().fromException(
-                skyKey,
-                reifiedBuilderException,
-                isTransitivelyTransient);
-            registerNewlyDiscoveredDepsForDoneEntry(
-                skyKey, state, oldDeps, env, evaluatorContext.keepGoing());
+            ErrorInfo errorInfo =
+                evaluatorContext
+                    .getErrorInfoManager()
+                    .fromException(skyKey, reifiedBuilderException, isTransitivelyTransient);
             env.setError(state, errorInfo);
             Set<SkyKey> rdepsToBubbleUpTo =
                 env.commit(
@@ -475,9 +496,13 @@
 
           try {
             evaluatorContext.getProgressReceiver().stateStarting(skyKey, NodeState.COMMIT);
+            if (maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry(
+                skyKey, state, oldDeps, env, evaluatorContext.keepGoing())) {
+              // A newly requested dep transitioned from done to dirty before this node finished.
+              // This node will be signalled again, and so we should return.
+              return;
+            }
             env.setValue(value);
-            registerNewlyDiscoveredDepsForDoneEntry(
-                skyKey, state, oldDeps, env, evaluatorContext.keepGoing());
             env.commit(state, EnqueueParentBehavior.ENQUEUE);
           } finally {
             evaluatorContext.getProgressReceiver().stateEnding(skyKey, NodeState.COMMIT, -1);
@@ -485,14 +510,14 @@
           return;
         }
 
-        if (env.getDepErrorKey() != null) {
+        SkyKey childErrorKey = env.getDepErrorKey();
+        if (childErrorKey != null) {
           Preconditions.checkState(
-              !evaluatorContext.keepGoing(), "%s %s %s", skyKey, state, env.getDepErrorKey());
+              !evaluatorContext.keepGoing(), "%s %s %s", skyKey, state, childErrorKey);
           // We encountered a child error in noKeepGoing mode, so we want to fail fast. But we first
           // need to add the edge between the current node and the child error it requested so that
           // error bubbling can occur. Note that this edge will subsequently be removed during graph
           // cleaning (since the current node will never be committed to the graph).
-          SkyKey childErrorKey = env.getDepErrorKey();
           NodeEntry childErrorEntry =
               Preconditions.checkNotNull(
                   graph.get(skyKey, Reason.OTHER, childErrorKey),
@@ -511,15 +536,31 @@
             } else {
               childErrorState = childErrorEntry.addReverseDepAndCheckIfDone(skyKey);
             }
-            Preconditions.checkState(
-                childErrorState == DependencyState.DONE,
-                "skyKey: %s, state: %s childErrorKey: %s",
-                skyKey,
-                state,
-                childErrorKey,
-                childErrorEntry);
+            if (childErrorState != DependencyState.DONE) {
+              // The child in error may have transitioned from done to dirty between when this node
+              // discovered the error and now. Notify the graph inconsistency receiver so that we
+              // can crash if that's unexpected.
+              // We don't enqueue the child, even if it returns NEEDS_SCHEDULING, because we are
+              // about to shut down evaluation.
+              evaluatorContext
+                  .getGraphInconsistencyReceiver()
+                  .noteInconsistencyAndMaybeThrow(
+                      skyKey, childErrorKey, Inconsistency.CHILD_UNDONE_FOR_BUILDING_NODE);
+            }
           }
-          ErrorInfo childErrorInfo = Preconditions.checkNotNull(childErrorEntry.getErrorInfo());
+          SkyValue childErrorInfoMaybe =
+              Preconditions.checkNotNull(
+                  env.maybeGetValueFromErrorOrDeps(childErrorKey),
+                  "dep error found but then lost while building: %s %s",
+                  skyKey,
+                  childErrorKey);
+          ErrorInfo childErrorInfo =
+              Preconditions.checkNotNull(
+                  ValueWithMetadata.getMaybeErrorInfo(childErrorInfoMaybe),
+                  "dep error found but then wasn't an error while building: %s %s %s",
+                  skyKey,
+                  childErrorKey,
+                  childErrorInfoMaybe);
           evaluatorContext.getVisitor().preventNewEvaluations();
           throw SchedulerException.ofError(childErrorInfo, childErrorKey, ImmutableSet.of(skyKey));
         }
@@ -652,16 +693,28 @@
   }
 
   /**
-   * Add any additional deps that were registered during the run of a builder that finished by
-   * creating a node or throwing an error. Builders may throw errors even if all their deps were not
-   * provided -- we trust that a SkyFunction might know it should throw an error even if not all of
-   * its requested deps are done. However, that means we're assuming the SkyFunction would throw
-   * that same error if all of its requested deps were done. Unfortunately, there is no way to
-   * enforce that condition.
+   * Add any newly discovered deps that were registered during the run of a SkyFunction that
+   * finished by returning a value or throwing an error. SkyFunctions may throw errors even if all
+   * their deps were not provided -- we trust that a SkyFunction might know it should throw an error
+   * even if not all of its requested deps are done. However, that means we're assuming the
+   * SkyFunction would throw that same error if all of its requested deps were done. Unfortunately,
+   * there is no way to enforce that condition.
+   *
+   * <p>Returns {@code true} if any newly discovered dep is dirty when this node registers itself as
+   * an rdep.
+   *
+   * <p>This can happen if a newly discovered dep transitions from done to dirty between when this
+   * node's evaluation accessed the dep's value and here. Adding this node as an rdep of that dep
+   * (or checking that this node is an rdep of that dep) will cause this node to be signalled when
+   * that dep completes.
+   *
+   * <p>If this returns {@code true}, this node should not actually finish, and this evaluation
+   * attempt should make no changes to the node after this method returns, because a completing dep
+   * may schedule a new evaluation attempt at any time.
    *
    * @throws InterruptedException
    */
-  private void registerNewlyDiscoveredDepsForDoneEntry(
+  private boolean maybeHandleRegisteringNewlyDiscoveredDepsForDoneEntry(
       SkyKey skyKey,
       NodeEntry entry,
       Set<SkyKey> oldDeps,
@@ -670,7 +723,7 @@
       throws InterruptedException {
     Iterator<SkyKey> it = env.getNewlyRequestedDeps().iterator();
     if (!it.hasNext()) {
-      return;
+      return false;
     }
 
     // We don't expect any unfinished deps in a keep-going build.
@@ -685,43 +738,57 @@
     InterruptibleSupplier<Map<SkyKey, ? extends NodeEntry>> newlyAddedNewDepNodes =
         graph.getBatchAsync(skyKey, Reason.RDEP_ADDITION, newlyAddedNewDeps);
 
+    // Note that the depEntries in the following two loops can't be null. In a keep-going build, we
+    // normally expect all deps to be done. In a non-keep-going build, If env.newlyRequestedDeps
+    // contained a key for a node that wasn't done, then it would have been removed via
+    // removeUndoneNewlyRequestedDeps() just above this loop. However, with intra-evaluation
+    // dirtying, a dep may not be done.
+    boolean dirtyDepFound = false;
     for (Map.Entry<SkyKey, ? extends NodeEntry> newDep :
         graph.getBatch(skyKey, Reason.SIGNAL_DEP, previouslyRegisteredNewDeps).entrySet()) {
-      // Note that this depEntry can't be null. In a keep-going build, we expect all deps to be
-      // done. In a non-keep-going build, If env.newlyRequestedDeps contained a key with a
-      // null entry, then it would have been added to unfinishedDeps and then removed from
-      // env.newlyRequestedDeps just above this loop.
-      NodeEntry depEntry = newDep.getValue();
-      DependencyState triState = depEntry.checkIfDoneForDirtyReverseDep(skyKey);
-      Preconditions.checkState(
-          DependencyState.DONE == triState,
-          "new dep %s was not already done for %s. NodeEntry: %s. DepNodeEntry: %s",
-          newDep,
-          skyKey,
-          entry,
-          depEntry);
-      entry.signalDep();
+      DependencyState triState = newDep.getValue().checkIfDoneForDirtyReverseDep(skyKey);
+      if (maybeHandleUndoneDepForDoneEntry(entry, triState, skyKey, newDep.getKey())) {
+        dirtyDepFound = true;
+      }
     }
 
     for (SkyKey newDep : newlyAddedNewDeps) {
-      // Note that this depEntry can't be null. In a keep-going build, we expect all deps to be
-      // done. In a non-keep-going build, If env.newlyRequestedDeps contained a key with a
-      // null entry, then it would have been added to unfinishedDeps and then removed from
-      // env.newlyRequestedDeps just above this loop.
       NodeEntry depEntry =
           Preconditions.checkNotNull(newlyAddedNewDepNodes.get().get(newDep), newDep);
       DependencyState triState = depEntry.addReverseDepAndCheckIfDone(skyKey);
-      Preconditions.checkState(
-          DependencyState.DONE == triState,
-          "new dep %s was not already done for %s. NodeEntry: %s. DepNodeEntry: %s",
-          newDep,
-          skyKey,
-          entry,
-          depEntry);
-      entry.signalDep();
+      if (maybeHandleUndoneDepForDoneEntry(entry, triState, skyKey, newDep)) {
+        dirtyDepFound = true;
+      }
     }
+
     Preconditions.checkState(
-        entry.isReady(), "%s %s %s", skyKey, entry, env.getNewlyRequestedDeps());
+        dirtyDepFound || entry.isReady(), "%s %s %s", skyKey, entry, env.getNewlyRequestedDeps());
+    return dirtyDepFound;
+  }
+
+  /**
+   * Returns {@code true} if the dep was not done. Notifies the {@link GraphInconsistencyReceiver}
+   * if so. Schedules the dep for evaluation if necessary.
+   *
+   * <p>Otherwise, returns {@code false} and signals this node.
+   */
+  private boolean maybeHandleUndoneDepForDoneEntry(
+      NodeEntry entry, DependencyState triState, SkyKey skyKey, SkyKey depKey) {
+    if (triState == DependencyState.DONE) {
+      entry.signalDep();
+      return false;
+    }
+    // The dep may have transitioned from done to dirty between when this node read its value and
+    // now. Notify the graph inconsistency receiver so that we can crash if that's unexpected. We
+    // schedule the dep if it needs scheduling, because nothing else can if we don't.
+    evaluatorContext
+        .getGraphInconsistencyReceiver()
+        .noteInconsistencyAndMaybeThrow(
+            skyKey, depKey, Inconsistency.CHILD_UNDONE_FOR_BUILDING_NODE);
+    if (triState == DependencyState.NEEDS_SCHEDULING) {
+      evaluatorContext.getVisitor().enqueueEvaluation(depKey);
+    }
+    return true;
   }
 
   /**
diff --git a/src/main/java/com/google/devtools/build/skyframe/EvaluationSuccessStateSupplier.java b/src/main/java/com/google/devtools/build/skyframe/EvaluationSuccessStateSupplier.java
index f4c320c..181727b 100644
--- a/src/main/java/com/google/devtools/build/skyframe/EvaluationSuccessStateSupplier.java
+++ b/src/main/java/com/google/devtools/build/skyframe/EvaluationSuccessStateSupplier.java
@@ -14,6 +14,7 @@
 package com.google.devtools.build.skyframe;
 
 import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
 import com.google.devtools.build.skyframe.EvaluationProgressReceiver.EvaluationSuccessState;
 
 /**
@@ -42,4 +43,10 @@
           e);
     }
   }
+
+  static Supplier<EvaluationSuccessState> fromSkyValue(SkyValue value) {
+    return ValueWithMetadata.justValue(value) != null
+        ? Suppliers.ofInstance(EvaluationSuccessState.SUCCESS)
+        : Suppliers.ofInstance(EvaluationSuccessState.FAILURE);
+  }
 }
diff --git a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
index 6aa161a..5d99c7d 100644
--- a/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
+++ b/src/main/java/com/google/devtools/build/skyframe/GraphInconsistencyReceiver.java
@@ -30,7 +30,8 @@
   /** The type of inconsistency detected. */
   enum Inconsistency {
     RESET_REQUESTED,
-    CHILD_MISSING_FOR_DIRTY_NODE
+    CHILD_MISSING_FOR_DIRTY_NODE,
+    CHILD_UNDONE_FOR_BUILDING_NODE
   }
 
   /** A {@link GraphInconsistencyReceiver} that crashes on any inconsistency. */
diff --git a/src/main/java/com/google/devtools/build/skyframe/SimpleCycleDetector.java b/src/main/java/com/google/devtools/build/skyframe/SimpleCycleDetector.java
index d1348daa..cf82baf 100644
--- a/src/main/java/com/google/devtools/build/skyframe/SimpleCycleDetector.java
+++ b/src/main/java/com/google/devtools/build/skyframe/SimpleCycleDetector.java
@@ -27,6 +27,7 @@
 import com.google.devtools.build.lib.util.GroupedList;
 import com.google.devtools.build.skyframe.ParallelEvaluatorContext.EnqueueParentBehavior;
 import com.google.devtools.build.skyframe.QueryableGraph.Reason;
+import com.google.devtools.build.skyframe.SkyFunctionEnvironment.UndonePreviouslyRequestedDep;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
@@ -153,12 +154,21 @@
             "Node %s was not successfully evaluated, but had no child errors. NodeEntry: %s",
             key,
             entry);
-        SkyFunctionEnvironment env =
-            new SkyFunctionEnvironment(
-                key,
-                directDeps,
-                Sets.difference(entry.getAllRemainingDirtyDirectDeps(), removedDeps),
-                evaluatorContext);
+        SkyFunctionEnvironment env = null;
+        try {
+          env =
+              new SkyFunctionEnvironment(
+                  key,
+                  directDeps,
+                  Sets.difference(entry.getAllRemainingDirtyDirectDeps(), removedDeps),
+                  evaluatorContext);
+        } catch (UndonePreviouslyRequestedDep undoneDep) {
+          // All children were finished according to the CHILDREN_FINISHED sentinel, and cycle
+          // detection does not do normal SkyFunction evaluation, so no restarting nor child
+          // dirtying was possible.
+          throw new IllegalStateException(
+              "Previously requested dep not done: " + undoneDep.getDepKey(), undoneDep);
+        }
         env.setError(entry, ErrorInfo.fromChildErrors(key, errorDeps));
         env.commit(entry, EnqueueParentBehavior.SIGNAL);
       } else {
diff --git a/src/main/java/com/google/devtools/build/skyframe/SkyFunctionEnvironment.java b/src/main/java/com/google/devtools/build/skyframe/SkyFunctionEnvironment.java
index 6d7918f..4b2fc51 100644
--- a/src/main/java/com/google/devtools/build/skyframe/SkyFunctionEnvironment.java
+++ b/src/main/java/com/google/devtools/build/skyframe/SkyFunctionEnvironment.java
@@ -30,6 +30,7 @@
 import com.google.devtools.build.lib.util.GroupedList;
 import com.google.devtools.build.lib.util.GroupedList.GroupedListHelper;
 import com.google.devtools.build.skyframe.EvaluationProgressReceiver.EvaluationState;
+import com.google.devtools.build.skyframe.GraphInconsistencyReceiver.Inconsistency;
 import com.google.devtools.build.skyframe.NodeEntry.DependencyState;
 import com.google.devtools.build.skyframe.ParallelEvaluatorContext.EnqueueParentBehavior;
 import com.google.devtools.build.skyframe.QueryableGraph.Reason;
@@ -149,14 +150,24 @@
       GroupedList<SkyKey> directDeps,
       Set<SkyKey> oldDeps,
       ParallelEvaluatorContext evaluatorContext)
-      throws InterruptedException {
-    this(skyKey, directDeps, null, oldDeps, evaluatorContext);
+      throws InterruptedException, UndonePreviouslyRequestedDep {
+    super(directDeps);
+    this.skyKey = skyKey;
+    this.oldDeps = oldDeps;
+    this.evaluatorContext = evaluatorContext;
+    this.bubbleErrorInfo = null;
+    this.previouslyRequestedDepsValues =
+        batchPrefetch(skyKey, directDeps, oldDeps, /*assertDone=*/ true, skyKey);
+    Preconditions.checkState(
+        !this.previouslyRequestedDepsValues.containsKey(ErrorTransienceValue.KEY),
+        "%s cannot have a dep on ErrorTransienceValue during building",
+        skyKey);
   }
 
   SkyFunctionEnvironment(
       SkyKey skyKey,
       GroupedList<SkyKey> directDeps,
-      @Nullable Map<SkyKey, ValueWithMetadata> bubbleErrorInfo,
+      Map<SkyKey, ValueWithMetadata> bubbleErrorInfo,
       Set<SkyKey> oldDeps,
       ParallelEvaluatorContext evaluatorContext)
       throws InterruptedException {
@@ -164,9 +175,14 @@
     this.skyKey = skyKey;
     this.oldDeps = oldDeps;
     this.evaluatorContext = evaluatorContext;
-    this.previouslyRequestedDepsValues =
-        batchPrefetch(skyKey, directDeps, oldDeps, /*assertDone=*/ bubbleErrorInfo == null, skyKey);
-    this.bubbleErrorInfo = bubbleErrorInfo;
+    this.bubbleErrorInfo = Preconditions.checkNotNull(bubbleErrorInfo);
+    try {
+      this.previouslyRequestedDepsValues =
+          batchPrefetch(skyKey, directDeps, oldDeps, /*assertDone=*/ false, skyKey);
+    } catch (UndonePreviouslyRequestedDep undonePreviouslyRequestedDep) {
+      throw new IllegalStateException(
+          "batchPrefetch can't throw UndonePreviouslyRequestedDep unless assertDone is true");
+    }
     Preconditions.checkState(
         !this.previouslyRequestedDepsValues.containsKey(ErrorTransienceValue.KEY),
         "%s cannot have a dep on ErrorTransienceValue during building",
@@ -179,7 +195,7 @@
       Set<SkyKey> oldDeps,
       boolean assertDone,
       SkyKey keyForDebugging)
-      throws InterruptedException {
+      throws InterruptedException, UndonePreviouslyRequestedDep {
     Set<SkyKey> depKeysAsSet = null;
     if (PREFETCH_OLD_DEPS) {
       if (!oldDeps.isEmpty()) {
@@ -211,9 +227,15 @@
         ImmutableMap.builderWithExpectedSize(batchMap.size());
     for (Entry<SkyKey, ? extends NodeEntry> entry : batchMap.entrySet()) {
       SkyValue valueMaybeWithMetadata = entry.getValue().getValueMaybeWithMetadata();
-      if (assertDone) {
-        Preconditions.checkNotNull(
-            valueMaybeWithMetadata, "%s had not done %s", keyForDebugging, entry);
+      if (assertDone && valueMaybeWithMetadata == null) {
+        // A previously requested dep may have transitioned from done to dirty between when the node
+        // was read during a previous attempt to build this node and now. Notify the graph
+        // inconsistency receiver so that we can crash if that's unexpected.
+        evaluatorContext
+            .getGraphInconsistencyReceiver()
+            .noteInconsistencyAndMaybeThrow(
+                skyKey, entry.getKey(), Inconsistency.CHILD_UNDONE_FOR_BUILDING_NODE);
+        throw new UndonePreviouslyRequestedDep(entry.getKey());
       }
       depValuesBuilder.put(
           entry.getKey(), valueMaybeWithMetadata == null ? NULL_MARKER : valueMaybeWithMetadata);
@@ -407,6 +429,9 @@
       newlyRequestedDepsValues.put(key, valueOrNullMarker);
       if (valueOrNullMarker == NULL_MARKER) {
         // TODO(mschaller): handle registered deps that transitioned from done to dirty during eval
+        // But how? Restarting the current node may not help, because this dep was *registered*, not
+        // requested. For now, no node that gets registered as a dep is eligible for
+        // intra-evaluation dirtying, so let it crash.
         Preconditions.checkState(!assertDone, "%s had not done: %s", skyKey, key);
         continue;
       }
@@ -428,7 +453,7 @@
    * (Note that none of the maps can have {@code null} as a value.)
    */
   @Nullable
-  private SkyValue maybeGetValueFromErrorOrDeps(SkyKey key) {
+  SkyValue maybeGetValueFromErrorOrDeps(SkyKey key) {
     if (bubbleErrorInfo != null) {
       ValueWithMetadata bubbleErrorInfoValue = bubbleErrorInfo.get(key);
       if (bubbleErrorInfoValue != null) {
@@ -713,7 +738,7 @@
         .evaluated(
             skyKey,
             evaluationState == EvaluationState.BUILT ? value : null,
-            new EvaluationSuccessStateSupplier(primaryEntry),
+            EvaluationSuccessStateSupplier.fromSkyValue(valueWithMetadata),
             evaluationState);
 
     evaluatorContext.signalValuesAndEnqueueIfReady(
@@ -756,4 +781,17 @@
     }
     newlyRequestedDeps.endGroup();
   }
+
+  /** Thrown during environment construction if a previously requested dep is no longer done. */
+  static class UndonePreviouslyRequestedDep extends Exception {
+    private final SkyKey depKey;
+
+    UndonePreviouslyRequestedDep(SkyKey depKey) {
+      this.depKey = depKey;
+    }
+
+    SkyKey getDepKey() {
+      return depKey;
+    }
+  }
 }