Respect --noexperimental_check_output_files in FileSystemValueChecker. FileStateValues for output files can make their way into the Skyframe graph if a source file is symlink to an output file.

Also fix a bug where ExternalFilesHelper#isExternalFileSeen would always return true after returning true once in the past. This meant if an external file ever made its way into the Skyframe graph, we would always do a full graph scan at the beginning of each build (iow, we would always waste some CPU time doing nothing interesting).

--
MOS_MIGRATED_REVID=118190190
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java
index 490a4e3..5cad7e7 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java
@@ -18,7 +18,7 @@
 
 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableList;
-import com.google.devtools.build.lib.pkgcache.PathPackageLocator;
+import com.google.devtools.build.lib.skyframe.ExternalFilesHelper.FileType;
 import com.google.devtools.build.lib.util.Preconditions;
 import com.google.devtools.build.lib.util.io.TimestampGranularityMonitor;
 import com.google.devtools.build.lib.vfs.Path;
@@ -27,6 +27,7 @@
 import com.google.devtools.build.skyframe.SkyValue;
 
 import java.io.IOException;
+import java.util.EnumSet;
 import java.util.Set;
 
 import javax.annotation.Nullable;
@@ -108,26 +109,24 @@
 
   /** Checks files outside of the package roots for changes. */
   static final class ExternalDirtinessChecker extends BasicFilesystemDirtinessChecker {
-    private final PathPackageLocator packageLocator;
+    private final ExternalFilesHelper externalFilesHelper;
+    private final EnumSet<FileType> fileTypesToCheck;
 
-    ExternalDirtinessChecker(PathPackageLocator packageLocator) {
-      this.packageLocator = packageLocator;
+    ExternalDirtinessChecker(ExternalFilesHelper externalFilesHelper,
+        EnumSet<FileType> fileTypesToCheck) {
+      this.externalFilesHelper = externalFilesHelper;
+      this.fileTypesToCheck = fileTypesToCheck;
     }
 
     @Override
     public boolean applies(SkyKey key) {
-      return super.applies(key)
-          && !ExternalFilesHelper.isInternal((RootedPath) key.argument(), packageLocator);
+      if (!super.applies(key)) {
+        return false;
+      }
+      FileType fileType = externalFilesHelper.getAndNoteFileType((RootedPath) key.argument());
+      return fileTypesToCheck.contains(fileType);
     }
 
-    /**
-     * Files under output_base/external have a dependency on the WORKSPACE file, so we don't add a
-     * new SkyValue to the graph yet because it might change once the WORKSPACE file has been
-     * parsed.
-     *
-     * <p>This dirtiness checker is a bit conservative: files that are outside the package roots
-     * but aren't under output_base/external/ could just be stat-ed here (but they aren't).</p>
-     */
     @Nullable
     @Override
     public SkyValue createNewValue(SkyKey key, @Nullable TimestampGranularityMonitor tsgm) {
@@ -137,9 +136,18 @@
     @Override
     public SkyValueDirtinessChecker.DirtyResult check(
         SkyKey skyKey, SkyValue oldValue, @Nullable TimestampGranularityMonitor tsgm) {
-      return Objects.equal(super.createNewValue(skyKey, tsgm), oldValue)
-          ? SkyValueDirtinessChecker.DirtyResult.notDirty(oldValue)
-          : SkyValueDirtinessChecker.DirtyResult.dirty(oldValue);
+      SkyValue newValue = super.createNewValue(skyKey, tsgm);
+      if (Objects.equal(newValue, oldValue)) {
+        return SkyValueDirtinessChecker.DirtyResult.notDirty(oldValue);
+      }
+      FileType fileType = externalFilesHelper.getAndNoteFileType((RootedPath) skyKey.argument());
+      if (fileType == FileType.EXTERNAL_REPO) {
+        // Files under output_base/external have a dependency on the WORKSPACE file, so we don't add
+        // a new SkyValue to the graph yet because it might change once the WORKSPACE file has been
+        // parsed.
+        return SkyValueDirtinessChecker.DirtyResult.dirty(oldValue);
+      }
+      return SkyValueDirtinessChecker.DirtyResult.dirtyWithNewValue(oldValue, newValue);
     }
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
index 6126197..d55a029 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
@@ -14,8 +14,11 @@
 package com.google.devtools.build.lib.skyframe;
 
 import com.google.devtools.build.lib.cmdline.Label;
+import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadCompatible;
+import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
 import com.google.devtools.build.lib.pkgcache.PathPackageLocator;
 import com.google.devtools.build.lib.rules.repository.RepositoryFunction;
+import com.google.devtools.build.lib.util.Preconditions;
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import com.google.devtools.build.lib.vfs.RootedPath;
@@ -28,9 +31,10 @@
   private final AtomicReference<PathPackageLocator> pkgLocator;
   private final ExternalFileAction externalFileAction;
 
-  // This variable is set to true from multiple threads, but only read once, in the main thread.
+  // These variables are set to true from multiple threads, but only read in the main thread.
   // So volatility or an AtomicBoolean is not needed.
-  private boolean externalFileSeen = false;
+  private boolean anyOutputFilesSeen = false;
+  private boolean anyNonOutputExternalFilesSeen = false;
 
   /**
    * @param pkgLocator an {@link AtomicReference} to a {@link PathPackageLocator} used to
@@ -39,32 +43,106 @@
    */
   public ExternalFilesHelper(
       AtomicReference<PathPackageLocator> pkgLocator, boolean errorOnExternalFiles) {
+    this(
+        pkgLocator,
+        errorOnExternalFiles
+            ? ExternalFileAction.ERROR_OUT
+            : ExternalFileAction.DEPEND_ON_EXTERNAL_PKG_FOR_EXTERNAL_REPO_FILES);
+  }
+
+  private ExternalFilesHelper(AtomicReference<PathPackageLocator> pkgLocator,
+      ExternalFileAction externalFileAction) {
     this.pkgLocator = pkgLocator;
-    if (errorOnExternalFiles) {
-      this.externalFileAction = ExternalFileAction.ERROR_OUT;
-    } else {
-      this.externalFileAction = ExternalFileAction.DEPEND_ON_EXTERNAL_PKG;
-    }
+    this.externalFileAction = externalFileAction;
   }
 
   private enum ExternalFileAction {
-    // Re-check the files when the WORKSPACE file changes.
-    DEPEND_ON_EXTERNAL_PKG,
+    /** Re-check the files when the WORKSPACE file changes. */
+    DEPEND_ON_EXTERNAL_PKG_FOR_EXTERNAL_REPO_FILES,
 
-    // Throw an exception if there is an external file.
+    /** Throw an exception if there is an external file. */
     ERROR_OUT,
   }
 
-  boolean isExternalFileSeen() {
-    return externalFileSeen;
+  enum FileType {
+    /** A file inside the package roots or in an external repository. */
+    INTERNAL,
+
+    /** A file outside the package roots about which we may make no other assumptions. */
+    EXTERNAL_MUTABLE,
+
+    /**
+     * A file in Bazel's output tree that's a proper output of an action (*not* a source file in an
+     * external repository). Such files are theoretically mutable, but certain Blaze flags may tell
+     * Blaze to assume these files are immutable.
+     *
+     * Note that {@link ExternalFilesHelper#maybeHandleExternalFile} is only used for
+     * {@link FileStateValue} and {@link DirectoryStateValue}, and also note that output files do
+     * not normally have corresponding {@link FileValue} instances (and thus also
+     * {@link FileStateValue} instances) in the Skyframe graph ({@link ArtifactFunction} only uses
+     * {@link FileValue}s for source files). But {@link FileStateValue}s for output files can still
+     * make their way into the Skyframe graph if e.g. a source file is a symlink to an output file.
+     */
+    // TODO(nharmata): Consider an alternative design where we have an OutputFileDiffAwareness. This
+    // could work but would first require that we clean up all RootedPath usage.
+    OUTPUT,
+
+    /**
+     * A file in the part of Bazel's output tree that contains (/ symlinks to) to external
+     * repositories.
+     */
+    EXTERNAL_REPO,
   }
 
-  static boolean isInternal(RootedPath rootedPath, PathPackageLocator packageLocator) {
-    // TODO(bazel-team): This is inefficient when there are a lot of package roots or there are a
-    // lot of external directories. Consider either explicitly preventing this case or using a more
-    // efficient approach here (e.g. use a trie for determining if a file is under an external
-    // directory).
-    return packageLocator.getPathEntries().contains(rootedPath.getRoot());
+  static class ExternalFilesKnowledge {
+    final boolean anyOutputFilesSeen;
+    final boolean anyNonOutputExternalFilesSeen;
+
+    private ExternalFilesKnowledge(boolean anyOutputFilesSeen,
+        boolean anyNonOutputExternalFilesSeen) {
+      this.anyOutputFilesSeen = anyOutputFilesSeen;
+      this.anyNonOutputExternalFilesSeen = anyNonOutputExternalFilesSeen;
+    }
+  }
+
+  @ThreadCompatible
+  ExternalFilesKnowledge getExternalFilesKnowledge() {
+    return new ExternalFilesKnowledge(anyOutputFilesSeen, anyNonOutputExternalFilesSeen);
+  }
+
+  @ThreadCompatible
+  void setExternalFilesKnowledge(ExternalFilesKnowledge externalFilesKnowledge) {
+    anyOutputFilesSeen = externalFilesKnowledge.anyOutputFilesSeen;
+    anyNonOutputExternalFilesSeen = externalFilesKnowledge.anyNonOutputExternalFilesSeen;
+  }
+
+  ExternalFilesHelper cloneWithFreshExternalFilesKnowledge() {
+    return new ExternalFilesHelper(pkgLocator, externalFileAction);
+  }
+
+  FileType getAndNoteFileType(RootedPath rootedPath) {
+    PathPackageLocator packageLocator = pkgLocator.get();
+    if (packageLocator.getPathEntries().contains(rootedPath.getRoot())) {
+      return FileType.INTERNAL;
+    }
+    // The outputBase may be null if we're not actually running a build.
+    Path outputBase = packageLocator.getOutputBase();
+    if (outputBase == null) {
+      anyNonOutputExternalFilesSeen = true;
+      return FileType.EXTERNAL_MUTABLE;
+    }
+    if (rootedPath.asPath().startsWith(outputBase)) {
+      Path externalRepoDir = outputBase.getRelative(Label.EXTERNAL_PATH_PREFIX);
+      if (rootedPath.asPath().startsWith(externalRepoDir)) {
+        anyNonOutputExternalFilesSeen = true;
+        return FileType.EXTERNAL_REPO;
+      } else {
+        anyOutputFilesSeen = true;
+        return FileType.OUTPUT;
+      }
+    }
+    anyNonOutputExternalFilesSeen = true;
+    return FileType.EXTERNAL_MUTABLE;
   }
 
   /**
@@ -72,26 +150,22 @@
    * under a package root then this adds a dependency on the //external package. If the action is
    * ERROR_OUT, it will throw an error instead.
    */
+  @ThreadSafe
   public void maybeHandleExternalFile(RootedPath rootedPath, SkyFunction.Environment env)
       throws FileOutsidePackageRootsException {
-    if (isInternal(rootedPath, pkgLocator.get())) {
+    FileType fileType = getAndNoteFileType(rootedPath);
+    if (fileType == FileType.INTERNAL) {
       return;
     }
-
-    externalFileSeen = true;
-    if (externalFileAction == ExternalFileAction.ERROR_OUT) {
-      throw new FileOutsidePackageRootsException(rootedPath);
-    }
-
-    // The outputBase may be null if we're not actually running a build.
-    Path outputBase = pkgLocator.get().getOutputBase();
-    if (outputBase == null) {
+    if (fileType == FileType.OUTPUT || fileType == FileType.EXTERNAL_MUTABLE) {
+      if (externalFileAction == ExternalFileAction.ERROR_OUT) {
+        throw new FileOutsidePackageRootsException(rootedPath);
+      }
       return;
     }
-    Path relativeExternal = outputBase.getRelative(Label.EXTERNAL_PATH_PREFIX);
-    if (!rootedPath.asPath().startsWith(relativeExternal)) {
-      return;
-    }
+    Preconditions.checkState(
+        externalFileAction == ExternalFileAction.DEPEND_ON_EXTERNAL_PKG_FOR_EXTERNAL_REPO_FILES,
+        externalFileAction);
 
     // For files that are under $OUTPUT_BASE/external, add a dependency on the corresponding rule
     // so that if the WORKSPACE file changes, the File/DirectoryStateValue will be re-evaluated.
@@ -105,7 +179,8 @@
     // neither want to encourage nor optimize for since it is not common. So the set of external
     // files is small.
 
-    PathFragment repositoryPath = rootedPath.asPath().relativeTo(relativeExternal);
+    Path externalRepoDir = pkgLocator.get().getOutputBase().getRelative(Label.EXTERNAL_PATH_PREFIX);
+    PathFragment repositoryPath = rootedPath.asPath().relativeTo(externalRepoDir);
     if (repositoryPath.segmentCount() == 0) {
       // We are the top of the repository path (<outputBase>/external), not in an actual external
       // repository path.
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
index 0a612f8..2684bfa 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
@@ -44,6 +44,8 @@
 import com.google.devtools.build.lib.skyframe.DirtinessCheckerUtils.ExternalDirtinessChecker;
 import com.google.devtools.build.lib.skyframe.DirtinessCheckerUtils.MissingDiffDirtinessChecker;
 import com.google.devtools.build.lib.skyframe.DirtinessCheckerUtils.UnionDirtinessChecker;
+import com.google.devtools.build.lib.skyframe.ExternalFilesHelper.ExternalFilesKnowledge;
+import com.google.devtools.build.lib.skyframe.ExternalFilesHelper.FileType;
 import com.google.devtools.build.lib.util.AbruptExitException;
 import com.google.devtools.build.lib.util.Pair;
 import com.google.devtools.build.lib.util.Preconditions;
@@ -67,6 +69,7 @@
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@@ -218,7 +221,7 @@
     this.valueCacheEvictionLimit = packageCacheOptions.minLoadedPkgCountForCtNodeEviction;
     super.sync(eventHandler, packageCacheOptions, outputBase, workingDirectory,
         defaultsPackageContents, commandId, tsgm);
-    handleDiffs(eventHandler);
+    handleDiffs(eventHandler, packageCacheOptions.checkOutputFiles);
   }
 
   /**
@@ -273,6 +276,11 @@
    */
   @VisibleForTesting
   public void handleDiffs(EventHandler eventHandler) throws InterruptedException {
+    handleDiffs(eventHandler, /*checkOutputFiles=*/ false);
+  }
+
+  private void handleDiffs(EventHandler eventHandler, boolean checkOutputFiles)
+      throws InterruptedException {
     if (lastAnalysisDiscarded) {
       // Values were cleared last build, but they couldn't be deleted because they were needed for
       // the execution phase. We can delete them now.
@@ -295,7 +303,8 @@
       }
     }
     handleDiffsWithCompleteDiffInformation(tsgm, modifiedFilesByPathEntry);
-    handleDiffsWithMissingDiffInformation(eventHandler, tsgm, pathEntriesWithoutDiffInformation);
+    handleDiffsWithMissingDiffInformation(eventHandler, tsgm, pathEntriesWithoutDiffInformation,
+        checkOutputFiles);
   }
 
   /**
@@ -324,10 +333,13 @@
   private void handleDiffsWithMissingDiffInformation(EventHandler eventHandler,
       TimestampGranularityMonitor tsgm,
       Set<Pair<Path, DiffAwarenessManager.ProcessableModifiedFileSet>>
-          pathEntriesWithoutDiffInformation) throws InterruptedException {
+          pathEntriesWithoutDiffInformation, boolean checkOutputFiles) throws InterruptedException {
+    ExternalFilesKnowledge externalFilesKnowledge =
+        externalFilesHelper.getExternalFilesKnowledge();
     if (pathEntriesWithoutDiffInformation.isEmpty()
         && Iterables.isEmpty(customDirtinessCheckers)
-        && !externalFilesHelper.isExternalFileSeen()) {
+        && ((!externalFilesKnowledge.anyOutputFilesSeen || !checkOutputFiles)
+            && !externalFilesKnowledge.anyNonOutputExternalFilesSeen)) {
       // Avoid a full graph scan if we have good diff information for all path entries, there are
       // no custom checkers that need to look at the whole graph, and no external (not under any
       // path) files need to be checked.
@@ -350,6 +362,16 @@
       diffPackageRootsUnderWhichToCheck.add(pair.getFirst());
     }
 
+    // We freshly compute knowledge of the presence of external files in the skyframe graph. We use
+    // a fresh ExternalFilesHelper instance and only set the real instance's knowledge *after* we
+    // are done with the graph scan, lest an interrupt during the graph scan causes us to
+    // incorrectly think there are no longer any external files.
+    ExternalFilesHelper tmpExternalFilesHelper =
+        externalFilesHelper.cloneWithFreshExternalFilesKnowledge();
+    // See the comment for FileType.OUTPUT for why we need to consider output files here.
+    EnumSet fileTypesToCheck = checkOutputFiles
+        ? EnumSet.of(FileType.EXTERNAL_MUTABLE, FileType.EXTERNAL_REPO, FileType.OUTPUT)
+        : EnumSet.of(FileType.EXTERNAL_MUTABLE, FileType.EXTERNAL_REPO);
     Differencer.Diff diff =
         fsvc.getDirtyKeys(
             memoizingEvaluator.getValues(),
@@ -357,7 +379,9 @@
                 Iterables.concat(
                     customDirtinessCheckers,
                     ImmutableList.<SkyValueDirtinessChecker>of(
-                        new ExternalDirtinessChecker(pkgLocator.get()),
+                        new ExternalDirtinessChecker(
+                            tmpExternalFilesHelper,
+                            fileTypesToCheck),
                         new MissingDiffDirtinessChecker(diffPackageRootsUnderWhichToCheck)))));
     handleChangedFiles(diffPackageRootsUnderWhichToCheck, diff);
 
@@ -365,6 +389,11 @@
         pathEntriesWithoutDiffInformation) {
       pair.getSecond().markProcessed();
     }
+    // We use the knowledge gained during the graph scan that just completed. Otherwise, naively,
+    // once an external file gets into the Skyframe graph, we'll overly-conservatively always think
+    // the graph needs to be scanned.
+    externalFilesHelper.setExternalFilesKnowledge(
+        tmpExternalFilesHelper.getExternalFilesKnowledge());
   }
 
   private void handleChangedFiles(