If EXTERNAL files are present, list them explicitly and check only these files, instead of triggering a search of all nodes in the graph.

This should improve performance for builds with a small number of external files and a large number of nodes in the graph, since it avoids a full graph traversal.

PiperOrigin-RevId: 399193720
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
index b8d9029..a4aebf2 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
@@ -14,6 +14,7 @@
 package com.google.devtools.build.lib.skyframe;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
 import com.google.common.flogger.GoogleLogger;
 import com.google.devtools.build.lib.analysis.BlazeDirectories;
 import com.google.devtools.build.lib.analysis.ConfiguredRuleClassProvider.BundledFileSystem;
@@ -29,6 +30,7 @@
 import com.google.devtools.build.lib.vfs.RootedPath;
 import com.google.devtools.build.skyframe.SkyFunction;
 import java.io.IOException;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
@@ -41,12 +43,17 @@
   private final BlazeDirectories directories;
   private final int maxNumExternalFilesToLog;
   private final AtomicInteger numExternalFilesLogged = new AtomicInteger(0);
+  private static final int MAX_EXTERNAL_FILES_TO_TRACK = 2500;
 
   // These variables are set to true from multiple threads, but only read in the main thread.
   // So volatility or an AtomicBoolean is not needed.
   private boolean anyOutputFilesSeen = false;
-  private boolean anyNonOutputExternalFilesSeen = false;
+  private boolean tooManyNonOutputExternalFilesSeen = false;
+  private boolean anyFilesInExternalReposSeen = false;
 
+  // This is a set of external files that are not in managed directories or
+  // external repositories.
+  private Set<RootedPath> nonOutputExternalFilesSeen = Sets.newConcurrentHashSet();
   private final ManagedDirectoriesKnowledge managedDirectoriesKnowledge;
 
   private ExternalFilesHelper(
@@ -190,24 +197,37 @@
 
   static class ExternalFilesKnowledge {
     final boolean anyOutputFilesSeen;
-    final boolean anyNonOutputExternalFilesSeen;
+    final Set<RootedPath> nonOutputExternalFilesSeen;
+    final boolean anyFilesInExternalReposSeen;
+    final boolean tooManyNonOutputExternalFilesSeen;
 
-    private ExternalFilesKnowledge(boolean anyOutputFilesSeen,
-        boolean anyNonOutputExternalFilesSeen) {
+    private ExternalFilesKnowledge(
+        boolean anyOutputFilesSeen,
+        Set<RootedPath> nonOutputExternalFilesSeen,
+        boolean anyFilesInExternalReposSeen,
+        boolean tooManyNonOutputExternalFilesSeen) {
       this.anyOutputFilesSeen = anyOutputFilesSeen;
-      this.anyNonOutputExternalFilesSeen = anyNonOutputExternalFilesSeen;
+      this.nonOutputExternalFilesSeen = nonOutputExternalFilesSeen;
+      this.anyFilesInExternalReposSeen = anyFilesInExternalReposSeen;
+      this.tooManyNonOutputExternalFilesSeen = tooManyNonOutputExternalFilesSeen;
     }
   }
 
   @ThreadCompatible
   ExternalFilesKnowledge getExternalFilesKnowledge() {
-    return new ExternalFilesKnowledge(anyOutputFilesSeen, anyNonOutputExternalFilesSeen);
+    return new ExternalFilesKnowledge(
+        anyOutputFilesSeen,
+        nonOutputExternalFilesSeen,
+        anyFilesInExternalReposSeen,
+        tooManyNonOutputExternalFilesSeen);
   }
 
   @ThreadCompatible
   void setExternalFilesKnowledge(ExternalFilesKnowledge externalFilesKnowledge) {
     anyOutputFilesSeen = externalFilesKnowledge.anyOutputFilesSeen;
-    anyNonOutputExternalFilesSeen = externalFilesKnowledge.anyNonOutputExternalFilesSeen;
+    nonOutputExternalFilesSeen = externalFilesKnowledge.nonOutputExternalFilesSeen;
+    anyFilesInExternalReposSeen = externalFilesKnowledge.anyFilesInExternalReposSeen;
+    tooManyNonOutputExternalFilesSeen = externalFilesKnowledge.tooManyNonOutputExternalFilesSeen;
   }
 
   ExternalFilesHelper cloneWithFreshExternalFilesKnowledge() {
@@ -227,12 +247,19 @@
     RepositoryName repositoryName =
         managedDirectoriesKnowledge.getOwnerRepository(rootedPath.getRootRelativePath());
     if (repositoryName != null) {
-      anyNonOutputExternalFilesSeen = true;
+      anyFilesInExternalReposSeen = true;
       return Pair.of(FileType.EXTERNAL_IN_MANAGED_DIRECTORY, repositoryName);
     }
     FileType fileType = detectFileType(rootedPath);
-    if (FileType.EXTERNAL == fileType || FileType.EXTERNAL_REPO == fileType) {
-      anyNonOutputExternalFilesSeen = true;
+    if (FileType.EXTERNAL == fileType) {
+      if (nonOutputExternalFilesSeen.size() >= MAX_EXTERNAL_FILES_TO_TRACK) {
+        tooManyNonOutputExternalFilesSeen = true;
+      } else {
+        nonOutputExternalFilesSeen.add(rootedPath);
+      }
+    }
+    if (FileType.EXTERNAL_REPO == fileType) {
+      anyFilesInExternalReposSeen = true;
     }
     if (FileType.OUTPUT == fileType) {
       anyOutputFilesSeen = true;
@@ -255,20 +282,16 @@
     // The outputBase may be null if we're not actually running a build.
     Path outputBase = packageLocator.getOutputBase();
     if (outputBase == null) {
-      anyNonOutputExternalFilesSeen = true;
       return FileType.EXTERNAL;
     }
     if (rootedPath.asPath().startsWith(outputBase)) {
       Path externalRepoDir = outputBase.getRelative(LabelConstants.EXTERNAL_REPOSITORY_LOCATION);
       if (rootedPath.asPath().startsWith(externalRepoDir)) {
-        anyNonOutputExternalFilesSeen = true;
         return FileType.EXTERNAL_REPO;
       } else {
-        anyOutputFilesSeen = true;
         return FileType.OUTPUT;
       }
     }
-    anyNonOutputExternalFilesSeen = true;
     return FileType.EXTERNAL;
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
index 60f19ad..62e1420 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
@@ -116,6 +116,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Consumer;
 import javax.annotation.Nullable;
 
@@ -476,38 +477,6 @@
       boolean managedDirectoriesChanged,
       int fsvcThreads)
       throws InterruptedException {
-    ExternalFilesKnowledge externalFilesKnowledge = externalFilesHelper.getExternalFilesKnowledge();
-    if (pathEntriesWithoutDiffInformation.isEmpty()
-        && Iterables.isEmpty(customDirtinessCheckers)
-        && ((!externalFilesKnowledge.anyOutputFilesSeen || !checkOutputFiles)
-            && !externalFilesKnowledge.anyNonOutputExternalFilesSeen)) {
-      // Avoid a full graph scan if we have good diff information for all path entries, there are
-      // no custom checkers that need to look at the whole graph, and no external (not under any
-      // path) files need to be checked.
-      return;
-    }
-    // Before running the FilesystemValueChecker, ensure that all values marked for invalidation
-    // have actually been invalidated (recall that invalidation happens at the beginning of the
-    // next evaluate() call), because checking those is a waste of time.
-    EvaluationContext evaluationContext =
-        EvaluationContext.newBuilder()
-            .setKeepGoing(false)
-            .setNumThreads(DEFAULT_THREAD_COUNT)
-            .setEventHandler(eventHandler)
-            .build();
-    getDriver().evaluate(ImmutableList.of(), evaluationContext);
-
-    FilesystemValueChecker fsvc =
-        new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
-    // We need to manually check for changes to known files. This entails finding all dirty file
-    // system values under package roots for which we don't have diff information. If at least
-    // one path entry doesn't have diff information, then we're going to have to iterate over
-    // the skyframe values at least once no matter what.
-    Set<Root> diffPackageRootsUnderWhichToCheck = new HashSet<>();
-    for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
-        pathEntriesWithoutDiffInformation) {
-      diffPackageRootsUnderWhichToCheck.add(pair.getFirst());
-    }
 
     // We freshly compute knowledge of the presence of external files in the skyframe graph. We use
     // a fresh ExternalFilesHelper instance and only set the real instance's knowledge *after* we
@@ -515,37 +484,101 @@
     // incorrectly think there are no longer any external files.
     ExternalFilesHelper tmpExternalFilesHelper =
         externalFilesHelper.cloneWithFreshExternalFilesKnowledge();
-    // See the comment for FileType.OUTPUT for why we need to consider output files here.
-    EnumSet<FileType> fileTypesToCheck =
-        checkOutputFiles
-            ? EnumSet.of(
-                FileType.EXTERNAL,
-                FileType.EXTERNAL_REPO,
-                FileType.EXTERNAL_IN_MANAGED_DIRECTORY,
-                FileType.OUTPUT)
-            : EnumSet.of(
-                FileType.EXTERNAL, FileType.EXTERNAL_REPO, FileType.EXTERNAL_IN_MANAGED_DIRECTORY);
-    logger.atInfo().log(
-        "About to scan skyframe graph checking for filesystem nodes of types %s",
-        Iterables.toString(fileTypesToCheck));
-    ImmutableBatchDirtyResult batchDirtyResult;
-    try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyKeys")) {
-      batchDirtyResult =
-          fsvc.getDirtyKeys(
-              memoizingEvaluator.getValues(),
-              new UnionDirtinessChecker(
-                  Iterables.concat(
-                      customDirtinessCheckers,
-                      ImmutableList.<SkyValueDirtinessChecker>of(
-                          new ExternalDirtinessChecker(tmpExternalFilesHelper, fileTypesToCheck),
-                          new MissingDiffDirtinessChecker(diffPackageRootsUnderWhichToCheck)))));
-    }
-    handleChangedFiles(
-        diffPackageRootsUnderWhichToCheck,
-        batchDirtyResult,
-        /*numSourceFilesCheckedIfDiffWasMissing=*/ batchDirtyResult.getNumKeysChecked(),
-        managedDirectoriesChanged);
 
+    ExternalFilesKnowledge externalFilesKnowledge = externalFilesHelper.getExternalFilesKnowledge();
+    if (!pathEntriesWithoutDiffInformation.isEmpty()
+        || (externalFilesKnowledge.anyOutputFilesSeen && checkOutputFiles)
+        || !Iterables.isEmpty(customDirtinessCheckers)
+        || externalFilesKnowledge.anyFilesInExternalReposSeen
+        || externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+
+      // Before running the FilesystemValueChecker, ensure that all values marked for invalidation
+      // have actually been invalidated (recall that invalidation happens at the beginning of the
+      // next evaluate() call), because checking those is a waste of time.
+      EvaluationContext evaluationContext =
+          EvaluationContext.newBuilder()
+              .setKeepGoing(false)
+              .setNumThreads(DEFAULT_THREAD_COUNT)
+              .setEventHandler(eventHandler)
+              .build();
+      getDriver().evaluate(ImmutableList.of(), evaluationContext);
+
+      FilesystemValueChecker fsvc =
+          new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
+      // We need to manually check for changes to known files. This entails finding all dirty file
+      // system values under package roots for which we don't have diff information. If at least
+      // one path entry doesn't have diff information, then we're going to have to iterate over
+      // the skyframe values at least once no matter what.
+      Set<Root> diffPackageRootsUnderWhichToCheck = new HashSet<>();
+      for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
+          pathEntriesWithoutDiffInformation) {
+        diffPackageRootsUnderWhichToCheck.add(pair.getFirst());
+      }
+
+      EnumSet<FileType> fileTypesToCheck =
+          EnumSet.of(FileType.EXTERNAL_REPO, FileType.EXTERNAL_IN_MANAGED_DIRECTORY);
+      // See the comment for FileType.OUTPUT for why we need to consider output files here.
+      if (checkOutputFiles) {
+        fileTypesToCheck.add(FileType.OUTPUT);
+      }
+      if (externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+        fileTypesToCheck.add(FileType.EXTERNAL);
+      }
+      logger.atInfo().log(
+          "About to scan skyframe graph checking for filesystem nodes of types %s",
+          Iterables.toString(fileTypesToCheck));
+      ImmutableBatchDirtyResult batchDirtyResult;
+      try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyKeys")) {
+        batchDirtyResult =
+            fsvc.getDirtyKeys(
+                memoizingEvaluator.getValues(),
+                new UnionDirtinessChecker(
+                    Iterables.concat(
+                        customDirtinessCheckers,
+                        ImmutableList.<SkyValueDirtinessChecker>of(
+                            new ExternalDirtinessChecker(tmpExternalFilesHelper, fileTypesToCheck),
+                            new MissingDiffDirtinessChecker(diffPackageRootsUnderWhichToCheck)))));
+      }
+      handleChangedFiles(
+          diffPackageRootsUnderWhichToCheck,
+          batchDirtyResult,
+          /*numSourceFilesCheckedIfDiffWasMissing=*/ batchDirtyResult.getNumKeysChecked(),
+          managedDirectoriesChanged);
+    }
+    if (!externalFilesKnowledge.nonOutputExternalFilesSeen.isEmpty()
+        && !externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+      logger.atInfo().log(
+          "About to scan %d external files",
+          externalFilesKnowledge.nonOutputExternalFilesSeen.size());
+      FilesystemValueChecker fsvc =
+          new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
+      ImmutableBatchDirtyResult batchDirtyResult;
+      try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyExternalKeys")) {
+        Map<SkyKey, SkyValue> externalDirtyNodes = new ConcurrentHashMap<>();
+        for (RootedPath path : externalFilesKnowledge.nonOutputExternalFilesSeen) {
+          SkyKey key = FileStateValue.key(path);
+          SkyValue value = memoizingEvaluator.getExistingValue(key);
+          if (value != null) {
+            externalDirtyNodes.put(key, value);
+          }
+          key = DirectoryListingStateValue.key(path);
+          memoizingEvaluator.getExistingValue(key);
+          if (value != null) {
+            externalDirtyNodes.put(key, value);
+          }
+        }
+        batchDirtyResult =
+            fsvc.getDirtyKeys(
+                externalDirtyNodes,
+                new ExternalDirtinessChecker(
+                    tmpExternalFilesHelper, EnumSet.of(FileType.EXTERNAL)));
+      }
+      handleChangedFiles(
+          ImmutableList.<Root>of(),
+          batchDirtyResult,
+          batchDirtyResult.getNumKeysChecked(),
+          /*managedDirectoriesChanged=*/ false);
+    }
     for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
         pathEntriesWithoutDiffInformation) {
       pair.getSecond().markProcessed();