If EXTERNAL files are present, list them explicitly and check only these files, instead of triggering a search of all nodes in the graph.
This should improve performance for builds with a small number of external files and a large number of nodes in the graph, since it avoids a full graph traversal.
PiperOrigin-RevId: 399193720
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
index b8d9029..a4aebf2 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java
@@ -14,6 +14,7 @@
package com.google.devtools.build.lib.skyframe;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.analysis.BlazeDirectories;
import com.google.devtools.build.lib.analysis.ConfiguredRuleClassProvider.BundledFileSystem;
@@ -29,6 +30,7 @@
import com.google.devtools.build.lib.vfs.RootedPath;
import com.google.devtools.build.skyframe.SkyFunction;
import java.io.IOException;
+import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
@@ -41,12 +43,17 @@
private final BlazeDirectories directories;
private final int maxNumExternalFilesToLog;
private final AtomicInteger numExternalFilesLogged = new AtomicInteger(0);
+ private static final int MAX_EXTERNAL_FILES_TO_TRACK = 2500;
// These variables are set to true from multiple threads, but only read in the main thread.
// So volatility or an AtomicBoolean is not needed.
private boolean anyOutputFilesSeen = false;
- private boolean anyNonOutputExternalFilesSeen = false;
+ private boolean tooManyNonOutputExternalFilesSeen = false;
+ private boolean anyFilesInExternalReposSeen = false;
+ // This is a set of external files that are not in managed directories or
+ // external repositories.
+ private Set<RootedPath> nonOutputExternalFilesSeen = Sets.newConcurrentHashSet();
private final ManagedDirectoriesKnowledge managedDirectoriesKnowledge;
private ExternalFilesHelper(
@@ -190,24 +197,37 @@
static class ExternalFilesKnowledge {
final boolean anyOutputFilesSeen;
- final boolean anyNonOutputExternalFilesSeen;
+ final Set<RootedPath> nonOutputExternalFilesSeen;
+ final boolean anyFilesInExternalReposSeen;
+ final boolean tooManyNonOutputExternalFilesSeen;
- private ExternalFilesKnowledge(boolean anyOutputFilesSeen,
- boolean anyNonOutputExternalFilesSeen) {
+ private ExternalFilesKnowledge(
+ boolean anyOutputFilesSeen,
+ Set<RootedPath> nonOutputExternalFilesSeen,
+ boolean anyFilesInExternalReposSeen,
+ boolean tooManyNonOutputExternalFilesSeen) {
this.anyOutputFilesSeen = anyOutputFilesSeen;
- this.anyNonOutputExternalFilesSeen = anyNonOutputExternalFilesSeen;
+ this.nonOutputExternalFilesSeen = nonOutputExternalFilesSeen;
+ this.anyFilesInExternalReposSeen = anyFilesInExternalReposSeen;
+ this.tooManyNonOutputExternalFilesSeen = tooManyNonOutputExternalFilesSeen;
}
}
@ThreadCompatible
ExternalFilesKnowledge getExternalFilesKnowledge() {
- return new ExternalFilesKnowledge(anyOutputFilesSeen, anyNonOutputExternalFilesSeen);
+ return new ExternalFilesKnowledge(
+ anyOutputFilesSeen,
+ nonOutputExternalFilesSeen,
+ anyFilesInExternalReposSeen,
+ tooManyNonOutputExternalFilesSeen);
}
@ThreadCompatible
void setExternalFilesKnowledge(ExternalFilesKnowledge externalFilesKnowledge) {
anyOutputFilesSeen = externalFilesKnowledge.anyOutputFilesSeen;
- anyNonOutputExternalFilesSeen = externalFilesKnowledge.anyNonOutputExternalFilesSeen;
+ nonOutputExternalFilesSeen = externalFilesKnowledge.nonOutputExternalFilesSeen;
+ anyFilesInExternalReposSeen = externalFilesKnowledge.anyFilesInExternalReposSeen;
+ tooManyNonOutputExternalFilesSeen = externalFilesKnowledge.tooManyNonOutputExternalFilesSeen;
}
ExternalFilesHelper cloneWithFreshExternalFilesKnowledge() {
@@ -227,12 +247,19 @@
RepositoryName repositoryName =
managedDirectoriesKnowledge.getOwnerRepository(rootedPath.getRootRelativePath());
if (repositoryName != null) {
- anyNonOutputExternalFilesSeen = true;
+ anyFilesInExternalReposSeen = true;
return Pair.of(FileType.EXTERNAL_IN_MANAGED_DIRECTORY, repositoryName);
}
FileType fileType = detectFileType(rootedPath);
- if (FileType.EXTERNAL == fileType || FileType.EXTERNAL_REPO == fileType) {
- anyNonOutputExternalFilesSeen = true;
+ if (FileType.EXTERNAL == fileType) {
+ if (nonOutputExternalFilesSeen.size() >= MAX_EXTERNAL_FILES_TO_TRACK) {
+ tooManyNonOutputExternalFilesSeen = true;
+ } else {
+ nonOutputExternalFilesSeen.add(rootedPath);
+ }
+ }
+ if (FileType.EXTERNAL_REPO == fileType) {
+ anyFilesInExternalReposSeen = true;
}
if (FileType.OUTPUT == fileType) {
anyOutputFilesSeen = true;
@@ -255,20 +282,16 @@
// The outputBase may be null if we're not actually running a build.
Path outputBase = packageLocator.getOutputBase();
if (outputBase == null) {
- anyNonOutputExternalFilesSeen = true;
return FileType.EXTERNAL;
}
if (rootedPath.asPath().startsWith(outputBase)) {
Path externalRepoDir = outputBase.getRelative(LabelConstants.EXTERNAL_REPOSITORY_LOCATION);
if (rootedPath.asPath().startsWith(externalRepoDir)) {
- anyNonOutputExternalFilesSeen = true;
return FileType.EXTERNAL_REPO;
} else {
- anyOutputFilesSeen = true;
return FileType.OUTPUT;
}
}
- anyNonOutputExternalFilesSeen = true;
return FileType.EXTERNAL;
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
index 60f19ad..62e1420 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/SequencedSkyframeExecutor.java
@@ -116,6 +116,7 @@
import java.util.Map;
import java.util.Set;
import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Consumer;
import javax.annotation.Nullable;
@@ -476,38 +477,6 @@
boolean managedDirectoriesChanged,
int fsvcThreads)
throws InterruptedException {
- ExternalFilesKnowledge externalFilesKnowledge = externalFilesHelper.getExternalFilesKnowledge();
- if (pathEntriesWithoutDiffInformation.isEmpty()
- && Iterables.isEmpty(customDirtinessCheckers)
- && ((!externalFilesKnowledge.anyOutputFilesSeen || !checkOutputFiles)
- && !externalFilesKnowledge.anyNonOutputExternalFilesSeen)) {
- // Avoid a full graph scan if we have good diff information for all path entries, there are
- // no custom checkers that need to look at the whole graph, and no external (not under any
- // path) files need to be checked.
- return;
- }
- // Before running the FilesystemValueChecker, ensure that all values marked for invalidation
- // have actually been invalidated (recall that invalidation happens at the beginning of the
- // next evaluate() call), because checking those is a waste of time.
- EvaluationContext evaluationContext =
- EvaluationContext.newBuilder()
- .setKeepGoing(false)
- .setNumThreads(DEFAULT_THREAD_COUNT)
- .setEventHandler(eventHandler)
- .build();
- getDriver().evaluate(ImmutableList.of(), evaluationContext);
-
- FilesystemValueChecker fsvc =
- new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
- // We need to manually check for changes to known files. This entails finding all dirty file
- // system values under package roots for which we don't have diff information. If at least
- // one path entry doesn't have diff information, then we're going to have to iterate over
- // the skyframe values at least once no matter what.
- Set<Root> diffPackageRootsUnderWhichToCheck = new HashSet<>();
- for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
- pathEntriesWithoutDiffInformation) {
- diffPackageRootsUnderWhichToCheck.add(pair.getFirst());
- }
// We freshly compute knowledge of the presence of external files in the skyframe graph. We use
// a fresh ExternalFilesHelper instance and only set the real instance's knowledge *after* we
@@ -515,37 +484,101 @@
// incorrectly think there are no longer any external files.
ExternalFilesHelper tmpExternalFilesHelper =
externalFilesHelper.cloneWithFreshExternalFilesKnowledge();
- // See the comment for FileType.OUTPUT for why we need to consider output files here.
- EnumSet<FileType> fileTypesToCheck =
- checkOutputFiles
- ? EnumSet.of(
- FileType.EXTERNAL,
- FileType.EXTERNAL_REPO,
- FileType.EXTERNAL_IN_MANAGED_DIRECTORY,
- FileType.OUTPUT)
- : EnumSet.of(
- FileType.EXTERNAL, FileType.EXTERNAL_REPO, FileType.EXTERNAL_IN_MANAGED_DIRECTORY);
- logger.atInfo().log(
- "About to scan skyframe graph checking for filesystem nodes of types %s",
- Iterables.toString(fileTypesToCheck));
- ImmutableBatchDirtyResult batchDirtyResult;
- try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyKeys")) {
- batchDirtyResult =
- fsvc.getDirtyKeys(
- memoizingEvaluator.getValues(),
- new UnionDirtinessChecker(
- Iterables.concat(
- customDirtinessCheckers,
- ImmutableList.<SkyValueDirtinessChecker>of(
- new ExternalDirtinessChecker(tmpExternalFilesHelper, fileTypesToCheck),
- new MissingDiffDirtinessChecker(diffPackageRootsUnderWhichToCheck)))));
- }
- handleChangedFiles(
- diffPackageRootsUnderWhichToCheck,
- batchDirtyResult,
- /*numSourceFilesCheckedIfDiffWasMissing=*/ batchDirtyResult.getNumKeysChecked(),
- managedDirectoriesChanged);
+ ExternalFilesKnowledge externalFilesKnowledge = externalFilesHelper.getExternalFilesKnowledge();
+ if (!pathEntriesWithoutDiffInformation.isEmpty()
+ || (externalFilesKnowledge.anyOutputFilesSeen && checkOutputFiles)
+ || !Iterables.isEmpty(customDirtinessCheckers)
+ || externalFilesKnowledge.anyFilesInExternalReposSeen
+ || externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+
+ // Before running the FilesystemValueChecker, ensure that all values marked for invalidation
+ // have actually been invalidated (recall that invalidation happens at the beginning of the
+ // next evaluate() call), because checking those is a waste of time.
+ EvaluationContext evaluationContext =
+ EvaluationContext.newBuilder()
+ .setKeepGoing(false)
+ .setNumThreads(DEFAULT_THREAD_COUNT)
+ .setEventHandler(eventHandler)
+ .build();
+ getDriver().evaluate(ImmutableList.of(), evaluationContext);
+
+ FilesystemValueChecker fsvc =
+ new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
+ // We need to manually check for changes to known files. This entails finding all dirty file
+ // system values under package roots for which we don't have diff information. If at least
+ // one path entry doesn't have diff information, then we're going to have to iterate over
+ // the skyframe values at least once no matter what.
+ Set<Root> diffPackageRootsUnderWhichToCheck = new HashSet<>();
+ for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
+ pathEntriesWithoutDiffInformation) {
+ diffPackageRootsUnderWhichToCheck.add(pair.getFirst());
+ }
+
+ EnumSet<FileType> fileTypesToCheck =
+ EnumSet.of(FileType.EXTERNAL_REPO, FileType.EXTERNAL_IN_MANAGED_DIRECTORY);
+ // See the comment for FileType.OUTPUT for why we need to consider output files here.
+ if (checkOutputFiles) {
+ fileTypesToCheck.add(FileType.OUTPUT);
+ }
+ if (externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+ fileTypesToCheck.add(FileType.EXTERNAL);
+ }
+ logger.atInfo().log(
+ "About to scan skyframe graph checking for filesystem nodes of types %s",
+ Iterables.toString(fileTypesToCheck));
+ ImmutableBatchDirtyResult batchDirtyResult;
+ try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyKeys")) {
+ batchDirtyResult =
+ fsvc.getDirtyKeys(
+ memoizingEvaluator.getValues(),
+ new UnionDirtinessChecker(
+ Iterables.concat(
+ customDirtinessCheckers,
+ ImmutableList.<SkyValueDirtinessChecker>of(
+ new ExternalDirtinessChecker(tmpExternalFilesHelper, fileTypesToCheck),
+ new MissingDiffDirtinessChecker(diffPackageRootsUnderWhichToCheck)))));
+ }
+ handleChangedFiles(
+ diffPackageRootsUnderWhichToCheck,
+ batchDirtyResult,
+ /*numSourceFilesCheckedIfDiffWasMissing=*/ batchDirtyResult.getNumKeysChecked(),
+ managedDirectoriesChanged);
+ }
+ if (!externalFilesKnowledge.nonOutputExternalFilesSeen.isEmpty()
+ && !externalFilesKnowledge.tooManyNonOutputExternalFilesSeen) {
+ logger.atInfo().log(
+ "About to scan %d external files",
+ externalFilesKnowledge.nonOutputExternalFilesSeen.size());
+ FilesystemValueChecker fsvc =
+ new FilesystemValueChecker(tsgm, /* lastExecutionTimeRange= */ null, fsvcThreads);
+ ImmutableBatchDirtyResult batchDirtyResult;
+ try (SilentCloseable c = Profiler.instance().profile("fsvc.getDirtyExternalKeys")) {
+ Map<SkyKey, SkyValue> externalDirtyNodes = new ConcurrentHashMap<>();
+ for (RootedPath path : externalFilesKnowledge.nonOutputExternalFilesSeen) {
+ SkyKey key = FileStateValue.key(path);
+ SkyValue value = memoizingEvaluator.getExistingValue(key);
+ if (value != null) {
+ externalDirtyNodes.put(key, value);
+ }
+ key = DirectoryListingStateValue.key(path);
+ memoizingEvaluator.getExistingValue(key);
+ if (value != null) {
+ externalDirtyNodes.put(key, value);
+ }
+ }
+ batchDirtyResult =
+ fsvc.getDirtyKeys(
+ externalDirtyNodes,
+ new ExternalDirtinessChecker(
+ tmpExternalFilesHelper, EnumSet.of(FileType.EXTERNAL)));
+ }
+ handleChangedFiles(
+ ImmutableList.<Root>of(),
+ batchDirtyResult,
+ batchDirtyResult.getNumKeysChecked(),
+ /*managedDirectoriesChanged=*/ false);
+ }
for (Pair<Root, DiffAwarenessManager.ProcessableModifiedFileSet> pair :
pathEntriesWithoutDiffInformation) {
pair.getSecond().markProcessed();