Expand exclude patterns in glob()s using the glob result instead of the file
system. Using actual file operations could be beneficial in theory as the glob
might return a large result that we have to linearly scan through it, whereas we
could just remove a handful of files found by a handful of stats.
However, measurements indicate that this is not beneficial in practice and
that the additional file systems stats are more costly.
RELNOTES: None.
PiperOrigin-RevId: 234870066
diff --git a/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java b/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
index 31b36ab..a968c4e 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
@@ -17,7 +17,6 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.SettableFuture;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
@@ -241,22 +240,17 @@
// Start globbing all patterns in parallel. The getGlob() calls below will
// block on an individual pattern's results, but the other globs can
// continue in the background.
- for (String pattern : Iterables.concat(includes, excludes)) {
+ for (String pattern : includes) {
@SuppressWarnings("unused")
Future<?> possiblyIgnoredError = getGlobUnsortedAsync(pattern, excludeDirs);
}
HashSet<String> results = new HashSet<>();
+ Preconditions.checkState(!results.contains(null), "glob returned null");
for (String pattern : includes) {
results.addAll(getGlobUnsorted(pattern, excludeDirs));
}
- for (String pattern : excludes) {
- for (String excludeMatch : getGlobUnsorted(pattern, excludeDirs)) {
- results.remove(excludeMatch);
- }
- }
-
- Preconditions.checkState(!results.contains(null), "glob returned null");
+ UnixGlob.removeExcludes(results, excludes);
return new ArrayList<>(results);
}
diff --git a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
index 0d11eac..1e181e8 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
@@ -20,7 +20,6 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.devtools.build.lib.analysis.skylark.BazelStarlarkContext;
import com.google.devtools.build.lib.analysis.skylark.SymbolGenerator;
@@ -300,7 +299,7 @@
@Override
public Token runAsync(List<String> includes, List<String> excludes, boolean excludeDirs)
throws BadGlobException {
- for (String pattern : Iterables.concat(includes, excludes)) {
+ for (String pattern : includes) {
@SuppressWarnings("unused")
Future<?> possiblyIgnoredError = globCache.getGlobUnsortedAsync(pattern, excludeDirs);
}
@@ -1732,11 +1731,13 @@
}
continue;
}
- if (arg.getValue() instanceof ListLiteral) {
- ListLiteral list = (ListLiteral) arg.getValue();
- for (Expression elem : list.getElements()) {
- if (elem instanceof StringLiteral) {
- globStrings.add(((StringLiteral) elem).getValue());
+ if (arg.getIdentifier() == null || arg.getIdentifier().getName().equals("include")) {
+ if (arg.getValue() instanceof ListLiteral) {
+ ListLiteral list = (ListLiteral) arg.getValue();
+ for (Expression elem : list.getElements()) {
+ if (elem instanceof StringLiteral) {
+ globStrings.add(((StringLiteral) elem).getValue());
+ }
}
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
index bd473e8..f6783ec 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
@@ -68,6 +68,7 @@
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Root;
import com.google.devtools.build.lib.vfs.RootedPath;
+import com.google.devtools.build.lib.vfs.UnixGlob;
import com.google.devtools.build.skyframe.SkyFunction;
import com.google.devtools.build.skyframe.SkyFunctionException;
import com.google.devtools.build.skyframe.SkyFunctionException.Transience;
@@ -879,56 +880,34 @@
@Override
public Token runAsync(List<String> includes, List<String> excludes, boolean excludeDirs)
throws BadGlobException, InterruptedException {
- List<SkyKey> globKeys = new ArrayList<>(includes.size() + excludes.size());
- LinkedHashSet<SkyKey> includesKeys = Sets.newLinkedHashSetWithExpectedSize(includes.size());
- LinkedHashSet<SkyKey> excludesKeys = Sets.newLinkedHashSetWithExpectedSize(excludes.size());
- Map<SkyKey, String> globKeyToIncludeStringMap =
- Maps.newHashMapWithExpectedSize(includes.size());
- Map<SkyKey, String> globKeyToExcludeStringMap =
- Maps.newHashMapWithExpectedSize(excludes.size());
+ LinkedHashSet<SkyKey> globKeys = Sets.newLinkedHashSetWithExpectedSize(includes.size());
+ Map<SkyKey, String> globKeyToPatternMap = Maps.newHashMapWithExpectedSize(includes.size());
for (String pattern : includes) {
SkyKey globKey = getGlobKey(pattern, excludeDirs);
globKeys.add(globKey);
- includesKeys.add(globKey);
- globKeyToIncludeStringMap.put(globKey, pattern);
+ globKeyToPatternMap.put(globKey, pattern);
}
- for (String pattern : excludes) {
- SkyKey globKey = getGlobKey(pattern, excludeDirs);
- globKeys.add(globKey);
- excludesKeys.add(globKey);
- globKeyToExcludeStringMap.put(globKey, pattern);
- }
+
globDepsRequested.addAll(globKeys);
Map<SkyKey, ValueOrException2<IOException, BuildFileNotFoundException>> globValueMap =
env.getValuesOrThrow(globKeys, IOException.class, BuildFileNotFoundException.class);
- // For each missing glob, evaluate it asychronously via the delegate.
+ // For each missing glob, evaluate it asynchronously via the delegate.
Collection<SkyKey> missingKeys = getMissingKeys(globKeys, globValueMap);
- List<String> includesToDelegate = new ArrayList<>(missingKeys.size());
- List<String> excludesToDelegate = new ArrayList<>(missingKeys.size());
+ List<String> globsToDelegate = new ArrayList<>(missingKeys.size());
for (SkyKey missingKey : missingKeys) {
- String missingIncludePattern = globKeyToIncludeStringMap.get(missingKey);
- if (missingIncludePattern != null) {
- includesToDelegate.add(missingIncludePattern);
- includesKeys.remove(missingKey);
- }
- String missingExcludePattern = globKeyToExcludeStringMap.get(missingKey);
- if (missingExcludePattern != null) {
- excludesToDelegate.add(missingExcludePattern);
- excludesKeys.remove(missingKey);
+ String missingPattern = globKeyToPatternMap.get(missingKey);
+ if (missingPattern != null) {
+ globsToDelegate.add(missingPattern);
+ globKeys.remove(missingKey);
}
}
Token legacyIncludesToken =
- legacyGlobber.runAsync(includesToDelegate, ImmutableList.<String>of(), excludeDirs);
- // See the HybridToken class-comment for why we pass excludesToDelegate as the includes
- // parameter here.
- Token legacyExcludesToken =
- legacyGlobber.runAsync(excludesToDelegate, ImmutableList.<String>of(), excludeDirs);
+ legacyGlobber.runAsync(globsToDelegate, ImmutableList.of(), excludeDirs);
- return new HybridToken(globValueMap, includesKeys, excludesKeys,
- legacyIncludesToken, legacyExcludesToken);
+ return new HybridToken(globValueMap, globKeys, legacyIncludesToken, excludes);
}
private Collection<SkyKey> getMissingKeys(Collection<SkyKey> globKeys,
@@ -969,33 +948,9 @@
/**
* A {@link Globber.Token} that encapsulates the result of a single {@link Globber#runAsync}
- * call via the fetching of some globs from skyframe, and some other globs via a
- * {@link PackageFactory.LegacyGlobber}. We take care to properly handle 'includes' vs
- * 'excludes'.
- *
- * <p>That is, we evaluate {@code glob(includes, excludes)} by partitioning {@code includes} and
- * {@code excludes}.
- *
- * <pre>
- * {@code
- * includes = includes_sky U includes_leg
- * excludes = excludes_sky U excludes_leg
- * }
- * </pre>
- *
- * <p>and then noting
- *
- * <pre>
- * {@code
- * glob(includes, excludes) =
- * (glob(includes_sky, []) U glob(includes_leg, []))
- * - (glob(excludes_sky, []) U glob(excludes_leg, []))
- * }
- * </pre>
- *
- * <p>Importantly, we pass excludes=[] in all cases; otherwise we'd be incorrectly not
- * subtracting excluded glob matches from the overall list of matches. In other words, we
- * implement the subtractive nature of excludes ourselves in {@link #resolve}.
+ * call via the fetching of some globs from skyframe, and some other globs via a{@link
+ * PackageFactory.LegacyGlobber}. 'exclude' patterns are evaluated using {@link
+ * UnixGlob#removeExcludes} after merging legacy and skyframe glob results in {@link #resolve}.
*/
private static class HybridToken extends Globber.Token {
// The result of the Skyframe lookup for all the needed glob patterns.
@@ -1004,25 +959,20 @@
// The skyframe keys corresponding to the 'includes' patterns fetched from Skyframe
// (this is includes_sky above).
private final Iterable<SkyKey> includesGlobKeys;
- // The skyframe keys corresponding to the 'excludes' patterns fetched from Skyframe
- // (this is excludes_sky above).
- private final Iterable<SkyKey> excludesGlobKeys;
- // A token for computing includes_leg.
+ // A token for computing legacy globs.
private final Token legacyIncludesToken;
- // A token for computing excludes_leg.
- private final Token legacyExcludesToken;
+
+ private final List<String> excludes;
private HybridToken(
Map<SkyKey, ValueOrException2<IOException, BuildFileNotFoundException>> globValueMap,
Iterable<SkyKey> includesGlobKeys,
- Iterable<SkyKey> excludesGlobKeys,
Token delegateIncludesToken,
- Token delegateExcludesToken) {
+ List<String> excludes) {
this.globValueMap = globValueMap;
this.includesGlobKeys = includesGlobKeys;
- this.excludesGlobKeys = excludesGlobKeys;
this.legacyIncludesToken = delegateIncludesToken;
- this.legacyExcludesToken = delegateExcludesToken;
+ this.excludes = excludes;
}
private List<String> resolve(Globber delegate) throws IOException, InterruptedException {
@@ -1034,14 +984,7 @@
}
}
matches.addAll(delegate.fetch(legacyIncludesToken));
- for (SkyKey excludeGlobKey : excludesGlobKeys) {
- for (PathFragment match : getGlobMatches(excludeGlobKey, globValueMap)) {
- matches.remove(match.getPathString());
- }
- }
- for (String delegateExcludeMatch : delegate.fetch(legacyExcludesToken)) {
- matches.remove(delegateExcludeMatch);
- }
+ UnixGlob.removeExcludes(matches, excludes);
List<String> result = new ArrayList<>(matches);
// Skyframe glob results are unsorted. And we used a LegacyGlobber that doesn't sort.
// Therefore, we want to unconditionally sort here.
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java b/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
index 8a17188..f38e5ae 100644
--- a/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
+++ b/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
@@ -34,9 +34,12 @@
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@@ -143,24 +146,21 @@
return null;
}
- /**
- * Calls {@link #matches(String, String, ConcurrentHashMap) matches(pattern, str, null)}
- */
+ /** Calls {@link #matches(String, String, Map) matches(pattern, str, null)} */
public static boolean matches(String pattern, String str) {
return matches(pattern, str, null);
}
/**
- * Returns whether {@code str} matches the glob pattern {@code pattern}. This
- * method may use the {@code patternCache} to speed up the matching process.
+ * Returns whether {@code str} matches the glob pattern {@code pattern}. This method may use the
+ * {@code patternCache} to speed up the matching process.
*
* @param pattern a glob pattern
* @param str the string to match
- * @param patternCache a cache from patterns to compiled Pattern objects, or
- * {@code null} to skip caching
+ * @param patternCache a cache from patterns to compiled Pattern objects, or {@code null} to skip
+ * caching
*/
- public static boolean matches(String pattern, String str,
- ConcurrentHashMap<String, Pattern> patternCache) {
+ public static boolean matches(String pattern, String str, Map<String, Pattern> patternCache) {
if (pattern.length() == 0 || str.length() == 0) {
return false;
}
@@ -840,4 +840,64 @@
}
}
}
+
+ /**
+ * Filters out exclude patterns from a Set of paths. Common cases such as wildcard-free patterns
+ * or suffix patterns are special-cased to make this function efficient.
+ */
+ public static void removeExcludes(Set<String> paths, Collection<String> excludes) {
+ ArrayList<String> complexPatterns = new ArrayList<>(excludes.size());
+ for (String exclude : excludes) {
+ if (isWildcardFree(exclude)) {
+ paths.remove(exclude);
+ continue;
+ }
+ if (exclude.startsWith("**/*")) {
+ String tail = exclude.substring(4);
+ if (isWildcardFree(tail)) {
+ paths.removeIf(path -> path.endsWith(tail));
+ continue;
+ }
+ }
+ complexPatterns.add(exclude);
+ }
+ if (complexPatterns.isEmpty()) {
+ return;
+ }
+ List<String[]> splitPatterns = checkAndSplitPatterns(complexPatterns);
+ HashMap<String, Pattern> patternCache = new HashMap<>();
+ paths.removeIf(
+ path -> {
+ String[] segments = Iterables.toArray(Splitter.on('/').split(path), String.class);
+ for (String[] splitPattern : splitPatterns) {
+ if (matchesPattern(splitPattern, segments, 0, 0, patternCache)) {
+ return true;
+ }
+ }
+ return false;
+ });
+ }
+
+ /** Returns true if {@code pattern} matches {@code path} starting from the given segments. */
+ private static boolean matchesPattern(
+ String[] pattern, String[] path, int i, int j, Map<String, Pattern> patternCache) {
+ if (i == pattern.length) {
+ return j == path.length;
+ }
+ if (pattern[i].equals("**")) {
+ return matchesPattern(pattern, path, i + 1, j, patternCache)
+ || (j < path.length && matchesPattern(pattern, path, i, j + 1, patternCache));
+ }
+ if (j == path.length) {
+ return false;
+ }
+ if (matches(pattern[i], path[j], patternCache)) {
+ return matchesPattern(pattern, path, i + 1, j + 1, patternCache);
+ }
+ return false;
+ }
+
+ private static boolean isWildcardFree(String pattern) {
+ return !pattern.contains("*") && !pattern.contains("?");
+ }
}
diff --git a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
index 94b47ca..354fb0c 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
@@ -1250,7 +1250,7 @@
"other_variable = glob(include = ['a'], exclude = ['b'])",
"third_variable = glob(['c'], exclude_directories = 0)"));
assertThat(globPatternExtractor.getExcludeDirectoriesPatterns())
- .containsExactly("ab", "a", "b", "**/*");
+ .containsExactly("ab", "a", "**/*");
assertThat(globPatternExtractor.getIncludeDirectoriesPatterns()).containsExactly("c");
}
}
diff --git a/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java b/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
index 24d9eb0..edfac88 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
@@ -215,8 +215,6 @@
// Ensure all of the patterns are recorded against this package:
assertThat(globCache.getKeySet().containsAll(createGlobCacheKeys(includes, excludeDirs)))
.isTrue();
- assertThat(globCache.getKeySet().containsAll(createGlobCacheKeys(excludes, excludeDirs)))
- .isTrue();
assertThat(pkg.containsErrors()).isFalse();
}
diff --git a/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java b/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
index 4c608c9..ad62a0f 100644
--- a/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
+++ b/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
@@ -442,4 +442,28 @@
assertThat(executor.awaitTermination(TestUtils.WAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS))
.isTrue();
}
+
+ private Collection<String> removeExcludes(ImmutableList<String> paths, String... excludes) {
+ HashSet<String> pathSet = new HashSet<>(paths);
+ UnixGlob.removeExcludes(pathSet, ImmutableList.copyOf(excludes));
+ return pathSet;
+ }
+
+ @Test
+ public void testExcludeFiltering() {
+ ImmutableList<String> paths = ImmutableList.of("a/A.java", "a/B.java", "a/b/C.java", "c.cc");
+ assertThat(removeExcludes(paths, "**/*.java")).containsExactly("c.cc");
+ assertThat(removeExcludes(paths, "**/nomatch.*")).containsAllIn(paths);
+ assertThat(removeExcludes(paths, "a/A.java")).containsExactly("a/B.java", "a/b/C.java", "c.cc");
+ assertThat(removeExcludes(paths, "a/?.java")).containsExactly("a/b/C.java", "c.cc");
+ assertThat(removeExcludes(paths, "a/*/C.java")).containsExactly("a/A.java", "a/B.java", "c.cc");
+ assertThat(removeExcludes(paths, "**")).isEmpty();
+ assertThat(removeExcludes(paths, "**/**")).isEmpty();
+
+ // Test filenames that look like code patterns.
+ paths = ImmutableList.of("a/A.java", "a/B.java", "a/b/*.java", "a/b/C.java", "c.cc");
+ assertThat(removeExcludes(paths, "**/*.java")).containsExactly("c.cc");
+ assertThat(removeExcludes(paths, "**/A.java", "**/B.java", "**/C.java"))
+ .containsExactly("a/b/*.java", "c.cc");
+ }
}