Expand exclude patterns in glob()s using the glob result instead of the file
system. Using actual file operations could be beneficial in theory as the glob
might return a large result that we have to linearly scan through it, whereas we
could just remove a handful of files found by a handful of stats.

However, measurements indicate that this is not beneficial in practice and
that the additional file systems stats are more costly.

RELNOTES: None.
PiperOrigin-RevId: 234870066
diff --git a/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java b/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
index 31b36ab..a968c4e 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
@@ -17,7 +17,6 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.devtools.build.lib.cmdline.PackageIdentifier;
@@ -241,22 +240,17 @@
     // Start globbing all patterns in parallel. The getGlob() calls below will
     // block on an individual pattern's results, but the other globs can
     // continue in the background.
-    for (String pattern : Iterables.concat(includes, excludes)) {
+    for (String pattern : includes) {
       @SuppressWarnings("unused") 
       Future<?> possiblyIgnoredError = getGlobUnsortedAsync(pattern, excludeDirs);
     }
 
     HashSet<String> results = new HashSet<>();
+    Preconditions.checkState(!results.contains(null), "glob returned null");
     for (String pattern : includes) {
       results.addAll(getGlobUnsorted(pattern, excludeDirs));
     }
-    for (String pattern : excludes) {
-      for (String excludeMatch : getGlobUnsorted(pattern, excludeDirs)) {
-        results.remove(excludeMatch);
-      }
-    }
-
-    Preconditions.checkState(!results.contains(null), "glob returned null");
+    UnixGlob.removeExcludes(results, excludes);
     return new ArrayList<>(results);
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
index 0d11eac..1e181e8 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
@@ -20,7 +20,6 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.google.devtools.build.lib.analysis.skylark.BazelStarlarkContext;
 import com.google.devtools.build.lib.analysis.skylark.SymbolGenerator;
@@ -300,7 +299,7 @@
     @Override
     public Token runAsync(List<String> includes, List<String> excludes, boolean excludeDirs)
         throws BadGlobException {
-      for (String pattern : Iterables.concat(includes, excludes)) {
+      for (String pattern : includes) {
         @SuppressWarnings("unused")
         Future<?> possiblyIgnoredError = globCache.getGlobUnsortedAsync(pattern, excludeDirs);
       }
@@ -1732,11 +1731,13 @@
           }
           continue;
         }
-        if (arg.getValue() instanceof ListLiteral) {
-          ListLiteral list = (ListLiteral) arg.getValue();
-          for (Expression elem : list.getElements()) {
-            if (elem instanceof StringLiteral) {
-              globStrings.add(((StringLiteral) elem).getValue());
+        if (arg.getIdentifier() == null || arg.getIdentifier().getName().equals("include")) {
+          if (arg.getValue() instanceof ListLiteral) {
+            ListLiteral list = (ListLiteral) arg.getValue();
+            for (Expression elem : list.getElements()) {
+              if (elem instanceof StringLiteral) {
+                globStrings.add(((StringLiteral) elem).getValue());
+              }
             }
           }
         }
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
index bd473e8..f6783ec 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java
@@ -68,6 +68,7 @@
 import com.google.devtools.build.lib.vfs.PathFragment;
 import com.google.devtools.build.lib.vfs.Root;
 import com.google.devtools.build.lib.vfs.RootedPath;
+import com.google.devtools.build.lib.vfs.UnixGlob;
 import com.google.devtools.build.skyframe.SkyFunction;
 import com.google.devtools.build.skyframe.SkyFunctionException;
 import com.google.devtools.build.skyframe.SkyFunctionException.Transience;
@@ -879,56 +880,34 @@
     @Override
     public Token runAsync(List<String> includes, List<String> excludes, boolean excludeDirs)
         throws BadGlobException, InterruptedException {
-      List<SkyKey> globKeys = new ArrayList<>(includes.size() + excludes.size());
-      LinkedHashSet<SkyKey> includesKeys = Sets.newLinkedHashSetWithExpectedSize(includes.size());
-      LinkedHashSet<SkyKey> excludesKeys = Sets.newLinkedHashSetWithExpectedSize(excludes.size());
-      Map<SkyKey, String> globKeyToIncludeStringMap =
-          Maps.newHashMapWithExpectedSize(includes.size());
-      Map<SkyKey, String> globKeyToExcludeStringMap =
-          Maps.newHashMapWithExpectedSize(excludes.size());
+      LinkedHashSet<SkyKey> globKeys = Sets.newLinkedHashSetWithExpectedSize(includes.size());
+      Map<SkyKey, String> globKeyToPatternMap = Maps.newHashMapWithExpectedSize(includes.size());
 
       for (String pattern : includes) {
         SkyKey globKey = getGlobKey(pattern, excludeDirs);
         globKeys.add(globKey);
-        includesKeys.add(globKey);
-        globKeyToIncludeStringMap.put(globKey, pattern);
+        globKeyToPatternMap.put(globKey, pattern);
       }
-      for (String pattern : excludes) {
-        SkyKey globKey = getGlobKey(pattern, excludeDirs);
-        globKeys.add(globKey);
-        excludesKeys.add(globKey);
-        globKeyToExcludeStringMap.put(globKey, pattern);
-      }
+
       globDepsRequested.addAll(globKeys);
 
       Map<SkyKey, ValueOrException2<IOException, BuildFileNotFoundException>> globValueMap =
           env.getValuesOrThrow(globKeys, IOException.class, BuildFileNotFoundException.class);
 
-      // For each missing glob, evaluate it asychronously via the delegate.
+      // For each missing glob, evaluate it asynchronously via the delegate.
       Collection<SkyKey> missingKeys = getMissingKeys(globKeys, globValueMap);
-      List<String> includesToDelegate = new ArrayList<>(missingKeys.size());
-      List<String> excludesToDelegate = new ArrayList<>(missingKeys.size());
+      List<String> globsToDelegate = new ArrayList<>(missingKeys.size());
       for (SkyKey missingKey : missingKeys) {
-        String missingIncludePattern = globKeyToIncludeStringMap.get(missingKey);
-        if (missingIncludePattern != null) {
-          includesToDelegate.add(missingIncludePattern);
-          includesKeys.remove(missingKey);
-        }
-        String missingExcludePattern = globKeyToExcludeStringMap.get(missingKey);
-        if (missingExcludePattern != null) {
-          excludesToDelegate.add(missingExcludePattern);
-          excludesKeys.remove(missingKey);
+        String missingPattern = globKeyToPatternMap.get(missingKey);
+        if (missingPattern != null) {
+          globsToDelegate.add(missingPattern);
+          globKeys.remove(missingKey);
         }
       }
       Token legacyIncludesToken =
-          legacyGlobber.runAsync(includesToDelegate, ImmutableList.<String>of(), excludeDirs);
-      // See the HybridToken class-comment for why we pass excludesToDelegate as the includes
-      // parameter here.
-      Token legacyExcludesToken =
-          legacyGlobber.runAsync(excludesToDelegate, ImmutableList.<String>of(), excludeDirs);
+          legacyGlobber.runAsync(globsToDelegate, ImmutableList.of(), excludeDirs);
 
-      return new HybridToken(globValueMap, includesKeys, excludesKeys,
-          legacyIncludesToken, legacyExcludesToken);
+      return new HybridToken(globValueMap, globKeys, legacyIncludesToken, excludes);
     }
 
     private Collection<SkyKey> getMissingKeys(Collection<SkyKey> globKeys,
@@ -969,33 +948,9 @@
 
     /**
      * A {@link Globber.Token} that encapsulates the result of a single {@link Globber#runAsync}
-     * call via the fetching of some globs from skyframe, and some other globs via a
-     * {@link PackageFactory.LegacyGlobber}. We take care to properly handle 'includes' vs
-     * 'excludes'.
-     *
-     * <p>That is, we evaluate {@code glob(includes, excludes)} by partitioning {@code includes} and
-     * {@code excludes}.
-     *
-     * <pre>
-     * {@code
-     * includes = includes_sky U includes_leg
-     * excludes = excludes_sky U excludes_leg
-     * }
-     * </pre>
-     *
-     * <p>and then noting
-     *
-     * <pre>
-     * {@code
-     * glob(includes, excludes) =
-     *     (glob(includes_sky, []) U glob(includes_leg, []))
-     *   - (glob(excludes_sky, []) U glob(excludes_leg, []))
-     * }
-     * </pre>
-     *
-     * <p>Importantly, we pass excludes=[] in all cases; otherwise we'd be incorrectly not
-     * subtracting excluded glob matches from the overall list of matches. In other words, we
-     * implement the subtractive nature of excludes ourselves in {@link #resolve}.
+     * call via the fetching of some globs from skyframe, and some other globs via a{@link
+     * PackageFactory.LegacyGlobber}. 'exclude' patterns are evaluated using {@link
+     * UnixGlob#removeExcludes} after merging legacy and skyframe glob results in {@link #resolve}.
      */
     private static class HybridToken extends Globber.Token {
       // The result of the Skyframe lookup for all the needed glob patterns.
@@ -1004,25 +959,20 @@
       // The skyframe keys corresponding to the 'includes' patterns fetched from Skyframe
       // (this is includes_sky above).
       private final Iterable<SkyKey> includesGlobKeys;
-      // The skyframe keys corresponding to the 'excludes' patterns fetched from Skyframe
-      // (this is excludes_sky above).
-      private final Iterable<SkyKey> excludesGlobKeys;
-      // A token for computing includes_leg.
+      // A token for computing legacy globs.
       private final Token legacyIncludesToken;
-      // A token for computing excludes_leg.
-      private final Token legacyExcludesToken;
+
+      private final List<String> excludes;
 
       private HybridToken(
           Map<SkyKey, ValueOrException2<IOException, BuildFileNotFoundException>> globValueMap,
           Iterable<SkyKey> includesGlobKeys,
-          Iterable<SkyKey> excludesGlobKeys,
           Token delegateIncludesToken,
-          Token delegateExcludesToken) {
+          List<String> excludes) {
         this.globValueMap = globValueMap;
         this.includesGlobKeys = includesGlobKeys;
-        this.excludesGlobKeys = excludesGlobKeys;
         this.legacyIncludesToken = delegateIncludesToken;
-        this.legacyExcludesToken = delegateExcludesToken;
+        this.excludes = excludes;
       }
 
       private List<String> resolve(Globber delegate) throws IOException, InterruptedException {
@@ -1034,14 +984,7 @@
           }
         }
         matches.addAll(delegate.fetch(legacyIncludesToken));
-        for (SkyKey excludeGlobKey : excludesGlobKeys) {
-          for (PathFragment match : getGlobMatches(excludeGlobKey, globValueMap)) {
-            matches.remove(match.getPathString());
-          }
-        }
-        for (String delegateExcludeMatch : delegate.fetch(legacyExcludesToken)) {
-          matches.remove(delegateExcludeMatch);
-        }
+        UnixGlob.removeExcludes(matches, excludes);
         List<String> result = new ArrayList<>(matches);
         // Skyframe glob results are unsorted. And we used a LegacyGlobber that doesn't sort.
         // Therefore, we want to unconditionally sort here.
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java b/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
index 8a17188..f38e5ae 100644
--- a/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
+++ b/src/main/java/com/google/devtools/build/lib/vfs/UnixGlob.java
@@ -34,9 +34,12 @@
 import com.google.devtools.build.lib.profiler.ProfilerTask;
 import com.google.devtools.build.lib.profiler.SilentCloseable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
@@ -143,24 +146,21 @@
     return null;
   }
 
-  /**
-   * Calls {@link #matches(String, String, ConcurrentHashMap) matches(pattern, str, null)}
-   */
+  /** Calls {@link #matches(String, String, Map) matches(pattern, str, null)} */
   public static boolean matches(String pattern, String str) {
     return matches(pattern, str, null);
   }
 
   /**
-   * Returns whether {@code str} matches the glob pattern {@code pattern}. This
-   * method may use the {@code patternCache} to speed up the matching process.
+   * Returns whether {@code str} matches the glob pattern {@code pattern}. This method may use the
+   * {@code patternCache} to speed up the matching process.
    *
    * @param pattern a glob pattern
    * @param str the string to match
-   * @param patternCache a cache from patterns to compiled Pattern objects, or
-   *        {@code null} to skip caching
+   * @param patternCache a cache from patterns to compiled Pattern objects, or {@code null} to skip
+   *     caching
    */
-  public static boolean matches(String pattern, String str,
-      ConcurrentHashMap<String, Pattern> patternCache) {
+  public static boolean matches(String pattern, String str, Map<String, Pattern> patternCache) {
     if (pattern.length() == 0 || str.length() == 0) {
       return false;
     }
@@ -840,4 +840,64 @@
       }
     }
   }
+
+  /**
+   * Filters out exclude patterns from a Set of paths. Common cases such as wildcard-free patterns
+   * or suffix patterns are special-cased to make this function efficient.
+   */
+  public static void removeExcludes(Set<String> paths, Collection<String> excludes) {
+    ArrayList<String> complexPatterns = new ArrayList<>(excludes.size());
+    for (String exclude : excludes) {
+      if (isWildcardFree(exclude)) {
+        paths.remove(exclude);
+        continue;
+      }
+      if (exclude.startsWith("**/*")) {
+        String tail = exclude.substring(4);
+        if (isWildcardFree(tail)) {
+          paths.removeIf(path -> path.endsWith(tail));
+          continue;
+        }
+      }
+      complexPatterns.add(exclude);
+    }
+    if (complexPatterns.isEmpty()) {
+      return;
+    }
+    List<String[]> splitPatterns = checkAndSplitPatterns(complexPatterns);
+    HashMap<String, Pattern> patternCache = new HashMap<>();
+    paths.removeIf(
+        path -> {
+          String[] segments = Iterables.toArray(Splitter.on('/').split(path), String.class);
+          for (String[] splitPattern : splitPatterns) {
+            if (matchesPattern(splitPattern, segments, 0, 0, patternCache)) {
+              return true;
+            }
+          }
+          return false;
+        });
+  }
+
+  /** Returns true if {@code pattern} matches {@code path} starting from the given segments. */
+  private static boolean matchesPattern(
+      String[] pattern, String[] path, int i, int j, Map<String, Pattern> patternCache) {
+    if (i == pattern.length) {
+      return j == path.length;
+    }
+    if (pattern[i].equals("**")) {
+      return matchesPattern(pattern, path, i + 1, j, patternCache)
+          || (j < path.length && matchesPattern(pattern, path, i, j + 1, patternCache));
+    }
+    if (j == path.length) {
+      return false;
+    }
+    if (matches(pattern[i], path[j], patternCache)) {
+      return matchesPattern(pattern, path, i + 1, j + 1, patternCache);
+    }
+    return false;
+  }
+
+  private static boolean isWildcardFree(String pattern) {
+    return !pattern.contains("*") && !pattern.contains("?");
+  }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
index 94b47ca..354fb0c 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
@@ -1250,7 +1250,7 @@
             "other_variable = glob(include = ['a'], exclude = ['b'])",
             "third_variable = glob(['c'], exclude_directories = 0)"));
     assertThat(globPatternExtractor.getExcludeDirectoriesPatterns())
-        .containsExactly("ab", "a", "b", "**/*");
+        .containsExactly("ab", "a", "**/*");
     assertThat(globPatternExtractor.getIncludeDirectoriesPatterns()).containsExactly("c");
   }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java b/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
index 24d9eb0..edfac88 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/util/PackageFactoryTestBase.java
@@ -215,8 +215,6 @@
     // Ensure all of the patterns are recorded against this package:
     assertThat(globCache.getKeySet().containsAll(createGlobCacheKeys(includes, excludeDirs)))
         .isTrue();
-    assertThat(globCache.getKeySet().containsAll(createGlobCacheKeys(excludes, excludeDirs)))
-        .isTrue();
     assertThat(pkg.containsErrors()).isFalse();
   }
 
diff --git a/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java b/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
index 4c608c9..ad62a0f 100644
--- a/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
+++ b/src/test/java/com/google/devtools/build/lib/vfs/GlobTest.java
@@ -442,4 +442,28 @@
     assertThat(executor.awaitTermination(TestUtils.WAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS))
         .isTrue();
   }
+
+  private Collection<String> removeExcludes(ImmutableList<String> paths, String... excludes) {
+    HashSet<String> pathSet = new HashSet<>(paths);
+    UnixGlob.removeExcludes(pathSet, ImmutableList.copyOf(excludes));
+    return pathSet;
+  }
+
+  @Test
+  public void testExcludeFiltering() {
+    ImmutableList<String> paths = ImmutableList.of("a/A.java", "a/B.java", "a/b/C.java", "c.cc");
+    assertThat(removeExcludes(paths, "**/*.java")).containsExactly("c.cc");
+    assertThat(removeExcludes(paths, "**/nomatch.*")).containsAllIn(paths);
+    assertThat(removeExcludes(paths, "a/A.java")).containsExactly("a/B.java", "a/b/C.java", "c.cc");
+    assertThat(removeExcludes(paths, "a/?.java")).containsExactly("a/b/C.java", "c.cc");
+    assertThat(removeExcludes(paths, "a/*/C.java")).containsExactly("a/A.java", "a/B.java", "c.cc");
+    assertThat(removeExcludes(paths, "**")).isEmpty();
+    assertThat(removeExcludes(paths, "**/**")).isEmpty();
+
+    // Test filenames that look like code patterns.
+    paths = ImmutableList.of("a/A.java", "a/B.java", "a/b/*.java", "a/b/C.java", "c.cc");
+    assertThat(removeExcludes(paths, "**/*.java")).containsExactly("c.cc");
+    assertThat(removeExcludes(paths, "**/A.java", "**/B.java", "**/C.java"))
+        .containsExactly("a/b/*.java", "c.cc");
+  }
 }