Implement asynchronous glob prefetching to specifically prefetch the globs() of
a file.

There is currently a mechanism in Bazel to asynchronously go off and prefetch
file system content, specifically by recursively reading subdirectories up to
maxDirectoriesToEagerlyVisitInGlobbing. This is started when the first glob in
a file is found. It is useful to warm up caches in networked filesystems, but
doesn't actually help evaluating the globs().  As it can create extra load on
the filesystem, it can actually make evaluation slower. Moreover, it implicitly
relies on the fact that in common packages, most files are actually read into
globs() in some way, which isn't always the case.

Instead, we can visit a build file's AST and eagerly try to explicitly extract
globs that should be visited. This change is doing that if
maxDirectoriesToEagerlyVisitInGlobbing is set to a sentinal value of -2. The
visitor cannot understand all globs, e.g. when some of the pattern strings are
constructed from complex expression, but the majority of globs is explicitly
spelled out.

RELNOTES: None.
PiperOrigin-RevId: 234751342
diff --git a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
index 55e3c61..0d11eac 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/PackageFactory.java
@@ -41,6 +41,7 @@
 import com.google.devtools.build.lib.skylarkinterface.Param;
 import com.google.devtools.build.lib.skylarkinterface.SkylarkSignature;
 import com.google.devtools.build.lib.skylarkinterface.SkylarkValue;
+import com.google.devtools.build.lib.syntax.Argument.Passed;
 import com.google.devtools.build.lib.syntax.BaseFunction;
 import com.google.devtools.build.lib.syntax.BuildFileAST;
 import com.google.devtools.build.lib.syntax.BuiltinFunction;
@@ -49,8 +50,12 @@
 import com.google.devtools.build.lib.syntax.Environment.Extension;
 import com.google.devtools.build.lib.syntax.EvalException;
 import com.google.devtools.build.lib.syntax.EvalUtils;
+import com.google.devtools.build.lib.syntax.Expression;
 import com.google.devtools.build.lib.syntax.FuncallExpression;
 import com.google.devtools.build.lib.syntax.FunctionSignature;
+import com.google.devtools.build.lib.syntax.Identifier;
+import com.google.devtools.build.lib.syntax.IntegerLiteral;
+import com.google.devtools.build.lib.syntax.ListLiteral;
 import com.google.devtools.build.lib.syntax.Mutability;
 import com.google.devtools.build.lib.syntax.ParserInputSource;
 import com.google.devtools.build.lib.syntax.Runtime;
@@ -62,6 +67,8 @@
 import com.google.devtools.build.lib.syntax.SkylarkUtils.Phase;
 import com.google.devtools.build.lib.syntax.StarlarkSemantics;
 import com.google.devtools.build.lib.syntax.Statement;
+import com.google.devtools.build.lib.syntax.StringLiteral;
+import com.google.devtools.build.lib.syntax.SyntaxTreeVisitor;
 import com.google.devtools.build.lib.syntax.Type;
 import com.google.devtools.build.lib.syntax.Type.ConversionException;
 import com.google.devtools.build.lib.vfs.FileSystem;
@@ -73,6 +80,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -423,7 +431,9 @@
   /**
    * Sets the number of directories to eagerly traverse on the first glob for a given package, in
    * order to warm the filesystem. -1 means do no eager traversal. See {@code
-   * PackageCacheOptions#maxDirectoriesToEagerlyVisitInGlobbing}.
+   * PackageCacheOptions#maxDirectoriesToEagerlyVisitInGlobbing}. -2 means do the eager traversal
+   * using the regular globbing infrastructure, i.e. sharing the globbing threads and caching the
+   * actual glob results.
    */
   public void setMaxDirectoriesToEagerlyVisitInGlobbing(
       int maxDirectoriesToEagerlyVisitInGlobbing) {
@@ -1664,6 +1674,17 @@
         pkgBuilder.setContainsErrors();
       }
 
+      if (maxDirectoriesToEagerlyVisitInGlobbing == -2) {
+        GlobPatternExtractor extractor = new GlobPatternExtractor();
+        extractor.visit(buildFileAST);
+        try {
+          globber.runAsync(extractor.getIncludeDirectoriesPatterns(), ImmutableList.of(), false);
+          globber.runAsync(extractor.getExcludeDirectoriesPatterns(), ImmutableList.of(), true);
+        } catch (BadGlobException | InterruptedException e) {
+          // Ignore exceptions. Errors will be properly reported when the actual globbing is done.
+        }
+      }
+
       // TODO(bazel-team): (2009) the invariant "if errors are reported, mark the package
       // as containing errors" is strewn all over this class.  Refactor to use an
       // event sensor--and see if we can simplify the calling code in
@@ -1678,6 +1699,64 @@
     return pkgBuilder;
   }
 
+  /**
+   * A GlobPatternExtractor visits a syntax tree, tries to extract glob() patterns from it, and
+   * eagerly instructs a {@link Globber} to fetch them asynchronously. That way, the glob results
+   * are readily available when required in the actual execution of the syntax tree. The starlark
+   * code itself is later executed sequentially and having costly globs, especially slow on
+   * networked file systems, executed sequentially in them can be very time consuming.
+   */
+  @VisibleForTesting
+  static class GlobPatternExtractor extends SyntaxTreeVisitor {
+    private final Set<String> includeDirectoriesPatterns = new HashSet<>();
+    private final Set<String> excludeDirectoriesPatterns = new HashSet<>();
+
+    @Override
+    public void visit(FuncallExpression node) {
+      super.visit(node);
+      Expression function = node.getFunction();
+      if (!(function instanceof Identifier)) {
+        return;
+      }
+      if (!((Identifier) function).getName().equals("glob")) {
+        return;
+      }
+
+      boolean excludeDirectories = true; // excluded by default.
+      List<String> globStrings = new ArrayList<>();
+      for (Passed arg : node.getArguments()) {
+        if (arg.getIdentifier() != null
+            && arg.getIdentifier().getName().equals("exclude_directories")) {
+          if (arg.getValue() instanceof IntegerLiteral) {
+            excludeDirectories = ((IntegerLiteral) arg.getValue()).getValue() != 0;
+          }
+          continue;
+        }
+        if (arg.getValue() instanceof ListLiteral) {
+          ListLiteral list = (ListLiteral) arg.getValue();
+          for (Expression elem : list.getElements()) {
+            if (elem instanceof StringLiteral) {
+              globStrings.add(((StringLiteral) elem).getValue());
+            }
+          }
+        }
+      }
+      if (excludeDirectories) {
+        excludeDirectoriesPatterns.addAll(globStrings);
+      } else {
+        includeDirectoriesPatterns.addAll(globStrings);
+      }
+    }
+
+    List<String> getIncludeDirectoriesPatterns() {
+      return ImmutableList.copyOf(includeDirectoriesPatterns);
+    }
+
+    List<String> getExcludeDirectoriesPatterns() {
+      return ImmutableList.copyOf(excludeDirectoriesPatterns);
+    }
+  }
+
   // Reports an error and returns false iff package identifier was illegal.
   private static boolean validatePackageIdentifier(
       PackageIdentifier packageId, Location location, ExtendedEventHandler eventHandler) {
diff --git a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
index ea11633..94b47ca 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/PackageFactoryTest.java
@@ -25,8 +25,10 @@
 import com.google.devtools.build.lib.cmdline.Label;
 import com.google.devtools.build.lib.cmdline.PackageIdentifier;
 import com.google.devtools.build.lib.events.Reporter;
+import com.google.devtools.build.lib.packages.PackageFactory.GlobPatternExtractor;
 import com.google.devtools.build.lib.packages.util.PackageFactoryApparatus;
 import com.google.devtools.build.lib.packages.util.PackageFactoryTestBase;
+import com.google.devtools.build.lib.syntax.BuildFileAST;
 import com.google.devtools.build.lib.syntax.Type;
 import com.google.devtools.build.lib.testutil.MoreAsserts;
 import com.google.devtools.build.lib.testutil.TestUtils;
@@ -1230,4 +1232,25 @@
   protected String getPathPrefix() {
     return "";
   }
+
+  @Test
+  public void testGlobPatternExtractor() {
+    GlobPatternExtractor globPatternExtractor = new GlobPatternExtractor();
+    globPatternExtractor.visit(
+        BuildFileAST.parseString(
+            event -> {
+              throw new IllegalArgumentException(event.getMessage());
+            },
+            "pattern = '*'",
+            "some_variable = glob([",
+            "  '**/*',",
+            "  'a' + 'b',",
+            "  pattern,",
+            "])",
+            "other_variable = glob(include = ['a'], exclude = ['b'])",
+            "third_variable = glob(['c'], exclude_directories = 0)"));
+    assertThat(globPatternExtractor.getExcludeDirectoriesPatterns())
+        .containsExactly("ab", "a", "b", "**/*");
+    assertThat(globPatternExtractor.getIncludeDirectoriesPatterns()).containsExactly("c");
+  }
 }