bazel packages: add --record_rule_instantiation_callstack flag

This flag (default: false) causes each rule to record the Starlark
call stack at the moment of its instantiation.

The stacks are displayed by blaze query --output=build.

Example output:

        # /workspace/x/BUILD:2:1
        cc_library(
          name = "a",
          ...
        )
        # Instantiation stack:
        #   /workspace/x/inc.bzl:4:3 called from g
        #   /workspace/x/inc.bzl:2:3 called from f
        #   /workspace/x/BUILD:2:1   called from <toplevel>

By combining two optimizations, prefix sharing using a linked tree,
and element compression using packed integers, this feature
imposes an additional retained heap space cost of only 1.5%
for deps(//X) where X is Google's web server.

PiperOrigin-RevId: 300408104
diff --git a/src/main/java/com/google/devtools/build/lib/packages/CallStack.java b/src/main/java/com/google/devtools/build/lib/packages/CallStack.java
new file mode 100644
index 0000000..fd097cd
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/packages/CallStack.java
@@ -0,0 +1,176 @@
+// Copyright 2020 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.packages;
+
+import com.google.common.collect.ImmutableList;
+import com.google.devtools.build.lib.events.Location;
+import com.google.devtools.build.lib.syntax.StarlarkThread;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+
+/**
+ * A CallStack is an opaque immutable stack of Starlark call frames, outermost call first. Its
+ * representation is highly optimized for space. Call {@link #toArray} to access the frames.
+ *
+ * <p>A CallStack cannot be constructed directly, but must be created by calling the {@code of}
+ * method of a Builder, which should be shared across all the rules of a package.
+ */
+public final class CallStack {
+
+  // A naive implementation using a simple array of CallStackEntry was
+  // found to increase retained heap size for a large query by about 6%.
+  // We reduce this using two optimizations:
+  //
+  // 1) entry compression
+  // A CallStackEntry has size 2 (header) + 1 String + 1 Location = 4 words.
+  // A Location has size 2 (header) + 1 String + 1 (file/loc) = 4 words.
+  // By representing the two strings as small integers---indices into a shared
+  // table---we can represent the payload using 4 ints = 2 words.
+  // We reconstitute new Locations and CallStackEntries on request.
+  // Eliminating Java object overheads reduces the marginal
+  // space for an element by about a half.
+  //
+  // 2) prefix sharing
+  // We share common stack prefixes between successive entries.
+  // The stacks passed to successive of() calls display locality because
+  // within a package, many rules are created by a single macro or stack
+  // of macros. The Builder records the previous stack and creates a tree node
+  // for each prefix. When it sees a new stack, it finds the common prefix
+  // for the current and previous stacks, and only creates new nodes for
+  // the suffix of different entries.
+  // Avoiding storage of redundant prefixes reduces overall space by about
+  // two thirds.
+  //
+  // This class intentionally does not implement List
+  // because internally it is a linked list,
+  // so accidentally using List.get for sequential access
+  // would result in quadratic behavior.
+
+  public static final CallStack EMPTY = new CallStack(ImmutableList.of(), 0, null);
+
+  private final List<String> strings; // builder's string table, shared
+  private final int size; // depth of stack
+  @Nullable private final Node node; // tree node representing this stack
+
+  private CallStack(List<String> strings, int size, @Nullable Node node) {
+    this.strings = strings;
+    this.size = size;
+    this.node = node;
+  }
+
+  /** Returns the call stack as a list of frames, outermost call first. */
+  public ImmutableList<StarlarkThread.CallStackEntry> toList() {
+    return ImmutableList.copyOf(toArray());
+  }
+
+  /** Returns the call stack as a new array, outermost call first. */
+  public StarlarkThread.CallStackEntry[] toArray() {
+    StarlarkThread.CallStackEntry[] array = new StarlarkThread.CallStackEntry[size];
+    int i = size;
+    for (Node n = node; n != null; n = n.parent) {
+      String file = strings.get(n.file);
+      String name = strings.get(n.name);
+      Location loc = Location.fromFileLineColumn(file, n.line, n.col);
+      array[--i] = new StarlarkThread.CallStackEntry(name, loc);
+    }
+    return array;
+  }
+
+  // A Node is a node in a linked tree representing a prefix of the call stack.
+  // file and line are indices of strings in the builder's shared table.
+  private static class Node {
+    Node(int name, int file, int line, int col, Node parent) {
+      this.name = name;
+      this.file = file;
+      this.line = line;
+      this.col = col;
+      this.parent = parent;
+    }
+
+    final int name;
+    final int file;
+    final int line;
+    final int col;
+    @Nullable final Node parent;
+  }
+
+  /** A Builder is a stateful indexer of call stacks. */
+  static final class Builder {
+
+    // string table: strings[index[s]] == s
+    private final Map<String, Integer> index = new HashMap<>();
+    private final List<String> strings = new ArrayList<>();
+
+    // nodes[0:depth] is the previously encountered call stack.
+    // We avoid ArrayList because we need efficient truncation.
+    private Node[] nodes = new Node[10];
+    private int depth = 0;
+
+    /**
+     * Returns a compact representation of the specified call stack. <br>
+     * Space efficiency depends on the similarity of the list elements in successive calls.
+     * Reversing the stack before calling this function destroys the optimization, for instance.
+     */
+    CallStack of(List<StarlarkThread.CallStackEntry> stack) {
+      // We find and reuse the common ancestor node for the prefix common
+      // to the current stack and the stack passed to the previous call,
+      // then add child nodes for the different suffix, if any.
+
+      // Loop invariant: parent == (i > 0 ? nodes[i-1] : null)
+      Node parent = null;
+      int n = stack.size();
+      for (int i = 0; i < n; i++) {
+        StarlarkThread.CallStackEntry entry = stack.get(i);
+
+        int name = indexOf(entry.name);
+        int file = indexOf(entry.location.file());
+        int line = entry.location.line();
+        int column = entry.location.column();
+        if (i < depth
+            && name == nodes[i].name
+            && file == nodes[i].file
+            && line == nodes[i].line
+            && column == nodes[i].col) {
+          parent = nodes[i];
+          continue;
+        }
+        parent = new Node(name, file, line, column, parent);
+        if (i == nodes.length) {
+          nodes = Arrays.copyOf(nodes, nodes.length << 1); // grow by doubling
+        }
+        nodes[i] = parent;
+      }
+      this.depth = n; // truncate
+
+      // Use the same node for all empty stacks, to avoid allocations.
+      return parent != null ? new CallStack(strings, n, parent) : EMPTY;
+    }
+
+    private int indexOf(String s) {
+      int i = index.size();
+      Integer prev = index.putIfAbsent(s, i);
+      if (prev != null) {
+        i = prev;
+      } else {
+        strings.add(s);
+      }
+      return i;
+    }
+  }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/packages/Package.java b/src/main/java/com/google/devtools/build/lib/packages/Package.java
index 5ae743e..9736d0f 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/Package.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/Package.java
@@ -45,6 +45,7 @@
 import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
 import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
 import com.google.devtools.build.lib.syntax.StarlarkSemantics;
+import com.google.devtools.build.lib.syntax.StarlarkThread;
 import com.google.devtools.build.lib.util.SpellChecker;
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.lib.vfs.PathFragment;
@@ -759,12 +760,9 @@
       RootedPath workspacePath,
       String runfilesPrefix,
       StarlarkSemantics starlarkSemantics) {
-    Builder b =
-        new Builder(
-            helper.createFreshPackage(LabelConstants.EXTERNAL_PACKAGE_IDENTIFIER, runfilesPrefix),
-            starlarkSemantics);
-    b.setFilename(workspacePath);
-    return b;
+    return new Builder(
+            helper, LabelConstants.EXTERNAL_PACKAGE_IDENTIFIER, runfilesPrefix, starlarkSemantics)
+        .setFilename(workspacePath);
   }
 
   /**
@@ -821,6 +819,7 @@
     private final Package pkg;
 
     private final StarlarkSemantics starlarkSemantics;
+    private final CallStack.Builder callStackBuilder = new CallStack.Builder();
 
     // The map from each repository to that repository's remappings map.
     // This is only used in the //external package, it is an empty map for all other packages.
@@ -909,6 +908,7 @@
       }
     };
 
+    // low-level constructor
     Builder(Package pkg, StarlarkSemantics starlarkSemantics) {
       this.starlarkSemantics = starlarkSemantics;
       this.pkg = pkg;
@@ -917,6 +917,7 @@
       }
     }
 
+    // high-level constructor
     Builder(
         Helper helper,
         PackageIdentifier id,
@@ -1245,24 +1246,22 @@
         Label label,
         RuleClass ruleClass,
         Location location,
+        List<StarlarkThread.CallStackEntry> callstack,
         AttributeContainer attributeContainer) {
       return new Rule(
-          pkg,
-          label,
-          ruleClass,
-          location,
-          attributeContainer);
+          pkg, label, ruleClass, location, callStackBuilder.of(callstack), attributeContainer);
     }
 
     /**
-     * Same as {@link #createRule(Label, RuleClass, Location, AttributeContainer)}, except
-     * allows specifying an {@link ImplicitOutputsFunction} override. Only use if you know what
-     * you're doing.
+     * Same as {@link #createRule(Label, RuleClass, Location, List<StarlarkThread.CallStackEntry>,
+     * AttributeContainer)}, except allows specifying an {@link ImplicitOutputsFunction} override.
+     * Only use if you know what you're doing.
      */
     Rule createRule(
         Label label,
         RuleClass ruleClass,
         Location location,
+        List<StarlarkThread.CallStackEntry> callstack,
         AttributeContainer attributeContainer,
         ImplicitOutputsFunction implicitOutputsFunction) {
       return new Rule(
@@ -1270,6 +1269,7 @@
           label,
           ruleClass,
           location,
+          callStackBuilder.of(callstack),
           attributeContainer,
           implicitOutputsFunction);
     }
diff --git a/src/main/java/com/google/devtools/build/lib/packages/Rule.java b/src/main/java/com/google/devtools/build/lib/packages/Rule.java
index 28f0df3..5e2c1b5 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/Rule.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/Rule.java
@@ -78,6 +78,8 @@
 
   private final Location location;
 
+  private final CallStack callstack;
+
   private final ImplicitOutputsFunction implicitOutputsFunction;
 
   // Initialized in the call to populateOutputFiles.
@@ -89,12 +91,14 @@
       Label label,
       RuleClass ruleClass,
       Location location,
+      CallStack callstack,
       AttributeContainer attributeContainer) {
     this(
         pkg,
         label,
         ruleClass,
         location,
+        callstack,
         attributeContainer,
         ruleClass.getDefaultImplicitOutputsFunction());
   }
@@ -104,12 +108,14 @@
       Label label,
       RuleClass ruleClass,
       Location location,
+      CallStack callstack,
       AttributeContainer attributeContainer,
       ImplicitOutputsFunction implicitOutputsFunction) {
     this.pkg = Preconditions.checkNotNull(pkg);
     this.label = label;
     this.ruleClass = Preconditions.checkNotNull(ruleClass);
     this.location = Preconditions.checkNotNull(location);
+    this.callstack = Preconditions.checkNotNull(callstack);
     this.attributes = attributeContainer;
     this.implicitOutputsFunction = implicitOutputsFunction;
     this.containsErrors = false;
@@ -270,6 +276,11 @@
     return location;
   }
 
+  /** Returns the stack of function calls active when this rule was instantiated. */
+  public CallStack getCallStack() {
+    return callstack;
+  }
+
   public String getDefinitionInformation() {
     return definitionInformation;
   }
@@ -611,7 +622,6 @@
   }
 
   @Override
-  @SuppressWarnings("unchecked")
   public Set<DistributionType> getDistributions() {
     if (isAttrDefined("distribs", BuildType.DISTRIBUTIONS)
         && isAttributeValueExplicitlySpecified("distribs")) {
diff --git a/src/main/java/com/google/devtools/build/lib/packages/RuleClass.java b/src/main/java/com/google/devtools/build/lib/packages/RuleClass.java
index 9b5504b..89b9edb 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/RuleClass.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/RuleClass.java
@@ -60,6 +60,7 @@
 import com.google.devtools.build.lib.syntax.Sequence;
 import com.google.devtools.build.lib.syntax.Starlark;
 import com.google.devtools.build.lib.syntax.StarlarkFunction;
+import com.google.devtools.build.lib.syntax.StarlarkThread;
 import com.google.devtools.build.lib.util.FileTypeSet;
 import com.google.devtools.build.lib.util.StringUtil;
 import com.google.devtools.build.lib.vfs.PathFragment;
@@ -1833,10 +1834,11 @@
       AttributeValues<T> attributeValues,
       EventHandler eventHandler,
       Location location,
+      List<StarlarkThread.CallStackEntry> callstack,
       AttributeContainer attributeContainer,
       boolean checkThirdPartyRulesHaveLicenses)
       throws LabelSyntaxException, InterruptedException, CannotPrecomputeDefaultsException {
-    Rule rule = pkgBuilder.createRule(ruleLabel, this, location, attributeContainer);
+    Rule rule = pkgBuilder.createRule(ruleLabel, this, location, callstack, attributeContainer);
     populateRuleAttributeValues(rule, pkgBuilder, attributeValues, eventHandler);
     checkAspectAllowedValues(rule, eventHandler);
     rule.populateOutputFiles(eventHandler, pkgBuilder);
@@ -1870,15 +1872,13 @@
       Label ruleLabel,
       AttributeValues<T> attributeValues,
       Location location,
+      List<StarlarkThread.CallStackEntry> callstack,
       AttributeContainer attributeContainer,
       ImplicitOutputsFunction implicitOutputsFunction)
       throws InterruptedException, CannotPrecomputeDefaultsException {
-    Rule rule = pkgBuilder.createRule(
-        ruleLabel,
-        this,
-        location,
-        attributeContainer,
-        implicitOutputsFunction);
+    Rule rule =
+        pkgBuilder.createRule(
+            ruleLabel, this, location, callstack, attributeContainer, implicitOutputsFunction);
     populateRuleAttributeValues(rule, pkgBuilder, attributeValues, NullEventHandler.INSTANCE);
     rule.populateOutputFilesUnchecked(NullEventHandler.INSTANCE, pkgBuilder);
     return rule;
diff --git a/src/main/java/com/google/devtools/build/lib/packages/RuleFactory.java b/src/main/java/com/google/devtools/build/lib/packages/RuleFactory.java
index b37bfe9..6043e28 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/RuleFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/RuleFactory.java
@@ -15,6 +15,7 @@
 package com.google.devtools.build.lib.packages;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.devtools.build.lib.cmdline.Label;
 import com.google.devtools.build.lib.cmdline.LabelSyntaxException;
@@ -24,7 +25,6 @@
 import com.google.devtools.build.lib.packages.Package.NameConflictException;
 import com.google.devtools.build.lib.packages.PackageFactory.PackageContext;
 import com.google.devtools.build.lib.syntax.StarlarkThread;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import javax.annotation.Nullable;
@@ -106,11 +106,22 @@
           ruleClass + " cannot be in the WORKSPACE file " + "(used by " + label + ")");
     }
 
-    // TODO(adonovan): record thread.getCallStack() in the rule,
-    // and make 'bazel query --output=build' display it (b/36593041).
+    ImmutableList<StarlarkThread.CallStackEntry> callstack =
+        thread != null ? thread.getCallStack() : ImmutableList.of();
 
     AttributesAndLocation generator =
-        generatorAttributesForMacros(pkgBuilder, attributeValues, thread, location, label);
+        generatorAttributesForMacros(pkgBuilder, attributeValues, callstack, location, label);
+
+    if (thread != null) {
+      // The raw stack is of the form [<toplevel>@BUILD:1, macro@lib.bzl:1, cc_library@<builtin>].
+      // If we're recording it (--record_rule_instantiation_callstack),
+      // pop the innermost frame for the rule, since it's obvious.
+      callstack =
+          thread.getSemantics().recordRuleInstantiationCallstack()
+              ? callstack.subList(0, callstack.size() - 1) // pop
+              : ImmutableList.of(); // save space
+    }
+
     try {
       // Examines --incompatible_disable_third_party_license_checking to see if we should check
       // third party targets for license existence.
@@ -126,6 +137,7 @@
           generator.attributes,
           eventHandler,
           generator.location,
+          callstack,
           attributeContainer,
           checkThirdPartyLicenses);
     } catch (LabelSyntaxException | CannotPrecomputeDefaultsException e) {
@@ -306,14 +318,9 @@
   private static AttributesAndLocation generatorAttributesForMacros(
       Package.Builder pkgBuilder,
       BuildLangTypedAttributeValuesMap args,
-      @Nullable StarlarkThread thread,
+      ImmutableList<StarlarkThread.CallStackEntry> stack,
       Location location,
       Label label) {
-    // Returns the original arguments if a) there is only the rule itself on the stack
-    // trace (=> no macro) or b) the attributes have already been set by Python pre-processing.
-    if (thread == null) {
-      return new AttributesAndLocation(args, location);
-    }
     boolean hasName = args.containsAttributeNamed("generator_name");
     boolean hasFunc = args.containsAttributeNamed("generator_function");
     // TODO(bazel-team): resolve cases in our code where hasName && !hasFunc, or hasFunc && !hasName
@@ -326,7 +333,6 @@
     // The stack must contain at least two entries:
     // 0: the outermost function (e.g. a BUILD file),
     // 1: the function called by it (e.g. a "macro" in a .bzl file).
-    List<StarlarkThread.CallStackEntry> stack = thread.getCallStack();
     if (stack.size() < 2 || !stack.get(1).location.file().endsWith(".bzl")) {
       return new AttributesAndLocation(args, location); // macro is not a Starlark function
     }
@@ -368,6 +374,7 @@
    *
    * @return The workspace-relative path of the given location, or null if it could not be computed.
    */
+  // TODO(b/151151653): make Starlark file Locations relative from the outset.
   @Nullable
   private static String maybeGetRelativeLocation(@Nullable Location location, Label label) {
     if (location == null) {
@@ -379,6 +386,9 @@
     // rules created from function calls in a subincluded file, even if both files share a path
     // prefix (for example, when //a/package:BUILD subincludes //a/package/with/a/subpackage:BUILD).
     // We can revert to that approach once subincludes aren't supported anymore.
+    //
+    // TODO(b/151165647): this logic has always been wrong:
+    // it spuriously matches occurrences of the package name earlier in the path.
     String absolutePath = location.toString();
     int pos = absolutePath.indexOf(label.getPackageName());
     return (pos < 0) ? null : absolutePath.substring(pos);
diff --git a/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java b/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
index a6443a4..aec88df 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
@@ -654,6 +654,16 @@
               + "instead of linkopts to cc_toolchain_config")
   public boolean incompatibleLinkoptsToLinkLibs;
 
+  @Option(
+      name = "record_rule_instantiation_callstack",
+      defaultValue = "false",
+      documentationCategory = OptionDocumentationCategory.STARLARK_SEMANTICS,
+      effectTags = {OptionEffectTag.BUILD_FILE_SEMANTICS},
+      help =
+          "Causes each rule to record the callstack at the moment of its instantiation, at a"
+              + " modest cost in memory. The stack is visible in some forms of query output.")
+  public boolean recordRuleInstantiationCallstack;
+
   /**
    * An interner to reduce the number of StarlarkSemantics instances. A single Blaze instance should
    * never accumulate a large number of these and being able to shortcut on object identity makes a
@@ -714,6 +724,7 @@
             .incompatibleRequireLinkerInputCcApi(incompatibleRequireLinkerInputCcApi)
             .incompatibleRestrictStringEscapes(incompatibleRestrictStringEscapes)
             .incompatibleLinkoptsToLinkLibs(incompatibleLinkoptsToLinkLibs)
+            .recordRuleInstantiationCallstack(recordRuleInstantiationCallstack)
             .build();
     return INTERNER.intern(semantics);
   }
diff --git a/src/main/java/com/google/devtools/build/lib/packages/WorkspaceFactory.java b/src/main/java/com/google/devtools/build/lib/packages/WorkspaceFactory.java
index ad69a34..708eb65 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/WorkspaceFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/WorkspaceFactory.java
@@ -240,11 +240,13 @@
       try {
         // The old rule references another Package instance and we wan't to keep the invariant that
         // every Rule references the Package it is contained within
-        Rule newRule = builder.createRule(
-            rule.getLabel(),
-            rule.getRuleClassObject(),
-            rule.getLocation(),
-            rule.getAttributeContainer());
+        Rule newRule =
+            builder.createRule(
+                rule.getLabel(),
+                rule.getRuleClassObject(),
+                rule.getLocation(),
+                rule.getCallStack().toList(),
+                rule.getAttributeContainer());
         newRule.populateOutputFiles(NullEventHandler.INSTANCE, builder);
         if (rule.containsErrors()) {
           newRule.setContainsErrors();