bazel syntax: fine-grained syntax locations This change improves the precision with which the locations of source tokens are recorded in the syntax tree. Prior to this change, every Node held a single LexerLocation object that recorded the start and end offsets of the node, plus a reference to the shared LineNumberTable (LNT), that maps these offsets to Locations. This had a cost of one reference and one LexerLocation object per node. This change causes every Node to record the offsets only of its salient tokens, plus a reference to the LNT. For example, in the expression "1 + 2", the only salient token is the plus operator; the start and end offsets can be computed inductively by delegating to x.getStartLocation and y.getEndLocation. Similarly, in f(x), the salient tokens are '(' and ')'. This has a cost of 1 word plus approximately 1 int per Node. Consequently, we can record the exact position of operators that fail, and do so using less memory than before. Now, when an expression such as 'f().g() + 1' fails, the location in the error message will refer to the '+' operator or one of the two '(' tokens. Before, all three errors would be wrongly reported at the same place: f, since it is the start of all three subexpressions. Overview: - Every Node has a reference to the LNT, set immediately after construction. (Morally it is part of the constructor but it's fussy to set it that way.) - Every node defines getStartOffset and getEndOffset, typically by delegating to its left and right subtrees. - Node end offsets are exclusive again. CL 170723732 was a mistake: half-open intervals are mathematically simpler. A client that wants to subtract one may do that. But there are none. - Comprehension.{For,If} are now true Nodes. - StarlarkFile's extent is now (correctly) the entire file, not just the range from the first statement to the last. - The parser provides offsets of salient tokens to the Node constructors. - IntegerLiteral now retains the raw token text in addition to the value. - Token is gone. Its four fields are now embedded in the Lexer. - Eval uses the following token positions in run-time error messages: x+y f(x) x[i] x.y x[i:j] k: v ^ ^ ^ ^ ^ ^ - Location is final. LexerLocation and LineAndColumn are gone. - Misparsed source represented as an Identifier now has the text of the source instead of "$error$". This is more faithful and causes the offsets to be correct. - The offsets of the orig Identifier in load("module", local="orig") coincide with the text 'orig', sans quotation marks. Benchmark: saves about 65MB (1% of live RAM) retained by the Usual Benchmark, a deps query. RELNOTES: N/A PiperOrigin-RevId: 305803031

commit: 07b15e6d996609129c5bd42d7669519cd959e4d5 [log] [tgz]
author: adonovan <adonovan@google.com> Thu Apr 09 18:32:33 2020 -0700
committer: Copybara-Service <copybara-worker@google.com> Thu Apr 09 18:34:00 2020 -0700
tree: 96024089ac25cf4120ae9c8aa1782e71b36c3107
parent: 1cd84ecf25ba495b70b5601babc45423427c7c9c [diff] [blame]
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StarlarkFile.java b/src/main/java/com/google/devtools/build/lib/syntax/StarlarkFile.java
index b5d222d..237cd19 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/StarlarkFile.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/StarlarkFile.java

@@ -34,46 +34,59 @@
   final List<SyntaxError> errors; // appended to by ValidationEnvironment
   @Nullable private final String contentHashCode;
 
+  @Override
+  public int getStartOffset() {
+    return 0;
+  }
+
+  @Override
+  public int getEndOffset() {
+    return lnt.size();
+  }
+
   private StarlarkFile(
       ImmutableList<Statement> statements,
       FileOptions options,
       ImmutableList<Comment> comments,
       List<SyntaxError> errors,
-      String contentHashCode,
-      Lexer.LexerLocation location) {
+      String contentHashCode) {
     this.statements = statements;
     this.options = options;
     this.comments = comments;
     this.errors = errors;
     this.contentHashCode = contentHashCode;
-    this.setLocation(location);
   }
 
   // Creates a StarlarkFile from the given effective list of statements,
   // which may include the prelude.
   private static StarlarkFile create(
+      LineNumberTable lnt,
       ImmutableList<Statement> statements,
       FileOptions options,
       Parser.ParseResult result,
       String contentHashCode) {
-    return new StarlarkFile(
-        statements,
-        options,
-        ImmutableList.copyOf(result.comments),
-        result.errors,
-        contentHashCode,
-        result.location);
+    StarlarkFile file =
+        new StarlarkFile(
+            statements,
+            options,
+            ImmutableList.copyOf(result.comments),
+            result.errors,
+            contentHashCode);
+    file.lnt = lnt;
+    return file;
   }
 
   /** Extract a subtree containing only statements from i (included) to j (excluded). */
   public StarlarkFile subTree(int i, int j) {
-    return new StarlarkFile(
-        this.statements.subList(i, j),
-        this.options,
-        /*comments=*/ ImmutableList.of(),
-        errors,
-        /*contentHashCode=*/ null,
-        (Lexer.LexerLocation) this.statements.get(i).getStartLocation());
+    StarlarkFile file =
+        new StarlarkFile(
+            this.statements.subList(i, j),
+            this.options,
+            /*comments=*/ ImmutableList.of(),
+            errors,
+            /*contentHashCode=*/ null);
+    file.lnt = this.lnt;
+    return file;
   }
 
   /**
@@ -121,7 +134,7 @@
     stmts.addAll(prelude);
     stmts.addAll(result.statements);
 
-    return create(stmts.build(), options, result, /*contentHashCode=*/ null);
+    return create(result.lnt, stmts.build(), options, result, /*contentHashCode=*/ null);
   }
 
   // TODO(adonovan): make the digest publicly settable, and delete this.
@@ -129,6 +142,7 @@
       throws IOException {
     Parser.ParseResult result = Parser.parseFile(input, options);
     return create(
+        result.lnt,
         ImmutableList.copyOf(result.statements),
         options,
         result,
@@ -152,7 +166,11 @@
   public static StarlarkFile parse(ParserInput input, FileOptions options) {
     Parser.ParseResult result = Parser.parseFile(input, options);
     return create(
-        ImmutableList.copyOf(result.statements), options, result, /*contentHashCode=*/ null);
+        result.lnt,
+        ImmutableList.copyOf(result.statements),
+        options,
+        result,
+        /*contentHashCode=*/ null);
   }
 
   /** Parse a Starlark file with default options. */
commit	07b15e6d996609129c5bd42d7669519cd959e4d5	[log] [tgz]
author	adonovan <adonovan@google.com>	Thu Apr 09 18:32:33 2020 -0700
committer	Copybara-Service <copybara-worker@google.com>	Thu Apr 09 18:34:00 2020 -0700
tree	96024089ac25cf4120ae9c8aa1782e71b36c3107
parent	1cd84ecf25ba495b70b5601babc45423427c7c9c [diff] [blame]