Reject files when the first line is indented. A bug in the lexer ignored indentation on the first line of a file. This now causes an error. Also, remove the COMMENT token from the lexer. Comments are now accessed separately. This will allow further optimizations in the lexer. It also aligns the code a bit more with the Go implementation. RELNOTES[INC]: Indentation on the first line of a file was previously ignored. This is now fixed. PiperOrigin-RevId: 197889775

commit: 17f8d4e5a36f5c4bd020ce9163f5b1db62679e2c [log] [tgz]
author: laurentlb <laurentlb@google.com> Thu May 24 07:32:52 2018 -0700
committer: Copybara-Service <copybara-piper@google.com> Thu May 24 07:33:48 2018 -0700
tree: 9b065d3c27259a5da38563fcda505c3f7002275c
parent: 2a6051b0c74ce59e30522fbd509ccbb460289df7 [diff]
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 5ecae12..a50a0a0 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java

@@ -24,7 +24,9 @@
 import com.google.devtools.build.lib.util.Pair;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import java.util.ArrayDeque;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Stack;
@@ -87,11 +89,18 @@
   /** Last Token that was scanned. */
   private Token lastToken;
 
+  private final List<Comment> comments;
+
   // The number of unclosed open-parens ("(", '{', '[') at the current point in
   // the stream. Whitespace is handled differently when this is nonzero.
   private int openParenStackDepth = 0;
 
   private boolean containsErrors;
+  /**
+   * True after a NEWLINE token.
+   * In other words, we are outside an expression and we have to check the indentation.
+   */
+  private boolean checkIndentation;
 
   /**
    * Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during
@@ -104,6 +113,8 @@
     this.pos = 0;
     this.eventHandler = eventHandler;
     this.locationInfo = new LocationInfo(input.getPath(), lineNumberTable);
+    this.checkIndentation = true;
+    this.comments = new ArrayList<>();
 
     indentStack.push(0);
   }
@@ -112,6 +123,10 @@
     this(input, eventHandler, LineNumberTable.create(input.getContent(), input.getPath()));
   }
 
+  List<Comment> getComments() {
+    return comments;
+  }
+
   /**
    * Returns the filename from which the lexer's input came. Returns an empty value if the input
    * came from a string.
@@ -216,18 +231,16 @@
   }
 
   /**
-   * Parses an end-of-line sequence, handling statement indentation correctly.
+   * Parses an end-of-line sequence.
    *
    * <p>UNIX newlines are assumed (LF). Carriage returns are always ignored.
-   *
-   * <p>ON ENTRY: 'pos' is the index of the char after '\n'.
-   * ON EXIT: 'pos' is the index of the next non-space char after '\n'.
    */
   private void newline() {
     if (openParenStackDepth > 0) {
       newlineInsideExpression(); // in an expression: ignore space
     } else {
-      newlineOutsideExpression(); // generate NEWLINE/INDENT/OUTDENT tokens
+      checkIndentation = true;
+      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
     }
   }
 
@@ -244,10 +257,6 @@
   }
 
   private void newlineOutsideExpression() {
-    if (pos > 1) { // skip over newline at start of file
-      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
-    }
-
     // we're in a stmt: suck up space at beginning of next line
     int indentLen = 0;
     while (pos < buffer.length) {
@@ -269,7 +278,7 @@
         while (pos < buffer.length && c != '\n') {
           c = buffer[pos++];
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos - 1, bufferSlice(oldPos, pos - 1)));
+        makeComment(oldPos, pos - 1, bufferSlice(oldPos, pos - 1));
         indentLen = 0;
       } else { // printing character
         break;
@@ -707,6 +716,14 @@
    * least one token will be added to the tokens queue.
    */
   private void tokenize() {
+    if (checkIndentation) {
+      checkIndentation = false;
+      newlineOutsideExpression(); // generate INDENT/OUTDENT tokens
+      if (!tokens.isEmpty()) {
+        return;
+      }
+    }
+
     while (pos < buffer.length) {
       if (tokenizeTwoChars()) {
         pos += 2;
@@ -837,7 +854,7 @@
             pos++;
           }
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos, bufferSlice(oldPos, pos)));
+        makeComment(oldPos, pos, bufferSlice(oldPos, pos));
         break;
       }
       case '\'':
@@ -908,4 +925,7 @@
     return new String(this.buffer, start, end - start);
   }
 
+  private void makeComment(int start, int end, String content) {
+    comments.add(ASTNode.setLocation(createLocation(start, end), new Comment(content)));
+  }
 }

diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
index d593dc0..ec6d323 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java

@@ -115,7 +115,6 @@
 
   private final Lexer lexer;
   private final EventHandler eventHandler;
-  private final List<Comment> comments;
 
   private static final Map<TokenKind, Operator> binaryOperators =
       new ImmutableMap.Builder<TokenKind, Operator>()
@@ -167,7 +166,6 @@
   private Parser(Lexer lexer, EventHandler eventHandler) {
     this.lexer = lexer;
     this.eventHandler = eventHandler;
-    this.comments = new ArrayList<>();
     nextToken();
   }
 
@@ -195,7 +193,7 @@
     List<Statement> statements = parser.parseFileInput();
     boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
     return new ParseResult(
-        statements, parser.comments, locationFromStatements(lexer, statements), errors);
+        statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
   }
 
   /**
@@ -415,11 +413,6 @@
   private void nextToken() {
     if (token == null || token.kind != TokenKind.EOF) {
       token = lexer.nextToken();
-      // transparently handle comment tokens
-      while (token.kind == TokenKind.COMMENT) {
-        makeComment();
-        token = lexer.nextToken();
-      }
     }
     checkForbiddenKeywords();
     if (DEBUGGING) {
@@ -1344,9 +1337,4 @@
     }
     return setLocation(new ReturnStatement(expression), start, end);
   }
-
-  // create a comment node
-  private void makeComment() {
-    comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
-  }
 }

diff --git a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
index e5098f1..e2a4dcd 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java

@@ -26,7 +26,6 @@
   CLASS("class"),
   COLON(":"),
   COMMA(","),
-  COMMENT("comment"),
   CONTINUE("continue"),
   DEF("def"),
   DEL("del"),
commit	17f8d4e5a36f5c4bd020ce9163f5b1db62679e2c	[log] [tgz]
author	laurentlb <laurentlb@google.com>	Thu May 24 07:32:52 2018 -0700
committer	Copybara-Service <copybara-piper@google.com>	Thu May 24 07:33:48 2018 -0700
tree	9b065d3c27259a5da38563fcda505c3f7002275c
parent	2a6051b0c74ce59e30522fbd509ccbb460289df7 [diff]