Reject files when the first line is indented.

A bug in the lexer ignored indentation on the first line of a file. This now
causes an error.

Also, remove the COMMENT token from the lexer. Comments are now accessed separately. This will allow further optimizations in the lexer. It also aligns the code a bit more with the Go implementation.

RELNOTES[INC]:
  Indentation on the first line of a file was previously ignored. This is now fixed.

PiperOrigin-RevId: 197889775
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 5ecae12..a50a0a0 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -24,7 +24,9 @@
 import com.google.devtools.build.lib.util.Pair;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import java.util.ArrayDeque;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Stack;
@@ -87,11 +89,18 @@
   /** Last Token that was scanned. */
   private Token lastToken;
 
+  private final List<Comment> comments;
+
   // The number of unclosed open-parens ("(", '{', '[') at the current point in
   // the stream. Whitespace is handled differently when this is nonzero.
   private int openParenStackDepth = 0;
 
   private boolean containsErrors;
+  /**
+   * True after a NEWLINE token.
+   * In other words, we are outside an expression and we have to check the indentation.
+   */
+  private boolean checkIndentation;
 
   /**
    * Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during
@@ -104,6 +113,8 @@
     this.pos = 0;
     this.eventHandler = eventHandler;
     this.locationInfo = new LocationInfo(input.getPath(), lineNumberTable);
+    this.checkIndentation = true;
+    this.comments = new ArrayList<>();
 
     indentStack.push(0);
   }
@@ -112,6 +123,10 @@
     this(input, eventHandler, LineNumberTable.create(input.getContent(), input.getPath()));
   }
 
+  List<Comment> getComments() {
+    return comments;
+  }
+
   /**
    * Returns the filename from which the lexer's input came. Returns an empty value if the input
    * came from a string.
@@ -216,18 +231,16 @@
   }
 
   /**
-   * Parses an end-of-line sequence, handling statement indentation correctly.
+   * Parses an end-of-line sequence.
    *
    * <p>UNIX newlines are assumed (LF). Carriage returns are always ignored.
-   *
-   * <p>ON ENTRY: 'pos' is the index of the char after '\n'.
-   * ON EXIT: 'pos' is the index of the next non-space char after '\n'.
    */
   private void newline() {
     if (openParenStackDepth > 0) {
       newlineInsideExpression(); // in an expression: ignore space
     } else {
-      newlineOutsideExpression(); // generate NEWLINE/INDENT/OUTDENT tokens
+      checkIndentation = true;
+      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
     }
   }
 
@@ -244,10 +257,6 @@
   }
 
   private void newlineOutsideExpression() {
-    if (pos > 1) { // skip over newline at start of file
-      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
-    }
-
     // we're in a stmt: suck up space at beginning of next line
     int indentLen = 0;
     while (pos < buffer.length) {
@@ -269,7 +278,7 @@
         while (pos < buffer.length && c != '\n') {
           c = buffer[pos++];
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos - 1, bufferSlice(oldPos, pos - 1)));
+        makeComment(oldPos, pos - 1, bufferSlice(oldPos, pos - 1));
         indentLen = 0;
       } else { // printing character
         break;
@@ -707,6 +716,14 @@
    * least one token will be added to the tokens queue.
    */
   private void tokenize() {
+    if (checkIndentation) {
+      checkIndentation = false;
+      newlineOutsideExpression(); // generate INDENT/OUTDENT tokens
+      if (!tokens.isEmpty()) {
+        return;
+      }
+    }
+
     while (pos < buffer.length) {
       if (tokenizeTwoChars()) {
         pos += 2;
@@ -837,7 +854,7 @@
             pos++;
           }
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos, bufferSlice(oldPos, pos)));
+        makeComment(oldPos, pos, bufferSlice(oldPos, pos));
         break;
       }
       case '\'':
@@ -908,4 +925,7 @@
     return new String(this.buffer, start, end - start);
   }
 
+  private void makeComment(int start, int end, String content) {
+    comments.add(ASTNode.setLocation(createLocation(start, end), new Comment(content)));
+  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
index d593dc0..ec6d323 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
@@ -115,7 +115,6 @@
 
   private final Lexer lexer;
   private final EventHandler eventHandler;
-  private final List<Comment> comments;
 
   private static final Map<TokenKind, Operator> binaryOperators =
       new ImmutableMap.Builder<TokenKind, Operator>()
@@ -167,7 +166,6 @@
   private Parser(Lexer lexer, EventHandler eventHandler) {
     this.lexer = lexer;
     this.eventHandler = eventHandler;
-    this.comments = new ArrayList<>();
     nextToken();
   }
 
@@ -195,7 +193,7 @@
     List<Statement> statements = parser.parseFileInput();
     boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
     return new ParseResult(
-        statements, parser.comments, locationFromStatements(lexer, statements), errors);
+        statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
   }
 
   /**
@@ -415,11 +413,6 @@
   private void nextToken() {
     if (token == null || token.kind != TokenKind.EOF) {
       token = lexer.nextToken();
-      // transparently handle comment tokens
-      while (token.kind == TokenKind.COMMENT) {
-        makeComment();
-        token = lexer.nextToken();
-      }
     }
     checkForbiddenKeywords();
     if (DEBUGGING) {
@@ -1344,9 +1337,4 @@
     }
     return setLocation(new ReturnStatement(expression), start, end);
   }
-
-  // create a comment node
-  private void makeComment() {
-    comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
-  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
index e5098f1..e2a4dcd 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
@@ -26,7 +26,6 @@
   CLASS("class"),
   COLON(":"),
   COMMA(","),
-  COMMENT("comment"),
   CONTINUE("continue"),
   DEF("def"),
   DEL("del"),
diff --git a/src/test/java/com/google/devtools/build/lib/pkgcache/PackageCacheTest.java b/src/test/java/com/google/devtools/build/lib/pkgcache/PackageCacheTest.java
index ba07b68..df3c5c8 100644
--- a/src/test/java/com/google/devtools/build/lib/pkgcache/PackageCacheTest.java
+++ b/src/test/java/com/google/devtools/build/lib/pkgcache/PackageCacheTest.java
@@ -488,8 +488,7 @@
   @Test
   public void testAddedBuildFileCausesLabelToBecomeInvalid() throws Exception {
     reporter.removeHandler(failFastHandler);
-    scratch.file(
-        "pkg/BUILD", "           cc_library(name = 'foo', ", "           srcs = ['x/y.cc'])");
+    scratch.file("pkg/BUILD", "cc_library(name = 'foo', srcs = ['x/y.cc'])");
 
     assertLabelValidity(true, "//pkg:x/y.cc");
 
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java b/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java
index a69563a..1ee0dff 100644
--- a/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java
+++ b/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java
@@ -467,9 +467,9 @@
         .testStatement("[    ] * 10", MutableList.empty())
         .testStatement("[1, 2] * 0", MutableList.empty())
         .testStatement("[1, 2] * -4", MutableList.empty())
-        .testStatement(" 2 * [1, 2]", MutableList.of(env, 1, 2, 1, 2))
+        .testStatement("2 * [1, 2]", MutableList.of(env, 1, 2, 1, 2))
         .testStatement("10 * []", MutableList.empty())
-        .testStatement(" 0 * [1, 2]", MutableList.empty())
+        .testStatement("0 * [1, 2]", MutableList.empty())
         .testStatement("-4 * [1, 2]", MutableList.empty());
   }
 
@@ -484,9 +484,9 @@
         .testStatement("(    ) * 10", Tuple.empty())
         .testStatement("(1, 2) * 0", Tuple.empty())
         .testStatement("(1, 2) * -4", Tuple.empty())
-        .testStatement(" 2 * (1, 2)", Tuple.of(1, 2, 1, 2))
+        .testStatement("2 * (1, 2)", Tuple.of(1, 2, 1, 2))
         .testStatement("10 * ()", Tuple.empty())
-        .testStatement(" 0 * (1, 2)", Tuple.empty())
+        .testStatement("0 * (1, 2)", Tuple.empty())
         .testStatement("-4 * (1, 2)", Tuple.empty());
   }
 
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
index 81212e6..14c7152 100644
--- a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
@@ -146,33 +146,35 @@
   public void testBasics1() throws Exception {
     assertThat(names(tokens("wiz) "))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
     assertThat(names(tokens("wiz )"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
-    assertThat(names(tokens(" wiz)"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
-    assertThat(names(tokens(" wiz ) "))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
+    assertThat(names(tokens(" wiz)")))
+        .isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
+    assertThat(names(tokens(" wiz ) ")))
+        .isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens("wiz\t)"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
   }
 
   @Test
   public void testBasics2() throws Exception {
     assertThat(names(tokens(")"))).isEqualTo("RPAREN NEWLINE EOF");
-    assertThat(names(tokens(" )"))).isEqualTo("RPAREN NEWLINE EOF");
-    assertThat(names(tokens(" ) "))).isEqualTo("RPAREN NEWLINE EOF");
+    assertThat(names(tokens(" )"))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
+    assertThat(names(tokens(" ) "))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens(") "))).isEqualTo("RPAREN NEWLINE EOF");
   }
 
   @Test
   public void testBasics3() throws Exception {
-    assertThat(names(tokens("123#456\n789"))).isEqualTo("INT COMMENT NEWLINE INT NEWLINE EOF");
-    assertThat(names(tokens("123 #456\n789"))).isEqualTo("INT COMMENT NEWLINE INT NEWLINE EOF");
-    assertThat(names(tokens("123#456 \n789"))).isEqualTo("INT COMMENT NEWLINE INT NEWLINE EOF");
+    assertThat(names(tokens("123#456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
+    assertThat(names(tokens("123 #456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
+    assertThat(names(tokens("123#456 \n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
     assertThat(names(tokens("123#456\n 789")))
-        .isEqualTo("INT COMMENT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF");
-    assertThat(names(tokens("123#456\n789 "))).isEqualTo("INT COMMENT NEWLINE INT NEWLINE EOF");
+        .isEqualTo("INT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF");
+    assertThat(names(tokens("123#456\n789 "))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
   }
 
   @Test
   public void testBasics4() throws Exception {
     assertThat(names(tokens(""))).isEqualTo("NEWLINE EOF");
-    assertThat(names(tokens("# foo"))).isEqualTo("COMMENT NEWLINE EOF");
+    assertThat(names(tokens("# foo"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("1 2 3 4"))).isEqualTo("INT INT INT INT NEWLINE EOF");
     assertThat(names(tokens("1.234"))).isEqualTo("INT DOT INT NEWLINE EOF");
     assertThat(names(tokens("foo(bar, wiz)")))
@@ -190,9 +192,8 @@
   @Test
   public void testCrLf() throws Exception {
     assertThat(names(tokens("\r\n\r\n"))).isEqualTo("NEWLINE EOF");
-    assertThat(names(tokens("\r\n\r1\r\r\n"))).isEqualTo("NEWLINE INT NEWLINE EOF");
-    assertThat(names(tokens("# foo\r\n# bar\r\n")))
-        .isEqualTo("COMMENT NEWLINE COMMENT NEWLINE EOF");
+    assertThat(names(tokens("\r\n\r1\r\r\n"))).isEqualTo("INT NEWLINE EOF");
+    assertThat(names(tokens("# foo\r\n# bar\r\n"))).isEqualTo("NEWLINE EOF");
   }
 
   @Test
@@ -419,35 +420,24 @@
   }
 
   @Test
-  public void testBlankLineIndentation() throws Exception {
-    // Blank lines and comment lines should not generate any newlines indents
-    // (but note that every input ends with NEWLINE EOF).
-    assertThat(names(tokens("\n      #\n"))).isEqualTo("COMMENT NEWLINE EOF");
-    assertThat(names(tokens("      #"))).isEqualTo("COMMENT NEWLINE EOF");
-    assertThat(names(tokens("      #\n"))).isEqualTo("COMMENT NEWLINE EOF");
-    assertThat(names(tokens("      #comment\n"))).isEqualTo("COMMENT NEWLINE EOF");
-    assertThat(names(tokens("def f(x):\n" + "  # comment\n" + "\n" + "  \n" + "  return x\n")))
-        .isEqualTo(
-            "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
-                + "COMMENT INDENT RETURN IDENTIFIER NEWLINE "
-                + "OUTDENT NEWLINE EOF");
+  public void testIndentationOnFirstLine() throws Exception {
+    assertThat(values(tokens("    1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
+    assertThat(values(tokens("\n\n    1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
   }
 
   @Test
-  public void testMultipleCommentLines() throws Exception {
-    assertThat(
-            names(
-                tokens(
-                    "# Copyright\n"
-                        + "#\n"
-                        + "# A comment line\n"
-                        + "# An adjoining line\n"
-                        + "def f(x):\n"
-                        + "  return x\n")))
+  public void testBlankLineIndentation() throws Exception {
+    // Blank lines and comment lines should not generate any newlines indents
+    // (but note that every input ends with NEWLINE EOF).
+    assertThat(names(tokens("\n      #\n"))).isEqualTo("NEWLINE EOF");
+    assertThat(names(tokens("      #"))).isEqualTo("NEWLINE EOF");
+    assertThat(names(tokens("      #\n"))).isEqualTo("NEWLINE EOF");
+    assertThat(names(tokens("      #comment\n"))).isEqualTo("NEWLINE EOF");
+    assertThat(names(tokens("def f(x):\n" + "  # comment\n" + "\n" + "  \n" + "  return x\n")))
         .isEqualTo(
-            "COMMENT NEWLINE COMMENT COMMENT COMMENT "
-                + "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
-                + "INDENT RETURN IDENTIFIER NEWLINE OUTDENT NEWLINE EOF");
+            "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
+                + "INDENT RETURN IDENTIFIER NEWLINE "
+                + "OUTDENT NEWLINE EOF");
   }
 
   @Test
diff --git a/src/test/shell/bazel/bazel_test_test.sh b/src/test/shell/bazel/bazel_test_test.sh
index d5a9b3b..52a89a1 100755
--- a/src/test/shell/bazel/bazel_test_test.sh
+++ b/src/test/shell/bazel/bazel_test_test.sh
@@ -235,7 +235,7 @@
   chmod +x dir/test.sh
 
   cat <<EOF > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     timeout = "short",
     srcs = [ "test.sh" ],
@@ -297,7 +297,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     srcs = [ "test.sh" ],
   )
@@ -328,7 +328,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     srcs = [ "test.sh" ],
   )
@@ -360,7 +360,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     timeout = "short",
     srcs = [ "test.sh" ],
@@ -589,7 +589,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     srcs = [ "test.sh" ],
   )
@@ -640,7 +640,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     srcs = [ "test.sh" ],
   )
@@ -674,7 +674,7 @@
   chmod +x dir/test.sh
 
   cat <<'EOF' > dir/BUILD
-  sh_test(
+sh_test(
     name = "test",
     srcs = [ "test.sh" ],
   )
diff --git a/src/test/shell/bazel/toolchain_test.sh b/src/test/shell/bazel/toolchain_test.sh
index 37b2610..bb0f34d 100755
--- a/src/test/shell/bazel/toolchain_test.sh
+++ b/src/test/shell/bazel/toolchain_test.sh
@@ -675,7 +675,7 @@
   # Write an invalid rule to be the platform.
   mkdir -p platform
   cat >> platform/BUILD <<EOF
-  filegroup(name = 'not_a_platform')
+filegroup(name = 'not_a_platform')
 EOF
 
   bazel build --platforms=//platform:not_a_platform //demo:use &> $TEST_log && fail "Build failure expected"