src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java - bazel - Git at Google

 // Copyright 2006 The Bazel Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.devtools.build.lib.syntax;

 import static com.google.common.truth.Truth.assertThat;

 import com.google.common.base.Joiner;
 import java.util.ArrayList;
 import java.util.List;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

 /**
  * Tests of tokenization behavior of the {@link Lexer}.
  */
 @RunWith(JUnit4.class)
 public class LexerTest {

   // TODO(adonovan): make these these tests less unnecessarily stateful.

   private final List<SyntaxError> errors = new ArrayList<>();
   private String lastError;

   /**
    * Create a lexer which takes input from the specified string. Resets the
    * error handler beforehand.
    */
   private Lexer createLexer(String input) {
     ParserInput inputSource = ParserInput.create(input, "/some/path.txt");
     errors.clear();
     lastError = null;
     return new Lexer(inputSource, FileOptions.DEFAULT, errors);
   }

   private static class Token {
     TokenKind kind;
     int start;
     int end;
     Object value;

     @Override
     public String toString() {
       return kind == TokenKind.STRING
           ? "\"" + value + "\""
           : value == null ? kind.toString() : value.toString();
     }
   }

   private ArrayList<Token> allTokens(Lexer lexer) {
     ArrayList<Token> result = new ArrayList<>();
     do {
       lexer.nextToken();
       Token tok = new Token();
       tok.kind = lexer.kind;
       tok.start = lexer.start;
       tok.end = lexer.end;
       tok.value = lexer.value;
       result.add(tok);
     } while (lexer.kind != TokenKind.EOF);

     for (SyntaxError error : errors) {
       lastError = error.location().file() + ":" + error.location().line() + ": " + error.message();
     }

     return result;
   }

   private Token[] tokens(String input) {
     ArrayList<Token> result = allTokens(createLexer(input));
     return result.toArray(new Token[0]);
   }

   /**
    * Lexes the specified input string, and returns a string containing just the line numbers of each
    * token.
    */
   private String linenums(String input) {
     Lexer lexer = createLexer(input);
     StringBuilder buf = new StringBuilder();
     for (Token tok : allTokens(lexer)) {
       if (buf.length() > 0) {
         buf.append(' ');
       }
       int line = lexer.locs.getLocation(tok.start).line();
       buf.append(line);
     }
     return buf.toString();
   }

   /**
    * Returns a string containing the names of the tokens and their associated
    * values. (String-literals are printed without escaping.)
    */
   private static String values(Token[] tokens) {
     StringBuilder buffer = new StringBuilder();
     for (Token token : tokens) {
       if (buffer.length() > 0) {
         buffer.append(' ');
       }
       buffer.append(token.kind.name());
       if (token.value != null) {
         buffer.append('(').append(token.value).append(')');
       }
     }
     return buffer.toString();
   }

   /**
    * Returns a string containing just the names of the tokens.
    */
   private static String names(Token[] tokens) {
     StringBuilder buf = new StringBuilder();
     for (Token tok : tokens) {
       if (buf.length() > 0) {
         buf.append(' ');
       }
       buf.append(tok.kind.name());
     }
     return buf.toString();
   }

   /**
    * Returns a string containing just the half-open position intervals of each
    * token. e.g. "[3,4) [4,9)".
    */
   private static String positions(Token[] tokens) {
     StringBuilder buf = new StringBuilder();
     for (Token tok : tokens) {
       if (buf.length() > 0) {
         buf.append(' ');
       }
       buf.append('[').append(tok.start).append(',').append(tok.end).append(')');
     }
     return buf.toString();
   }

   @Test
   public void testBasics1() throws Exception {
     assertThat(names(tokens("wiz) "))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
     assertThat(names(tokens("wiz )"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
     assertThat(names(tokens(" wiz)")))
         .isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens(" wiz ) ")))
         .isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens("wiz\t)"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
   }

   @Test
   public void testBasics2() throws Exception {
     assertThat(names(tokens(")"))).isEqualTo("RPAREN NEWLINE EOF");
     assertThat(names(tokens(" )"))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens(" ) "))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens(") "))).isEqualTo("RPAREN NEWLINE EOF");
   }

   @Test
   public void testBasics3() throws Exception {
     assertThat(names(tokens("123#456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
     assertThat(names(tokens("123 #456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
     assertThat(names(tokens("123#456 \n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
     assertThat(names(tokens("123#456\n 789")))
         .isEqualTo("INT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF");
     assertThat(names(tokens("123#456\n789 "))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
   }

   @Test
   public void testBasics4() throws Exception {
     assertThat(names(tokens(""))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("# foo"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("1 2 3 4"))).isEqualTo("INT INT INT INT NEWLINE EOF");
     assertThat(names(tokens("1.234"))).isEqualTo("INT DOT INT NEWLINE EOF");
     assertThat(names(tokens("foo(bar, wiz)")))
         .isEqualTo("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN " + "NEWLINE EOF");
   }

   @Test
   public void testNoWhiteSpaceBetweenTokens() throws Exception {
     assertThat(names(tokens("6or()"))).isEqualTo("INT OR LPAREN RPAREN NEWLINE EOF");
     assertThat(names(tokens("0in(''and[])")))
         .isEqualTo("INT IN LPAREN STRING AND LBRACKET RBRACKET RPAREN NEWLINE EOF");

     assertThat(values(tokens("0or()"))).isEqualTo("INT(0) IDENTIFIER(r) LPAREN RPAREN NEWLINE EOF");
     assertThat(lastError).isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0o");
   }

   @Test
   public void testNonAsciiIdentifiers() throws Exception {
     tokens("ümlaut");
     assertThat(lastError.toString()).contains("invalid character: 'ü'");
     tokens("umläut");
     assertThat(lastError.toString()).contains("invalid character: 'ä'");
   }

   @Test
   public void testCrLf() throws Exception {
     assertThat(names(tokens("\r\n\r\n"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("\r\n\r1\r\r\n"))).isEqualTo("INT NEWLINE EOF");
     assertThat(names(tokens("# foo\r\n# bar\r\n"))).isEqualTo("NEWLINE EOF");
   }

   @Test
   public void testIntegers() throws Exception {
     // Detection of MINUS immediately following integer constant proves we
     // don't consume too many chars.

     // decimal
     assertThat(values(tokens("12345-"))).isEqualTo("INT(12345) MINUS NEWLINE EOF");

     // octal
     assertThat(values(tokens("0o12345-"))).isEqualTo("INT(5349) MINUS NEWLINE EOF");
     assertThat(values(tokens("0O77"))).isEqualTo("INT(63) NEWLINE EOF");

     // octal (bad)
     assertThat(values(tokens("012349-"))).isEqualTo("INT(0) MINUS NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 012349");

     assertThat(values(tokens("0o"))).isEqualTo("INT(0) NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0o");

     assertThat(values(tokens("012345"))).isEqualTo("INT(5349) NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: invalid octal value `012345`, should be: `0o12345`");

     // hexadecimal (uppercase)
     assertThat(values(tokens("0X12345F-"))).isEqualTo("INT(1193055) MINUS NEWLINE EOF");

     // hexadecimal (lowercase)
     assertThat(values(tokens("0x12345f-"))).isEqualTo("INT(1193055) MINUS NEWLINE EOF");

     // hexadecimal (lowercase) [note: "g" cause termination of token]
     assertThat(values(tokens("0x12345g-"))).isEqualTo("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF");
   }

   @Test
   public void testIntegersAndDot() throws Exception {
     assertThat(values(tokens("1.2345"))).isEqualTo("INT(1) DOT INT(2345) NEWLINE EOF");

     assertThat(values(tokens("1.2.345"))).isEqualTo("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF");

     assertThat(values(tokens("1.0E10"))).isEqualTo("INT(1) DOT INT(0) NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0E10");

     assertThat(values(tokens("1.03E-10"))).isEqualTo("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 03E");

     assertThat(values(tokens(". 123"))).isEqualTo("DOT INT(123) NEWLINE EOF");
     assertThat(values(tokens(".123"))).isEqualTo("DOT INT(123) NEWLINE EOF");
     assertThat(values(tokens(".abc"))).isEqualTo("DOT IDENTIFIER(abc) NEWLINE EOF");

     assertThat(values(tokens("foo.123"))).isEqualTo("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF");
     assertThat(values(tokens("foo.bcd")))
         .isEqualTo("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF"); // 'b' are hex chars
     assertThat(values(tokens("foo.xyz")))
         .isEqualTo("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF");
   }

   @Test
   public void testStringDelimiters() throws Exception {
     assertThat(values(tokens("\"foo\""))).isEqualTo("STRING(foo) NEWLINE EOF");
     assertThat(values(tokens("'foo'"))).isEqualTo("STRING(foo) NEWLINE EOF");
   }

   @Test
   public void testQuotesInStrings() throws Exception {
     assertThat(values(tokens("'foo\\'bar'"))).isEqualTo("STRING(foo'bar) NEWLINE EOF");
     assertThat(values(tokens("\"foo'bar\""))).isEqualTo("STRING(foo'bar) NEWLINE EOF");
     assertThat(values(tokens("'foo\"bar'"))).isEqualTo("STRING(foo\"bar) NEWLINE EOF");
     assertThat(values(tokens("\"foo\\\"bar\""))).isEqualTo("STRING(foo\"bar) NEWLINE EOF");
   }

   @Test
   public void testStringEscapes() throws Exception {
     assertThat(values(tokens("'a\\tb\\nc\\rd'")))
         .isEqualTo("STRING(a\tb\nc\rd) NEWLINE EOF"); // \t \r \n
     assertThat(values(tokens("'x\\hx'")))
         .isEqualTo("STRING(x\\hx) NEWLINE EOF"); // \h is unknown => "\h"
     assertThat(values(tokens("'\\$$'"))).isEqualTo("STRING(\\$$) NEWLINE EOF");
     assertThat(values(tokens("'a\\\nb'")))
         .isEqualTo("STRING(ab) NEWLINE EOF"); // escape end of line
     assertThat(values(tokens("\"ab\\ucd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
     assertThat(lastError).isEqualTo("/some/path.txt:1: invalid escape sequence: \\u");
   }

   @Test
   public void testEscapedCrlfInString() throws Exception {
     assertThat(values(tokens("'a\\\r\nb'"))).isEqualTo("STRING(ab) NEWLINE EOF");
     assertThat(values(tokens("\"a\\\r\nb\""))).isEqualTo("STRING(ab) NEWLINE EOF");
     assertThat(values(tokens("\"\"\"a\\\r\nb\"\"\""))).isEqualTo("STRING(ab) NEWLINE EOF");
     assertThat(values(tokens("'''a\\\r\nb'''"))).isEqualTo("STRING(ab) NEWLINE EOF");
     assertThat(values(tokens("r'a\\\r\nb'"))).isEqualTo("STRING(a\\\nb) NEWLINE EOF");
     assertThat(values(tokens("r\"a\\\r\nb\""))).isEqualTo("STRING(a\\\nb) NEWLINE EOF");
     assertThat(values(tokens("r\"a\\\r\n\\\nb\""))).isEqualTo("STRING(a\\\n\\\nb) NEWLINE EOF");
   }

   @Test
   public void testRawString() throws Exception {
     assertThat(values(tokens("r'abcd'"))).isEqualTo("STRING(abcd) NEWLINE EOF");
     assertThat(values(tokens("r\"abcd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
     assertThat(values(tokens("r'a\\tb\\nc\\rd'")))
         .isEqualTo("STRING(a\\tb\\nc\\rd) NEWLINE EOF"); // r'a\tb\nc\rd'
     assertThat(values(tokens("r\"a\\\"\""))).isEqualTo("STRING(a\\\") NEWLINE EOF"); // r"a\""
     assertThat(values(tokens("r'a\\\\b'"))).isEqualTo("STRING(a\\\\b) NEWLINE EOF"); // r'a\\b'
     assertThat(values(tokens("r'ab'r"))).isEqualTo("STRING(ab) IDENTIFIER(r) NEWLINE EOF");

     // Unterminated raw string
     values(tokens("r'\\'")); // r'\'
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: unterminated string literal at eof");
   }

   @Test
   public void testTripleRawString() throws Exception {
     // r'''a\ncd'''
     assertThat(values(tokens("r'''ab\\ncd'''"))).isEqualTo("STRING(ab\\ncd) NEWLINE EOF");
     // r"""ab
     // cd"""
     assertThat(values(tokens("\"\"\"ab\ncd\"\"\""))).isEqualTo("STRING(ab\ncd) NEWLINE EOF");

     // Unterminated raw string
     values(tokens("r'''\\'''")); // r'''\'''
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: unterminated string literal at eof");
   }

   @Test
   public void testOctalEscapes() throws Exception {
     // Regression test for a bug.
     assertThat(values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377'")))
         .isEqualTo("STRING(\0 \1 \t \u003f I I1 \u00ff) NEWLINE EOF");
     // Test boundaries (non-octal char, EOF).
     assertThat(values(tokens("'\\1b \\1'"))).isEqualTo("STRING(\1b \1) NEWLINE EOF");
   }

   @Test
   public void testOctalEscapeOutOfRange() throws Exception {
     assertThat(values(tokens("'\\777'"))).isEqualTo("STRING(\u00ff) NEWLINE EOF");
     assertThat(lastError.toString())
         .isEqualTo("/some/path.txt:1: octal escape sequence out of range (maximum is \\377)");
   }

   @Test
   public void testTripleQuotedStrings() throws Exception {
     assertThat(values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")))
         .isEqualTo("STRING(a\"b'c \n d\"\"e) NEWLINE EOF");
     assertThat(values(tokens("'''a\"b'c \n d\"\"e'''")))
         .isEqualTo("STRING(a\"b'c \n d\"\"e) NEWLINE EOF");
   }

   @Test
   public void testBadChar() throws Exception {
     assertThat(values(tokens("a$b"))).isEqualTo("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF");
     assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: invalid character: '$'");
   }

   @Test
   public void testIndentation() throws Exception {
     assertThat(values(tokens("1\n2\n3")))
         .isEqualTo("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF");
     assertThat(values(tokens("1\n  2\n  3\n4 ")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "INT(4) NEWLINE EOF");
     assertThat(values(tokens("1\n  2\n  3")))
         .isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "NEWLINE EOF");
     assertThat(values(tokens("1\n  2\n    3")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
                 + "OUTDENT OUTDENT NEWLINE EOF");
     assertThat(values(tokens("1\n  2\n    3\n  4\n5")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
                 + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");

     assertThat(values(tokens("1\n  2\n    3\n   4\n5")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
                 + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");
     assertThat(lastError.toString()).isEqualTo("/some/path.txt:4: indentation error");
   }

   @Test
   public void testIndentationWithTab() throws Exception {
     tokens("def x():\n\tpass");
     assertThat(lastError).contains("Tab characters are not allowed");
   }

   @Test
   public void testIndentationWithCrLf() throws Exception {
     assertThat(values(tokens("1\r\n  2\r\n")))
         .isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF");
     assertThat(values(tokens("1\r\n  2\r\n\r\n")))
         .isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF");
     assertThat(values(tokens("1\r\n  2\r\n    3\r\n  4\r\n5")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE OUTDENT INT(4) "
                 + "NEWLINE OUTDENT INT(5) NEWLINE EOF");
     assertThat(values(tokens("1\r\n  2\r\n\r\n  3\r\n4")))
         .isEqualTo(
             "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT INT(4) NEWLINE EOF");
   }

   @Test
   public void testIndentationInsideParens() throws Exception {
     // Indentation is ignored inside parens:
     assertThat(values(tokens("1 (\n  2\n    3\n  4\n5")))
         .isEqualTo("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
     assertThat(values(tokens("1 {\n  2\n    3\n  4\n5")))
         .isEqualTo("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
     assertThat(values(tokens("1 [\n  2\n    3\n  4\n5")))
         .isEqualTo("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
     assertThat(values(tokens("1 [\n  2]\n    3\n    4\n5")))
         .isEqualTo(
             "INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) "
                 + "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");
   }

   @Test
   public void testIndentationAtEOF() throws Exception {
     // Matching OUTDENTS are created at EOF:
     assertThat(values(tokens("\n  1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
   }

   @Test
   public void testIndentationOnFirstLine() throws Exception {
     assertThat(values(tokens("    1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
     assertThat(values(tokens("\n\n    1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
   }

   @Test
   public void testBlankLineIndentation() throws Exception {
     // Blank lines and comment lines should not generate any newlines indents
     // (but note that every input ends with NEWLINE EOF).
     assertThat(names(tokens("\n      #\n"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("      #"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("      #\n"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("      #comment\n"))).isEqualTo("NEWLINE EOF");
     assertThat(names(tokens("def f(x):\n" + "  # comment\n" + "\n" + "  \n" + "  return x\n")))
         .isEqualTo(
             "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
                 + "INDENT RETURN IDENTIFIER NEWLINE "
                 + "OUTDENT NEWLINE EOF");
   }

   @Test
   public void testBackslash() throws Exception {
     assertThat(names(tokens("a\\\nb"))).isEqualTo("IDENTIFIER IDENTIFIER NEWLINE EOF");
     assertThat(names(tokens("a\\\r\nb"))).isEqualTo("IDENTIFIER IDENTIFIER NEWLINE EOF");
     assertThat(names(tokens("a\\ b"))).isEqualTo("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF");
     assertThat(names(tokens("a(\\\n2)"))).isEqualTo("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF");
   }

   @Test
   public void testTokenPositions() throws Exception {
     assertThat(positions(tokens("foo(bar, {1: 'quux'}, \"\"\"b\"\"\", r\"\")")))
         .isEqualTo(
             // foo (     bar   ,     {      1       :
             "[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
                 //  'quux'  }       ,       """b""" ,       r""     )       NEWLINE EOF
                 + " [13,19) [19,20) [20,21) [22,29) [29,30) [31,34) [34,35) [35,35) [35,35)");
   }

   @Test
   public void testLineNumbers() throws Exception {
     assertThat(linenums("foo = 1\nbar = 2\n\nwiz = 3")).isEqualTo("1 1 1 1 2 2 2 2 4 4 4 4 4");

     assertThat(values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2")))
         .isEqualTo(
             "IDENTIFIER(foo) EQUALS INT(1) NEWLINE "
                 + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE "
                 + "IDENTIFIER(wiz) EQUALS NEWLINE "
                 + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF");
     assertThat(lastError.toString()).isEqualTo("/some/path.txt:4: invalid character: '$'");

     // '\\n' in string should not increment linenum:
     String s = "1\n'foo\\nbar'\3";
     assertThat(values(tokens(s))).isEqualTo("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF");
     assertThat(linenums(s)).isEqualTo("1 1 2 2 2");
   }

   @Test
   public void testContainsErrors() throws Exception {
     Lexer lexerSuccess = createLexer("foo");
     allTokens(lexerSuccess); // ensure the file has been completely scanned
     assertThat(errors).isEmpty();

     Lexer lexerFail = createLexer("f$o");
     allTokens(lexerFail);
     assertThat(errors).isNotEmpty();

     String s = "'unterminated";
     lexerFail = createLexer(s);
     allTokens(lexerFail);
     assertThat(errors).isNotEmpty();
     assertThat(values(tokens(s))).isEqualTo("STRING(unterminated) NEWLINE EOF");
   }

   @Test
   public void testUnterminatedRawStringWithEscapingError() throws Exception {
     assertThat(names(tokens("r'\\"))).isEqualTo("STRING NEWLINE EOF");
     assertThat(lastError).isEqualTo("/some/path.txt:1: unterminated string literal at eof");
   }

   @Test
   public void testFirstCharIsTab() {
     assertThat(names(tokens("\t"))).isEqualTo("NEWLINE EOF");
     assertThat(lastError)
         .isEqualTo(
             "/some/path.txt:1: Tab characters are not allowed for indentation. Use spaces"
                 + " instead.");
   }

   /**
    * Returns the first error whose string form contains the specified substring, or throws an
    * informative AssertionError if there is none.
    *
    * <p>Exposed for use by other frontend tests.
    */
   static SyntaxError assertContainsError(List<SyntaxError> errors, String substr) {
     for (SyntaxError error : errors) {
       if (error.toString().contains(substr)) {
         return error;
       }
     }
     if (errors.isEmpty()) {
       throw new AssertionError("no errors, want '" + substr + "'");
     } else {
       throw new AssertionError(
           "error '" + substr + "' not found, but got these:\n" + Joiner.on("\n").join(errors));
     }
   }
 }
	// Copyright 2006 The Bazel Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	package com.google.devtools.build.lib.syntax;

	import static com.google.common.truth.Truth.assertThat;

	import com.google.common.base.Joiner;
	import java.util.ArrayList;
	import java.util.List;
	import org.junit.Test;
	import org.junit.runner.RunWith;
	import org.junit.runners.JUnit4;

	/**
	* Tests of tokenization behavior of the {@link Lexer}.
	*/
	@RunWith(JUnit4.class)
	public class LexerTest {

	// TODO(adonovan): make these these tests less unnecessarily stateful.

	private final List<SyntaxError> errors = new ArrayList<>();
	private String lastError;

	/**
	* Create a lexer which takes input from the specified string. Resets the
	* error handler beforehand.
	*/
	private Lexer createLexer(String input) {
	ParserInput inputSource = ParserInput.create(input, "/some/path.txt");
	errors.clear();
	lastError = null;
	return new Lexer(inputSource, FileOptions.DEFAULT, errors);
	}

	private static class Token {
	TokenKind kind;
	int start;
	int end;
	Object value;

	@Override
	public String toString() {
	return kind == TokenKind.STRING
	? "\"" + value + "\""
	: value == null ? kind.toString() : value.toString();
	}
	}

	private ArrayList<Token> allTokens(Lexer lexer) {
	ArrayList<Token> result = new ArrayList<>();
	do {
	lexer.nextToken();
	Token tok = new Token();
	tok.kind = lexer.kind;
	tok.start = lexer.start;
	tok.end = lexer.end;
	tok.value = lexer.value;
	result.add(tok);
	} while (lexer.kind != TokenKind.EOF);

	for (SyntaxError error : errors) {
	lastError = error.location().file() + ":" + error.location().line() + ": " + error.message();
	}

	return result;
	}

	private Token[] tokens(String input) {
	ArrayList<Token> result = allTokens(createLexer(input));
	return result.toArray(new Token[0]);
	}

	/**
	* Lexes the specified input string, and returns a string containing just the line numbers of each
	* token.
	*/
	private String linenums(String input) {
	Lexer lexer = createLexer(input);
	StringBuilder buf = new StringBuilder();
	for (Token tok : allTokens(lexer)) {
	if (buf.length() > 0) {
	buf.append(' ');
	}
	int line = lexer.locs.getLocation(tok.start).line();
	buf.append(line);
	}
	return buf.toString();
	}

	/**
	* Returns a string containing the names of the tokens and their associated
	* values. (String-literals are printed without escaping.)
	*/
	private static String values(Token[] tokens) {
	StringBuilder buffer = new StringBuilder();
	for (Token token : tokens) {
	if (buffer.length() > 0) {
	buffer.append(' ');
	}
	buffer.append(token.kind.name());
	if (token.value != null) {
	buffer.append('(').append(token.value).append(')');
	}
	}
	return buffer.toString();
	}

	/**
	* Returns a string containing just the names of the tokens.
	*/
	private static String names(Token[] tokens) {
	StringBuilder buf = new StringBuilder();
	for (Token tok : tokens) {
	if (buf.length() > 0) {
	buf.append(' ');
	}
	buf.append(tok.kind.name());
	}
	return buf.toString();
	}

	/**
	* Returns a string containing just the half-open position intervals of each
	* token. e.g. "[3,4) [4,9)".
	*/
	private static String positions(Token[] tokens) {
	StringBuilder buf = new StringBuilder();
	for (Token tok : tokens) {
	if (buf.length() > 0) {
	buf.append(' ');
	}
	buf.append('[').append(tok.start).append(',').append(tok.end).append(')');
	}
	return buf.toString();
	}

	@Test
	public void testBasics1() throws Exception {
	assertThat(names(tokens("wiz) "))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
	assertThat(names(tokens("wiz )"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
	assertThat(names(tokens(" wiz)")))
	.isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
	assertThat(names(tokens(" wiz ) ")))
	.isEqualTo("INDENT IDENTIFIER RPAREN NEWLINE OUTDENT NEWLINE EOF");
	assertThat(names(tokens("wiz\t)"))).isEqualTo("IDENTIFIER RPAREN NEWLINE EOF");
	}

	@Test
	public void testBasics2() throws Exception {
	assertThat(names(tokens(")"))).isEqualTo("RPAREN NEWLINE EOF");
	assertThat(names(tokens(" )"))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
	assertThat(names(tokens(" ) "))).isEqualTo("INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF");
	assertThat(names(tokens(") "))).isEqualTo("RPAREN NEWLINE EOF");
	}

	@Test
	public void testBasics3() throws Exception {
	assertThat(names(tokens("123#456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
	assertThat(names(tokens("123 #456\n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
	assertThat(names(tokens("123#456 \n789"))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
	assertThat(names(tokens("123#456\n 789")))
	.isEqualTo("INT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF");
	assertThat(names(tokens("123#456\n789 "))).isEqualTo("INT NEWLINE INT NEWLINE EOF");
	}

	@Test
	public void testBasics4() throws Exception {
	assertThat(names(tokens(""))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens("# foo"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens("1 2 3 4"))).isEqualTo("INT INT INT INT NEWLINE EOF");
	assertThat(names(tokens("1.234"))).isEqualTo("INT DOT INT NEWLINE EOF");
	assertThat(names(tokens("foo(bar, wiz)")))
	.isEqualTo("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN " + "NEWLINE EOF");
	}

	@Test
	public void testNoWhiteSpaceBetweenTokens() throws Exception {
	assertThat(names(tokens("6or()"))).isEqualTo("INT OR LPAREN RPAREN NEWLINE EOF");
	assertThat(names(tokens("0in(''and[])")))
	.isEqualTo("INT IN LPAREN STRING AND LBRACKET RBRACKET RPAREN NEWLINE EOF");

	assertThat(values(tokens("0or()"))).isEqualTo("INT(0) IDENTIFIER(r) LPAREN RPAREN NEWLINE EOF");
	assertThat(lastError).isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0o");
	}

	@Test
	public void testNonAsciiIdentifiers() throws Exception {
	tokens("ümlaut");
	assertThat(lastError.toString()).contains("invalid character: 'ü'");
	tokens("umläut");
	assertThat(lastError.toString()).contains("invalid character: 'ä'");
	}

	@Test
	public void testCrLf() throws Exception {
	assertThat(names(tokens("\r\n\r\n"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens("\r\n\r1\r\r\n"))).isEqualTo("INT NEWLINE EOF");
	assertThat(names(tokens("# foo\r\n# bar\r\n"))).isEqualTo("NEWLINE EOF");
	}

	@Test
	public void testIntegers() throws Exception {
	// Detection of MINUS immediately following integer constant proves we
	// don't consume too many chars.

	// decimal
	assertThat(values(tokens("12345-"))).isEqualTo("INT(12345) MINUS NEWLINE EOF");

	// octal
	assertThat(values(tokens("0o12345-"))).isEqualTo("INT(5349) MINUS NEWLINE EOF");
	assertThat(values(tokens("0O77"))).isEqualTo("INT(63) NEWLINE EOF");

	// octal (bad)
	assertThat(values(tokens("012349-"))).isEqualTo("INT(0) MINUS NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 012349");

	assertThat(values(tokens("0o"))).isEqualTo("INT(0) NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0o");

	assertThat(values(tokens("012345"))).isEqualTo("INT(5349) NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: invalid octal value `012345`, should be: `0o12345`");

	// hexadecimal (uppercase)
	assertThat(values(tokens("0X12345F-"))).isEqualTo("INT(1193055) MINUS NEWLINE EOF");

	// hexadecimal (lowercase)
	assertThat(values(tokens("0x12345f-"))).isEqualTo("INT(1193055) MINUS NEWLINE EOF");

	// hexadecimal (lowercase) [note: "g" cause termination of token]
	assertThat(values(tokens("0x12345g-"))).isEqualTo("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF");
	}

	@Test
	public void testIntegersAndDot() throws Exception {
	assertThat(values(tokens("1.2345"))).isEqualTo("INT(1) DOT INT(2345) NEWLINE EOF");

	assertThat(values(tokens("1.2.345"))).isEqualTo("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF");

	assertThat(values(tokens("1.0E10"))).isEqualTo("INT(1) DOT INT(0) NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 0E10");

	assertThat(values(tokens("1.03E-10"))).isEqualTo("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: invalid base-8 integer constant: 03E");

	assertThat(values(tokens(". 123"))).isEqualTo("DOT INT(123) NEWLINE EOF");
	assertThat(values(tokens(".123"))).isEqualTo("DOT INT(123) NEWLINE EOF");
	assertThat(values(tokens(".abc"))).isEqualTo("DOT IDENTIFIER(abc) NEWLINE EOF");

	assertThat(values(tokens("foo.123"))).isEqualTo("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF");
	assertThat(values(tokens("foo.bcd")))
	.isEqualTo("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF"); // 'b' are hex chars
	assertThat(values(tokens("foo.xyz")))
	.isEqualTo("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF");
	}

	@Test
	public void testStringDelimiters() throws Exception {
	assertThat(values(tokens("\"foo\""))).isEqualTo("STRING(foo) NEWLINE EOF");
	assertThat(values(tokens("'foo'"))).isEqualTo("STRING(foo) NEWLINE EOF");
	}

	@Test
	public void testQuotesInStrings() throws Exception {
	assertThat(values(tokens("'foo\\'bar'"))).isEqualTo("STRING(foo'bar) NEWLINE EOF");
	assertThat(values(tokens("\"foo'bar\""))).isEqualTo("STRING(foo'bar) NEWLINE EOF");
	assertThat(values(tokens("'foo\"bar'"))).isEqualTo("STRING(foo\"bar) NEWLINE EOF");
	assertThat(values(tokens("\"foo\\\"bar\""))).isEqualTo("STRING(foo\"bar) NEWLINE EOF");
	}

	@Test
	public void testStringEscapes() throws Exception {
	assertThat(values(tokens("'a\\tb\\nc\\rd'")))
	.isEqualTo("STRING(a\tb\nc\rd) NEWLINE EOF"); // \t \r \n
	assertThat(values(tokens("'x\\hx'")))
	.isEqualTo("STRING(x\\hx) NEWLINE EOF"); // \h is unknown => "\h"
	assertThat(values(tokens("'\\$$'"))).isEqualTo("STRING(\\$$) NEWLINE EOF");
	assertThat(values(tokens("'a\\\nb'")))
	.isEqualTo("STRING(ab) NEWLINE EOF"); // escape end of line
	assertThat(values(tokens("\"ab\\ucd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
	assertThat(lastError).isEqualTo("/some/path.txt:1: invalid escape sequence: \\u");
	}

	@Test
	public void testEscapedCrlfInString() throws Exception {
	assertThat(values(tokens("'a\\\r\nb'"))).isEqualTo("STRING(ab) NEWLINE EOF");
	assertThat(values(tokens("\"a\\\r\nb\""))).isEqualTo("STRING(ab) NEWLINE EOF");
	assertThat(values(tokens("\"\"\"a\\\r\nb\"\"\""))).isEqualTo("STRING(ab) NEWLINE EOF");
	assertThat(values(tokens("'''a\\\r\nb'''"))).isEqualTo("STRING(ab) NEWLINE EOF");
	assertThat(values(tokens("r'a\\\r\nb'"))).isEqualTo("STRING(a\\\nb) NEWLINE EOF");
	assertThat(values(tokens("r\"a\\\r\nb\""))).isEqualTo("STRING(a\\\nb) NEWLINE EOF");
	assertThat(values(tokens("r\"a\\\r\n\\\nb\""))).isEqualTo("STRING(a\\\n\\\nb) NEWLINE EOF");
	}

	@Test
	public void testRawString() throws Exception {
	assertThat(values(tokens("r'abcd'"))).isEqualTo("STRING(abcd) NEWLINE EOF");
	assertThat(values(tokens("r\"abcd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
	assertThat(values(tokens("r'a\\tb\\nc\\rd'")))
	.isEqualTo("STRING(a\\tb\\nc\\rd) NEWLINE EOF"); // r'a\tb\nc\rd'
	assertThat(values(tokens("r\"a\\\"\""))).isEqualTo("STRING(a\\\") NEWLINE EOF"); // r"a\""
	assertThat(values(tokens("r'a\\\\b'"))).isEqualTo("STRING(a\\\\b) NEWLINE EOF"); // r'a\\b'
	assertThat(values(tokens("r'ab'r"))).isEqualTo("STRING(ab) IDENTIFIER(r) NEWLINE EOF");

	// Unterminated raw string
	values(tokens("r'\\'")); // r'\'
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: unterminated string literal at eof");
	}

	@Test
	public void testTripleRawString() throws Exception {
	// r'''a\ncd'''
	assertThat(values(tokens("r'''ab\\ncd'''"))).isEqualTo("STRING(ab\\ncd) NEWLINE EOF");
	// r"""ab
	// cd"""
	assertThat(values(tokens("\"\"\"ab\ncd\"\"\""))).isEqualTo("STRING(ab\ncd) NEWLINE EOF");

	// Unterminated raw string
	values(tokens("r'''\\'''")); // r'''\'''
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: unterminated string literal at eof");
	}

	@Test
	public void testOctalEscapes() throws Exception {
	// Regression test for a bug.
	assertThat(values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377'")))
	.isEqualTo("STRING(\0 \1 \t \u003f I I1 \u00ff) NEWLINE EOF");
	// Test boundaries (non-octal char, EOF).
	assertThat(values(tokens("'\\1b \\1'"))).isEqualTo("STRING(\1b \1) NEWLINE EOF");
	}

	@Test
	public void testOctalEscapeOutOfRange() throws Exception {
	assertThat(values(tokens("'\\777'"))).isEqualTo("STRING(\u00ff) NEWLINE EOF");
	assertThat(lastError.toString())
	.isEqualTo("/some/path.txt:1: octal escape sequence out of range (maximum is \\377)");
	}

	@Test
	public void testTripleQuotedStrings() throws Exception {
	assertThat(values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")))
	.isEqualTo("STRING(a\"b'c \n d\"\"e) NEWLINE EOF");
	assertThat(values(tokens("'''a\"b'c \n d\"\"e'''")))
	.isEqualTo("STRING(a\"b'c \n d\"\"e) NEWLINE EOF");
	}

	@Test
	public void testBadChar() throws Exception {
	assertThat(values(tokens("a$b"))).isEqualTo("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF");
	assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: invalid character: '$'");
	}

	@Test
	public void testIndentation() throws Exception {
	assertThat(values(tokens("1\n2\n3")))
	.isEqualTo("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF");
	assertThat(values(tokens("1\n 2\n 3\n4 ")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "INT(4) NEWLINE EOF");
	assertThat(values(tokens("1\n 2\n 3")))
	.isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "NEWLINE EOF");
	assertThat(values(tokens("1\n 2\n 3")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
	+ "OUTDENT OUTDENT NEWLINE EOF");
	assertThat(values(tokens("1\n 2\n 3\n 4\n5")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
	+ "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");

	assertThat(values(tokens("1\n 2\n 3\n 4\n5")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
	+ "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");
	assertThat(lastError.toString()).isEqualTo("/some/path.txt:4: indentation error");
	}

	@Test
	public void testIndentationWithTab() throws Exception {
	tokens("def x():\n\tpass");
	assertThat(lastError).contains("Tab characters are not allowed");
	}

	@Test
	public void testIndentationWithCrLf() throws Exception {
	assertThat(values(tokens("1\r\n 2\r\n")))
	.isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF");
	assertThat(values(tokens("1\r\n 2\r\n\r\n")))
	.isEqualTo("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF");
	assertThat(values(tokens("1\r\n 2\r\n 3\r\n 4\r\n5")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE OUTDENT INT(4) "
	+ "NEWLINE OUTDENT INT(5) NEWLINE EOF");
	assertThat(values(tokens("1\r\n 2\r\n\r\n 3\r\n4")))
	.isEqualTo(
	"INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT INT(4) NEWLINE EOF");
	}

	@Test
	public void testIndentationInsideParens() throws Exception {
	// Indentation is ignored inside parens:
	assertThat(values(tokens("1 (\n 2\n 3\n 4\n5")))
	.isEqualTo("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
	assertThat(values(tokens("1 {\n 2\n 3\n 4\n5")))
	.isEqualTo("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
	assertThat(values(tokens("1 [\n 2\n 3\n 4\n5")))
	.isEqualTo("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF");
	assertThat(values(tokens("1 [\n 2]\n 3\n 4\n5")))
	.isEqualTo(
	"INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) "
	+ "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF");
	}

	@Test
	public void testIndentationAtEOF() throws Exception {
	// Matching OUTDENTS are created at EOF:
	assertThat(values(tokens("\n 1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
	}

	@Test
	public void testIndentationOnFirstLine() throws Exception {
	assertThat(values(tokens(" 1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
	assertThat(values(tokens("\n\n 1"))).isEqualTo("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF");
	}

	@Test
	public void testBlankLineIndentation() throws Exception {
	// Blank lines and comment lines should not generate any newlines indents
	// (but note that every input ends with NEWLINE EOF).
	assertThat(names(tokens("\n #\n"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens(" #"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens(" #\n"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens(" #comment\n"))).isEqualTo("NEWLINE EOF");
	assertThat(names(tokens("def f(x):\n" + " # comment\n" + "\n" + " \n" + " return x\n")))
	.isEqualTo(
	"DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
	+ "INDENT RETURN IDENTIFIER NEWLINE "
	+ "OUTDENT NEWLINE EOF");
	}

	@Test
	public void testBackslash() throws Exception {
	assertThat(names(tokens("a\\\nb"))).isEqualTo("IDENTIFIER IDENTIFIER NEWLINE EOF");
	assertThat(names(tokens("a\\\r\nb"))).isEqualTo("IDENTIFIER IDENTIFIER NEWLINE EOF");
	assertThat(names(tokens("a\\ b"))).isEqualTo("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF");
	assertThat(names(tokens("a(\\\n2)"))).isEqualTo("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF");
	}

	@Test
	public void testTokenPositions() throws Exception {
	assertThat(positions(tokens("foo(bar, {1: 'quux'}, \"\"\"b\"\"\", r\"\")")))
	.isEqualTo(
	// foo ( bar , { 1 :
	"[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
	// 'quux' } , """b""" , r"" ) NEWLINE EOF
	+ " [13,19) [19,20) [20,21) [22,29) [29,30) [31,34) [34,35) [35,35) [35,35)");
	}

	@Test
	public void testLineNumbers() throws Exception {
	assertThat(linenums("foo = 1\nbar = 2\n\nwiz = 3")).isEqualTo("1 1 1 1 2 2 2 2 4 4 4 4 4");

	assertThat(values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2")))
	.isEqualTo(
	"IDENTIFIER(foo) EQUALS INT(1) NEWLINE "
	+ "IDENTIFIER(bar) EQUALS INT(2) NEWLINE "
	+ "IDENTIFIER(wiz) EQUALS NEWLINE "
	+ "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF");
	assertThat(lastError.toString()).isEqualTo("/some/path.txt:4: invalid character: '$'");

	// '\\n' in string should not increment linenum:
	String s = "1\n'foo\\nbar'\3";
	assertThat(values(tokens(s))).isEqualTo("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF");
	assertThat(linenums(s)).isEqualTo("1 1 2 2 2");
	}

	@Test
	public void testContainsErrors() throws Exception {
	Lexer lexerSuccess = createLexer("foo");
	allTokens(lexerSuccess); // ensure the file has been completely scanned
	assertThat(errors).isEmpty();

	Lexer lexerFail = createLexer("f$o");
	allTokens(lexerFail);
	assertThat(errors).isNotEmpty();

	String s = "'unterminated";
	lexerFail = createLexer(s);
	allTokens(lexerFail);
	assertThat(errors).isNotEmpty();
	assertThat(values(tokens(s))).isEqualTo("STRING(unterminated) NEWLINE EOF");
	}

	@Test
	public void testUnterminatedRawStringWithEscapingError() throws Exception {
	assertThat(names(tokens("r'\\"))).isEqualTo("STRING NEWLINE EOF");
	assertThat(lastError).isEqualTo("/some/path.txt:1: unterminated string literal at eof");
	}

	@Test
	public void testFirstCharIsTab() {
	assertThat(names(tokens("\t"))).isEqualTo("NEWLINE EOF");
	assertThat(lastError)
	.isEqualTo(
	"/some/path.txt:1: Tab characters are not allowed for indentation. Use spaces"
	+ " instead.");
	}

	/**
	* Returns the first error whose string form contains the specified substring, or throws an
	* informative AssertionError if there is none.
	*
	* <p>Exposed for use by other frontend tests.
	*/
	static SyntaxError assertContainsError(List<SyntaxError> errors, String substr) {
	for (SyntaxError error : errors) {
	if (error.toString().contains(substr)) {
	return error;
	}
	}
	if (errors.isEmpty()) {
	throw new AssertionError("no errors, want '" + substr + "'");
	} else {
	throw new AssertionError(
	"error '" + substr + "' not found, but got these:\n" + Joiner.on("\n").join(errors));
	}
	}
	}