| // Copyright 2006 The Bazel Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package com.google.devtools.build.lib.syntax; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| import com.google.devtools.build.lib.events.Event; |
| import com.google.devtools.build.lib.events.EventHandler; |
| import com.google.devtools.build.lib.events.EventKind; |
| import com.google.devtools.build.lib.events.Location; |
| import com.google.devtools.build.lib.events.Reporter; |
| import com.google.devtools.build.lib.vfs.PathFragment; |
| |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| /** |
| * Tests of tokenization behavior of the {@link Lexer}. |
| */ |
| @RunWith(JUnit4.class) |
| public class LexerTest { |
| private String lastError; |
| private Location lastErrorLocation; |
| |
| /** |
| * Create a lexer which takes input from the specified string. Resets the |
| * error handler beforehand. |
| */ |
| private Lexer createLexer(String input) { |
| PathFragment somePath = new PathFragment("/some/path.txt"); |
| ParserInputSource inputSource = ParserInputSource.create(input, somePath); |
| Reporter reporter = new Reporter(); |
| reporter.addHandler(new EventHandler() { |
| @Override |
| public void handle(Event event) { |
| if (EventKind.ERRORS.contains(event.getKind())) { |
| lastErrorLocation = event.getLocation(); |
| lastError = lastErrorLocation.getPath() + ":" |
| + event.getLocation().getStartLineAndColumn().getLine() + ": " + event.getMessage(); |
| } |
| } |
| }); |
| |
| return new Lexer(inputSource, reporter); |
| } |
| |
| public Token[] tokens(String input) { |
| return createLexer(input).getTokens().toArray(new Token[0]); |
| } |
| |
| /** |
| * Lexes the specified input string, and returns a string containing just the |
| * linenumbers of each token. |
| */ |
| private String linenums(String input) { |
| Lexer lexer = createLexer(input); |
| StringBuilder buf = new StringBuilder(); |
| for (Token tok : lexer.getTokens()) { |
| if (buf.length() > 0) { |
| buf.append(' '); |
| } |
| int line = |
| lexer.createLocation(tok.left, tok.left).getStartLineAndColumn().getLine(); |
| buf.append(line); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Returns a string containing the names of the tokens and their associated |
| * values. (String-literals are printed without escaping.) |
| */ |
| private static String values(Token[] tokens) { |
| StringBuilder buffer = new StringBuilder(); |
| for (Token token : tokens) { |
| if (buffer.length() > 0) { |
| buffer.append(' '); |
| } |
| buffer.append(token.kind.name()); |
| if (token.value != null) { |
| buffer.append('(').append(token.value).append(')'); |
| } |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * Returns a string containing just the names of the tokens. |
| */ |
| private static String names(Token[] tokens) { |
| StringBuilder buf = new StringBuilder(); |
| for (Token tok : tokens) { |
| if (buf.length() > 0) { |
| buf.append(' '); |
| } |
| buf.append(tok.kind.name()); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Returns a string containing just the half-open position intervals of each |
| * token. e.g. "[3,4) [4,9)". |
| */ |
| private static String positions(Token[] tokens) { |
| StringBuilder buf = new StringBuilder(); |
| for (Token tok : tokens) { |
| if (buf.length() > 0) { |
| buf.append(' '); |
| } |
| buf.append('[') |
| .append(tok.left) |
| .append(',') |
| .append(tok.right) |
| .append(')'); |
| } |
| return buf.toString(); |
| } |
| |
| @Test |
| public void testBasics1() throws Exception { |
| assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz) "))); |
| assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz )"))); |
| assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz)"))); |
| assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz ) "))); |
| assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz\t)"))); |
| } |
| |
| @Test |
| public void testBasics2() throws Exception { |
| assertEquals("RPAREN NEWLINE EOF", names(tokens(")"))); |
| assertEquals("RPAREN NEWLINE EOF", names(tokens(" )"))); |
| assertEquals("RPAREN NEWLINE EOF", names(tokens(" ) "))); |
| assertEquals("RPAREN NEWLINE EOF", names(tokens(") "))); |
| } |
| |
| @Test |
| public void testBasics3() throws Exception { |
| assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789"))); |
| assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123 #456\n789"))); |
| assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456 \n789"))); |
| assertEquals("INT COMMENT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF", |
| names(tokens("123#456\n 789"))); |
| assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789 "))); |
| } |
| |
| @Test |
| public void testBasics4() throws Exception { |
| assertEquals("NEWLINE EOF", names(tokens(""))); |
| assertEquals("COMMENT NEWLINE EOF", names(tokens("# foo"))); |
| assertEquals("INT INT INT INT NEWLINE EOF", names(tokens("1 2 3 4"))); |
| assertEquals("INT DOT INT NEWLINE EOF", names(tokens("1.234"))); |
| assertEquals("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN " |
| + "NEWLINE EOF", names(tokens("foo(bar, wiz)"))); |
| } |
| |
| @Test |
| public void testCrLf() throws Exception { |
| assertEquals("NEWLINE EOF", names(tokens("\r\n\r\n"))); |
| assertEquals("NEWLINE INT NEWLINE EOF", names(tokens("\r\n\r1\r\r\n"))); |
| assertEquals("COMMENT NEWLINE COMMENT NEWLINE EOF", names(tokens("# foo\r\n# bar\r\n"))); |
| } |
| |
| @Test |
| public void testIntegers() throws Exception { |
| // Detection of MINUS immediately following integer constant proves we |
| // don't consume too many chars. |
| |
| // decimal |
| assertEquals("INT(12345) MINUS NEWLINE EOF", values(tokens("12345-"))); |
| |
| // octal |
| assertEquals("INT(5349) MINUS NEWLINE EOF", values(tokens("012345-"))); |
| |
| // octal (bad) |
| assertEquals("INT(0) MINUS NEWLINE EOF", values(tokens("012349-"))); |
| assertEquals("/some/path.txt:1: invalid base-8 integer constant: 012349", |
| lastError.toString()); |
| |
| // hexadecimal (uppercase) |
| assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0X12345F-"))); |
| |
| // hexadecimal (lowercase) |
| assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0x12345f-"))); |
| |
| // hexadecimal (lowercase) [note: "g" cause termination of token] |
| assertEquals("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF", |
| values(tokens("0x12345g-"))); |
| } |
| |
| @Test |
| public void testIntegersAndDot() throws Exception { |
| assertEquals("INT(1) DOT INT(2345) NEWLINE EOF", values(tokens("1.2345"))); |
| |
| assertEquals("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF", |
| values(tokens("1.2.345"))); |
| |
| assertEquals("INT(1) DOT INT(0) NEWLINE EOF", values(tokens("1.23E10"))); |
| assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E10", |
| lastError.toString()); |
| |
| assertEquals("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF", |
| values(tokens("1.23E-10"))); |
| assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E", |
| lastError.toString()); |
| |
| assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(". 123"))); |
| assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(".123"))); |
| assertEquals("DOT IDENTIFIER(abc) NEWLINE EOF", values(tokens(".abc"))); |
| |
| assertEquals("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF", |
| values(tokens("foo.123"))); |
| assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF", |
| values(tokens("foo.bcd"))); // 'b' are hex chars |
| assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF", |
| values(tokens("foo.xyz"))); |
| } |
| |
| @Test |
| public void testStringDelimiters() throws Exception { |
| assertEquals("STRING(foo) NEWLINE EOF", values(tokens("\"foo\""))); |
| assertEquals("STRING(foo) NEWLINE EOF", values(tokens("'foo'"))); |
| } |
| |
| @Test |
| public void testQuotesInStrings() throws Exception { |
| assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("'foo\\'bar'"))); |
| assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("\"foo'bar\""))); |
| assertEquals("STRING(foo\"bar) NEWLINE EOF", values(tokens("'foo\"bar'"))); |
| assertEquals("STRING(foo\"bar) NEWLINE EOF", |
| values(tokens("\"foo\\\"bar\""))); |
| } |
| |
| @Test |
| public void testStringEscapes() throws Exception { |
| assertEquals("STRING(a\tb\nc\rd) NEWLINE EOF", |
| values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n |
| assertEquals("STRING(x\\hx) NEWLINE EOF", |
| values(tokens("'x\\hx'"))); // \h is unknown => "\h" |
| assertEquals("STRING(\\$$) NEWLINE EOF", values(tokens("'\\$$'"))); |
| assertEquals("STRING(ab) NEWLINE EOF", |
| values(tokens("'a\\\nb'"))); // escape end of line |
| assertEquals("STRING(abcd) NEWLINE EOF", |
| values(tokens("\"ab\\ucd\""))); |
| assertEquals("/some/path.txt:1: escape sequence not implemented: \\u", |
| lastError.toString()); |
| } |
| |
| @Test |
| public void testEscapedCrlfInString() throws Exception { |
| assertEquals("STRING(ab) NEWLINE EOF", |
| values(tokens("'a\\\r\nb'"))); |
| assertEquals("STRING(ab) NEWLINE EOF", |
| values(tokens("\"a\\\r\nb\""))); |
| assertEquals("STRING(ab) NEWLINE EOF", |
| values(tokens("\"\"\"a\\\r\nb\"\"\""))); |
| assertEquals("STRING(ab) NEWLINE EOF", |
| values(tokens("'''a\\\r\nb'''"))); |
| assertEquals("STRING(a\\\nb) NEWLINE EOF", |
| values(tokens("r'a\\\r\nb'"))); |
| assertEquals("STRING(a\\\nb) NEWLINE EOF", |
| values(tokens("r\"a\\\r\nb\""))); |
| assertEquals("STRING(a\\\n\\\nb) NEWLINE EOF", |
| values(tokens("r\"a\\\r\n\\\nb\""))); |
| } |
| |
| @Test |
| public void testRawString() throws Exception { |
| assertEquals("STRING(abcd) NEWLINE EOF", |
| values(tokens("r'abcd'"))); |
| assertEquals("STRING(abcd) NEWLINE EOF", |
| values(tokens("r\"abcd\""))); |
| assertEquals("STRING(a\\tb\\nc\\rd) NEWLINE EOF", |
| values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd' |
| assertEquals("STRING(a\\\") NEWLINE EOF", |
| values(tokens("r\"a\\\"\""))); // r"a\"" |
| assertEquals("STRING(a\\\\b) NEWLINE EOF", |
| values(tokens("r'a\\\\b'"))); // r'a\\b' |
| assertEquals("STRING(ab) IDENTIFIER(r) NEWLINE EOF", |
| values(tokens("r'ab'r"))); |
| |
| // Unterminated raw string |
| values(tokens("r'\\'")); // r'\' |
| assertEquals("/some/path.txt:1: unterminated string literal at eof", |
| lastError.toString()); |
| } |
| |
| @Test |
| public void testTripleRawString() throws Exception { |
| // r'''a\ncd''' |
| assertEquals("STRING(ab\\ncd) NEWLINE EOF", |
| values(tokens("r'''ab\\ncd'''"))); |
| // r"""ab |
| // cd""" |
| assertEquals( |
| "STRING(ab\ncd) NEWLINE EOF", |
| values(tokens("\"\"\"ab\ncd\"\"\""))); |
| |
| // Unterminated raw string |
| values(tokens("r'''\\'''")); // r'''\''' |
| assertEquals("/some/path.txt:1: unterminated string literal at eof", |
| lastError.toString()); |
| } |
| |
| @Test |
| public void testOctalEscapes() throws Exception { |
| // Regression test for a bug. |
| assertEquals("STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe) NEWLINE EOF", |
| values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'"))); |
| // Test boundaries (non-octal char, EOF). |
| assertEquals("STRING(\1b \1) NEWLINE EOF", values(tokens("'\\1b \\1'"))); |
| } |
| |
| @Test |
| public void testTripleQuotedStrings() throws Exception { |
| assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF", |
| values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\""))); |
| assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF", |
| values(tokens("'''a\"b'c \n d\"\"e'''"))); |
| } |
| |
| @Test |
| public void testBadChar() throws Exception { |
| assertEquals("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF", |
| values(tokens("a$b"))); |
| assertEquals("/some/path.txt:1: invalid character: '$'", |
| lastError.toString()); |
| } |
| |
| @Test |
| public void testIndentation() throws Exception { |
| assertEquals("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF", |
| values(tokens("1\n2\n3"))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " |
| + "INT(4) NEWLINE EOF", values(tokens("1\n 2\n 3\n4 "))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " |
| + "NEWLINE EOF", values(tokens("1\n 2\n 3"))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT OUTDENT NEWLINE EOF", |
| values(tokens("1\n 2\n 3"))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", |
| values(tokens("1\n 2\n 3\n 4\n5"))); |
| |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", |
| values(tokens("1\n 2\n 3\n 4\n5"))); |
| assertEquals("/some/path.txt:4: indentation error", lastError.toString()); |
| } |
| |
| @Test |
| public void testIndentationWithCrLf() throws Exception { |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF", |
| values(tokens("1\r\n 2\r\n"))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF", |
| values(tokens("1\r\n 2\r\n\r\n"))); |
| assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE OUTDENT INT(4) " |
| + "NEWLINE OUTDENT INT(5) NEWLINE EOF", |
| values(tokens("1\r\n 2\r\n 3\r\n 4\r\n5"))); |
| assertEquals( |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT INT(4) NEWLINE EOF", |
| values(tokens("1\r\n 2\r\n\r\n 3\r\n4"))); |
| } |
| |
| @Test |
| public void testIndentationInsideParens() throws Exception { |
| // Indentation is ignored inside parens: |
| assertEquals("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", |
| values(tokens("1 (\n 2\n 3\n 4\n5"))); |
| assertEquals("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", |
| values(tokens("1 {\n 2\n 3\n 4\n5"))); |
| assertEquals("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF", |
| values(tokens("1 [\n 2\n 3\n 4\n5"))); |
| assertEquals("INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) " |
| + "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", |
| values(tokens("1 [\n 2]\n 3\n 4\n5"))); |
| } |
| |
| @Test |
| public void testIndentationAtEOF() throws Exception { |
| // Matching OUTDENTS are created at EOF: |
| assertEquals("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF", |
| values(tokens("\n 1"))); |
| } |
| |
| @Test |
| public void testBlankLineIndentation() throws Exception { |
| // Blank lines and comment lines should not generate any newlines indents |
| // (but note that every input ends with NEWLINE EOF). |
| assertEquals("COMMENT NEWLINE EOF", names(tokens("\n #\n"))); |
| assertEquals("COMMENT NEWLINE EOF", names(tokens(" #"))); |
| assertEquals("COMMENT NEWLINE EOF", names(tokens(" #\n"))); |
| assertEquals("COMMENT NEWLINE EOF", names(tokens(" #comment\n"))); |
| assertEquals("DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE " |
| + "COMMENT INDENT RETURN IDENTIFIER NEWLINE " |
| + "OUTDENT NEWLINE EOF", |
| names(tokens("def f(x):\n" |
| + " # comment\n" |
| + "\n" |
| + " \n" |
| + " return x\n"))); |
| } |
| |
| @Test |
| public void testMultipleCommentLines() throws Exception { |
| assertEquals("COMMENT NEWLINE COMMENT COMMENT COMMENT " |
| + "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE " |
| + "INDENT RETURN IDENTIFIER NEWLINE OUTDENT NEWLINE EOF", |
| names(tokens("# Copyright\n" |
| + "#\n" |
| + "# A comment line\n" |
| + "# An adjoining line\n" |
| + "def f(x):\n" |
| + " return x\n"))); |
| } |
| |
| @Test |
| public void testBackslash() throws Exception { |
| assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF", |
| names(tokens("a\\\nb"))); |
| assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF", names(tokens("a\\\r\nb"))); |
| assertEquals("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF", |
| names(tokens("a\\ b"))); |
| assertEquals("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF", |
| names(tokens("a(\\\n2)"))); |
| } |
| |
| @Test |
| public void testTokenPositions() throws Exception { |
| // foo ( bar , { 1 : |
| assertEquals("[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)" |
| // 'quux' } ) NEWLINE EOF |
| + " [13,19) [19,20) [20,21) [20,21) [21,21)", |
| positions(tokens("foo(bar, {1: 'quux'})"))); |
| } |
| |
| @Test |
| public void testLineNumbers() throws Exception { |
| assertEquals("1 1 1 1 2 2 2 2 4 4 4 4 4", |
| linenums("foo = 1\nbar = 2\n\nwiz = 3")); |
| |
| assertEquals("IDENTIFIER(foo) EQUALS INT(1) NEWLINE " |
| + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE " |
| + "IDENTIFIER(wiz) EQUALS NEWLINE " |
| + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF", |
| values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2"))); |
| assertEquals("/some/path.txt:4: invalid character: '$'", |
| lastError.toString()); |
| |
| // '\\n' in string should not increment linenum: |
| String s = "1\n'foo\\nbar'\3"; |
| assertEquals("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF", |
| values(tokens(s))); |
| assertEquals("1 1 2 2 2", linenums(s)); |
| } |
| |
| @Test |
| public void testContainsErrors() throws Exception { |
| Lexer lexerSuccess = createLexer("foo"); |
| assertFalse(lexerSuccess.containsErrors()); |
| |
| Lexer lexerFail = createLexer("f$o"); |
| assertTrue(lexerFail.containsErrors()); |
| |
| String s = "'unterminated"; |
| lexerFail = createLexer(s); |
| assertTrue(lexerFail.containsErrors()); |
| assertEquals(0, lastErrorLocation.getStartOffset()); |
| assertEquals(s.length(), lastErrorLocation.getEndOffset()); |
| assertEquals("STRING(unterminated) NEWLINE EOF", values(tokens(s))); |
| } |
| |
| @Test |
| public void testUnterminatedRawStringWithEscapingError() throws Exception { |
| assertEquals("STRING NEWLINE EOF", names(tokens("r'\\"))); |
| assertEquals("/some/path.txt:1: unterminated string literal at eof", lastError); |
| } |
| } |