blob: 311922e429c61cc2b147a456d6f90f54244a09bf [file] [log] [blame]
// Copyright 2006 The Bazel Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.syntax;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.EventHandler;
import com.google.devtools.build.lib.events.EventKind;
import com.google.devtools.build.lib.events.Location;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.vfs.PathFragment;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/**
* Tests of tokenization behavior of the {@link Lexer}.
*/
@RunWith(JUnit4.class)
public class LexerTest {
private String lastError;
private Location lastErrorLocation;
/**
* Create a lexer which takes input from the specified string. Resets the
* error handler beforehand.
*/
private Lexer createLexer(String input) {
PathFragment somePath = new PathFragment("/some/path.txt");
ParserInputSource inputSource = ParserInputSource.create(input, somePath);
Reporter reporter = new Reporter();
reporter.addHandler(new EventHandler() {
@Override
public void handle(Event event) {
if (EventKind.ERRORS.contains(event.getKind())) {
lastErrorLocation = event.getLocation();
lastError = lastErrorLocation.getPath() + ":"
+ event.getLocation().getStartLineAndColumn().getLine() + ": " + event.getMessage();
}
}
});
return new Lexer(inputSource, reporter);
}
public Token[] tokens(String input) {
return createLexer(input).getTokens().toArray(new Token[0]);
}
/**
* Lexes the specified input string, and returns a string containing just the
* linenumbers of each token.
*/
private String linenums(String input) {
Lexer lexer = createLexer(input);
StringBuilder buf = new StringBuilder();
for (Token tok : lexer.getTokens()) {
if (buf.length() > 0) {
buf.append(' ');
}
int line =
lexer.createLocation(tok.left, tok.left).getStartLineAndColumn().getLine();
buf.append(line);
}
return buf.toString();
}
/**
* Returns a string containing the names of the tokens and their associated
* values. (String-literals are printed without escaping.)
*/
private static String values(Token[] tokens) {
StringBuilder buffer = new StringBuilder();
for (Token token : tokens) {
if (buffer.length() > 0) {
buffer.append(' ');
}
buffer.append(token.kind.name());
if (token.value != null) {
buffer.append('(').append(token.value).append(')');
}
}
return buffer.toString();
}
/**
* Returns a string containing just the names of the tokens.
*/
private static String names(Token[] tokens) {
StringBuilder buf = new StringBuilder();
for (Token tok : tokens) {
if (buf.length() > 0) {
buf.append(' ');
}
buf.append(tok.kind.name());
}
return buf.toString();
}
/**
* Returns a string containing just the half-open position intervals of each
* token. e.g. "[3,4) [4,9)".
*/
private static String positions(Token[] tokens) {
StringBuilder buf = new StringBuilder();
for (Token tok : tokens) {
if (buf.length() > 0) {
buf.append(' ');
}
buf.append('[')
.append(tok.left)
.append(',')
.append(tok.right)
.append(')');
}
return buf.toString();
}
@Test
public void testBasics1() throws Exception {
assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz) ")));
assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz )")));
assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz)")));
assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens(" wiz ) ")));
assertEquals("IDENTIFIER RPAREN NEWLINE EOF", names(tokens("wiz\t)")));
}
@Test
public void testBasics2() throws Exception {
assertEquals("RPAREN NEWLINE EOF", names(tokens(")")));
assertEquals("RPAREN NEWLINE EOF", names(tokens(" )")));
assertEquals("RPAREN NEWLINE EOF", names(tokens(" ) ")));
assertEquals("RPAREN NEWLINE EOF", names(tokens(") ")));
}
@Test
public void testBasics3() throws Exception {
assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789")));
assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123 #456\n789")));
assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456 \n789")));
assertEquals("INT COMMENT NEWLINE INDENT INT NEWLINE OUTDENT NEWLINE EOF",
names(tokens("123#456\n 789")));
assertEquals("INT COMMENT NEWLINE INT NEWLINE EOF", names(tokens("123#456\n789 ")));
}
@Test
public void testBasics4() throws Exception {
assertEquals("NEWLINE EOF", names(tokens("")));
assertEquals("COMMENT NEWLINE EOF", names(tokens("# foo")));
assertEquals("INT INT INT INT NEWLINE EOF", names(tokens("1 2 3 4")));
assertEquals("INT DOT INT NEWLINE EOF", names(tokens("1.234")));
assertEquals("IDENTIFIER LPAREN IDENTIFIER COMMA IDENTIFIER RPAREN "
+ "NEWLINE EOF", names(tokens("foo(bar, wiz)")));
}
@Test
public void testCrLf() throws Exception {
assertEquals("NEWLINE EOF", names(tokens("\r\n\r\n")));
assertEquals("NEWLINE INT NEWLINE EOF", names(tokens("\r\n\r1\r\r\n")));
assertEquals("COMMENT NEWLINE COMMENT NEWLINE EOF", names(tokens("# foo\r\n# bar\r\n")));
}
@Test
public void testIntegers() throws Exception {
// Detection of MINUS immediately following integer constant proves we
// don't consume too many chars.
// decimal
assertEquals("INT(12345) MINUS NEWLINE EOF", values(tokens("12345-")));
// octal
assertEquals("INT(5349) MINUS NEWLINE EOF", values(tokens("012345-")));
// octal (bad)
assertEquals("INT(0) MINUS NEWLINE EOF", values(tokens("012349-")));
assertEquals("/some/path.txt:1: invalid base-8 integer constant: 012349",
lastError.toString());
// hexadecimal (uppercase)
assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0X12345F-")));
// hexadecimal (lowercase)
assertEquals("INT(1193055) MINUS NEWLINE EOF", values(tokens("0x12345f-")));
// hexadecimal (lowercase) [note: "g" cause termination of token]
assertEquals("INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF",
values(tokens("0x12345g-")));
}
@Test
public void testIntegersAndDot() throws Exception {
assertEquals("INT(1) DOT INT(2345) NEWLINE EOF", values(tokens("1.2345")));
assertEquals("INT(1) DOT INT(2) DOT INT(345) NEWLINE EOF",
values(tokens("1.2.345")));
assertEquals("INT(1) DOT INT(0) NEWLINE EOF", values(tokens("1.23E10")));
assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E10",
lastError.toString());
assertEquals("INT(1) DOT INT(0) MINUS INT(10) NEWLINE EOF",
values(tokens("1.23E-10")));
assertEquals("/some/path.txt:1: invalid base-10 integer constant: 23E",
lastError.toString());
assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(". 123")));
assertEquals("DOT INT(123) NEWLINE EOF", values(tokens(".123")));
assertEquals("DOT IDENTIFIER(abc) NEWLINE EOF", values(tokens(".abc")));
assertEquals("IDENTIFIER(foo) DOT INT(123) NEWLINE EOF",
values(tokens("foo.123")));
assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF",
values(tokens("foo.bcd"))); // 'b' are hex chars
assertEquals("IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF",
values(tokens("foo.xyz")));
}
@Test
public void testStringDelimiters() throws Exception {
assertEquals("STRING(foo) NEWLINE EOF", values(tokens("\"foo\"")));
assertEquals("STRING(foo) NEWLINE EOF", values(tokens("'foo'")));
}
@Test
public void testQuotesInStrings() throws Exception {
assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("'foo\\'bar'")));
assertEquals("STRING(foo'bar) NEWLINE EOF", values(tokens("\"foo'bar\"")));
assertEquals("STRING(foo\"bar) NEWLINE EOF", values(tokens("'foo\"bar'")));
assertEquals("STRING(foo\"bar) NEWLINE EOF",
values(tokens("\"foo\\\"bar\"")));
}
@Test
public void testStringEscapes() throws Exception {
assertEquals("STRING(a\tb\nc\rd) NEWLINE EOF",
values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n
assertEquals("STRING(x\\hx) NEWLINE EOF",
values(tokens("'x\\hx'"))); // \h is unknown => "\h"
assertEquals("STRING(\\$$) NEWLINE EOF", values(tokens("'\\$$'")));
assertEquals("STRING(ab) NEWLINE EOF",
values(tokens("'a\\\nb'"))); // escape end of line
assertEquals("STRING(abcd) NEWLINE EOF",
values(tokens("\"ab\\ucd\"")));
assertEquals("/some/path.txt:1: escape sequence not implemented: \\u",
lastError.toString());
}
@Test
public void testEscapedCrlfInString() throws Exception {
assertEquals("STRING(ab) NEWLINE EOF",
values(tokens("'a\\\r\nb'")));
assertEquals("STRING(ab) NEWLINE EOF",
values(tokens("\"a\\\r\nb\"")));
assertEquals("STRING(ab) NEWLINE EOF",
values(tokens("\"\"\"a\\\r\nb\"\"\"")));
assertEquals("STRING(ab) NEWLINE EOF",
values(tokens("'''a\\\r\nb'''")));
assertEquals("STRING(a\\\nb) NEWLINE EOF",
values(tokens("r'a\\\r\nb'")));
assertEquals("STRING(a\\\nb) NEWLINE EOF",
values(tokens("r\"a\\\r\nb\"")));
assertEquals("STRING(a\\\n\\\nb) NEWLINE EOF",
values(tokens("r\"a\\\r\n\\\nb\"")));
}
@Test
public void testRawString() throws Exception {
assertEquals("STRING(abcd) NEWLINE EOF",
values(tokens("r'abcd'")));
assertEquals("STRING(abcd) NEWLINE EOF",
values(tokens("r\"abcd\"")));
assertEquals("STRING(a\\tb\\nc\\rd) NEWLINE EOF",
values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd'
assertEquals("STRING(a\\\") NEWLINE EOF",
values(tokens("r\"a\\\"\""))); // r"a\""
assertEquals("STRING(a\\\\b) NEWLINE EOF",
values(tokens("r'a\\\\b'"))); // r'a\\b'
assertEquals("STRING(ab) IDENTIFIER(r) NEWLINE EOF",
values(tokens("r'ab'r")));
// Unterminated raw string
values(tokens("r'\\'")); // r'\'
assertEquals("/some/path.txt:1: unterminated string literal at eof",
lastError.toString());
}
@Test
public void testTripleRawString() throws Exception {
// r'''a\ncd'''
assertEquals("STRING(ab\\ncd) NEWLINE EOF",
values(tokens("r'''ab\\ncd'''")));
// r"""ab
// cd"""
assertEquals(
"STRING(ab\ncd) NEWLINE EOF",
values(tokens("\"\"\"ab\ncd\"\"\"")));
// Unterminated raw string
values(tokens("r'''\\'''")); // r'''\'''
assertEquals("/some/path.txt:1: unterminated string literal at eof",
lastError.toString());
}
@Test
public void testOctalEscapes() throws Exception {
// Regression test for a bug.
assertEquals("STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe) NEWLINE EOF",
values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'")));
// Test boundaries (non-octal char, EOF).
assertEquals("STRING(\1b \1) NEWLINE EOF", values(tokens("'\\1b \\1'")));
}
@Test
public void testTripleQuotedStrings() throws Exception {
assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF",
values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")));
assertEquals("STRING(a\"b'c \n d\"\"e) NEWLINE EOF",
values(tokens("'''a\"b'c \n d\"\"e'''")));
}
@Test
public void testBadChar() throws Exception {
assertEquals("IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF",
values(tokens("a$b")));
assertEquals("/some/path.txt:1: invalid character: '$'",
lastError.toString());
}
@Test
public void testIndentation() throws Exception {
assertEquals("INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF",
values(tokens("1\n2\n3")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT "
+ "INT(4) NEWLINE EOF", values(tokens("1\n 2\n 3\n4 ")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT "
+ "NEWLINE EOF", values(tokens("1\n 2\n 3")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ "OUTDENT OUTDENT NEWLINE EOF",
values(tokens("1\n 2\n 3")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
values(tokens("1\n 2\n 3\n 4\n5")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE "
+ "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
values(tokens("1\n 2\n 3\n 4\n5")));
assertEquals("/some/path.txt:4: indentation error", lastError.toString());
}
@Test
public void testIndentationWithCrLf() throws Exception {
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF",
values(tokens("1\r\n 2\r\n")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF",
values(tokens("1\r\n 2\r\n\r\n")));
assertEquals("INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE OUTDENT INT(4) "
+ "NEWLINE OUTDENT INT(5) NEWLINE EOF",
values(tokens("1\r\n 2\r\n 3\r\n 4\r\n5")));
assertEquals(
"INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT INT(4) NEWLINE EOF",
values(tokens("1\r\n 2\r\n\r\n 3\r\n4")));
}
@Test
public void testIndentationInsideParens() throws Exception {
// Indentation is ignored inside parens:
assertEquals("INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
values(tokens("1 (\n 2\n 3\n 4\n5")));
assertEquals("INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
values(tokens("1 {\n 2\n 3\n 4\n5")));
assertEquals("INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF",
values(tokens("1 [\n 2\n 3\n 4\n5")));
assertEquals("INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) "
+ "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF",
values(tokens("1 [\n 2]\n 3\n 4\n5")));
}
@Test
public void testIndentationAtEOF() throws Exception {
// Matching OUTDENTS are created at EOF:
assertEquals("INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF",
values(tokens("\n 1")));
}
@Test
public void testBlankLineIndentation() throws Exception {
// Blank lines and comment lines should not generate any newlines indents
// (but note that every input ends with NEWLINE EOF).
assertEquals("COMMENT NEWLINE EOF", names(tokens("\n #\n")));
assertEquals("COMMENT NEWLINE EOF", names(tokens(" #")));
assertEquals("COMMENT NEWLINE EOF", names(tokens(" #\n")));
assertEquals("COMMENT NEWLINE EOF", names(tokens(" #comment\n")));
assertEquals("DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
+ "COMMENT INDENT RETURN IDENTIFIER NEWLINE "
+ "OUTDENT NEWLINE EOF",
names(tokens("def f(x):\n"
+ " # comment\n"
+ "\n"
+ " \n"
+ " return x\n")));
}
@Test
public void testMultipleCommentLines() throws Exception {
assertEquals("COMMENT NEWLINE COMMENT COMMENT COMMENT "
+ "DEF IDENTIFIER LPAREN IDENTIFIER RPAREN COLON NEWLINE "
+ "INDENT RETURN IDENTIFIER NEWLINE OUTDENT NEWLINE EOF",
names(tokens("# Copyright\n"
+ "#\n"
+ "# A comment line\n"
+ "# An adjoining line\n"
+ "def f(x):\n"
+ " return x\n")));
}
@Test
public void testBackslash() throws Exception {
assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF",
names(tokens("a\\\nb")));
assertEquals("IDENTIFIER IDENTIFIER NEWLINE EOF", names(tokens("a\\\r\nb")));
assertEquals("IDENTIFIER ILLEGAL IDENTIFIER NEWLINE EOF",
names(tokens("a\\ b")));
assertEquals("IDENTIFIER LPAREN INT RPAREN NEWLINE EOF",
names(tokens("a(\\\n2)")));
}
@Test
public void testTokenPositions() throws Exception {
// foo ( bar , { 1 :
assertEquals("[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
// 'quux' } ) NEWLINE EOF
+ " [13,19) [19,20) [20,21) [20,21) [21,21)",
positions(tokens("foo(bar, {1: 'quux'})")));
}
@Test
public void testLineNumbers() throws Exception {
assertEquals("1 1 1 1 2 2 2 2 4 4 4 4 4",
linenums("foo = 1\nbar = 2\n\nwiz = 3"));
assertEquals("IDENTIFIER(foo) EQUALS INT(1) NEWLINE "
+ "IDENTIFIER(bar) EQUALS INT(2) NEWLINE "
+ "IDENTIFIER(wiz) EQUALS NEWLINE "
+ "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF",
values(tokens("foo = 1\nbar = 2\n\nwiz = $\nbar = 2")));
assertEquals("/some/path.txt:4: invalid character: '$'",
lastError.toString());
// '\\n' in string should not increment linenum:
String s = "1\n'foo\\nbar'\3";
assertEquals("INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF",
values(tokens(s)));
assertEquals("1 1 2 2 2", linenums(s));
}
@Test
public void testContainsErrors() throws Exception {
Lexer lexerSuccess = createLexer("foo");
assertFalse(lexerSuccess.containsErrors());
Lexer lexerFail = createLexer("f$o");
assertTrue(lexerFail.containsErrors());
String s = "'unterminated";
lexerFail = createLexer(s);
assertTrue(lexerFail.containsErrors());
assertEquals(0, lastErrorLocation.getStartOffset());
assertEquals(s.length(), lastErrorLocation.getEndOffset());
assertEquals("STRING(unterminated) NEWLINE EOF", values(tokens(s)));
}
@Test
public void testUnterminatedRawStringWithEscapingError() throws Exception {
assertEquals("STRING NEWLINE EOF", names(tokens("r'\\")));
assertEquals("/some/path.txt:1: unterminated string literal at eof", lastError);
}
}