| // Copyright 2006 The Bazel Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package net.starlark.java.syntax; |
| |
| import static com.google.common.truth.Truth.assertThat; |
| import static org.junit.Assert.assertThrows; |
| |
| import com.google.common.base.Joiner; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.List; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| /** |
| * Tests of tokenization behavior of the {@link Lexer}. |
| */ |
| @RunWith(JUnit4.class) |
| public class LexerTest { |
| |
| // TODO(adonovan): make these these tests less unnecessarily stateful. |
| |
| private final List<SyntaxError> errors = new ArrayList<>(); |
| |
| // Reassign in test case to inject non-default options to the Lexer. |
| // Doesn't leak between test cases since each case is its own instance. |
| private FileOptions options = FileOptions.DEFAULT; |
| |
| /** |
| * Create a lexer which takes input from the specified string. Resets the error handler |
| * beforehand. Uses the current state of {@link #options}. |
| */ |
| private Lexer createLexer(String input) { |
| ParserInput inputSource = ParserInput.fromString(input, ""); |
| errors.clear(); |
| return new Lexer(inputSource, errors, options); |
| } |
| |
| private static class Token { |
| TokenKind kind; |
| int start; |
| int end; |
| Object value; |
| |
| @Override |
| public String toString() { |
| return kind == TokenKind.STRING |
| ? "\"" + value + "\"" |
| : value == null ? kind.toString() : value.toString(); |
| } |
| } |
| |
| private ArrayList<Token> allTokens(Lexer lexer) { |
| ArrayList<Token> result = new ArrayList<>(); |
| do { |
| lexer.nextToken(); |
| Token tok = new Token(); |
| tok.kind = lexer.kind; |
| tok.start = lexer.start; |
| tok.end = lexer.end; |
| tok.value = lexer.value; |
| result.add(tok); |
| } while (lexer.kind != TokenKind.EOF); |
| return result; |
| } |
| |
| private Token[] tokens(String input) { |
| ArrayList<Token> result = allTokens(createLexer(input)); |
| return result.toArray(new Token[0]); |
| } |
| |
| /** |
| * Lexes the specified input string, and returns a string containing just the line numbers of each |
| * token. |
| */ |
| private String linenums(String input) { |
| Lexer lexer = createLexer(input); |
| StringBuilder buf = new StringBuilder(); |
| for (Token tok : allTokens(lexer)) { |
| if (buf.length() > 0) { |
| buf.append(' '); |
| } |
| int line = lexer.locs.getLocation(tok.start).line(); |
| buf.append(line); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Returns a string containing the names of the tokens and their associated |
| * values. (String-literals are printed without escaping.) |
| */ |
| private static String values(Token[] tokens) { |
| StringBuilder buffer = new StringBuilder(); |
| for (Token token : tokens) { |
| if (buffer.length() > 0) { |
| buffer.append(' '); |
| } |
| buffer.append(token.kind.name()); |
| if (token.value != null) { |
| buffer.append('(').append(token.value).append(')'); |
| } |
| } |
| return buffer.toString(); |
| } |
| |
| // Scans src, and asserts that the tokens match wantTokens |
| // and that there are no errors. |
| private void check(String src, String wantTokens) { |
| assertThat(values(tokens(src))).isEqualTo(wantTokens); |
| assertThat(errors).isEmpty(); |
| } |
| |
| // Scans src, and asserts that the tokens match wantTokens |
| // and the errors match wantErrors. |
| // Errors are formatted with a caret ^ under the errant column. |
| private void checkErrors(String src, String wantTokens, String... wantErrors) { |
| assertThat(values(tokens(src))).isEqualTo(wantTokens); |
| |
| List<String> gotErrors = new ArrayList<>(); |
| for (SyntaxError err : errors) { |
| String msg = spaces(err.location().column() - 1) + "^ " + err.message(); |
| if (err.location().line() != 1) { |
| msg = String.format("%s (line %d)", msg, err.location().line()); |
| } |
| gotErrors.add(msg); |
| } |
| assertThat(gotErrors).isEqualTo(Arrays.asList(wantErrors)); |
| } |
| |
| private static String spaces(int n) { |
| return new String(new char[n]).replace('\0', ' '); |
| } |
| |
| /** |
| * Returns a string containing just the half-open position intervals of each |
| * token. e.g. "[3,4) [4,9)". |
| */ |
| private static String positions(Token[] tokens) { |
| StringBuilder buf = new StringBuilder(); |
| for (Token tok : tokens) { |
| if (buf.length() > 0) { |
| buf.append(' '); |
| } |
| buf.append('[').append(tok.start).append(',').append(tok.end).append(')'); |
| } |
| return buf.toString(); |
| } |
| |
| @Test |
| public void testBasics1() throws Exception { |
| checkErrors( |
| "wiz) ", // |
| "IDENTIFIER(wiz) RPAREN NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| "wiz )", // |
| "IDENTIFIER(wiz) RPAREN NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| " wiz)", // |
| "INDENT IDENTIFIER(wiz) RPAREN NEWLINE OUTDENT NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| " wiz ) ", // |
| "INDENT IDENTIFIER(wiz) RPAREN NEWLINE OUTDENT NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| "wiz\t)", // |
| "IDENTIFIER(wiz) RPAREN NEWLINE EOF", |
| " ^ indentation error"); |
| } |
| |
| @Test |
| public void testBasics2() throws Exception { |
| checkErrors( |
| ")", // |
| "RPAREN NEWLINE EOF", |
| "^ indentation error"); |
| checkErrors( |
| " )", // |
| "INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| " ) ", // |
| "INDENT RPAREN NEWLINE OUTDENT NEWLINE EOF", |
| " ^ indentation error"); |
| checkErrors( |
| ") ", // |
| "RPAREN NEWLINE EOF", |
| "^ indentation error"); |
| } |
| |
| @Test |
| public void testBasics3() throws Exception { |
| check("123#456\n789", "INT(123) NEWLINE INT(789) NEWLINE EOF"); |
| check("123 #456\n789", "INT(123) NEWLINE INT(789) NEWLINE EOF"); |
| check("123#456 \n789", "INT(123) NEWLINE INT(789) NEWLINE EOF"); |
| check("123#456\n 789", "INT(123) NEWLINE INDENT INT(789) NEWLINE OUTDENT NEWLINE EOF"); |
| check("123#456\n789 ", "INT(123) NEWLINE INT(789) NEWLINE EOF"); |
| } |
| |
| private static String zeroes(int n) { |
| return new String(new char[n]).replace('\0', '0'); |
| } |
| |
| @Test |
| public void testBasics4() throws Exception { |
| check("", "NEWLINE EOF"); |
| check("# foo", "NEWLINE EOF"); |
| check("1 2 3 4", "INT(1) INT(2) INT(3) INT(4) NEWLINE EOF"); |
| check("1.234", "FLOAT(1.234) NEWLINE EOF"); |
| check( |
| "foo(bar, wiz)", |
| "IDENTIFIER(foo) LPAREN IDENTIFIER(bar) COMMA IDENTIFIER(wiz) RPAREN NEWLINE EOF"); |
| check("1.0e308 1" + zeroes(308) + ".0", "FLOAT(1.0E308) FLOAT(1.0E308) NEWLINE EOF"); |
| checkErrors( |
| "1.0e309 1" + zeroes(309) + ".0", |
| "FLOAT(Infinity) FLOAT(Infinity) NEWLINE EOF", |
| "^ floating-point literal too large", |
| " ^ floating-point literal too large"); |
| } |
| |
| @Test |
| public void testNoWhiteSpaceBetweenTokens() throws Exception { |
| check("6or()", "INT(6) OR LPAREN RPAREN NEWLINE EOF"); |
| check("0in(''and[])", "INT(0) IN LPAREN STRING() AND LBRACKET RBRACKET RPAREN NEWLINE EOF"); |
| |
| checkErrors( |
| "0or()", |
| "INT(0) IDENTIFIER(r) LPAREN RPAREN NEWLINE EOF", |
| "^ invalid base-8 integer literal: 0o"); |
| } |
| |
| @Test |
| public void testNonAsciiIdentifiers() throws Exception { |
| checkErrors( |
| "ümlaut", // |
| "IDENTIFIER(mlaut) NEWLINE EOF", |
| "^ invalid character: 'ü'"); |
| checkErrors( |
| "umläut", // |
| "IDENTIFIER(uml) IDENTIFIER(ut) NEWLINE EOF", |
| " ^ invalid character: 'ä'"); |
| } |
| |
| @Test |
| public void testCrLf() throws Exception { |
| check("\r\n\r\n", "NEWLINE EOF"); |
| check("\r\n\r1\r\r\n", "INT(1) NEWLINE EOF"); |
| check("# foo\r\n# bar\r\n", "NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testIntegers() throws Exception { |
| // Detection of MINUS immediately following integer constant proves we |
| // don't consume too many chars. |
| |
| // decimal |
| check("12345-", "INT(12345) MINUS NEWLINE EOF"); |
| |
| // TODO(adonovan): add tests for 0b binary literals |
| |
| // octal |
| check("0o12345-", "INT(5349) MINUS NEWLINE EOF"); |
| check("0O77", "INT(63) NEWLINE EOF"); |
| check("0o1o2349-", "INT(1) IDENTIFIER(o2349) MINUS NEWLINE EOF"); |
| checkErrors( |
| "0o12349-", // |
| "INT(0) MINUS NEWLINE EOF", |
| "^ invalid base-8 integer literal: 0o12349"); |
| checkErrors( |
| "0o", // |
| "INT(0) NEWLINE EOF", |
| "^ invalid base-8 integer literal: 0o"); |
| checkErrors( |
| "012345", // |
| "INT(0) NEWLINE EOF", |
| "^ invalid octal literal: 012345 (use '0o12345')"); |
| |
| // hexadecimal (uppercase) |
| check("0X12345F-", "INT(1193055) MINUS NEWLINE EOF"); |
| |
| // hexadecimal (lowercase) |
| check("0x12345f-", "INT(1193055) MINUS NEWLINE EOF"); |
| |
| // hexadecimal (lowercase) [note: "g" cause termination of token] |
| check("0x12345g-", "INT(74565) IDENTIFIER(g) MINUS NEWLINE EOF"); |
| |
| // long |
| check("1234567890 0x123456789ABCDEF", "INT(1234567890) INT(81985529216486895) NEWLINE EOF"); |
| // big |
| check( |
| "123456789123456789123456789 0xABCDEFABCDEFABCDEFABCDEFABCDEF", |
| "INT(123456789123456789123456789) INT(892059645479943313385225296292859375) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testNumbersAndDot() throws Exception { |
| check("0", "INT(0) NEWLINE EOF"); |
| check("0.", "FLOAT(0.0) NEWLINE EOF"); |
| check(".0", "FLOAT(0.0) NEWLINE EOF"); |
| checkErrors( |
| "1e", // |
| "FLOAT(0.0) NEWLINE EOF", |
| "^ invalid float literal"); |
| checkErrors( |
| "1e+x", // |
| "FLOAT(0.0) IDENTIFIER(x) NEWLINE EOF", |
| "^ invalid float literal"); |
| check("1e1", "FLOAT(10.0) NEWLINE EOF"); |
| check(".e1", "DOT IDENTIFIER(e1) NEWLINE EOF"); |
| check("1.e1", "FLOAT(10.0) NEWLINE EOF"); |
| check("1.e+1", "FLOAT(10.0) NEWLINE EOF"); |
| check("1.e-1", "FLOAT(0.1) NEWLINE EOF"); |
| |
| check("1.2345", "FLOAT(1.2345) NEWLINE EOF"); |
| check("1.2.345", "FLOAT(1.2) FLOAT(0.345) NEWLINE EOF"); |
| |
| check("1.0E10", "FLOAT(1.0E10) NEWLINE EOF"); |
| check("1.03E-10", "FLOAT(1.03E-10) NEWLINE EOF"); |
| |
| check(". 123", "DOT INT(123) NEWLINE EOF"); |
| check(".123", "FLOAT(0.123) NEWLINE EOF"); |
| check(".abc", "DOT IDENTIFIER(abc) NEWLINE EOF"); |
| |
| check("foo.123", "IDENTIFIER(foo) FLOAT(0.123) NEWLINE EOF"); |
| check("foo.bcd", "IDENTIFIER(foo) DOT IDENTIFIER(bcd) NEWLINE EOF"); // 'b' are hex chars |
| check("foo.xyz", "IDENTIFIER(foo) DOT IDENTIFIER(xyz) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testStringDelimiters() throws Exception { |
| check("\"foo\"", "STRING(foo) NEWLINE EOF"); |
| check("'foo'", "STRING(foo) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testQuotesInStrings() throws Exception { |
| check("'foo\\'bar'", "STRING(foo'bar) NEWLINE EOF"); |
| check("\"foo'bar\"", "STRING(foo'bar) NEWLINE EOF"); |
| check("'foo\"bar'", "STRING(foo\"bar) NEWLINE EOF"); |
| check("\"foo\\\"bar\"", "STRING(foo\"bar) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testStringEscapes() throws Exception { |
| check( |
| "'a\\tb\\nc\\rd\\fe\\vf\\ag\\bh'", |
| "STRING(a\tb\nc\rd\fe\u000bf\u0007g\bh) NEWLINE EOF"); // \t \r \n \f \v \a \b |
| checkErrors( |
| "'x\\hx'", // |
| "STRING(x\\hx) NEWLINE EOF", |
| " ^ invalid escape sequence: \\h. Use '\\\\' to insert '\\'."); |
| checkErrors( |
| "'\\$$'", // |
| "STRING(\\$$) NEWLINE EOF", |
| " ^ invalid escape sequence: \\$. Use '\\\\' to insert '\\'."); |
| check("'a\\\nb'", "STRING(ab) NEWLINE EOF"); // escape end of line |
| checkErrors( |
| "\"ab\\ucd\"", // |
| "STRING(ab\\ucd) NEWLINE EOF", |
| " ^ invalid escape sequence: \\u. Use '\\\\' to insert '\\'."); |
| } |
| |
| @Test |
| public void testEscapedCrlfInString() throws Exception { |
| check("'a\\\r\nb'", "STRING(ab) NEWLINE EOF"); |
| check("\"a\\\r\nb\"", "STRING(ab) NEWLINE EOF"); |
| check("\"\"\"a\\\r\nb\"\"\"", "STRING(ab) NEWLINE EOF"); |
| check("'''a\\\r\nb'''", "STRING(ab) NEWLINE EOF"); |
| check("r'a\\\r\nb'", "STRING(a\\\nb) NEWLINE EOF"); |
| check("r\"a\\\r\nb\"", "STRING(a\\\nb) NEWLINE EOF"); |
| check("r\"a\\\r\n\\\nb\"", "STRING(a\\\n\\\nb) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testRawString() throws Exception { |
| check("r'abcd'", "STRING(abcd) NEWLINE EOF"); |
| check("r\"abcd\"", "STRING(abcd) NEWLINE EOF"); |
| check("r'a\\tb\\nc\\rd'", "STRING(a\\tb\\nc\\rd) NEWLINE EOF"); // r'a\tb\nc\rd' |
| check("r\"a\\\"\"", "STRING(a\\\") NEWLINE EOF"); // r"a\"" |
| check("r'a\\\\b'", "STRING(a\\\\b) NEWLINE EOF"); // r'a\\b' |
| check("r'ab'r", "STRING(ab) IDENTIFIER(r) NEWLINE EOF"); |
| |
| // Unclosed raw string |
| checkErrors( |
| "+ r'\\'", // r'\' |
| "PLUS STRING(\\') NEWLINE EOF", |
| " ^ unclosed string literal"); |
| } |
| |
| @Test |
| public void testTripleRawString() throws Exception { |
| // r'''a\ncd''' |
| check("r'''ab\\ncd'''", "STRING(ab\\ncd) NEWLINE EOF"); |
| // r"""ab |
| // cd""" |
| check("\"\"\"ab\ncd\"\"\"", "STRING(ab\ncd) NEWLINE EOF"); |
| |
| // Unclosed raw string |
| checkErrors( |
| "r'''\\'''", // r'''\''' |
| "STRING(\\''') NEWLINE EOF", |
| "^ unclosed string literal"); |
| } |
| |
| @Test |
| public void testOctalEscapes() throws Exception { |
| // Regression test for a bug. |
| check( |
| "'\\0 \\1 \\11 \\77 \\111 \\1111 \\377'", |
| "STRING(\0 \1 \t \u003f I I1 \u00ff) NEWLINE EOF"); |
| // Test boundaries (non-octal char, EOF). |
| check("'\\1b \\1'", "STRING(\1b \1) NEWLINE EOF"); |
| // Test first digit out-of-range. |
| checkErrors( |
| "'\\800'", |
| "STRING(\\800) NEWLINE EOF", |
| " ^ invalid escape sequence: \\8. Use '\\\\' to insert '\\'."); |
| } |
| |
| @Test |
| public void testOctalEscapeOutOfRange() throws Exception { |
| // Capped at U+FF. |
| checkErrors( |
| "'\\777'", |
| "STRING(\u00ff) NEWLINE EOF", |
| " ^ octal escape sequence out of range (maximum is \\377)"); |
| // Emitted value is masked by (not capped to) 0xFF. |
| checkErrors( |
| "'\\401'", |
| "STRING(\u0001) NEWLINE EOF", |
| " ^ octal escape sequence out of range (maximum is \\377)"); |
| // Multiple errors. |
| checkErrors( |
| "'\\401\\402'", |
| "STRING(\u0001\u0002) NEWLINE EOF", |
| " ^ octal escape sequence out of range (maximum is \\377)", |
| " ^ octal escape sequence out of range (maximum is \\377)"); |
| } |
| |
| @Test |
| public void testTripleQuotedStrings() throws Exception { |
| check("\"\"\"a\"b'c \n d\"\"e\"\"\"", "STRING(a\"b'c \n d\"\"e) NEWLINE EOF"); |
| check("'''a\"b'c \n d\"\"e'''", "STRING(a\"b'c \n d\"\"e) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testStringContainingNonAsciiRawCharacter() throws Exception { |
| // Lexer is fine with U+80 to U+FF by default. |
| check("'\u0080\u00ff'", "STRING(\u0080\u00ff) NEWLINE EOF"); |
| // If the ParserInput provides content greater than 8 bits wide, the Lexer tolerates it. |
| check("'\u0100\uffff'", "STRING(\u0100\uffff) NEWLINE EOF"); |
| |
| options = FileOptions.builder().stringLiteralsAreAsciiOnly(true).build(); |
| // Ok, U+7F is ASCII. |
| check("'\u007f'", "STRING(\u007f) NEWLINE EOF"); |
| // With U+80 and higher, we error but still emit the token with the original value (no masking |
| // down to ASCII). |
| checkErrors( |
| "'abc\u0080xyz'", |
| "STRING(abc\u0080xyz) NEWLINE EOF", |
| " ^ string literal contains non-ASCII character"); |
| checkErrors( |
| "'abc\u0100xyz'", |
| "STRING(abc\u0100xyz) NEWLINE EOF", |
| " ^ string literal contains non-ASCII character"); |
| // Test a case with an escape sequence to trigger the longer code path. |
| checkErrors( |
| "'abc\u0080xyz\\n'", |
| "STRING(abc\u0080xyz\n) NEWLINE EOF", |
| " ^ string literal contains non-ASCII character"); |
| // Multiple errors. |
| checkErrors( |
| "'\u0080\u0081'", |
| "STRING(\u0080\u0081) NEWLINE EOF", |
| " ^ string literal contains non-ASCII character", |
| " ^ string literal contains non-ASCII character"); |
| } |
| |
| @Test |
| public void testStringContainingNonAsciiOctalEscapes() throws Exception { |
| // Lexer is fine with U+80 to U+FF by default. |
| check("'\\200\\377'", "STRING(\200\377) NEWLINE EOF"); |
| |
| options = FileOptions.builder().stringLiteralsAreAsciiOnly(true).build(); |
| // Ok, U+7F is ASCII. |
| check("'\\177'", "STRING(\177) NEWLINE EOF"); |
| // With U+80 to U+FF, we error but still emit the token with the original value (no masking |
| // down to ASCII). |
| checkErrors( |
| "'\\200'", |
| "STRING(\200) NEWLINE EOF", |
| " ^ octal escape sequence denotes non-ASCII character"); |
| // Out-of-range error takes priority over non-ASCII error. As in the case without the ASCII-only |
| // option, the value is masked down to U+FF. |
| checkErrors( |
| "'\\400'", |
| "STRING(\000) NEWLINE EOF", |
| " ^ octal escape sequence out of range (maximum is \\377)"); |
| // Multiple errors. |
| checkErrors( |
| "'\\200\\201'", |
| "STRING(\200\201) NEWLINE EOF", |
| " ^ octal escape sequence denotes non-ASCII character", |
| " ^ octal escape sequence denotes non-ASCII character"); |
| } |
| |
| @Test |
| public void testBadChar() throws Exception { |
| checkErrors( |
| "a$b", // |
| "IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF", |
| " ^ invalid character: '$'"); |
| } |
| |
| @Test |
| public void testIndentation() throws Exception { |
| check("1\n2\n3", "INT(1) NEWLINE INT(2) NEWLINE INT(3) NEWLINE EOF"); |
| check( |
| "1\n 2\n 3\n4 ", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "INT(4) NEWLINE EOF"); |
| check( |
| "1\n 2\n 3", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT " + "NEWLINE EOF"); |
| check( |
| "1\n 2\n 3", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT OUTDENT NEWLINE EOF"); |
| check( |
| "1\n 2\n 3\n 4\n5", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF"); |
| |
| checkErrors( |
| "1\n 2\n 3\n 4\n5", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE " |
| + "OUTDENT INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF", |
| " ^ indentation error (line 4)"); |
| } |
| |
| @Test |
| public void testIndentationWithTab() throws Exception { |
| checkErrors( |
| "def x():\n" + "\tpass", // |
| "DEF IDENTIFIER(x) LPAREN RPAREN COLON NEWLINE " |
| + "INDENT PASS NEWLINE OUTDENT NEWLINE EOF", |
| " ^ Tab characters are not allowed for indentation. Use spaces instead. (line 2)"); |
| } |
| |
| @Test |
| public void testIndentationWithCrLf() throws Exception { |
| check("1\r\n 2\r\n", "INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF"); |
| check("1\r\n 2\r\n\r\n", "INT(1) NEWLINE INDENT INT(2) NEWLINE OUTDENT NEWLINE EOF"); |
| check( |
| "1\r\n 2\r\n 3\r\n 4\r\n5", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INDENT INT(3) NEWLINE OUTDENT INT(4) " |
| + "NEWLINE OUTDENT INT(5) NEWLINE EOF"); |
| check( |
| "1\r\n 2\r\n\r\n 3\r\n4", |
| "INT(1) NEWLINE INDENT INT(2) NEWLINE INT(3) NEWLINE OUTDENT INT(4) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testIndentationInsideParens() throws Exception { |
| // Indentation is ignored inside parens: |
| check("1 (\n 2\n 3\n 4\n5", "INT(1) LPAREN INT(2) INT(3) INT(4) INT(5) NEWLINE EOF"); |
| check("1 {\n 2\n 3\n 4\n5", "INT(1) LBRACE INT(2) INT(3) INT(4) INT(5) NEWLINE EOF"); |
| check("1 [\n 2\n 3\n 4\n5", "INT(1) LBRACKET INT(2) INT(3) INT(4) INT(5) NEWLINE EOF"); |
| check( |
| "1 [\n 2]\n 3\n 4\n5", |
| "INT(1) LBRACKET INT(2) RBRACKET NEWLINE INDENT INT(3) " |
| + "NEWLINE INT(4) NEWLINE OUTDENT INT(5) NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testIndentationAtEOF() throws Exception { |
| // Matching OUTDENTS are created at EOF: |
| check("\n 1", "INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testIndentationOnFirstLine() throws Exception { |
| check(" 1", "INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF"); |
| check("\n\n 1", "INDENT INT(1) NEWLINE OUTDENT NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testBlankLineIndentation() throws Exception { |
| // Blank lines and comment lines should not generate any newlines indents |
| // (but note that every input ends with NEWLINE EOF). |
| check("\n #\n", "NEWLINE EOF"); |
| check(" #", "NEWLINE EOF"); |
| check(" #\n", "NEWLINE EOF"); |
| check(" #comment\n", "NEWLINE EOF"); |
| check( |
| "def f(x):\n" |
| + // |
| " # comment\n" |
| + // |
| "\n" |
| + // |
| " \n" |
| + // |
| " return x\n", |
| "DEF IDENTIFIER(f) LPAREN IDENTIFIER(x) RPAREN COLON NEWLINE " |
| + "INDENT RETURN IDENTIFIER(x) NEWLINE " |
| + "OUTDENT NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testBackslash() throws Exception { |
| check("a\\\nb", "IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF"); |
| check("a\\\r\nb", "IDENTIFIER(a) IDENTIFIER(b) NEWLINE EOF"); |
| check("a\\ b", "IDENTIFIER(a) ILLEGAL(\\) IDENTIFIER(b) NEWLINE EOF"); |
| check("a(\\\n2)", "IDENTIFIER(a) LPAREN INT(2) RPAREN NEWLINE EOF"); |
| } |
| |
| @Test |
| public void testTokenPositions() throws Exception { |
| assertThat(positions(tokens("foo(bar, {1: 'quux'}, \"\"\"b\"\"\", r\"\")"))) |
| .isEqualTo( |
| // foo ( bar , { 1 : |
| "[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)" |
| // 'quux' } , """b""" , r"" ) NEWLINE EOF |
| + " [13,19) [19,20) [20,21) [22,29) [29,30) [31,34) [34,35) [35,35) [35,35)"); |
| } |
| |
| @Test |
| public void testLineNumbers() throws Exception { |
| assertThat(linenums("foo = 1\nbar = 2\n\nwiz = 3")).isEqualTo("1 1 1 1 2 2 2 2 4 4 4 4 4"); |
| |
| checkErrors( |
| "foo = 1\n" + "bar = 2\n" + "\n" + "wiz = $\n" + "bar = 2", |
| "IDENTIFIER(foo) EQUALS INT(1) NEWLINE " |
| + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE " |
| + "IDENTIFIER(wiz) EQUALS NEWLINE " |
| + "IDENTIFIER(bar) EQUALS INT(2) NEWLINE EOF", |
| " ^ invalid character: '$' (line 4)"); |
| |
| // '\\n' in string should not increment linenum: |
| String s = // |
| "1\n'foo\\nbar'\3"; |
| checkErrors( |
| s, // |
| "INT(1) NEWLINE STRING(foo\nbar) NEWLINE EOF", |
| " ^ invalid character: '\3' (line 2)"); |
| assertThat(linenums(s)).isEqualTo("1 1 2 2 2"); |
| } |
| |
| @Test |
| public void testContainsErrors() throws Exception { |
| check("foo", "IDENTIFIER(foo) NEWLINE EOF"); |
| checkErrors( |
| "f$o", // |
| "IDENTIFIER(f) IDENTIFIER(o) NEWLINE EOF", |
| " ^ invalid character: '$'"); |
| checkErrors( |
| "+ 'unterminated", "PLUS STRING(unterminated) NEWLINE EOF", " ^ unclosed string literal"); |
| } |
| |
| @Test |
| public void testUnclosedRawStringWithEscapingError() throws Exception { |
| checkErrors( |
| "r'\\", |
| "STRING(\\) NEWLINE EOF", // |
| "^ unclosed string literal"); |
| } |
| |
| @Test |
| public void testFirstCharIsTab() { |
| checkErrors( |
| "\t", // |
| "NEWLINE EOF", |
| " ^ Tab characters are not allowed for indentation. Use spaces instead."); |
| } |
| |
| /** |
| * Returns the first error whose string form contains the specified substring, or throws an |
| * informative AssertionError if there is none. |
| * |
| * <p>Exposed for use by other frontend tests. |
| */ |
| // TODO(adonovan): move to ParserTest |
| static SyntaxError assertContainsError(List<SyntaxError> errors, String substr) { |
| for (SyntaxError error : errors) { |
| if (error.toString().contains(substr)) { |
| return error; |
| } |
| } |
| if (errors.isEmpty()) { |
| throw new AssertionError("no errors, want '" + substr + "'"); |
| } else { |
| throw new AssertionError( |
| "error '" + substr + "' not found, but got these:\n" + Joiner.on("\n").join(errors)); |
| } |
| } |
| |
| @Test |
| public void testStringLiteralUnquote() { |
| // Coverage here needn't be exhaustive, |
| // as the underlying logic is that of the Lexer. |
| assertUnquoteEquals("'hello'", "hello"); |
| assertUnquoteEquals("\"hello\"", "hello"); |
| assertUnquoteEquals("r'a\\b\"c'", "a\\b\"c"); |
| |
| assertUnquoteError("", "invalid syntax"); // empty |
| assertUnquoteError(" 'hello'", "invalid syntax"); // leading space |
| assertUnquoteError("'hello' ", "invalid syntax"); // trailing space |
| assertUnquoteError("x", "invalid syntax"); // identifier |
| assertUnquoteError("r", "invalid syntax"); // identifier (same prefix as r'...') |
| assertUnquoteError("r2", "invalid syntax"); // identifier |
| assertUnquoteError("1", "invalid syntax"); // number |
| assertUnquoteError("'", "unclosed string literal"); |
| assertUnquoteError("\"", "unclosed string literal"); |
| assertUnquoteError("'abc", "unclosed string literal"); |
| assertUnquoteError("'\\g'", "invalid escape sequence: \\g. Use '\\\\' to insert '\\'."); |
| } |
| |
| private static void assertUnquoteEquals(String literal, String value) { |
| assertThat(StringLiteral.unquote(literal)).isEqualTo(value); |
| } |
| |
| private static void assertUnquoteError(String badLiteral, String errorSubstring) { |
| IllegalArgumentException ex = |
| assertThrows(IllegalArgumentException.class, () -> StringLiteral.unquote(badLiteral)); |
| assertThat(ex).hasMessageThat().contains(errorSubstring); |
| } |
| } |