base/tests/unittests/com/google/idea/blaze/base/lang/buildfile/lexer/AbstractLexerTest.java - intellij - Git at Google

 /*
  * Copyright 2016 The Bazel Authors. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package com.google.idea.blaze.base.lang.buildfile.lexer;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;

 import org.junit.Ignore;
 import org.junit.Test;

 /** Tests of tokenization behavior of {@link BuildLexerBase}. */
 @Ignore
 public abstract class AbstractLexerTest {

   private final BuildLexerBase.LexerMode mode;
   protected String lastError;

   protected AbstractLexerTest(BuildLexerBase.LexerMode mode) {
     this.mode = mode;
   }

   /**
    * Create a lexer which takes input from the specified string. Resets the error handler
    * beforehand.
    */
   protected BuildLexerBase createLexer(String input) {
     lastError = null;
     return new BuildLexerBase(input, 0, mode) {
       @Override
       protected void error(String message, int start, int end) {
         super.error(message, start, end);
         lastError = message;
       }
     };
   }

   protected Token[] tokens(String input) {
     Token[] tokens = createLexer(input).getTokens().toArray(new Token[0]);
     assertNoCharactersMissing(input.length(), tokens);
     return tokens;
   }

   /**
    * Both the syntax highlighter and the parser require every character be accounted for by a
    * lexical element.
    */
   private static void assertNoCharactersMissing(int totalLength, Token[] tokens) {
     if (tokens.length != 0 && tokens[tokens.length - 1].right != totalLength) {
       throw new AssertionError(
           String.format(
               "Last tokenized character '%s' doesn't match document length '%s'",
               tokens[tokens.length - 1].right, totalLength));
     }
     int start = 0;
     for (int i = 0; i < tokens.length; i++) {
       Token token = tokens[i];
       if (token.left != start) {
         throw new AssertionError("Gap/inconsistency at: " + start);
       }
       start = token.right;
     }
   }

   /**
    * Returns a string containing the names of the tokens and their associated values.
    * (String-literals are printed without escaping.)
    */
   protected String values(Token[] tokens) {
     StringBuilder buffer = new StringBuilder();
     for (Token token : tokens) {
       if (isIgnored(token.kind)) {
         continue;
       }
       if (buffer.length() > 0) {
         buffer.append(' ');
       }
       buffer.append(token.kind.name());
       if (token.kind != TokenKind.WHITESPACE && token.value != null) {
         buffer.append('(').append(token.value).append(')');
       }
     }
     return buffer.toString();
   }

   /** Returns a string containing just the names of the tokens. */
   protected String names(Token[] tokens) {
     StringBuilder buf = new StringBuilder();
     for (Token token : tokens) {
       if (isIgnored(token.kind)) {
         continue;
       }
       if (buf.length() > 0) {
         buf.append(' ');
       }
       buf.append(token.kind.name());
     }
     return buf.toString();
   }

   private boolean isIgnored(TokenKind kind) {
     if (mode == BuildLexerBase.LexerMode.Parsing) {
       return kind == TokenKind.WHITESPACE || kind == TokenKind.COMMENT;
     }
     return false;
   }

   /**
    * Returns a string containing just the half-open position intervals of each token. e.g. "[3,4)
    * [4,9)".
    */
   protected String positions(Token[] tokens) {
     StringBuilder buf = new StringBuilder();
     for (Token token : tokens) {
       if (isIgnored(token.kind)) {
         continue;
       }
       if (buf.length() > 0) {
         buf.append(' ');
       }
       buf.append('[').append(token.left).append(',').append(token.right).append(')');
     }
     return buf.toString();
   }

   @Test
   public void testIntegers() throws Exception {
     // Detection of MINUS immediately following integer constant proves we
     // don't consume too many chars.

     // decimal
     assertEquals("INT(12345) MINUS", values(tokens("12345-")));

     // octal
     assertEquals("INT(5349) MINUS", values(tokens("012345-")));

     // octal (bad)
     assertEquals("INT(0) MINUS", values(tokens("012349-")));
     assertEquals("invalid base-8 integer constant: 012349", lastError);

     // hexadecimal (uppercase)
     assertEquals("INT(1193055) MINUS", values(tokens("0X12345F-")));

     // hexadecimal (lowercase)
     assertEquals("INT(1193055) MINUS", values(tokens("0x12345f-")));

     // hexadecimal (lowercase) [note: "g" cause termination of token]
     assertEquals("INT(74565) IDENTIFIER(g) MINUS", values(tokens("0x12345g-")));
   }

   @Test
   public void testStringDelimiters() throws Exception {
     assertEquals("STRING(foo)", values(tokens("\"foo\"")));
     assertEquals("STRING(foo)", values(tokens("'foo'")));
   }

   @Test
   public void testQuotesInStrings() throws Exception {
     assertEquals("STRING(foo'bar)", values(tokens("'foo\\'bar'")));
     assertEquals("STRING(foo'bar)", values(tokens("\"foo'bar\"")));
     assertEquals("STRING(foo\"bar)", values(tokens("'foo\"bar'")));
     assertEquals("STRING(foo\"bar)", values(tokens("\"foo\\\"bar\"")));
   }

   @Test
   public void testStringEscapes() throws Exception {
     assertEquals("STRING(a\tb\nc\rd)", values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n
     assertEquals("STRING(x\\hx)", values(tokens("'x\\hx'"))); // \h is unknown => "\h"
     assertEquals("STRING(\\$$)", values(tokens("'\\$$'")));
     assertEquals("STRING(ab)", values(tokens("'a\\\nb'"))); // escape end of line
     assertEquals("STRING(abcd)", values(tokens("\"ab\\ucd\"")));
     assertEquals("escape sequence not implemented: \\u", lastError);
   }

   @Test
   public void testRawString() throws Exception {
     assertEquals("STRING(abcd)", values(tokens("r'abcd'")));
     assertEquals("STRING(abcd)", values(tokens("r\"abcd\"")));
     assertEquals("STRING(a\\tb\\nc\\rd)", values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd'
     assertEquals("STRING(a\\\")", values(tokens("r\"a\\\"\""))); // r"a\""
     assertEquals("STRING(a\\\\b)", values(tokens("r'a\\\\b'"))); // r'a\\b'
     assertEquals("STRING(ab) IDENTIFIER(r)", values(tokens("r'ab'r")));

     // Unterminated raw string
     values(tokens("r'\\'")); // r'\'
     assertEquals("unterminated string literal at eof", lastError);
   }

   @Test
   public void testTripleRawString() throws Exception {
     // r'''a\ncd'''
     assertEquals("STRING(ab\\ncd)", values(tokens("r'''ab\\ncd'''")));
     // r"""ab
     // cd"""
     assertEquals("STRING(ab\ncd)", values(tokens("\"\"\"ab\ncd\"\"\"")));

     // Unterminated raw string
     values(tokens("r'''\\'''")); // r'''\'''
     assertEquals("unterminated string literal at eof", lastError);
   }

   @Test
   public void testOctalEscapes() throws Exception {
     // Regression test for a bug.
     assertEquals(
         "STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe)",
         values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'")));
     // Test boundaries (non-octal char, EOF).
     assertEquals("STRING(\1b \1)", values(tokens("'\\1b \\1'")));
   }

   @Test
   public void testTripleQuotedStrings() throws Exception {
     assertEquals("STRING(a\"b'c \n d\"\"e)", values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")));
     assertEquals("STRING(a\"b'c \n d\"\"e)", values(tokens("'''a\"b'c \n d\"\"e'''")));
   }

   @Test
   public void testBadChar() throws Exception {
     assertEquals("IDENTIFIER(a) ILLEGAL($) IDENTIFIER(b)", values(tokens("a$b")));
     assertEquals("invalid character: '$'", lastError);
   }

   @Test
   public void testContainsErrors() throws Exception {
     BuildLexerBase lexerSuccess = createLexer("foo");
     assertFalse(lexerSuccess.containsErrors());

     BuildLexerBase lexerFail = createLexer("f$o");
     assertTrue(lexerFail.containsErrors());

     String s = "'unterminated";
     lexerFail = createLexer(s);
     assertTrue(lexerFail.containsErrors());
     assertEquals("STRING(unterminated)", values(tokens(s)));
   }

   @Test
   public void testUnterminatedEscapedQuotedString() throws Exception {
     // regression test --
     assertEquals(
         "STRING(escaped \n string) NEWLINE IDENTIFIER(next_line)",
         values(tokens("\"escaped \\n string\nnext_line")));

     assertEquals("STRING(escaped \n string)", values(tokens("'escaped \\n string")));
   }
 }
	/*
	* Copyright 2016 The Bazel Authors. All rights reserved.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package com.google.idea.blaze.base.lang.buildfile.lexer;

	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertFalse;
	import static org.junit.Assert.assertTrue;

	import org.junit.Ignore;
	import org.junit.Test;

	/** Tests of tokenization behavior of {@link BuildLexerBase}. */
	@Ignore
	public abstract class AbstractLexerTest {

	private final BuildLexerBase.LexerMode mode;
	protected String lastError;

	protected AbstractLexerTest(BuildLexerBase.LexerMode mode) {
	this.mode = mode;
	}

	/**
	* Create a lexer which takes input from the specified string. Resets the error handler
	* beforehand.
	*/
	protected BuildLexerBase createLexer(String input) {
	lastError = null;
	return new BuildLexerBase(input, 0, mode) {
	@Override
	protected void error(String message, int start, int end) {
	super.error(message, start, end);
	lastError = message;
	}
	};
	}

	protected Token[] tokens(String input) {
	Token[] tokens = createLexer(input).getTokens().toArray(new Token[0]);
	assertNoCharactersMissing(input.length(), tokens);
	return tokens;
	}

	/**
	* Both the syntax highlighter and the parser require every character be accounted for by a
	* lexical element.
	*/
	private static void assertNoCharactersMissing(int totalLength, Token[] tokens) {
	if (tokens.length != 0 && tokens[tokens.length - 1].right != totalLength) {
	throw new AssertionError(
	String.format(
	"Last tokenized character '%s' doesn't match document length '%s'",
	tokens[tokens.length - 1].right, totalLength));
	}
	int start = 0;
	for (int i = 0; i < tokens.length; i++) {
	Token token = tokens[i];
	if (token.left != start) {
	throw new AssertionError("Gap/inconsistency at: " + start);
	}
	start = token.right;
	}
	}

	/**
	* Returns a string containing the names of the tokens and their associated values.
	* (String-literals are printed without escaping.)
	*/
	protected String values(Token[] tokens) {
	StringBuilder buffer = new StringBuilder();
	for (Token token : tokens) {
	if (isIgnored(token.kind)) {
	continue;
	}
	if (buffer.length() > 0) {
	buffer.append(' ');
	}
	buffer.append(token.kind.name());
	if (token.kind != TokenKind.WHITESPACE && token.value != null) {
	buffer.append('(').append(token.value).append(')');
	}
	}
	return buffer.toString();
	}

	/** Returns a string containing just the names of the tokens. */
	protected String names(Token[] tokens) {
	StringBuilder buf = new StringBuilder();
	for (Token token : tokens) {
	if (isIgnored(token.kind)) {
	continue;
	}
	if (buf.length() > 0) {
	buf.append(' ');
	}
	buf.append(token.kind.name());
	}
	return buf.toString();
	}

	private boolean isIgnored(TokenKind kind) {
	if (mode == BuildLexerBase.LexerMode.Parsing) {
	return kind == TokenKind.WHITESPACE \|\| kind == TokenKind.COMMENT;
	}
	return false;
	}

	/**
	* Returns a string containing just the half-open position intervals of each token. e.g. "[3,4)
	* [4,9)".
	*/
	protected String positions(Token[] tokens) {
	StringBuilder buf = new StringBuilder();
	for (Token token : tokens) {
	if (isIgnored(token.kind)) {
	continue;
	}
	if (buf.length() > 0) {
	buf.append(' ');
	}
	buf.append('[').append(token.left).append(',').append(token.right).append(')');
	}
	return buf.toString();
	}

	@Test
	public void testIntegers() throws Exception {
	// Detection of MINUS immediately following integer constant proves we
	// don't consume too many chars.

	// decimal
	assertEquals("INT(12345) MINUS", values(tokens("12345-")));

	// octal
	assertEquals("INT(5349) MINUS", values(tokens("012345-")));

	// octal (bad)
	assertEquals("INT(0) MINUS", values(tokens("012349-")));
	assertEquals("invalid base-8 integer constant: 012349", lastError);

	// hexadecimal (uppercase)
	assertEquals("INT(1193055) MINUS", values(tokens("0X12345F-")));

	// hexadecimal (lowercase)
	assertEquals("INT(1193055) MINUS", values(tokens("0x12345f-")));

	// hexadecimal (lowercase) [note: "g" cause termination of token]
	assertEquals("INT(74565) IDENTIFIER(g) MINUS", values(tokens("0x12345g-")));
	}

	@Test
	public void testStringDelimiters() throws Exception {
	assertEquals("STRING(foo)", values(tokens("\"foo\"")));
	assertEquals("STRING(foo)", values(tokens("'foo'")));
	}

	@Test
	public void testQuotesInStrings() throws Exception {
	assertEquals("STRING(foo'bar)", values(tokens("'foo\\'bar'")));
	assertEquals("STRING(foo'bar)", values(tokens("\"foo'bar\"")));
	assertEquals("STRING(foo\"bar)", values(tokens("'foo\"bar'")));
	assertEquals("STRING(foo\"bar)", values(tokens("\"foo\\\"bar\"")));
	}

	@Test
	public void testStringEscapes() throws Exception {
	assertEquals("STRING(a\tb\nc\rd)", values(tokens("'a\\tb\\nc\\rd'"))); // \t \r \n
	assertEquals("STRING(x\\hx)", values(tokens("'x\\hx'"))); // \h is unknown => "\h"
	assertEquals("STRING(\\$$)", values(tokens("'\\$$'")));
	assertEquals("STRING(ab)", values(tokens("'a\\\nb'"))); // escape end of line
	assertEquals("STRING(abcd)", values(tokens("\"ab\\ucd\"")));
	assertEquals("escape sequence not implemented: \\u", lastError);
	}

	@Test
	public void testRawString() throws Exception {
	assertEquals("STRING(abcd)", values(tokens("r'abcd'")));
	assertEquals("STRING(abcd)", values(tokens("r\"abcd\"")));
	assertEquals("STRING(a\\tb\\nc\\rd)", values(tokens("r'a\\tb\\nc\\rd'"))); // r'a\tb\nc\rd'
	assertEquals("STRING(a\\\")", values(tokens("r\"a\\\"\""))); // r"a\""
	assertEquals("STRING(a\\\\b)", values(tokens("r'a\\\\b'"))); // r'a\\b'
	assertEquals("STRING(ab) IDENTIFIER(r)", values(tokens("r'ab'r")));

	// Unterminated raw string
	values(tokens("r'\\'")); // r'\'
	assertEquals("unterminated string literal at eof", lastError);
	}

	@Test
	public void testTripleRawString() throws Exception {
	// r'''a\ncd'''
	assertEquals("STRING(ab\\ncd)", values(tokens("r'''ab\\ncd'''")));
	// r"""ab
	// cd"""
	assertEquals("STRING(ab\ncd)", values(tokens("\"\"\"ab\ncd\"\"\"")));

	// Unterminated raw string
	values(tokens("r'''\\'''")); // r'''\'''
	assertEquals("unterminated string literal at eof", lastError);
	}

	@Test
	public void testOctalEscapes() throws Exception {
	// Regression test for a bug.
	assertEquals(
	"STRING(\0 \1 \t \u003f I I1 \u00ff \u00ff \u00fe)",
	values(tokens("'\\0 \\1 \\11 \\77 \\111 \\1111 \\377 \\777 \\776'")));
	// Test boundaries (non-octal char, EOF).
	assertEquals("STRING(\1b \1)", values(tokens("'\\1b \\1'")));
	}

	@Test
	public void testTripleQuotedStrings() throws Exception {
	assertEquals("STRING(a\"b'c \n d\"\"e)", values(tokens("\"\"\"a\"b'c \n d\"\"e\"\"\"")));
	assertEquals("STRING(a\"b'c \n d\"\"e)", values(tokens("'''a\"b'c \n d\"\"e'''")));
	}

	@Test
	public void testBadChar() throws Exception {
	assertEquals("IDENTIFIER(a) ILLEGAL($) IDENTIFIER(b)", values(tokens("a$b")));
	assertEquals("invalid character: '$'", lastError);
	}

	@Test
	public void testContainsErrors() throws Exception {
	BuildLexerBase lexerSuccess = createLexer("foo");
	assertFalse(lexerSuccess.containsErrors());

	BuildLexerBase lexerFail = createLexer("f$o");
	assertTrue(lexerFail.containsErrors());

	String s = "'unterminated";
	lexerFail = createLexer(s);
	assertTrue(lexerFail.containsErrors());
	assertEquals("STRING(unterminated)", values(tokens(s)));
	}

	@Test
	public void testUnterminatedEscapedQuotedString() throws Exception {
	// regression test --
	assertEquals(
	"STRING(escaped \n string) NEWLINE IDENTIFIER(next_line)",
	values(tokens("\"escaped \\n string\nnext_line")));

	assertEquals("STRING(escaped \n string)", values(tokens("'escaped \\n string")));
	}
	}