blob: 600eda47ec38418552d0ef0afe3f2c74b7bb3f5d [file] [log] [blame]
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package com.google.devtools.build.lib.bazel.rules.ninja.lexer;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
import com.google.devtools.build.lib.util.Pair;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
/** Ninja files lexer. The types of tokens: {@link NinjaToken}. */
public class NinjaLexer {
// They all are having different first letter, let's use it.
private static final ImmutableMap<Byte, NinjaToken> KEYWORD_MAP =
// There is no #of() method for 6 key-value pairs.
Stream.of(
NinjaToken.BUILD,
NinjaToken.RULE,
NinjaToken.DEFAULT,
NinjaToken.SUBNINJA,
NinjaToken.INCLUDE,
NinjaToken.POOL)
.collect(ImmutableMap.toImmutableMap(token -> token.getBytes()[0], nt -> nt));
private final ByteBufferFragment fragment;
private NinjaLexerStep step;
private final List<Pair<Integer, Integer>> ranges;
private final List<NinjaToken> tokens;
/** Flag to give a hint how letters should be interpreted (as text, identifier, path). */
private TextKind expectedTextKind = TextKind.IDENTIFIER;
/** @param fragment fragment to do the lexing on */
public NinjaLexer(ByteBufferFragment fragment) {
this.fragment = fragment;
step = new NinjaLexerStep(fragment, 0);
ranges = Lists.newArrayList();
tokens = Lists.newArrayList();
}
/**
* Returns true if following nextToken() call may produce meaningful token. However, it may happen
* that nextToken() will only produce {@link NinjaToken#EOF}, {@link NinjaToken#ZERO} or {@link
* NinjaToken#ERROR}.
*
* <p>It is an optimization here to check for 'seen' flags: nextToken() may return some meaningful
* token, and at the same time already discover the end of file or zero byte.
*/
public boolean hasNextToken() {
return step.canAdvance();
}
/**
* Returns {@link NinjaToken} type of the token for the next non-space and non-comment token
* at/after current <code>position</code> position.
*/
public NinjaToken nextToken() {
Preconditions.checkState(step.canAdvance());
while (step.canAdvance()) {
// First byte is checked right in constructor.
if (step.isSeenZero()) {
return push(NinjaToken.ZERO);
}
byte b = step.startByte();
switch (b) {
case ' ':
step.skipSpaces();
if (step.getPosition() == 0
|| NinjaToken.NEWLINE.equals(Iterables.getLast(tokens, null))) {
return push(NinjaToken.INDENT);
}
break;
case '\t':
step.forceError("Tabs are not allowed, use spaces.");
return push(NinjaToken.ERROR);
case '\r':
expectedTextKind = TextKind.IDENTIFIER;
step.processLineFeedNewLine();
return push(NinjaToken.NEWLINE);
case '\n':
expectedTextKind = TextKind.IDENTIFIER;
return push(NinjaToken.NEWLINE);
case '#':
step.skipComment();
break;
case '=':
if (TextKind.TEXT.equals(expectedTextKind)) {
step.readText();
return push(NinjaToken.TEXT);
}
return push(NinjaToken.EQUALS);
case ':':
return push(NinjaToken.COLON);
case '|':
if (TextKind.TEXT.equals(expectedTextKind)) {
step.readText();
return push(NinjaToken.TEXT);
}
if (step.tryReadDoublePipe()) {
return push(NinjaToken.PIPE2);
}
return push(NinjaToken.PIPE);
case '$':
if (step.trySkipEscapedNewline()) {
break;
}
if (step.tryReadVariableInBrackets() || step.tryReadSimpleVariable()) {
return push(NinjaToken.VARIABLE);
}
if (step.tryReadEscapedLiteral()) {
return push(NinjaToken.ESCAPED_TEXT);
}
step.forceError("Bad $-escape (literal $ must be written as $$)");
return push(NinjaToken.ERROR);
default:
switch (expectedTextKind) {
case TEXT:
step.readText();
return push(NinjaToken.TEXT);
case PATH:
step.readPath();
return push(NinjaToken.TEXT);
case IDENTIFIER:
step.tryReadIdentifier();
if (step.getError() == null) {
byte[] bytes = step.getBytes();
NinjaToken keywordToken = KEYWORD_MAP.get(bytes[0]);
if (keywordToken != null && Arrays.equals(keywordToken.getBytes(), bytes)) {
return push(keywordToken);
}
}
return push(NinjaToken.IDENTIFIER);
}
throw new IllegalStateException();
}
if (step.canAdvance()) {
step.ensureEnd();
// For all skipping cases: move to the next step.
step = step.nextStep();
}
}
return push(NinjaToken.EOF);
}
/** Return the bytes of the token, returned by previous nextToken() call. */
public byte[] getTokenBytes() {
if (ranges.isEmpty()) {
throw new IllegalStateException();
}
return fragment.getBytes(getLastStart(), getLastEnd());
}
private NinjaToken push(NinjaToken token) {
step.ensureEnd();
ranges.add(Pair.of(step.getStart(), step.getEnd()));
tokens.add(token);
if (step.getError() != null) {
// Do not move in case of error.
return NinjaToken.ERROR;
}
if (step.canAdvance()) {
step = step.nextStep();
}
return token;
}
public boolean haveReadAnyTokens() {
return !ranges.isEmpty();
}
public int getLastStart() {
if (ranges.isEmpty()) {
throw new IllegalStateException();
}
return Preconditions.checkNotNull(Iterables.getLast(ranges).getFirst());
}
public int getLastEnd() {
if (ranges.isEmpty()) {
throw new IllegalStateException();
}
return Preconditions.checkNotNull(Iterables.getLast(ranges).getSecond());
}
/** Give a hint how letters should be interpreted (as text, identifier, path). */
public void setExpectedTextKind(TextKind expectedTextKind) {
this.expectedTextKind = expectedTextKind;
}
/** Undo the previously read token. */
public void undo() {
Preconditions.checkState(ranges.size() == tokens.size());
ranges.remove(ranges.size() - 1);
tokens.remove(tokens.size() - 1);
step = new NinjaLexerStep(fragment, ranges.isEmpty() ? 0 : getLastEnd());
expectedTextKind = TextKind.IDENTIFIER;
}
public String getError() {
return step.getError();
}
public ByteBufferFragment getFragment() {
return fragment;
}
/**
* Enum with variants of text fragments parsing: as identifier (most restricted set of symbols),
* path (all spaces should be $-escaped, and | symbol has a special meaning), or text.
*/
public enum TextKind {
IDENTIFIER,
PATH,
TEXT
}
}