blob: 1dbec9209af1d1ff0a5b1f36faaaf3262bbc91b8 [file] [log] [blame]
// Copyright 2019 The Bazel Authors. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
import java.nio.charset.StandardCharsets;
import java.util.function.Predicate;
* Helper class for {@link NinjaLexer}. Contains methods for reading Ninja tokens.
* <p>Start position for reading is fixed. Mutable state includes the end position, offset in case
* of escaped symbol, error text if a lexing error occurred, and the flag indicating if zero byte
* was read. (Zero byte determines the end of the file.)
* <p>Intended to be used like: <code>
* NinjaLexerStep step = new NinjaLexerStep(fragment, 0);
* while (step.hasNext()) {
* byte b = step.startByte();
* // if/switch, then:
* step.skipXXX();
* // or
* step.tryXXX();
* // read the end position and error text
* if (step.hasNext()) {
* step = nextStep();
* }
* }
* </code>
public class NinjaLexerStep {
private static final ImmutableSortedSet<Byte> IDENTIFIER_SYMBOLS =
private static final ImmutableSortedSet<Byte> TEXT_STOPPERS = createByteSet("\n\r \t$:\u0000");
// We allow # symbol in the path, so the comment on the line with path can only start with space.
private static final ImmutableSortedSet<Byte> PATH_STOPPERS = createByteSet("\n\r \t$:|\u0000");
private static ImmutableSortedSet<Byte> createByteSet(String variants) {
ImmutableSortedSet.Builder<Byte> builder = ImmutableSortedSet.naturalOrder();
byte[] bytes = variants.getBytes(StandardCharsets.ISO_8859_1);
for (byte b : bytes) {
private final ByteBufferFragment fragment;
private final int position;
private boolean seenZero;
private String error;
private int end;
* @param position start of the step inside a fragment; must point to a symbol inside fragment.
public NinjaLexerStep(ByteBufferFragment fragment, int position) {
Preconditions.checkState(position < fragment.length());
this.fragment = fragment;
this.position = position;
end = -1;
seenZero = position < fragment.length() && (0 == fragment.byteAt(position));
public byte startByte() {
return fragment.byteAt(position);
public NinjaLexerStep nextStep() {
Preconditions.checkState(error == null);
return new NinjaLexerStep(fragment, end);
* Returns true, if there are still symbols to process, i.e. either the next step can be
* constructed, or if current step was just created, so its bounds are not known yet.
public boolean canAdvance() {
return !seenZero && error == null && end < fragment.length();
public ByteBufferFragment getFragment() {
return fragment;
/** Return step bytes, taking into account possible escaped symbol offset. */
public byte[] getBytes() {
return fragment.getBytes(position, end);
public int getPosition() {
return position;
public boolean isSeenZero() {
return seenZero;
public String getError() {
return error;
public int getStart() {
return position;
public int getEnd() {
return end;
private boolean checkForward(int steps, char... chars) {
if ((position + steps) < fragment.length()) {
for (char ch : chars) {
if ((byte) ch == fragment.byteAt(position + steps)) {
return true;
return false;
public void forceError(String error) {
this.error = error;
end = position + 1;
public void skipSpaces() {
end = eatSequence(position, aByte -> ' ' != aByte && '\t' != aByte);
public void skipComment() {
Preconditions.checkState('#' == fragment.byteAt(position));
end = eatSequence(position + 1, aByte -> '\n' == aByte || '\r' == aByte);
public boolean trySkipEscapedNewline() {
Preconditions.checkState('$' == fragment.byteAt(position));
if (checkForward(1, '\n')) {
end = position + 2;
return true;
} else if (checkForward(1, '\r')) {
if (checkForward(2, '\n')) {
end = position + 3;
} else {
error = "Wrong newline separators: \\r should be followed by \\n.";
end = safeEnd(position + 3);
return true;
return false;
public void processLineFeedNewLine() {
Preconditions.checkState('\r' == fragment.byteAt(position));
if (checkForward(1, '\n')) {
end = position + 2;
} else {
error = "Wrong newline separators: \\r should be followed by \\n.";
end = safeEnd(position + 2);
public boolean tryReadVariableInBrackets() {
Preconditions.checkState('$' == fragment.byteAt(position));
if (checkForward(1, '{')) {
end = eatSequence(position + 2, aByte -> ' ' != aByte);
int endOfVariableName = readIdentifier(end, true);
if (endOfVariableName == end) {
error = "Variable identifier expected.";
// Up to the 'wrong' symbol.
end = endOfVariableName + 1;
} else {
end = eatSequence(endOfVariableName, aByte -> ' ' != aByte);
if (end >= fragment.length() || '}' != fragment.byteAt(end)) {
error = "Variable end symbol '}' expected.";
end = safeEnd(end + 1);
} else {
return true;
return false;
public boolean tryReadSimpleVariable() {
Preconditions.checkState('$' == fragment.byteAt(position));
if (position + 1 < fragment.length()
&& IDENTIFIER_SYMBOLS.contains(fragment.byteAt(position + 1))) {
end = readIdentifier(position + 1, false);
return true;
return false;
public boolean tryReadEscapedLiteral() {
if (checkForward(1, '$', ':', ' ')) {
// Escaped literal.
end = position + 2;
return true;
return false;
public void tryReadIdentifier() {
end = readIdentifier(position, true);
if (position >= end) {
error =
"Symbol '%s' is not allowed in the identifier,"
+ " the text fragment with the symbol:\n%s\n",
fragment.subFragment(position, position + 1), fragment.getFragmentAround(position));
end = position + 1;
public boolean tryReadDoublePipe() {
Preconditions.checkState('|' == fragment.byteAt(position));
if (checkForward(1, '|')) {
end = position + 2;
return true;
return false;
public void readText() {
end = eatSequence(position, TEXT_STOPPERS::contains);
public void readPath() {
end = eatSequence(position, PATH_STOPPERS::contains);
private int readIdentifier(int startFrom, boolean withDot) {
if (withDot) {
return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b) && '.' != b);
} else {
return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b));
private int safeEnd(int number) {
return Math.min(fragment.length(), number);
private int eatSequence(int startFrom, Predicate<Byte> stop) {
int i = startFrom;
for (; i < fragment.length(); i++) {
byte b = fragment.byteAt(i);
if (0 == b) {
seenZero = true;
return i;
if (stop.test(b)) {
return i;
* For the quick checks outside of skipXXX and tryXXX methods of this class, assume that the step
* takes just one symbol.
public void ensureEnd() {
if (end < 0) {
end = position + 1;