src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexerStep.java - bazel - Git at Google

 // Copyright 2019 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 package com.google.devtools.build.lib.bazel.rules.ninja.lexer;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSortedSet;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
 import java.nio.charset.StandardCharsets;
 import java.util.function.Predicate;

 /**
  * Helper class for {@link NinjaLexer}. Contains methods for reading Ninja tokens.
  *
  * <p>Start position for reading is fixed. Mutable state includes the end position, offset in case
  * of escaped symbol, error text if a lexing error occurred, and the flag indicating if zero byte
  * was read. (Zero byte determines the end of the file.)
  *
  * <p>Intended to be used like: <code>
  * NinjaLexerStep step = new NinjaLexerStep(fragment, 0);
  * while (step.hasNext()) {
  *   byte b = step.startByte();
  *   // if/switch, then:
  *   step.skipXXX();
  *   // or
  *   step.tryXXX();
  *   // read the end position and error text
  *   if (step.hasNext()) {
  *     step = nextStep();
  *   }
  * }
  * </code>
  */
 public class NinjaLexerStep {
   private static final ImmutableSortedSet<Byte> IDENTIFIER_SYMBOLS =
       createByteSet("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
   private static final ImmutableSortedSet<Byte> TEXT_STOPPERS = createByteSet("\n\r \t$:\u0000");
   // We allow # symbol in the path, so the comment on the line with path can only start with space.
   private static final ImmutableSortedSet<Byte> PATH_STOPPERS = createByteSet("\n\r \t$:|\u0000");

   private static ImmutableSortedSet<Byte> createByteSet(String variants) {
     ImmutableSortedSet.Builder<Byte> builder = ImmutableSortedSet.naturalOrder();
     byte[] bytes = variants.getBytes(StandardCharsets.ISO_8859_1);
     for (byte b : bytes) {
       builder.add(b);
     }
     return builder.build();
   }

   private final ByteBufferFragment fragment;
   private final int position;

   private boolean seenZero;
   private String error;
   private int end;

   /**
    * @param position start of the step inside a fragment; must point to a symbol inside fragment.
    */
   public NinjaLexerStep(ByteBufferFragment fragment, int position) {
     Preconditions.checkState(position < fragment.length());
     this.fragment = fragment;
     this.position = position;
     end = -1;
     seenZero = position < fragment.length() && (0 == fragment.byteAt(position));
   }

   public byte startByte() {
     return fragment.byteAt(position);
   }

   public NinjaLexerStep nextStep() {
     Preconditions.checkState(error == null);
     Preconditions.checkState(!seenZero);

     return new NinjaLexerStep(fragment, end);
   }

   /**
    * Returns true, if there are still symbols to process, i.e. either the next step can be
    * constructed, or if current step was just created, so its bounds are not known yet.
    */
   public boolean canAdvance() {
     return !seenZero && error == null && end < fragment.length();
   }

   public ByteBufferFragment getFragment() {
     return fragment;
   }

   /** Return step bytes, taking into account possible escaped symbol offset. */
   public byte[] getBytes() {
     return fragment.getBytes(position, end);
   }

   public int getPosition() {
     return position;
   }

   public boolean isSeenZero() {
     return seenZero;
   }

   public String getError() {
     return error;
   }

   public int getStart() {
     return position;
   }

   public int getEnd() {
     return end;
   }

   private boolean checkForward(int steps, char... chars) {
     if ((position + steps) < fragment.length()) {
       for (char ch : chars) {
         if ((byte) ch == fragment.byteAt(position + steps)) {
           return true;
         }
       }
     }
     return false;
   }

   public void forceError(String error) {
     this.error = error;
     end = position + 1;
   }

   public void skipSpaces() {
     end = eatSequence(position, aByte -> ' ' != aByte && '\t' != aByte);
   }

   public void skipComment() {
     Preconditions.checkState('#' == fragment.byteAt(position));
     end = eatSequence(position + 1, aByte -> '\n' == aByte || '\r' == aByte);
   }

   public boolean trySkipEscapedNewline() {
     Preconditions.checkState('$' == fragment.byteAt(position));
     if (checkForward(1, '\n')) {
       end = position + 2;
       return true;
     } else if (checkForward(1, '\r')) {
       if (checkForward(2, '\n')) {
         end = position + 3;
       } else {
         error = "Wrong newline separators: \\r should be followed by \\n.";
         end = safeEnd(position + 3);
       }
       return true;
     }
     return false;
   }

   public void processLineFeedNewLine() {
     Preconditions.checkState('\r' == fragment.byteAt(position));
     if (checkForward(1, '\n')) {
       end = position + 2;
     } else {
       error = "Wrong newline separators: \\r should be followed by \\n.";
       end = safeEnd(position + 2);
     }
   }

   public boolean tryReadVariableInBrackets() {
     Preconditions.checkState('$' == fragment.byteAt(position));
     if (checkForward(1, '{')) {
       end = eatSequence(position + 2, aByte -> ' ' != aByte);
       int endOfVariableName = readIdentifier(end, true);
       if (endOfVariableName == end) {
         error = "Variable identifier expected.";
         // Up to the 'wrong' symbol.
         end = endOfVariableName + 1;
       } else {
         end = eatSequence(endOfVariableName, aByte -> ' ' != aByte);
         if (end >= fragment.length() || '}' != fragment.byteAt(end)) {
           error = "Variable end symbol '}' expected.";
           end = safeEnd(end + 1);
         } else {
           ++end;
         }
       }
       return true;
     }
     return false;
   }

   public boolean tryReadSimpleVariable() {
     Preconditions.checkState('$' == fragment.byteAt(position));
     if (position + 1 < fragment.length()
         && IDENTIFIER_SYMBOLS.contains(fragment.byteAt(position + 1))) {
       end = readIdentifier(position + 1, false);
       return true;
     }
     return false;
   }

   public boolean tryReadEscapedLiteral() {
     if (checkForward(1, '$', ':', ' ')) {
       // Escaped literal.
       end = position + 2;
       return true;
     }
     return false;
   }

   public void tryReadIdentifier() {
     end = readIdentifier(position, true);
     if (position >= end) {
       error =
           String.format(
               "Symbol '%s' is not allowed in the identifier,"
                   + " the text fragment with the symbol:\n%s\n",
               fragment.subFragment(position, position + 1), fragment.getFragmentAround(position));
       end = position + 1;
     }
   }

   public boolean tryReadDoublePipe() {
     Preconditions.checkState('|' == fragment.byteAt(position));
     if (checkForward(1, '|')) {
       end = position + 2;
       return true;
     }
     return false;
   }

   public void readText() {
     end = eatSequence(position, TEXT_STOPPERS::contains);
   }

   public void readPath() {
     end = eatSequence(position, PATH_STOPPERS::contains);
   }

   private int readIdentifier(int startFrom, boolean withDot) {
     if (withDot) {
       return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b) && '.' != b);
     } else {
       return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b));
     }
   }

   private int safeEnd(int number) {
     return Math.min(fragment.length(), number);
   }

   private int eatSequence(int startFrom, Predicate<Byte> stop) {
     int i = startFrom;
     for (; i < fragment.length(); i++) {
       byte b = fragment.byteAt(i);
       if (0 == b) {
         seenZero = true;
         return i;
       }
       if (stop.test(b)) {
         break;
       }
     }
     return i;
   }

   /**
    * For the quick checks outside of skipXXX and tryXXX methods of this class, assume that the step
    * takes just one symbol.
    */
   public void ensureEnd() {
     if (end < 0) {
       end = position + 1;
     }
   }
 }
	// Copyright 2019 The Bazel Authors. All rights reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	package com.google.devtools.build.lib.bazel.rules.ninja.lexer;

	import com.google.common.base.Preconditions;
	import com.google.common.collect.ImmutableSortedSet;
	import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
	import java.nio.charset.StandardCharsets;
	import java.util.function.Predicate;

	/**
	* Helper class for {@link NinjaLexer}. Contains methods for reading Ninja tokens.
	*
	* <p>Start position for reading is fixed. Mutable state includes the end position, offset in case
	* of escaped symbol, error text if a lexing error occurred, and the flag indicating if zero byte
	* was read. (Zero byte determines the end of the file.)
	*
	* <p>Intended to be used like: <code>
	* NinjaLexerStep step = new NinjaLexerStep(fragment, 0);
	* while (step.hasNext()) {
	* byte b = step.startByte();
	* // if/switch, then:
	* step.skipXXX();
	* // or
	* step.tryXXX();
	* // read the end position and error text
	* if (step.hasNext()) {
	* step = nextStep();
	* }
	* }
	* </code>
	*/
	public class NinjaLexerStep {
	private static final ImmutableSortedSet<Byte> IDENTIFIER_SYMBOLS =
	createByteSet("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
	private static final ImmutableSortedSet<Byte> TEXT_STOPPERS = createByteSet("\n\r \t$:\u0000");
	// We allow # symbol in the path, so the comment on the line with path can only start with space.
	private static final ImmutableSortedSet<Byte> PATH_STOPPERS = createByteSet("\n\r \t$:\|\u0000");

	private static ImmutableSortedSet<Byte> createByteSet(String variants) {
	ImmutableSortedSet.Builder<Byte> builder = ImmutableSortedSet.naturalOrder();
	byte[] bytes = variants.getBytes(StandardCharsets.ISO_8859_1);
	for (byte b : bytes) {
	builder.add(b);
	}
	return builder.build();
	}

	private final ByteBufferFragment fragment;
	private final int position;

	private boolean seenZero;
	private String error;
	private int end;

	/**
	* @param position start of the step inside a fragment; must point to a symbol inside fragment.
	*/
	public NinjaLexerStep(ByteBufferFragment fragment, int position) {
	Preconditions.checkState(position < fragment.length());
	this.fragment = fragment;
	this.position = position;
	end = -1;
	seenZero = position < fragment.length() && (0 == fragment.byteAt(position));
	}

	public byte startByte() {
	return fragment.byteAt(position);
	}

	public NinjaLexerStep nextStep() {
	Preconditions.checkState(error == null);
	Preconditions.checkState(!seenZero);

	return new NinjaLexerStep(fragment, end);
	}

	/**
	* Returns true, if there are still symbols to process, i.e. either the next step can be
	* constructed, or if current step was just created, so its bounds are not known yet.
	*/
	public boolean canAdvance() {
	return !seenZero && error == null && end < fragment.length();
	}

	public ByteBufferFragment getFragment() {
	return fragment;
	}

	/** Return step bytes, taking into account possible escaped symbol offset. */
	public byte[] getBytes() {
	return fragment.getBytes(position, end);
	}

	public int getPosition() {
	return position;
	}

	public boolean isSeenZero() {
	return seenZero;
	}

	public String getError() {
	return error;
	}

	public int getStart() {
	return position;
	}

	public int getEnd() {
	return end;
	}

	private boolean checkForward(int steps, char... chars) {
	if ((position + steps) < fragment.length()) {
	for (char ch : chars) {
	if ((byte) ch == fragment.byteAt(position + steps)) {
	return true;
	}
	}
	}
	return false;
	}

	public void forceError(String error) {
	this.error = error;
	end = position + 1;
	}

	public void skipSpaces() {
	end = eatSequence(position, aByte -> ' ' != aByte && '\t' != aByte);
	}

	public void skipComment() {
	Preconditions.checkState('#' == fragment.byteAt(position));
	end = eatSequence(position + 1, aByte -> '\n' == aByte \|\| '\r' == aByte);
	}

	public boolean trySkipEscapedNewline() {
	Preconditions.checkState('$' == fragment.byteAt(position));
	if (checkForward(1, '\n')) {
	end = position + 2;
	return true;
	} else if (checkForward(1, '\r')) {
	if (checkForward(2, '\n')) {
	end = position + 3;
	} else {
	error = "Wrong newline separators: \\r should be followed by \\n.";
	end = safeEnd(position + 3);
	}
	return true;
	}
	return false;
	}

	public void processLineFeedNewLine() {
	Preconditions.checkState('\r' == fragment.byteAt(position));
	if (checkForward(1, '\n')) {
	end = position + 2;
	} else {
	error = "Wrong newline separators: \\r should be followed by \\n.";
	end = safeEnd(position + 2);
	}
	}

	public boolean tryReadVariableInBrackets() {
	Preconditions.checkState('$' == fragment.byteAt(position));
	if (checkForward(1, '{')) {
	end = eatSequence(position + 2, aByte -> ' ' != aByte);
	int endOfVariableName = readIdentifier(end, true);
	if (endOfVariableName == end) {
	error = "Variable identifier expected.";
	// Up to the 'wrong' symbol.
	end = endOfVariableName + 1;
	} else {
	end = eatSequence(endOfVariableName, aByte -> ' ' != aByte);
	if (end >= fragment.length() \|\| '}' != fragment.byteAt(end)) {
	error = "Variable end symbol '}' expected.";
	end = safeEnd(end + 1);
	} else {
	++end;
	}
	}
	return true;
	}
	return false;
	}

	public boolean tryReadSimpleVariable() {
	Preconditions.checkState('$' == fragment.byteAt(position));
	if (position + 1 < fragment.length()
	&& IDENTIFIER_SYMBOLS.contains(fragment.byteAt(position + 1))) {
	end = readIdentifier(position + 1, false);
	return true;
	}
	return false;
	}

	public boolean tryReadEscapedLiteral() {
	if (checkForward(1, '$', ':', ' ')) {
	// Escaped literal.
	end = position + 2;
	return true;
	}
	return false;
	}

	public void tryReadIdentifier() {
	end = readIdentifier(position, true);
	if (position >= end) {
	error =
	String.format(
	"Symbol '%s' is not allowed in the identifier,"
	+ " the text fragment with the symbol:\n%s\n",
	fragment.subFragment(position, position + 1), fragment.getFragmentAround(position));
	end = position + 1;
	}
	}

	public boolean tryReadDoublePipe() {
	Preconditions.checkState('\|' == fragment.byteAt(position));
	if (checkForward(1, '\|')) {
	end = position + 2;
	return true;
	}
	return false;
	}

	public void readText() {
	end = eatSequence(position, TEXT_STOPPERS::contains);
	}

	public void readPath() {
	end = eatSequence(position, PATH_STOPPERS::contains);
	}

	private int readIdentifier(int startFrom, boolean withDot) {
	if (withDot) {
	return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b) && '.' != b);
	} else {
	return eatSequence(startFrom, b -> !IDENTIFIER_SYMBOLS.contains(b));
	}
	}

	private int safeEnd(int number) {
	return Math.min(fragment.length(), number);
	}

	private int eatSequence(int startFrom, Predicate<Byte> stop) {
	int i = startFrom;
	for (; i < fragment.length(); i++) {
	byte b = fragment.byteAt(i);
	if (0 == b) {
	seenZero = true;
	return i;
	}
	if (stop.test(b)) {
	break;
	}
	}
	return i;
	}

	/**
	* For the quick checks outside of skipXXX and tryXXX methods of this class, assume that the step
	* takes just one symbol.
	*/
	public void ensureEnd() {
	if (end < 0) {
	end = position + 1;
	}
	}
	}