src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexer.java - bazel - Git at Google

 // Copyright 2019 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 package com.google.devtools.build.lib.bazel.rules.ninja.lexer;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
 import com.google.devtools.build.lib.util.Pair;
 import java.util.Arrays;
 import java.util.List;
 import java.util.stream.Stream;

 /** Ninja files lexer. The types of tokens: {@link NinjaToken}. */
 public class NinjaLexer {
   // They all are having different first letter, let's use it.
   private static final ImmutableMap<Byte, NinjaToken> KEYWORD_MAP =
       // There is no #of() method for 6 key-value pairs.
       Stream.of(
               NinjaToken.BUILD,
               NinjaToken.RULE,
               NinjaToken.DEFAULT,
               NinjaToken.SUBNINJA,
               NinjaToken.INCLUDE,
               NinjaToken.POOL)
           .collect(ImmutableMap.toImmutableMap(token -> token.getBytes()[0], nt -> nt));

   private final ByteBufferFragment fragment;
   private NinjaLexerStep step;
   private final List<Pair<Integer, Integer>> ranges;
   private final List<NinjaToken> tokens;
   /** Flag to give a hint how letters should be interpreted (as text, identifier, path). */
   private TextKind expectedTextKind = TextKind.IDENTIFIER;

   /** @param fragment fragment to do the lexing on */
   public NinjaLexer(ByteBufferFragment fragment) {
     this.fragment = fragment;
     step = new NinjaLexerStep(fragment, 0);
     ranges = Lists.newArrayList();
     tokens = Lists.newArrayList();
   }

   /**
    * Returns true if following nextToken() call may produce meaningful token. However, it may happen
    * that nextToken() will only produce {@link NinjaToken#EOF}, {@link NinjaToken#ZERO} or {@link
    * NinjaToken#ERROR}.
    *
    * <p>It is an optimization here to check for 'seen' flags: nextToken() may return some meaningful
    * token, and at the same time already discover the end of file or zero byte.
    */
   public boolean hasNextToken() {
     return step.canAdvance();
   }

   /**
    * Returns {@link NinjaToken} type of the token for the next non-space and non-comment token
    * at/after current <code>position</code> position.
    */
   public NinjaToken nextToken() {
     Preconditions.checkState(step.canAdvance());
     while (step.canAdvance()) {
       // First byte is checked right in constructor.
       if (step.isSeenZero()) {
         return push(NinjaToken.ZERO);
       }
       byte b = step.startByte();
       switch (b) {
         case ' ':
           step.skipSpaces();
           if (step.getPosition() == 0
               || NinjaToken.NEWLINE.equals(Iterables.getLast(tokens, null))) {
             return push(NinjaToken.INDENT);
           }
           break;
         case '\t':
           step.forceError("Tabs are not allowed, use spaces.");
           return push(NinjaToken.ERROR);
         case '\r':
           expectedTextKind = TextKind.IDENTIFIER;
           step.processLineFeedNewLine();
           return push(NinjaToken.NEWLINE);
         case '\n':
           expectedTextKind = TextKind.IDENTIFIER;
           return push(NinjaToken.NEWLINE);
         case '#':
           step.skipComment();
           break;
         case '=':
           if (TextKind.TEXT.equals(expectedTextKind)) {
             step.readText();
             return push(NinjaToken.TEXT);
           }
           return push(NinjaToken.EQUALS);
         case ':':
           return push(NinjaToken.COLON);
         case '|':
           if (TextKind.TEXT.equals(expectedTextKind)) {
             step.readText();
             return push(NinjaToken.TEXT);
           }
           if (step.tryReadDoublePipe()) {
             return push(NinjaToken.PIPE2);
           }
           return push(NinjaToken.PIPE);
         case '$':
           if (step.trySkipEscapedNewline()) {
             break;
           }
           if (step.tryReadVariableInBrackets() || step.tryReadSimpleVariable()) {
             return push(NinjaToken.VARIABLE);
           }
           if (step.tryReadEscapedLiteral()) {
             return push(NinjaToken.ESCAPED_TEXT);
           }
           step.forceError("Bad $-escape (literal $ must be written as $$)");
           return push(NinjaToken.ERROR);
         default:
           switch (expectedTextKind) {
             case TEXT:
               step.readText();
               return push(NinjaToken.TEXT);
             case PATH:
               step.readPath();
               return push(NinjaToken.TEXT);
             case IDENTIFIER:
               step.tryReadIdentifier();
               if (step.getError() == null) {
                 byte[] bytes = step.getBytes();
                 NinjaToken keywordToken = KEYWORD_MAP.get(bytes[0]);
                 if (keywordToken != null && Arrays.equals(keywordToken.getBytes(), bytes)) {
                   return push(keywordToken);
                 }
               }
               return push(NinjaToken.IDENTIFIER);
           }
           throw new IllegalStateException();
       }
       if (step.canAdvance()) {
         step.ensureEnd();
         // For all skipping cases: move to the next step.
         step = step.nextStep();
       }
     }
     return push(NinjaToken.EOF);
   }

   /** Return the bytes of the token, returned by previous nextToken() call. */
   public byte[] getTokenBytes() {
     if (ranges.isEmpty()) {
       throw new IllegalStateException();
     }
     return fragment.getBytes(getLastStart(), getLastEnd());
   }

   private NinjaToken push(NinjaToken token) {
     step.ensureEnd();
     ranges.add(Pair.of(step.getStart(), step.getEnd()));
     tokens.add(token);
     if (step.getError() != null) {
       // Do not move in case of error.
       return NinjaToken.ERROR;
     }
     if (step.canAdvance()) {
       step = step.nextStep();
     }
     return token;
   }

   public boolean haveReadAnyTokens() {
     return !ranges.isEmpty();
   }

   public int getLastStart() {
     if (ranges.isEmpty()) {
       throw new IllegalStateException();
     }
     return Preconditions.checkNotNull(Iterables.getLast(ranges).getFirst());
   }

   public int getLastEnd() {
     if (ranges.isEmpty()) {
       throw new IllegalStateException();
     }
     return Preconditions.checkNotNull(Iterables.getLast(ranges).getSecond());
   }

   /** Give a hint how letters should be interpreted (as text, identifier, path). */
   public void setExpectedTextKind(TextKind expectedTextKind) {
     this.expectedTextKind = expectedTextKind;
   }

   /** Undo the previously read token. */
   public void undo() {
     Preconditions.checkState(ranges.size() == tokens.size());
     ranges.remove(ranges.size() - 1);
     tokens.remove(tokens.size() - 1);
     step = new NinjaLexerStep(fragment, ranges.isEmpty() ? 0 : getLastEnd());
     expectedTextKind = TextKind.IDENTIFIER;
   }

   public String getError() {
     return step.getError();
   }

   public ByteBufferFragment getFragment() {
     return fragment;
   }

   /**
    * Enum with variants of text fragments parsing: as identifier (most restricted set of symbols),
    * path (all spaces should be $-escaped, and | symbol has a special meaning), or text.
    */
   public enum TextKind {
     IDENTIFIER,
     PATH,
     TEXT
   }
 }
	// Copyright 2019 The Bazel Authors. All rights reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	package com.google.devtools.build.lib.bazel.rules.ninja.lexer;

	import com.google.common.base.Preconditions;
	import com.google.common.collect.ImmutableMap;
	import com.google.common.collect.Iterables;
	import com.google.common.collect.Lists;
	import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
	import com.google.devtools.build.lib.util.Pair;
	import java.util.Arrays;
	import java.util.List;
	import java.util.stream.Stream;

	/** Ninja files lexer. The types of tokens: {@link NinjaToken}. */
	public class NinjaLexer {
	// They all are having different first letter, let's use it.
	private static final ImmutableMap<Byte, NinjaToken> KEYWORD_MAP =
	// There is no #of() method for 6 key-value pairs.
	Stream.of(
	NinjaToken.BUILD,
	NinjaToken.RULE,
	NinjaToken.DEFAULT,
	NinjaToken.SUBNINJA,
	NinjaToken.INCLUDE,
	NinjaToken.POOL)
	.collect(ImmutableMap.toImmutableMap(token -> token.getBytes()[0], nt -> nt));

	private final ByteBufferFragment fragment;
	private NinjaLexerStep step;
	private final List<Pair<Integer, Integer>> ranges;
	private final List<NinjaToken> tokens;
	/** Flag to give a hint how letters should be interpreted (as text, identifier, path). */
	private TextKind expectedTextKind = TextKind.IDENTIFIER;

	/** @param fragment fragment to do the lexing on */
	public NinjaLexer(ByteBufferFragment fragment) {
	this.fragment = fragment;
	step = new NinjaLexerStep(fragment, 0);
	ranges = Lists.newArrayList();
	tokens = Lists.newArrayList();
	}

	/**
	* Returns true if following nextToken() call may produce meaningful token. However, it may happen
	* that nextToken() will only produce {@link NinjaToken#EOF}, {@link NinjaToken#ZERO} or {@link
	* NinjaToken#ERROR}.
	*
	* <p>It is an optimization here to check for 'seen' flags: nextToken() may return some meaningful
	* token, and at the same time already discover the end of file or zero byte.
	*/
	public boolean hasNextToken() {
	return step.canAdvance();
	}

	/**
	* Returns {@link NinjaToken} type of the token for the next non-space and non-comment token
	* at/after current <code>position</code> position.
	*/
	public NinjaToken nextToken() {
	Preconditions.checkState(step.canAdvance());
	while (step.canAdvance()) {
	// First byte is checked right in constructor.
	if (step.isSeenZero()) {
	return push(NinjaToken.ZERO);
	}
	byte b = step.startByte();
	switch (b) {
	case ' ':
	step.skipSpaces();
	if (step.getPosition() == 0
	\|\| NinjaToken.NEWLINE.equals(Iterables.getLast(tokens, null))) {
	return push(NinjaToken.INDENT);
	}
	break;
	case '\t':
	step.forceError("Tabs are not allowed, use spaces.");
	return push(NinjaToken.ERROR);
	case '\r':
	expectedTextKind = TextKind.IDENTIFIER;
	step.processLineFeedNewLine();
	return push(NinjaToken.NEWLINE);
	case '\n':
	expectedTextKind = TextKind.IDENTIFIER;
	return push(NinjaToken.NEWLINE);
	case '#':
	step.skipComment();
	break;
	case '=':
	if (TextKind.TEXT.equals(expectedTextKind)) {
	step.readText();
	return push(NinjaToken.TEXT);
	}
	return push(NinjaToken.EQUALS);
	case ':':
	return push(NinjaToken.COLON);
	case '\|':
	if (TextKind.TEXT.equals(expectedTextKind)) {
	step.readText();
	return push(NinjaToken.TEXT);
	}
	if (step.tryReadDoublePipe()) {
	return push(NinjaToken.PIPE2);
	}
	return push(NinjaToken.PIPE);
	case '$':
	if (step.trySkipEscapedNewline()) {
	break;
	}
	if (step.tryReadVariableInBrackets() \|\| step.tryReadSimpleVariable()) {
	return push(NinjaToken.VARIABLE);
	}
	if (step.tryReadEscapedLiteral()) {
	return push(NinjaToken.ESCAPED_TEXT);
	}
	step.forceError("Bad $-escape (literal $ must be written as $$)");
	return push(NinjaToken.ERROR);
	default:
	switch (expectedTextKind) {
	case TEXT:
	step.readText();
	return push(NinjaToken.TEXT);
	case PATH:
	step.readPath();
	return push(NinjaToken.TEXT);
	case IDENTIFIER:
	step.tryReadIdentifier();
	if (step.getError() == null) {
	byte[] bytes = step.getBytes();
	NinjaToken keywordToken = KEYWORD_MAP.get(bytes[0]);
	if (keywordToken != null && Arrays.equals(keywordToken.getBytes(), bytes)) {
	return push(keywordToken);
	}
	}
	return push(NinjaToken.IDENTIFIER);
	}
	throw new IllegalStateException();
	}
	if (step.canAdvance()) {
	step.ensureEnd();
	// For all skipping cases: move to the next step.
	step = step.nextStep();
	}
	}
	return push(NinjaToken.EOF);
	}

	/** Return the bytes of the token, returned by previous nextToken() call. */
	public byte[] getTokenBytes() {
	if (ranges.isEmpty()) {
	throw new IllegalStateException();
	}
	return fragment.getBytes(getLastStart(), getLastEnd());
	}

	private NinjaToken push(NinjaToken token) {
	step.ensureEnd();
	ranges.add(Pair.of(step.getStart(), step.getEnd()));
	tokens.add(token);
	if (step.getError() != null) {
	// Do not move in case of error.
	return NinjaToken.ERROR;
	}
	if (step.canAdvance()) {
	step = step.nextStep();
	}
	return token;
	}

	public boolean haveReadAnyTokens() {
	return !ranges.isEmpty();
	}

	public int getLastStart() {
	if (ranges.isEmpty()) {
	throw new IllegalStateException();
	}
	return Preconditions.checkNotNull(Iterables.getLast(ranges).getFirst());
	}

	public int getLastEnd() {
	if (ranges.isEmpty()) {
	throw new IllegalStateException();
	}
	return Preconditions.checkNotNull(Iterables.getLast(ranges).getSecond());
	}

	/** Give a hint how letters should be interpreted (as text, identifier, path). */
	public void setExpectedTextKind(TextKind expectedTextKind) {
	this.expectedTextKind = expectedTextKind;
	}

	/** Undo the previously read token. */
	public void undo() {
	Preconditions.checkState(ranges.size() == tokens.size());
	ranges.remove(ranges.size() - 1);
	tokens.remove(tokens.size() - 1);
	step = new NinjaLexerStep(fragment, ranges.isEmpty() ? 0 : getLastEnd());
	expectedTextKind = TextKind.IDENTIFIER;
	}

	public String getError() {
	return step.getError();
	}

	public ByteBufferFragment getFragment() {
	return fragment;
	}

	/**
	* Enum with variants of text fragments parsing: as identifier (most restricted set of symbols),
	* path (all spaces should be $-escaped, and \| symbol has a special meaning), or text.
	*/
	public enum TextKind {
	IDENTIFIER,
	PATH,
	TEXT
	}
	}