Optimize path/text evaluation in ninja lexer by using equals instead of set lookup RELNOTES: None. PiperOrigin-RevId: 309225814

commit: 41160b438cdc33903f2f13637f48f97de07603d7 [log] [tgz]
author: cparsons <cparsons@google.com> Thu Apr 30 08:02:46 2020 -0700
committer: Copybara-Service <copybara-worker@google.com> Thu Apr 30 08:03:47 2020 -0700
tree: c4973ad417c7455e63d076fa23fc6fc36083fac8
parent: ef3c5acf1b59fc3945ab2e1de0de2c745a053db6 [diff]
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexerStep.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexerStep.java
index 73a288d..025a517 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexerStep.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/lexer/NinjaLexerStep.java

@@ -46,9 +46,14 @@
 public class NinjaLexerStep {
   private static final ImmutableSortedSet<Byte> IDENTIFIER_SYMBOLS =
       createByteSet("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
-  private static final ImmutableSortedSet<Byte> TEXT_STOPPERS = createByteSet("\n\r \t$:\u0000");
+  private static final byte[] TEXT_STOPPERS = createByteArray("\n\r \t$:\u0000");
   // We allow # symbol in the path, so the comment on the line with path can only start with space.
-  private static final ImmutableSortedSet<Byte> PATH_STOPPERS = createByteSet("\n\r \t$:|\u0000");
+  private static final byte[] PATH_STOPPERS = createByteArray("\n\r \t$:|\u0000");
+
+  private static byte[] createByteArray(String variants) {
+    byte[] bytes = variants.getBytes(StandardCharsets.ISO_8859_1);
+    return bytes;
+  }
 
   private static ImmutableSortedSet<Byte> createByteSet(String variants) {
     ImmutableSortedSet.Builder<Byte> builder = ImmutableSortedSet.naturalOrder();
@@ -242,11 +247,57 @@
   }
 
   public void readText() {
-    end = eatSequence(position, TEXT_STOPPERS::contains);
+    int i = position;
+    for (; i < fragment.length(); i++) {
+      byte b = fragment.byteAt(i);
+      if (0 == b) {
+        seenZero = true;
+        end = i;
+        return;
+      }
+      if (isTextStopper(b)) {
+        break;
+      }
+    }
+    end = i;
   }
 
   public void readPath() {
-    end = eatSequence(position, PATH_STOPPERS::contains);
+    int i = position;
+    for (; i < fragment.length(); i++) {
+      byte b = fragment.byteAt(i);
+      if (0 == b) {
+        seenZero = true;
+        end = i;
+        return;
+      }
+      if (isPathStopper(b)) {
+        break;
+      }
+    }
+    end = i;
+  }
+
+  // Optimized, since this is run for each byte in the ninja file. (This has better performance
+  // than lookup in a java Set, since TEXT_STOPPERS is small.
+  private static boolean isTextStopper(byte b) {
+    for (int i = 0; i < TEXT_STOPPERS.length; i++) {
+      if (b == TEXT_STOPPERS[i]) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Optimized, since this is run for each byte in the ninja file. (This has better performance
+  // than lookup in a java Set, since PATH_STOPPERS is small.
+  private static boolean isPathStopper(byte b) {
+    for (int i = 0; i < PATH_STOPPERS.length; i++) {
+      if (b == PATH_STOPPERS[i]) {
+        return true;
+      }
+    }
+    return false;
   }
 
   private int readIdentifier(int startFrom, boolean withDot) {
commit	41160b438cdc33903f2f13637f48f97de07603d7	[log] [tgz]
author	cparsons <cparsons@google.com>	Thu Apr 30 08:02:46 2020 -0700
committer	Copybara-Service <copybara-worker@google.com>	Thu Apr 30 08:03:47 2020 -0700
tree	c4973ad417c7455e63d076fa23fc6fc36083fac8
parent	ef3c5acf1b59fc3945ab2e1de0de2c745a053db6 [diff]