Support '\r\n' separators when splitting Ninja files into separately ? ?parsed fragments. Closes #10210. PiperOrigin-RevId: 280170780

commit: 7535d4c8c9bd98743446ee293796e4b938770705 [log] [tgz]
author: ichern <ichern@google.com> Wed Nov 13 04:21:25 2019 -0800
committer: Copybara-Service <copybara-worker@google.com> Wed Nov 13 04:22:25 2019 -0800
tree: 176fc1a7e248957171bd761b1a5d5024c7165214
parent: ceadf0a063cb97c32aced143d2447781d1dafc38 [diff]
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java
index 9372bf8..2e1db30 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java

@@ -30,23 +30,23 @@
 public class BufferSplitter implements Callable<List<ByteFragmentAtOffset>> {
   private final ByteBufferFragment bufferFragment;
   private final DeclarationConsumer consumer;
-  private final SeparatorPredicate separatorPredicate;
+  private final SeparatorFinder separatorFinder;
   private final int offset;
 
   /**
    * @param bufferFragment {@link ByteBufferFragment}, fragment of which should be splitted
    * @param consumer declaration consumer
-   * @param separatorPredicate predicate for separating declarations
+   * @param separatorFinder finds declaration separators
    * @param offset start offset of <code>buffer</code> from the beginning of the file
    */
   public BufferSplitter(
       ByteBufferFragment bufferFragment,
       DeclarationConsumer consumer,
-      SeparatorPredicate separatorPredicate,
+      SeparatorFinder separatorFinder,
       int offset) {
     this.bufferFragment = bufferFragment;
     this.consumer = consumer;
-    this.separatorPredicate = separatorPredicate;
+    this.separatorFinder = separatorFinder;
     this.offset = offset;
   }
 
@@ -61,22 +61,19 @@
   public List<ByteFragmentAtOffset> call() throws Exception {
     List<ByteFragmentAtOffset> fragments = Lists.newArrayList();
     int start = 0;
-    for (int i = 0; i < bufferFragment.length() - 2; i++) {
-      byte previous = bufferFragment.byteAt(i);
-      byte current = bufferFragment.byteAt(i + 1);
-      byte next = bufferFragment.byteAt(i + 2);
-
-      if (!separatorPredicate.test(previous, current, next)) {
-        continue;
+    while (true) {
+      int end = separatorFinder.findNextSeparator(bufferFragment, start, -1);
+      if (end < 0) {
+        break;
       }
-      ByteBufferFragment fragment = bufferFragment.subFragment(start, i + 2);
+      ByteBufferFragment fragment = bufferFragment.subFragment(start, end + 1);
       ByteFragmentAtOffset fragmentAtOffset = new ByteFragmentAtOffset(offset, fragment);
       if (start > 0) {
         consumer.declaration(fragmentAtOffset);
       } else {
         fragments.add(fragmentAtOffset);
       }
-      start = i + 2;
+      start = end + 1;
     }
     // There is always at least one byte at the bounds of the fragment.
     ByteBufferFragment lastFragment = bufferFragment.subFragment(start, bufferFragment.length());

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java
index 383335a..97667d1 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java

@@ -18,6 +18,8 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Range;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 
@@ -28,18 +30,18 @@
  */
 public class DeclarationAssembler {
   private final DeclarationConsumer declarationConsumer;
-  private final SeparatorPredicate separatorPredicate;
+  private final SeparatorFinder separatorFinder;
 
   /**
    * @param declarationConsumer delegate declaration consumer for actual processing / parsing
-   * @param separatorPredicate predicate used to determine if two fragments should be separate
+   * @param separatorFinder callback used to determine if two fragments should be separate
    *     declarations (in the Ninja case, if the new line starts with a space, it should be treated
    *     as a part of the previous declaration, i.e. the separator is longer then one symbol).
    */
   public DeclarationAssembler(
-      DeclarationConsumer declarationConsumer, SeparatorPredicate separatorPredicate) {
+      DeclarationConsumer declarationConsumer, SeparatorFinder separatorFinder) {
     this.declarationConsumer = declarationConsumer;
-    this.separatorPredicate = separatorPredicate;
+    this.separatorFinder = separatorFinder;
   }
 
   /**
@@ -69,56 +71,60 @@
   }
 
   private void sendMerged(List<ByteFragmentAtOffset> list) throws GenericParsingException {
-    int offset = -1;
-    List<ByteBufferFragment> leftPart = Lists.newArrayList();
+    Preconditions.checkArgument(!list.isEmpty());
+    ByteFragmentAtOffset first = list.get(0);
+    if (list.size() == 1) {
+      declarationConsumer.declaration(first);
+      return;
+    }
 
-    for (ByteFragmentAtOffset edge : list) {
-      ByteBufferFragment sequence = edge.getFragment();
-      // If the new sequence is separate from already collected parts,
-      // merge them and feed to consumer.
-      if (!leftPart.isEmpty()) {
-        ByteBufferFragment lastPart = Iterables.getLast(leftPart);
-        // The order of symbols: previousInOld, lastInOld, currentInNew, nextInNew.
-        byte previousInOld = lastPart.length() == 1 ? 0 : lastPart.byteAt(lastPart.length() - 2);
-        byte lastInOld = lastPart.byteAt(lastPart.length() - 1);
-        byte currentInNew = sequence.byteAt(0);
-        byte nextInNew = sequence.length() == 1 ? 0 : sequence.byteAt(1);
-
-        // <symbol> | \n<non-space>
-        if (separatorPredicate.test(lastInOld, currentInNew, nextInNew)) {
-          // Add separator to the end of the accumulated sequence
-          leftPart.add(sequence.subFragment(0, 1));
-          ByteFragmentAtOffset byteFragmentAtOffset =
-              new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
-          declarationConsumer.declaration(byteFragmentAtOffset);
-          leftPart.clear();
-          // Cutting out the separator in the beginning
-          if (sequence.length() > 1) {
-            leftPart.add(sequence.subFragment(1, sequence.length()));
-            offset = edge.getOffset();
-          }
-          continue;
-        }
-
-        // <symbol>\n | <non-space>
-        if (separatorPredicate.test(previousInOld, lastInOld, currentInNew)) {
-          ByteFragmentAtOffset byteFragmentAtOffset =
-              new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
-          declarationConsumer.declaration(byteFragmentAtOffset);
-          leftPart.clear();
-        }
+    // 1. We merge all the passed fragments into one fragment.
+    // 2. We check 6 bytes at the connection of two fragments, 3 bytes in each part:
+    // separator can consist of 4 bytes (<escape>/r/n<indent>),
+    // so in case only a part of the separator is in one of the fragments,
+    // we get 3 bytes in one part and one byte in the other.
+    // 3. We record the ranges of at most 6 bytes at the connections of the fragments into
+    // interestingRanges.
+    // 4. Later we will check only interestingRanges for separators, and create corresponding
+    // fragments; the underlying common ByteBuffer will be reused, so we are not performing
+    // extensive copying.
+    int firstOffset = first.getOffset();
+    List<ByteBufferFragment> fragments = new ArrayList<>();
+    List<Range<Integer>> interestingRanges = Lists.newArrayList();
+    int fragmentShift = 0;
+    for (ByteFragmentAtOffset byteFragmentAtOffset : list) {
+      ByteBufferFragment fragment = byteFragmentAtOffset.getFragment();
+      fragments.add(fragment);
+      if (fragmentShift > 0) {
+        // We are only looking for the separators between fragments.
+        int start = Math.max(0, fragmentShift - 3);
+        int end = fragmentShift + Math.min(4, fragment.length());
+        // Assert that the ranges are not intersecting, otherwise the code that iterates ranges
+        // will work incorrectly.
+        Preconditions.checkState(
+            interestingRanges.isEmpty()
+                || Iterables.getLast(interestingRanges).upperEndpoint() < start);
+        interestingRanges.add(Range.openClosed(start, end));
       }
+      fragmentShift += fragment.length();
+    }
 
-      leftPart.add(sequence);
-      if (offset == -1) {
-        offset = edge.getOffset();
+    ByteBufferFragment merged = ByteBufferFragment.merge(fragments);
+
+    int previousEnd = 0;
+    for (Range<Integer> range : interestingRanges) {
+      int idx =
+          separatorFinder.findNextSeparator(merged, range.lowerEndpoint(), range.upperEndpoint());
+      if (idx >= 0) {
+        // There should always be a previous fragment, as we are checking non-intersecting ranges,
+        // starting from the connection point between first and second fragments.
+        Preconditions.checkState(idx > previousEnd);
+        declarationConsumer.declaration(
+            new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, idx + 1)));
+        previousEnd = idx + 1;
       }
     }
-    if (!leftPart.isEmpty()) {
-      Preconditions.checkState(offset >= 0);
-      ByteFragmentAtOffset byteFragmentAtOffset =
-          new ByteFragmentAtOffset(offset, ByteBufferFragment.merge(leftPart));
-      declarationConsumer.declaration(byteFragmentAtOffset);
-    }
+    declarationConsumer.declaration(
+        new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, merged.length())));
   }
 }

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorPredicate.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/IncorrectSeparatorException.java
similarity index 62%
rename from src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorPredicate.java
rename to src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/IncorrectSeparatorException.java
index fbb2d3a..4d0c6c1 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorPredicate.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/IncorrectSeparatorException.java

@@ -15,15 +15,9 @@
 
 package com.google.devtools.build.lib.bazel.rules.ninja.file;
 
-/** Interface for determining where the byte sequence should be split into parts. */
-public interface SeparatorPredicate {
-
-  /**
-   * Returns true if the sequence should be split after <code>current</code> byte.
-   *
-   * @param previous previous byte (before current)
-   * @param current current byte
-   * @param next next byte (after current)
-   */
-  boolean test(byte previous, byte current, byte next);
+/** Thrown by {@link BufferSplitter} when incorrect file separators are used ('\r'). */
+public class IncorrectSeparatorException extends GenericParsingException {
+  public IncorrectSeparatorException(String message) {
+    super(message);
+  }
 }

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorFinder.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorFinder.java
new file mode 100644
index 0000000..eb6121a
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorFinder.java

@@ -0,0 +1,78 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package com.google.devtools.build.lib.bazel.rules.ninja.file;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Implementation of {@link SeparatorFinder} for Ninja files.
+ *
+ * <p>The Ninja declaration consists of several text lines; if the line is a part of the previous
+ * declaration, it starts with some amount of spaces or tabs. If the line is the beginning of the
+ * new declaration, it starts with non-space symbol. Dollar symbol '$' escapes the newline, i.e.
+ * "$\nsomething" does not contain a separator.
+ *
+ * <p>We support '\r\n' separators in Ninja files and throw {@link IncorrectSeparatorException} in
+ * case an incorrect separator '\r' is used.
+ */
+public class NinjaSeparatorFinder implements SeparatorFinder {
+  public static final NinjaSeparatorFinder INSTANCE = new NinjaSeparatorFinder();
+
+  private static final byte DOLLAR_BYTE = '$';
+  private static final byte LINEFEED_BYTE = '\r';
+  private static final byte NEWLINE_BYTE = '\n';
+  private static final byte SPACE_BYTE = ' ';
+  private static final byte TAB_BYTE = '\t';
+
+  private NinjaSeparatorFinder() {}
+
+  @Override
+  public int findNextSeparator(ByteBufferFragment fragment, int startingFrom, int untilExcluded)
+      throws IncorrectSeparatorException {
+    Preconditions.checkState(startingFrom < fragment.length());
+    Preconditions.checkState(untilExcluded < 0 || untilExcluded <= fragment.length());
+
+    boolean escaped = DOLLAR_BYTE == fragment.byteAt(startingFrom);
+    int endExcl = untilExcluded > 0 ? untilExcluded : fragment.length();
+    for (int i = startingFrom + 1; i < endExcl - 1; i++) {
+      byte current = fragment.byteAt(i);
+      byte next = fragment.byteAt(i + 1);
+      byte afterNextOrSpace = i < (endExcl - 2) ? fragment.byteAt(i + 2) : SPACE_BYTE;
+      if (LINEFEED_BYTE == current && NEWLINE_BYTE != next) {
+        throw new IncorrectSeparatorException(
+            "Wrong newline separators: \\r should be followed by \\n.");
+      }
+      if (!escaped
+          && SPACE_BYTE != afterNextOrSpace
+          && TAB_BYTE != afterNextOrSpace
+          && LINEFEED_BYTE == current) {
+        // To do not introduce the length of the separator, let us point to the last symbol of it.
+        return i + 1;
+      }
+      if (!escaped && SPACE_BYTE != next && TAB_BYTE != next && NEWLINE_BYTE == current) {
+        return i;
+      }
+      if (escaped && LINEFEED_BYTE == current) {
+        // Jump over the whole escaped linefeed + newline.
+        ++i;
+        escaped = false;
+      } else {
+        escaped = DOLLAR_BYTE == current;
+      }
+    }
+    return -1;
+  }
+}

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorPredicate.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorPredicate.java
deleted file mode 100644
index 5884ac9..0000000
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorPredicate.java
+++ /dev/null

@@ -1,43 +0,0 @@
-// Copyright 2019 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-package com.google.devtools.build.lib.bazel.rules.ninja.file;
-
-/**
- * Implementation of {@link SeparatorPredicate} for Ninja files.
- *
- * <p>The Ninja declaration consists of several text lines; if the line is a part of the previous
- * declaration, it starts with some amount of spaces or tabs. If the line is the beginning of the
- * new declaration, it starts with non-space symbol. Dollar symbol '$' escapes the newline, i.e.
- * "$\nsomething" does not contain a separator.
- */
-public class NinjaSeparatorPredicate implements SeparatorPredicate {
-  public static final NinjaSeparatorPredicate INSTANCE = new NinjaSeparatorPredicate();
-
-  private static final byte DOLLAR_BYTE = '$';
-  private static final byte NEWLINE_BYTE = '\n';
-  private static final byte SPACE_BYTE = ' ';
-  private static final byte TAB_BYTE = '\t';
-
-  private NinjaSeparatorPredicate() {}
-
-  @Override
-  public boolean test(byte previous, byte current, byte next) {
-    return NEWLINE_BYTE == current
-        && DOLLAR_BYTE != previous
-        && SPACE_BYTE != next
-        && TAB_BYTE != next;
-  }
-}

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/ParallelFileProcessing.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/ParallelFileProcessing.java
index ce3ab26..51ebafe 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/ParallelFileProcessing.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/ParallelFileProcessing.java

@@ -25,21 +25,21 @@
 
 /**
  * Parallel file processing implementation. See comment to {@link #processFile(ReadableByteChannel,
- * BlockParameters, Supplier, ListeningExecutorService, SeparatorPredicate)}.
+ * BlockParameters, Supplier, ListeningExecutorService, SeparatorFinder)}.
  */
 public class ParallelFileProcessing {
   private final ReadableByteChannel channel;
   private final BlockParameters parameters;
   private final Supplier<DeclarationConsumer> tokenConsumerFactory;
   private final ListeningExecutorService executorService;
-  private final SeparatorPredicate predicate;
+  private final SeparatorFinder predicate;
 
   private ParallelFileProcessing(
       ReadableByteChannel channel,
       BlockParameters parameters,
       Supplier<DeclarationConsumer> tokenConsumerFactory,
       ListeningExecutorService executorService,
-      SeparatorPredicate predicate) {
+      SeparatorFinder predicate) {
     this.channel = channel;
     this.parameters = parameters;
     this.tokenConsumerFactory = tokenConsumerFactory;
@@ -90,7 +90,7 @@
       BlockParameters parameters,
       Supplier<DeclarationConsumer> tokenConsumerFactory,
       ListeningExecutorService executorService,
-      SeparatorPredicate predicate)
+      SeparatorFinder predicate)
       throws GenericParsingException, IOException, InterruptedException {
     new ParallelFileProcessing(
             channel, parameters, tokenConsumerFactory, executorService, predicate)
@@ -141,19 +141,25 @@
     int blockSize = parameters.getTokenizeBlockSize();
     while (from < bb.limit()) {
       int to = Math.min(bb.limit(), from + blockSize);
+      if (bb.limit() - to < BlockParameters.MIN_TOKENIZE_BLOCK_SIZE) {
+        // Do not create the last block too small, rather join it with the previous block.
+        to = bb.limit();
+      }
       DeclarationConsumer consumer = tokenConsumerFactory.get();
       ByteBufferFragment fragment = new ByteBufferFragment(bb, from, to);
       BufferSplitter tokenizer = new BufferSplitter(fragment, consumer, predicate, offset);
       future.add(executorService.submit(tokenizer));
-      from += blockSize;
+      from = to;
     }
   }
 
   /** Sizes of blocks for reading from file and parsing for {@link ParallelFileProcessing}. */
   public static class BlockParameters {
+
     private static final int READ_BLOCK_SIZE = 25 * 1024 * 1024;
     private static final int MIN_READ_BLOCK_SIZE = 10 * 1024 * 1024;
     private static final int TOKENIZE_BLOCK_SIZE = 1024 * 1024;
+    private static final int MIN_TOKENIZE_BLOCK_SIZE = 100;
 
     /** Size of the reading buffer. */
     private int readBlockSize;
@@ -173,7 +179,8 @@
     public BlockParameters(long fileSize) {
       readBlockSize = (int) Math.min(READ_BLOCK_SIZE, fileSize);
       minReadBlockSize = Math.min(MIN_READ_BLOCK_SIZE, (int) Math.ceil((double) fileSize / 2));
-      tokenizeBlockSize = Math.min(TOKENIZE_BLOCK_SIZE, minReadBlockSize / 4);
+      tokenizeBlockSize =
+          Math.max(MIN_TOKENIZE_BLOCK_SIZE, Math.min(TOKENIZE_BLOCK_SIZE, minReadBlockSize / 4));
     }
 
     public int getReadBlockSize() {
@@ -188,7 +195,8 @@
       if (readBlockSize > 0) {
         this.readBlockSize = readBlockSize;
         minReadBlockSize = Math.min(minReadBlockSize, (int) Math.ceil((double) readBlockSize / 2));
-        tokenizeBlockSize = Math.min(tokenizeBlockSize, minReadBlockSize / 4);
+        tokenizeBlockSize =
+            Math.max(MIN_TOKENIZE_BLOCK_SIZE, Math.min(tokenizeBlockSize, minReadBlockSize / 4));
       }
       return this;
     }
@@ -197,14 +205,6 @@
       return tokenizeBlockSize;
     }
 
-    /** Sets tokenizeBlockSize, if it is less than readBlockSize. */
-    public BlockParameters setTokenizeBlockSize(int tokenizeBlockSize) {
-      if (tokenizeBlockSize > 0 && tokenizeBlockSize <= readBlockSize) {
-        this.tokenizeBlockSize = tokenizeBlockSize;
-      }
-      return this;
-    }
-
     public int getMinReadBlockSize() {
       return minReadBlockSize;
     }

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorFinder.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorFinder.java
new file mode 100644
index 0000000..c879646
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/SeparatorFinder.java

@@ -0,0 +1,32 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package com.google.devtools.build.lib.bazel.rules.ninja.file;
+
+/** Interface for determining where the byte sequence should be split into parts. */
+public interface SeparatorFinder {
+
+  /**
+   * Returns the index of the end of the next separator (separator can be of two symbols, \r\n), or
+   * -1 if the fragment does not contain any separators.
+   *
+   * @param fragment fragment to search in
+   * @param startingFrom index to start search from
+   * @param untilExcluded index to stop search before (excluded from search).
+   * @throws IncorrectSeparatorException if the incorrect separator value (\r) is used
+   */
+  int findNextSeparator(ByteBufferFragment fragment, int startingFrom, int untilExcluded)
+      throws IncorrectSeparatorException;
+}

diff --git a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/BufferSplitterTest.java b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/BufferSplitterTest.java
index 5a21ea3..fbe00f8 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/BufferSplitterTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/BufferSplitterTest.java

@@ -24,7 +24,7 @@
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteFragmentAtOffset;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.DeclarationConsumer;
-import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorPredicate;
+import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorFinder;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -51,7 +51,7 @@
     byte[] chars = String.join("\n", list).getBytes(StandardCharsets.ISO_8859_1);
     ByteBufferFragment fragment = new ByteBufferFragment(ByteBuffer.wrap(chars), 0, chars.length);
     BufferSplitter tokenizer =
-        new BufferSplitter(fragment, consumer, NinjaSeparatorPredicate.INSTANCE, offsetValue);
+        new BufferSplitter(fragment, consumer, NinjaSeparatorFinder.INSTANCE, offsetValue);
     List<ByteFragmentAtOffset> edges = tokenizer.call();
     assertThat(result).containsExactly("two\n");
     assertThat(
@@ -74,7 +74,7 @@
 
     ByteBufferFragment fragment = new ByteBufferFragment(ByteBuffer.wrap(chars), 0, chars.length);
     BufferSplitter tokenizer =
-        new BufferSplitter(fragment, consumer, NinjaSeparatorPredicate.INSTANCE, 0);
+        new BufferSplitter(fragment, consumer, NinjaSeparatorFinder.INSTANCE, 0);
     List<ByteFragmentAtOffset> edges = tokenizer.call();
     assertThat(result).containsExactly("two\n\ttwo-detail\n");
     assertThat(

diff --git a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/DeclarationAssemblerTest.java b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/DeclarationAssemblerTest.java
index 5192ea8..ff52c74 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/DeclarationAssemblerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/DeclarationAssemblerTest.java

@@ -22,7 +22,7 @@
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteFragmentAtOffset;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.DeclarationAssembler;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.GenericParsingException;
-import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorPredicate;
+import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorFinder;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
@@ -65,7 +65,7 @@
               list.add(byteFragmentAtOffset.getFragment().toString());
               assertThat(byteFragmentAtOffset.getOffset()).isAnyOf(0, chars1.length);
             },
-            NinjaSeparatorPredicate.INSTANCE);
+            NinjaSeparatorFinder.INSTANCE);
 
     assembler.wrapUp(
         Lists.newArrayList(
@@ -87,7 +87,7 @@
               list.add(byteFragmentAtOffset.getFragment().toString());
               assertThat(byteFragmentAtOffset.getOffset()).isEqualTo(0);
             },
-            NinjaSeparatorPredicate.INSTANCE);
+            NinjaSeparatorFinder.INSTANCE);
 
     final byte[] chars = s.getBytes(StandardCharsets.ISO_8859_1);
     assembler.wrapUp(

diff --git a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorFinderTest.java b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorFinderTest.java
new file mode 100644
index 0000000..9e9f722
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorFinderTest.java

@@ -0,0 +1,77 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package com.google.devtools.build.lib.bazel.rules.ninja;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.devtools.build.lib.testutil.MoreAsserts.assertThrows;
+
+import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
+import com.google.devtools.build.lib.bazel.rules.ninja.file.IncorrectSeparatorException;
+import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorFinder;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link NinjaSeparatorFinder}. */
+@RunWith(JUnit4.class)
+public class NinjaSeparatorFinderTest {
+  @Test
+  public void testIsSeparator() throws IncorrectSeparatorException {
+    doTestIsSeparator(" \na", 1);
+    doTestIsSeparator("b\na", 1);
+    doTestIsSeparator(" \na", 1);
+    doTestIsSeparator("b\n$", 1);
+    doTestIsSeparator(" \n\n", 1);
+    doTestIsSeparator("a\n\n", 1);
+    // We are pointing to the last symbol of separator.
+    doTestIsSeparator("a\r\n\n", 2);
+    doTestIsSeparator(" \r\n\n", 2);
+    doTestIsSeparator("a\r\na", 2);
+    doTestIsSeparator("\r\na", 1);
+
+    doTestIsSeparator(" \n ", -1);
+    doTestIsSeparator(" \r\n ", -1);
+    doTestIsSeparator("$\n ", -1);
+    doTestIsSeparator("$\r\n ", -1);
+    doTestIsSeparator("$\r\n\n ", -1);
+    doTestIsSeparator("$\r\n\r\n ", -1);
+    doTestIsSeparator("$\n\n", -1);
+    doTestIsSeparator("$\na", -1);
+    doTestIsSeparator("$\r\na", -1);
+    doTestIsSeparator("a\n ", -1);
+    doTestIsSeparator("a\n\t", -1);
+    // Not enough information.
+    doTestIsSeparator("\r\n", -1);
+    doTestIsSeparator("\n", -1);
+    // Test for incorrect separators.
+    byte[] bytes = "a\rb".getBytes(StandardCharsets.ISO_8859_1);
+    ByteBuffer buffer = ByteBuffer.wrap(bytes);
+    ByteBufferFragment fragment = new ByteBufferFragment(buffer, 0, buffer.limit());
+    assertThrows(
+        IncorrectSeparatorException.class,
+        () -> NinjaSeparatorFinder.INSTANCE.findNextSeparator(fragment, 0, -1));
+  }
+
+  private static void doTestIsSeparator(String s, int expected) throws IncorrectSeparatorException {
+    byte[] bytes = s.getBytes(StandardCharsets.ISO_8859_1);
+    ByteBuffer buffer = ByteBuffer.wrap(bytes);
+    ByteBufferFragment fragment = new ByteBufferFragment(buffer, 0, buffer.limit());
+    int result = NinjaSeparatorFinder.INSTANCE.findNextSeparator(fragment, 0, -1);
+    assertThat(result).isEqualTo(expected);
+  }
+}

diff --git a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorPredicateTest.java b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorPredicateTest.java
deleted file mode 100644
index d9fa2b3..0000000
--- a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/NinjaSeparatorPredicateTest.java
+++ /dev/null

@@ -1,51 +0,0 @@
-// Copyright 2019 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-package com.google.devtools.build.lib.bazel.rules.ninja;
-
-import static com.google.common.truth.Truth.assertThat;
-
-import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorPredicate;
-import java.nio.charset.StandardCharsets;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link NinjaSeparatorPredicate}. */
-@RunWith(JUnit4.class)
-public class NinjaSeparatorPredicateTest {
-  @Test
-  public void testIsSeparator() {
-    doTestIsSeparator(" \n ", false);
-    doTestIsSeparator(" \na", true);
-    doTestIsSeparator("$\n ", false);
-    doTestIsSeparator("$\n\n", false);
-    doTestIsSeparator("$\na", false);
-    doTestIsSeparator("b\na", true);
-    doTestIsSeparator(" \na", true);
-    doTestIsSeparator("b\n$", true);
-    doTestIsSeparator(" \n\n", true);
-    doTestIsSeparator("a\n\n", true);
-    doTestIsSeparator("a\n ", false);
-    doTestIsSeparator("a\n\t", false);
-  }
-
-  private static void doTestIsSeparator(String s, Boolean expected) {
-    assertThat(s).hasLength(3);
-    byte[] bytes = s.getBytes(StandardCharsets.ISO_8859_1);
-    boolean result = NinjaSeparatorPredicate.INSTANCE.test(bytes[0], bytes[1], bytes[2]);
-    assertThat(result).isEqualTo(expected);
-  }
-}

diff --git a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/ParallelFileProcessingTest.java b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/ParallelFileProcessingTest.java
index f016edb..cfe5a49 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/ParallelFileProcessingTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/rules/ninja/ParallelFileProcessingTest.java

@@ -27,7 +27,7 @@
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ByteBufferFragment;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.DeclarationConsumer;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.GenericParsingException;
-import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorPredicate;
+import com.google.devtools.build.lib.bazel.rules.ninja.file.NinjaSeparatorFinder;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ParallelFileProcessing;
 import com.google.devtools.build.lib.bazel.rules.ninja.file.ParallelFileProcessing.BlockParameters;
 import com.google.devtools.build.lib.concurrent.ExecutorUtil;
@@ -144,7 +144,7 @@
           parameters != null ? parameters : new BlockParameters(file.length()),
           factory,
           service,
-          NinjaSeparatorPredicate.INSTANCE);
+          NinjaSeparatorFinder.INSTANCE);
     } finally {
       ExecutorUtil.interruptibleShutdown(service);
     }
commit	7535d4c8c9bd98743446ee293796e4b938770705	[log] [tgz]
author	ichern <ichern@google.com>	Wed Nov 13 04:21:25 2019 -0800
committer	Copybara-Service <copybara-worker@google.com>	Wed Nov 13 04:22:25 2019 -0800
tree	176fc1a7e248957171bd761b1a5d5024c7165214
parent	ceadf0a063cb97c32aced143d2447781d1dafc38 [diff]