Add memory-efficient map for storing nested set -> digest.

Instead of using ConcurrentHashMap, we use a dead-simple open addressed hash hable with a giant byte array with 16-byte slots. We then read or write fingerprints straight into and out of the array, obviating the need to generate intermediate garbage.

Locking mechanism is a read-write lock. This should be faster than full synchronisation for read-heavy loads.

RELNOTES: None
PiperOrigin-RevId: 184019301
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
index 92e058b..bb8fae4 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
@@ -41,7 +41,10 @@
 
 java_library(
     name = "fingerprint_cache",
-    srcs = ["NestedSetFingerprintCache.java"],
+    srcs = [
+        "DigestMap.java",
+        "NestedSetFingerprintCache.java",
+    ],
     deps = [
         ":nestedset",
         "//src/main/java/com/google/devtools/build/lib:commandline_item",
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
new file mode 100644
index 0000000..71e5f85
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
@@ -0,0 +1,181 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import com.google.common.base.Preconditions;
+import com.google.devtools.build.lib.util.Fingerprint;
+import java.util.concurrent.locks.StampedLock;
+
+/**
+ * Map of key -> [digest bytes].
+ *
+ * <p>This class uses a single array of keys and a big single block of bytes. To read/store digests
+ * we index straight into the byte array. This is more memory-efficient and uses less GC than a
+ * corresponding Map<Object, byte[]>.
+ *
+ * <p>Keys use reference equality.
+ */
+final class DigestMap {
+  private final int digestSize;
+  private final StampedLock readWriteLock = new StampedLock();
+  private Object[] keys;
+  private byte[] bytes;
+  private int tableSize;
+  private int nextResize;
+  private int size;
+
+  DigestMap(int digestSize, int initialSize) {
+    Preconditions.checkArgument(
+        initialSize > 0 && (initialSize & (initialSize - 1)) == 0,
+        "initialSize must be a power of 2 greater than 0");
+    this.digestSize = digestSize;
+    this.keys = new Object[initialSize];
+    this.bytes = new byte[initialSize * digestSize];
+    this.tableSize = initialSize;
+    this.nextResize = getNextResize(initialSize);
+  }
+
+  /** Finds the digest for the corresponding key and adds it to the passed fingerprint. */
+  boolean readDigest(Object key, Fingerprint fingerprint) {
+    long stamp = readWriteLock.readLock();
+    try {
+      int index = findKeyReadLocked(key);
+      if (index >= 0) {
+        fingerprint.addBytes(bytes, index * digestSize, digestSize);
+        return true;
+      } else {
+        return false;
+      }
+    } finally {
+      readWriteLock.unlockRead(stamp);
+    }
+  }
+
+  // Finds the key in the array. Must be called under read lock.
+  private int findKeyReadLocked(Object key) {
+    int hash = hash(key);
+    int index = hash & (tableSize - 1);
+    while (true) {
+      Object currentKey = keys[index];
+      if (currentKey == key) {
+        return index;
+      } else if (currentKey == null) {
+        return -1;
+      }
+      index = probe(index, tableSize);
+    }
+  }
+
+  /**
+   * Inserts a digest for the corresponding key, then immediately reads it into another fingerprint.
+   *
+   * <p>This is equivalent to <code>
+   * digestMap.insertDigest(key, digest.digestAndReset(); digestMap.readDigest(key, readTo); </code>
+   * but it will be faster.
+   *
+   * @param key The key to insert.
+   * @param digest The fingerprint to insert. This will reset the fingerprint instance.
+   * @param readTo A fingerprint to read the just-added fingerprint into.
+   */
+  void insertAndReadDigest(Object key, Fingerprint digest, Fingerprint readTo) {
+    long stamp = readWriteLock.writeLock();
+    try {
+      int index = insertKeyWriteLocked(key);
+      digest.digestAndReset(bytes, index * digestSize, digestSize);
+      readTo.addBytes(bytes, index * digestSize, digestSize);
+    } finally {
+      readWriteLock.unlockWrite(stamp);
+    }
+  }
+
+  // Inserts a key into the array and returns the index. Must be called under write lock.
+  private int insertKeyWriteLocked(Object key) {
+    if (size >= nextResize) {
+      resizeTableWriteLocked();
+    }
+    int hash = hash(key);
+    int index = hash & (tableSize - 1);
+    while (true) {
+      Object currentKey = keys[index];
+      if (currentKey == null) {
+        keys[index] = key;
+        ++size;
+        return index;
+      } else if (currentKey == key) {
+        // Key is already present
+        return index;
+      }
+      index = probe(index, tableSize);
+    }
+  }
+
+  private void resizeTableWriteLocked() {
+    int digestSize = this.digestSize;
+    int tableSize = this.tableSize;
+    Object[] keys = this.keys;
+    byte[] bytes = this.bytes;
+    int newTableSize = this.tableSize * 2;
+    Object[] newKeys = new Object[newTableSize];
+    byte[] newBytes = new byte[newTableSize * digestSize];
+    for (int i = 0; i < tableSize; ++i) {
+      Object key = keys[i];
+      if (key != null) {
+        int newIndex = firstFreeIndex(newKeys, newTableSize, key);
+        newKeys[newIndex] = key;
+        System.arraycopy(bytes, i * digestSize, newBytes, newIndex * digestSize, digestSize);
+      }
+    }
+    this.tableSize = newTableSize;
+    this.keys = newKeys;
+    this.bytes = newBytes;
+    this.nextResize = getNextResize(newTableSize);
+  }
+
+  private static int firstFreeIndex(Object[] keys, int tableSize, Object key) {
+    int hash = hash(key);
+    int index = hash & (tableSize - 1);
+    while (true) {
+      Object currentKey = keys[index];
+      if (currentKey == null) {
+        return index;
+      }
+      index = probe(index, tableSize);
+    }
+  }
+
+  private static int hash(Object key) {
+    return smear(System.identityHashCode(key));
+  }
+
+  private static int probe(int index, int tableSize) {
+    return (index + 1) & (tableSize - 1);
+  }
+
+  private static int getNextResize(int newTableSize) {
+    // 75% load
+    return (newTableSize * 3) / 4;
+  }
+
+  /*
+   * This method was rewritten in Java from an intermediate step of the Murmur hash function in
+   * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the
+   * following header:
+   *
+   * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
+   * hereby disclaims copyright to this source code.
+   */
+  private static int smear(int hashCode) {
+    return 0x1b873593 * Integer.rotateLeft(hashCode * 0xcc9e2d51, 15);
+  }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
index b4d3411..b48d4a2 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
@@ -22,10 +22,11 @@
 
 /** Computes fingerprints for nested sets, reusing sub-computations from children. */
 public class NestedSetFingerprintCache {
-  private static final byte[] EMPTY_SET_BYTES = new byte[] {};
+  private static final int DIGEST_SIZE = 16;
+  private static final int EMPTY_SET_DIGEST = 104_395_303;
 
   /** Memoize the subresults. We have to have one cache per type of command item map function. */
-  private Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> mapFnToFingerprints = createMap();
+  private Map<CommandLineItem.MapFn<?>, DigestMap> mapFnToFingerprints = createMap();
 
   public <T> void addNestedSetToFingerprint(Fingerprint fingerprint, NestedSet<T> nestedSet) {
     addNestedSetToFingerprint(CommandLineItem.MapFn.DEFAULT, fingerprint, nestedSet);
@@ -33,12 +34,16 @@
 
   public <T> void addNestedSetToFingerprint(
       CommandLineItem.MapFn<? super T> mapFn, Fingerprint fingerprint, NestedSet<T> nestedSet) {
-    Map<Object, byte[]> fingerprints =
-        mapFnToFingerprints.computeIfAbsent(mapFn, k -> new ConcurrentHashMap<>());
+    // Only top-level nested sets can be empty, so we can bail here
+    if (nestedSet.isEmpty()) {
+      fingerprint.addInt(EMPTY_SET_DIGEST);
+      return;
+    }
+    DigestMap digestMap =
+        mapFnToFingerprints.computeIfAbsent(mapFn, k -> new DigestMap(DIGEST_SIZE, 1024));
     fingerprint.addInt(nestedSet.getOrder().ordinal());
     Object children = nestedSet.rawChildren();
-    byte[] bytes = getBytes(mapFn, fingerprints, children);
-    fingerprint.addBytes(bytes);
+    addToFingerprint(mapFn, fingerprint, digestMap, children);
   }
 
   public void clear() {
@@ -46,36 +51,22 @@
   }
 
   @SuppressWarnings("unchecked")
-  private <T> byte[] getBytes(
-      CommandLineItem.MapFn<? super T> mapFn, Map<Object, byte[]> fingerprints, Object children) {
-    byte[] bytes = fingerprints.get(children);
-    if (bytes == null) {
-      if (children instanceof Object[]) {
-        Fingerprint fingerprint = new Fingerprint();
+  private <T> void addToFingerprint(
+      CommandLineItem.MapFn<? super T> mapFn,
+      Fingerprint fingerprint,
+      DigestMap digestMap,
+      Object children) {
+    if (children instanceof Object[]) {
+      if (!digestMap.readDigest(children, fingerprint)) {
+        Fingerprint childrenFingerprint = new Fingerprint();
         for (Object child : (Object[]) children) {
-          if (child instanceof Object[]) {
-            fingerprint.addBytes(getBytes(mapFn, fingerprints, child));
-          } else {
-            addToFingerprint(mapFn, fingerprint, (T) child);
-          }
+          addToFingerprint(mapFn, childrenFingerprint, digestMap, child);
         }
-        bytes = fingerprint.digestAndReset();
-
-        // There is no point memoizing anything except the multi-item case,
-        // since the single-item case gets inlined into its parents anyway,
-        // and the empty set is a singleton
-        fingerprints.put(children, bytes);
-      } else if (children != NestedSet.EMPTY_CHILDREN) {
-        // Single item
-        Fingerprint fingerprint = new Fingerprint();
-        addToFingerprint(mapFn, fingerprint, (T) children);
-        bytes = fingerprint.digestAndReset();
-      } else {
-        // Empty nested set
-        bytes = EMPTY_SET_BYTES;
+        digestMap.insertAndReadDigest(children, childrenFingerprint, fingerprint);
       }
+    } else {
+      addToFingerprint(mapFn, fingerprint, (T) children);
     }
-    return bytes;
   }
 
   @VisibleForTesting
@@ -84,7 +75,7 @@
     fingerprint.addString(mapFn.expandToCommandLine(object));
   }
 
-  private static Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> createMap() {
+  private static Map<CommandLineItem.MapFn<?>, DigestMap> createMap() {
     return new ConcurrentHashMap<>();
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
index c2119ee..ddb71b4 100644
--- a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
+++ b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
@@ -20,6 +20,7 @@
 import com.google.protobuf.CodedOutputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
+import java.security.DigestException;
 import java.security.DigestOutputStream;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
@@ -73,6 +74,26 @@
     return md5.digest();
   }
 
+  /**
+   * Completes the hash computation by doing final operations and resets the underlying state,
+   * allowing this instance to be used again.
+   *
+   * <p>Instead of returning a digest, this method writes the digest straight into the supplied byte
+   * array, at the given offset.
+   *
+   * @see java.security.MessageDigest#digest()
+   */
+  public void digestAndReset(byte[] buf, int offset, int len) {
+    try {
+      codedOut.flush();
+      md5.digest(buf, offset, len);
+    } catch (IOException e) {
+      throw new IllegalStateException("failed to flush", e);
+    } catch (DigestException e) {
+      throw new IllegalStateException("failed to digest", e);
+    }
+  }
+
   /** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */
   public String hexDigestAndReset() {
     return hexDigest(digestAndReset());
diff --git a/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java
new file mode 100644
index 0000000..35fb7ed
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java
@@ -0,0 +1,110 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.devtools.build.lib.util.Fingerprint;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DigestMap}. */
+@RunWith(JUnit4.class)
+public class DigestMapTest {
+
+  @Test
+  public void simpleTest() {
+    int count = 128; // Must be smaller than byte for this test or we'll overflow
+    Object[] keys = new Object[count];
+    for (int i = 0; i < count; ++i) {
+      keys[i] = new Object();
+    }
+
+    DigestMap digestMap = new DigestMap(16, 4);
+    for (int i = 0; i < count; ++i) {
+      Fingerprint digest = new Fingerprint().addInt(i);
+      Fingerprint fingerprint = new Fingerprint();
+      digestMap.insertAndReadDigest(keys[i], digest, fingerprint);
+      Fingerprint reference =
+          new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset());
+      assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset());
+    }
+    for (int i = 0; i < count; ++i) {
+      Fingerprint fingerprint = new Fingerprint();
+      assertThat(digestMap.readDigest(keys[i], fingerprint)).isTrue();
+      Fingerprint reference =
+          new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset());
+      assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset());
+    }
+  }
+
+  @Test
+  public void concurrencyTest() throws Exception {
+    int count = 128; // Must be smaller than byte for this test or we'll overflow
+    Object[] keys = new Object[count];
+    for (int i = 0; i < count; ++i) {
+      keys[i] = new Object();
+    }
+    DigestMap digestMap = new DigestMap(16, 4);
+
+    AtomicBoolean done = new AtomicBoolean();
+    AtomicReference<Exception> exception = new AtomicReference<>();
+    List<Thread> threads = new ArrayList<>();
+    int threadCount = 16;
+    for (int i = 0; i < threadCount; ++i) {
+      Thread thread =
+          new Thread(
+              () -> {
+                Random random = new Random();
+                while (!done.get()) {
+                  int index = random.nextInt(count);
+                  Object key = keys[index];
+                  Fingerprint fingerprint = new Fingerprint();
+                  if (!digestMap.readDigest(key, fingerprint)) {
+                    Fingerprint digest = new Fingerprint().addInt(index);
+                    digestMap.insertAndReadDigest(key, digest, fingerprint);
+                  }
+                  Fingerprint reference =
+                      new Fingerprint().addBytes(new Fingerprint().addInt(index).digestAndReset());
+                  String hexDigest = fingerprint.hexDigestAndReset();
+                  String referenceDigest = reference.hexDigestAndReset();
+                  if (!hexDigest.equals(referenceDigest)) {
+                    exception.set(
+                        new IllegalStateException(
+                            String.format(
+                                "Digests are not equal: %s != %s, index %d",
+                                hexDigest, referenceDigest, index)));
+                    done.set(true);
+                  }
+                }
+              });
+      thread.start();
+      threads.add(thread);
+    }
+    Thread.sleep(1000);
+    done.set(true);
+    for (int i = 0; i < threadCount; ++i) {
+      threads.get(i).join(1000);
+    }
+    if (exception.get() != null) {
+      throw exception.get();
+    }
+  }
+}