When a fetch for an Object[] returns, cache the unwrapped contents.
By keeping the cached value as the wrapping ListenableFuture<Object[]>, we have the potential of making multiple fetches for the same fingerprint. This is illustrated by the test case - a fetch for a transitive member will only retain the ListenableFuture until it is resolved, then the cache entry becomes eligible for GC even though the underlying Object[] is still in memory.
I don't know how often this happens, but the change seems harmless and we can use the number of NestedSet reads to see if it makes any difference.
RELNOTES: None.
PiperOrigin-RevId: 298361983
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java
index f607ce5..fcd6230 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java
@@ -115,12 +115,16 @@
public static class NestedSetCache {
/**
- * Fingerprint to {@link NestedSet#children} cache.
+ * Fingerprint to array cache.
*
* <p>The values in this cache are always {@code Object[]} or {@code
- * ListenableFuture<Object[]>}, the same as the children field in NestedSet. We avoid a common
- * wrapper object both for memory efficiency and because our cache eviction policy is based on
- * value GC, and wrapper objects would defeat that.
+ * ListenableFuture<Object[]>}. We avoid a common wrapper object both for memory efficiency and
+ * because our cache eviction policy is based on value GC, and wrapper objects would defeat
+ * that.
+ *
+ * <p>While a fetch for the contents is outstanding, the key in the cache will be a {@link
+ * ListenableFuture}. When it is resolved, it is replaced with the unwrapped {@code Object[]}.
+ * This is done because if the array is a transitive member, its future may be GC'd.
*/
private final Cache<ByteString, Object> fingerprintToContents =
CacheBuilder.newBuilder()
@@ -183,12 +187,16 @@
private void putAsync(ByteString fingerprint, ListenableFuture<Object[]> futureContents) {
futureContents.addListener(
() -> {
- // There may already be an entry here, but it's better to put a fingerprint result with
- // an immediate future, since then later readers won't need to block unnecessarily. It
- // would be nice to sanity check the old value, but Cache#put doesn't provide it to us.
try {
+ Object[] contents = Futures.getDone(futureContents);
+ // Replace the cache entry with the unwrapped contents, since the Future may be GC'd.
+ fingerprintToContents.put(fingerprint, contents);
+ // There may already be an entry here, but it's better to put a fingerprint result
+ // with an immediate future, since then later readers won't need to block
+ // unnecessarily. It would be nice to sanity check the old value, but Cache#put
+ // doesn't provide it to us.
contentsToFingerprint.put(
- Futures.getDone(futureContents),
+ contents,
FingerprintComputationResult.create(fingerprint, Futures.immediateFuture(null)));
} catch (ExecutionException e) {
diff --git a/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java b/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java
index 6866e39..08f4a70 100644
--- a/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java
+++ b/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java
@@ -40,6 +40,7 @@
import java.lang.ref.WeakReference;
import java.nio.charset.Charset;
import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -65,6 +66,46 @@
}
@Test
+ public void onlyOneReadPerArray() throws Exception {
+ NestedSet<String> base = NestedSetBuilder.create(Order.STABLE_ORDER, "a", "b");
+ NestedSet<String> top = NestedSetBuilder.fromNestedSet(base).add("c").build();
+
+ AtomicInteger reads = new AtomicInteger();
+ NestedSetStorageEndpoint endpoint =
+ new NestedSetStorageEndpoint() {
+ InMemoryNestedSetStorageEndpoint delegate = new InMemoryNestedSetStorageEndpoint();
+
+ @Override
+ public ListenableFuture<Void> put(ByteString fingerprint, byte[] serializedBytes) {
+ return delegate.put(fingerprint, serializedBytes);
+ }
+
+ @Override
+ public ListenableFuture<byte[]> get(ByteString fingerprint) {
+ reads.incrementAndGet();
+ return delegate.get(fingerprint);
+ }
+ };
+
+ ObjectCodecs serializer = createCodecs(new NestedSetStore(endpoint));
+ ByteString serializedBase = serializer.serializeMemoizedAndBlocking(base).getObject();
+ ByteString serializedTop = serializer.serializeMemoizedAndBlocking(top).getObject();
+
+ // When deserializing top, we should perform 2 reads, one for each array in [[a, b], c].
+ ObjectCodecs deserializer = createCodecs(new NestedSetStore(endpoint));
+ NestedSet<?> deserializedTop = (NestedSet<?>) deserializer.deserializeMemoized(serializedTop);
+ assertThat(deserializedTop.toList()).containsExactly("a", "b", "c");
+ assertThat(reads.get()).isEqualTo(2);
+
+ // When deserializing base, we should not need to perform any additional reads since we have
+ // already read [a, b] and it is still in memory.
+ GcFinalization.awaitFullGc();
+ NestedSet<?> deserializedBase = (NestedSet<?>) deserializer.deserializeMemoized(serializedBase);
+ assertThat(deserializedBase.toList()).containsExactly("a", "b");
+ assertThat(reads.get()).isEqualTo(2);
+ }
+
+ @Test
public void failedRetrievalHiddenUntilNestedSetIsConsumed() throws Exception {
NestedSetStorageEndpoint storageEndpoint =
new NestedSetStorageEndpoint() {