blob: 11b866421005d1ec682e3627c129ca74652b0793 [file] [log] [blame]
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.skyframe.serialization;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import javax.annotation.Nullable;
/**
* A bidirectional, in-memory, weak cache storing fingerprint ↔ value associations for the {@link
* FingerprintValueStore}.
*
* <p>The cache supports the possibility of semantically different values having the same serialized
* representation. For this reason, a distinguisher object can be included in the key for the
* fingerprint ⇒ value mapping. This object should encapsulate all additional context necessary to
* deserialize a value. The value ⇒ fingerprint mapping, on the other hand, is expected to be
* deterministic. See {@link FingerprintWithDistinguisher}.
*/
public final class FingerprintValueCache {
/**
* Fingerprint to value cache.
*
* <p>Used to deduplicate fetches, or in some cases, where the object to be fetched was already
* serialized, retrieves the already existing object.
*
* <p>The keys can either be a {@link ByteString} or a {@link FingerprintWithDistinguisher}.
*
* <p>The values in this cache are always {@code Object} or {@code ListenableFuture<Object>}. We
* avoid a common wrapper object both for memory efficiency and because our cache eviction policy
* is based on value GC, and wrapper objects would defeat that.
*
* <p>While a fetch for the contents is outstanding, the value in the cache will be a {@link
* ListenableFuture}. When it is resolved, it is replaced with the unwrapped {@code Object}.
*/
private final Cache<Object, Object> deserializationCache =
Caffeine.newBuilder()
.initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE)
.weakValues()
.build();
/**
* {@link Object} contents to store result mapping, eventually a fingerprint, but a future while
* in-flight or in case of errors.
*
* <p>This cache deduplicates serializing the same contents to the {@link FingerprintValueStore}.
* Its entries are as follows.
*
* <ul>
* <li>key: the content value object, using reference equality
* <li>value: either a {@code ListenableFuture<PutOperation>} when the operation is in flight or
* a {@link ByteString} fingerprint when it is complete
* </ul>
*
* <p>{@code ListenableFuture<PutOperation>} contains two distinct asynchronous operations.
*
* <ul>
* <li><em>Outer {@code ListenableFuture}</em>: represents the asynchronous completion of
* serialization, fingerprinting and the initialization of the {@link
* FingerprintValueStore#put} operation.
* <li><em>{@link PutOperation#writeStatus}</em>: represents the completion of the {@link
* FingerprintValueStore#put} operation.
* </ul>
*/
private final Cache<Object, Object> serializationCache =
Caffeine.newBuilder()
.initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE)
.weakKeys()
.build();
/**
* Disables population of {@link #deserializationCache} during serialization.
*
* <p>When serialization completes, the usual behavior is to populate the deserialization cache so
* that any subsequent deserialization of the value will hit the cache. This blocks the round-trip
* serialization testing of deserialization code paths as a side effect. Setting this true
* disables population of the deserialization cache on serialization.
*/
private final boolean exerciseDeserializationForTesting;
public FingerprintValueCache() {
this(/* exerciseDeserializationForTesting= */ false);
}
@VisibleForTesting
public FingerprintValueCache(boolean exerciseDeserializationForTesting) {
this.exerciseDeserializationForTesting = exerciseDeserializationForTesting;
}
/**
* Gets the result of a previous {@code putOperation} or registers a new one for {@code obj}, the
* {@link #serializationCache} key.
*
* <p>If the {@code obj} has already been serialized or if its serialization is in-flight, returns
* a non-null object that may be either:
*
* <ul>
* <li>a {@code ListenableFuture<PutOperation>} if it is still in flight; or
* <li>a {@link ByteString} fingerprint if writing to remote storage is successful.
* </ul>
*
* <p>If a {@code ListenableFuture<PutOperation>} is returned, its expected {@link
* ExecutionException} causes are {@link SerializationException} and {@link IOException}. The
* caller must ensure that these are the only possible causes.
*
* <p>If a previous operation is returned, {@code putOperation} is ignored. Otherwise, if a null
* value is returned, the caller owns the {@code putOperation} and must ensure it completes and
* handle its errors.
*
* @param distinguisher an optional key distinguisher, see {@link FingerprintWithDistinguisher}
*/
@Nullable
Object getOrClaimPutOperation(
Object obj, @Nullable Object distinguisher, ListenableFuture<PutOperation> putOperation) {
// Any contention here is caused by two threads racing to serialize the same object. Since
// the serialization is pure CPU work, it's tempting to simplify this code by using
// `computeIfAbsent` instead. That unfortunately leads to recursive `ConcurrentMap` updates,
// which isn't supported.
Object previous = serializationCache.asMap().putIfAbsent(obj, putOperation);
if (previous != null) {
return previous;
}
unwrapFingerprintWhenDone(obj, distinguisher, putOperation);
return null;
}
/**
* Gets the result of a previous {@code getOperation} or registers a new one for {@code
* fingerprint}, the {@link #deserializationCache} key.
*
* <p>This is used to avoid deduplicate fetches or fetching an object that had already been
* serialized from this cache. If the key is for an already stored or retrieved object, or one
* where retrievial is in-flight, returns a non-null value that can be one of the following.
*
* <ul>
* <li>a {@code ListenableFuture<Object>} if retrieval is in-flight; or
* <li>an {@link Object} if it is already known for the key.
* </ul>
*
* <p>If a {@code ListenableFuture<Object>} is returned, its possible {@link ExecutionException}
* causes are {@link SerializationException}, {@link IOException} and {@link
* MissingFingerprintValueException}. The caller must ensure these are the only possible causes.
*
* <p>If a non-null value is returned, {@code getOperation} is ignored. Otherwise, when this
* returns null, the caller must ensure that {@code getOperation} is eventually completed with the
* or an error. The caller is responsible for handling errors.
*
* @param distinguisher an optional distinguisher, see {@link FingerprintWithDistinguisher}
*/
@Nullable
Object getOrClaimGetOperation(
ByteString fingerprint,
@Nullable Object distinguisher,
ListenableFuture<Object> getOperation) {
Object key = createKey(fingerprint, distinguisher);
Object previous = deserializationCache.asMap().putIfAbsent(key, getOperation);
if (previous != null) {
return previous;
}
unwrapValueWhenDone(fingerprint, key, getOperation);
return null;
}
/** Populates the reverse mapping and unwraps futures when they are no longer needed. */
private void unwrapFingerprintWhenDone(
Object obj, @Nullable Object distinguisher, ListenableFuture<PutOperation> putOperation) {
Futures.addCallback(
putOperation,
new FutureCallback<PutOperation>() {
@Override
public void onSuccess(PutOperation operation) {
// Serialization and fingerprinting has succeeded and storing the bytes in the
// FingerprintValueStore has started.
// Stores the reverse mapping in `deserializationCache`.
if (!exerciseDeserializationForTesting) {
deserializationCache.put(createKey(operation.fingerprint(), distinguisher), obj);
}
// It's possible to discard the outermost future at this point and only keep the
// PutOperation instead, but for simplicity, discards both once both succeed.
Futures.addCallback(
operation.writeStatus(),
new FutureCallback<Void>() {
@Override
public void onSuccess(Void unused) {
// The object has been successfully written to remote storage. Discards all the
// wrappers.
serializationCache.put(obj, operation.fingerprint());
}
@Override
public void onFailure(Throwable t) {
// Failure will be reported by the owner of `putOperation`.
}
},
directExecutor());
}
@Override
public void onFailure(Throwable t) {
// Failure will be reported by the owner of `putOperation`.
}
},
directExecutor());
}
/** Unwraps the future and populates the reverse mapping when done. */
private void unwrapValueWhenDone(
ByteString fingerprint, Object key, ListenableFuture<Object> getOperation) {
Futures.addCallback(
getOperation,
new FutureCallback<Object>() {
@Override
public void onSuccess(Object value) {
deserializationCache.put(key, value);
serializationCache.put(value, fingerprint);
}
@Override
public void onFailure(Throwable t) {
// Failure will be reported by the owner of the `getOperation`.
}
},
directExecutor());
}
private static Object createKey(ByteString fingerprint, @Nullable Object distinguisher) {
if (distinguisher == null) {
return fingerprint;
}
return FingerprintWithDistinguisher.of(fingerprint, distinguisher);
}
/**
* An extended {@link #deserializationCache} key, needed when the fingerprint alone is not enough.
*
* <p>The mapping stores a bidirectional fingerprint to value associations. However, there can be
* multiple values for the same fingerprint. For example, consider the parent and child objects
* (A, B). Suppose that both A and B share a common value S. When serializing B, S may be omitted
* because it is already known to A and can be reinjected during deserialization.
*
* <p>The problem is that the fingerprint of B does not include anything about the shared value S.
* So it could collide on fingerprint with some other (C, D) with a different shared value T.
*
* <p>Including a <em>distinguisher</em> in the key to account for the contextual value can be
* used to avoid conflicts in cases like this.
*/
@AutoValue
abstract static class FingerprintWithDistinguisher {
/** The primary key for a {@link #deserializationCache} entry. */
abstract ByteString fingerprint();
/** A secondary key, sometimes needed to resolve ambiguity. */
abstract Object distinguisher();
static FingerprintWithDistinguisher of(ByteString fingerprint, Object distinguisher) {
return new AutoValue_FingerprintValueCache_FingerprintWithDistinguisher(
fingerprint, distinguisher);
}
}
}