|  | // Copyright 2014 The Bazel Authors. All rights reserved. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | package com.google.devtools.build.lib.util; | 
|  |  | 
|  | import static java.nio.charset.StandardCharsets.UTF_8; | 
|  |  | 
|  | import com.google.common.io.ByteStreams; | 
|  | import com.google.devtools.build.lib.vfs.DigestHashFunction; | 
|  | import com.google.devtools.build.lib.vfs.Path; | 
|  | import com.google.devtools.build.lib.vfs.PathFragment; | 
|  | import com.google.errorprone.annotations.CanIgnoreReturnValue; | 
|  | import com.google.protobuf.ByteString; | 
|  | import com.google.protobuf.CodedOutputStream; | 
|  | import java.io.IOException; | 
|  | import java.security.DigestException; | 
|  | import java.security.DigestOutputStream; | 
|  | import java.security.MessageDigest; | 
|  | import java.util.Collection; | 
|  | import java.util.Map; | 
|  | import java.util.UUID; | 
|  | import javax.annotation.Nullable; | 
|  |  | 
|  | /** | 
|  | * Simplified wrapper for using {@link MessageDigest} to generate fingerprints. | 
|  | * | 
|  | * <p>A fingerprint is a cryptographic hash of a message that encodes the representation of an | 
|  | * object. Two objects of the same type have the same fingerprint if and only if they are equal. | 
|  | * This property allows fingerprints to be used as unique identifiers for objects of a particular | 
|  | * type, and for fingerprint equivalence to be used as a proxy for object equivalence, and these | 
|  | * properties hold even outside the process. Note that this is a stronger requirement than {@link | 
|  | * Object#hashCode}, which allows unequal objects to share the same hash code. | 
|  | * | 
|  | * <p>Values are added to the fingerprint by converting them to bytes and digesting the bytes. | 
|  | * Therefore, there are two potential sources of bugs: 1) a proper hash collision where two distinct | 
|  | * streams of bytes produce the same digest, and 2) a programming oversight whereby two unequal | 
|  | * values produce the same bytes, or conversely, two equal values produce distinct bytes. | 
|  | * | 
|  | * <p>The case of a hash collision is statistically very unlikely, so we just need to ensure a | 
|  | * one-to-one relationship between equality classes of values and their byte representation. A good | 
|  | * way to do this is to literally serialize the values such that there is enough information to | 
|  | * unambiguously deserialize them. For example, when serializing a list of strings ({@link | 
|  | * #addStrings}, it is enough to write each string's content along with its length, plus the overall | 
|  | * number of strings in the list. This ensures that no other list of strings can generate the same | 
|  | * bytes. Note that it is not necessary to avoid collisions between different fingerprinting methods | 
|  | * (e.g., between {@link #addStrings} and {@link #addString}) because the caller will only use one | 
|  | * or the other in a given context, or else the user is required to write a disambiguating tag if | 
|  | * both are possible. | 
|  | * | 
|  | * @see java.security.MessageDigest | 
|  | */ | 
|  | public final class Fingerprint { | 
|  |  | 
|  | // Make novel use of a CodedOutputStream, which is good at efficiently serializing data. By | 
|  | // flushing at the end of each digest we can continue to use the stream. | 
|  | private final CodedOutputStream codedOut; | 
|  | private final MessageDigest messageDigest; | 
|  |  | 
|  | /** Creates and initializes a new instance. */ | 
|  | public Fingerprint(DigestHashFunction digestFunction) { | 
|  | messageDigest = digestFunction.cloneOrCreateMessageDigest(); | 
|  | // This is a lot of indirection, but CodedOutputStream does a reasonable job of converting | 
|  | // strings to bytes without creating a whole bunch of garbage, which pays off. | 
|  | codedOut = | 
|  | CodedOutputStream.newInstance( | 
|  | new DigestOutputStream(ByteStreams.nullOutputStream(), messageDigest), | 
|  | /*bufferSize=*/ 1024); | 
|  | } | 
|  |  | 
|  | public Fingerprint() { | 
|  | // TODO(b/112460990): Use the value from DigestHashFunction.getDefault(), but check for | 
|  | // contention. | 
|  | this(DigestHashFunction.SHA256); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Completes the hash computation by doing final operations and resets the underlying state, | 
|  | * allowing this instance to be used again. | 
|  | * | 
|  | * @return the digest as a 16-byte array | 
|  | * @see java.security.MessageDigest#digest() | 
|  | */ | 
|  | public byte[] digestAndReset() { | 
|  | try { | 
|  | codedOut.flush(); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to flush", e); | 
|  | } | 
|  | return messageDigest.digest(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Completes the hash computation by doing final operations and resets the underlying state, | 
|  | * allowing this instance to be used again. | 
|  | * | 
|  | * <p>Instead of returning a digest, this method writes the digest straight into the supplied byte | 
|  | * array, at the given offset. | 
|  | * | 
|  | * @see java.security.MessageDigest#digest() | 
|  | */ | 
|  | public void digestAndReset(byte[] buf, int offset, int len) { | 
|  | try { | 
|  | codedOut.flush(); | 
|  | messageDigest.digest(buf, offset, len); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to flush", e); | 
|  | } catch (DigestException e) { | 
|  | throw new IllegalStateException("failed to digest", e); | 
|  | } | 
|  | } | 
|  |  | 
|  | /** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */ | 
|  | public String hexDigestAndReset() { | 
|  | return hexDigest(digestAndReset()); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Appends the specified bytes to the fingerprint message. Same as {@link #addBytes(byte[])}, but | 
|  | * faster when only a {@link ByteString} is available. | 
|  | * | 
|  | * <p>The fingerprint directly injects the bytes with no framing or tags added. Thus, not | 
|  | * guaranteed to be unambiguous; especially if input length is data-dependent. | 
|  | */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addBytes(ByteString bytes) { | 
|  | try { | 
|  | codedOut.writeRawBytes(bytes); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write bytes", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends the specified bytes to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addBytes(byte[] input) { | 
|  | addBytes(input, 0, input.length); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Appends the specified bytes to the fingerprint message, starting at offset. | 
|  | * | 
|  | * <p>The bytes are directly injected into the fingerprint with no framing or tags added. Thus, | 
|  | * not guaranteed to be unambiguous; especially if len is data-dependent. | 
|  | */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addBytes(byte[] input, int offset, int len) { | 
|  | try { | 
|  | codedOut.write(input, offset, len); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write bytes", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Updates the digest with a boolean value. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addBoolean(boolean input) { | 
|  | try { | 
|  | codedOut.writeBoolNoTag(input); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write bool", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Same as {@link #addBoolean(boolean)}, except considers nullability. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addNullableBoolean(Boolean input) { | 
|  | if (input == null) { | 
|  | addBoolean(false); | 
|  | } else { | 
|  | addBoolean(true); | 
|  | addBoolean(input); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends an int to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addInt(int x) { | 
|  | try { | 
|  | codedOut.writeInt32NoTag(x); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write int", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends a long to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addLong(long x) { | 
|  | try { | 
|  | codedOut.writeInt64NoTag(x); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write long", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Same as {@link #addInt(int)}, except considers nullability. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addNullableInt(@Nullable Integer input) { | 
|  | if (input == null) { | 
|  | addBoolean(false); | 
|  | } else { | 
|  | addBoolean(true); | 
|  | addInt(input); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends a {@link UUID} to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addUUID(UUID uuid) { | 
|  | addLong(uuid.getLeastSignificantBits()); | 
|  | addLong(uuid.getMostSignificantBits()); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends a String to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addString(String input) { | 
|  | try { | 
|  | codedOut.writeStringNoTag(input); | 
|  | } catch (IOException e) { | 
|  | throw new IllegalStateException("failed to write string", e); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Same as {@link #addString(String)}, except considers nullability. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addNullableString(@Nullable String input) { | 
|  | if (input == null) { | 
|  | addBoolean(false); | 
|  | } else { | 
|  | addBoolean(true); | 
|  | addString(input); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends a {@link Path} to the fingerprint message. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addPath(Path input) { | 
|  | addString(input.getPathString()); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Appends a {@link PathFragment} to the fingerprint message. */ | 
|  | public Fingerprint addPath(PathFragment input) { | 
|  | return addString(input.getPathString()); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Appends a collection of strings to the fingerprint message as a unit. The collection must have | 
|  | * a deterministic iteration order. | 
|  | * | 
|  | * <p>The fingerprint effectively records the sequence of calls, not just the elements. That is, | 
|  | * addStrings(x+y).addStrings(z) is different from addStrings(x).addStrings(y+z). | 
|  | */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addStrings(Collection<String> inputs) { | 
|  | addInt(inputs.size()); | 
|  | for (String input : inputs) { | 
|  | addString(input); | 
|  | } | 
|  |  | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Appends an arbitrary sequence of Strings as a unit. | 
|  | * | 
|  | * <p>This is slightly less efficient than {@link #addStrings}. | 
|  | */ | 
|  | // TODO(b/150312032): Deprecate this method. | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addIterableStrings(Iterable<String> inputs) { | 
|  | for (String input : inputs) { | 
|  | addBoolean(true); | 
|  | addString(input); | 
|  | } | 
|  | addBoolean(false); | 
|  |  | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Updates the digest with the supplied map. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addStringMap(Map<String, String> inputs) { | 
|  | addInt(inputs.size()); | 
|  | for (Map.Entry<String, String> entry : inputs.entrySet()) { | 
|  | addString(entry.getKey()); | 
|  | addString(entry.getValue()); | 
|  | } | 
|  |  | 
|  | return this; | 
|  | } | 
|  |  | 
|  | /** Like {@link #addStrings} but for {@link PathFragment}. */ | 
|  | @CanIgnoreReturnValue | 
|  | public Fingerprint addPaths(Collection<PathFragment> inputs) { | 
|  | addInt(inputs.size()); | 
|  | for (PathFragment input : inputs) { | 
|  | addPath(input); | 
|  | } | 
|  | return this; | 
|  | } | 
|  |  | 
|  | private static String hexDigest(byte[] digest) { | 
|  | StringBuilder b = new StringBuilder(32); | 
|  | for (int i = 0; i < digest.length; i++) { | 
|  | int n = digest[i]; | 
|  | b.append("0123456789abcdef".charAt((n >> 4) & 0xF)); | 
|  | b.append("0123456789abcdef".charAt(n & 0xF)); | 
|  | } | 
|  | return b.toString(); | 
|  | } | 
|  |  | 
|  | // -------- Convenience methods ---------------------------- | 
|  |  | 
|  | /** | 
|  | * Computes the hex digest from a String using UTF8 encoding and returning the hexDigest(). | 
|  | * | 
|  | * @param input the String from which to compute the digest | 
|  | */ | 
|  | public static String getHexDigest(String input) { | 
|  | // TODO(b/112460990): This convenience method should | 
|  | // use the value from DigestHashFunction.getDefault(). However, this gets called during class | 
|  | // loading in a few places, before setDefault() has been called, so these call-sites should be | 
|  | // removed before this can be done safely. | 
|  | return hexDigest( | 
|  | DigestHashFunction.SHA256.cloneOrCreateMessageDigest().digest(input.getBytes(UTF_8))); | 
|  | } | 
|  | } |