| // Copyright 2014 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| /* |
| * Copyright (C) 2012 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.devtools.build.lib.collect; |
| |
| import com.google.common.base.Preconditions; |
| import com.google.common.primitives.Ints; |
| |
| import java.io.IOException; |
| import java.io.InvalidObjectException; |
| import java.io.ObjectInputStream; |
| import java.io.ObjectOutputStream; |
| import java.io.Serializable; |
| import java.lang.reflect.Array; |
| import java.util.AbstractSet; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.ConcurrentModificationException; |
| import java.util.Iterator; |
| import java.util.NoSuchElementException; |
| import java.util.Objects; |
| |
| import javax.annotation.Nullable; |
| |
| /** |
| * CompactHashSet is an implementation of a Set. All optional operations (adding and |
| * removing) are supported. The elements can be any objects. |
| * |
| * <p>{@code contains(x)}, {@code add(x)} and {@code remove(x)}, are all (expected and amortized) |
| * constant time operations. Expected in the hashtable sense (depends on the hash function |
| * doing a good job of distributing the elements to the buckets to a distribution not far from |
| * uniform), and amortized since some operations can trigger a hash table resize. |
| * |
| * <p>Unlike {@code java.util.HashSet}, iteration is only proportional to the actual |
| * {@code size()}, which is optimal, and <i>not</i> the size of the internal hashtable, |
| * which could be much larger than {@code size()}. Furthermore, this structure only depends |
| * on a fixed number of arrays; {@code add(x)} operations <i>do not</i> create objects |
| * for the garbage collector to deal with, and for every element added, the garbage collector |
| * will have to traverse {@code 1.5} references on average, in the marking phase, not {@code 5.0} |
| * as in {@code java.util.HashSet}. |
| * |
| * <p>If there are no removals, then {@link #iterator iteration} order is the same as insertion |
| * order. Any removal invalidates any ordering guarantees. |
| */ |
| // TODO(bazel-team): This was branched of an internal version of guava. If the class is released, we |
| // should remove this again. |
| public class CompactHashSet<E> extends AbstractSet<E> implements Serializable { |
| // TODO(bazel-team): cache all field accesses in local vars |
| |
| // A partial copy of com.google.common.collect.Hashing. |
| private static final int C1 = 0xcc9e2d51; |
| private static final int C2 = 0x1b873593; |
| |
| /* |
| * This method was rewritten in Java from an intermediate step of the Murmur hash function in |
| * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the |
| * following header: |
| * |
| * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author |
| * hereby disclaims copyright to this source code. |
| */ |
| private static int smear(int hashCode) { |
| return C2 * Integer.rotateLeft(hashCode * C1, 15); |
| } |
| |
| private static int smearedHash(@Nullable Object o) { |
| return smear((o == null) ? 0 : o.hashCode()); |
| } |
| |
| private static final int MAX_TABLE_SIZE = Ints.MAX_POWER_OF_TWO; |
| |
| private static int closedTableSize(int expectedEntries, double loadFactor) { |
| // Get the recommended table size. |
| // Round down to the nearest power of 2. |
| expectedEntries = Math.max(expectedEntries, 2); |
| int tableSize = Integer.highestOneBit(expectedEntries); |
| // Check to make sure that we will not exceed the maximum load factor. |
| if (expectedEntries > (int) (loadFactor * tableSize)) { |
| tableSize <<= 1; |
| return (tableSize > 0) ? tableSize : MAX_TABLE_SIZE; |
| } |
| return tableSize; |
| } |
| |
| /** |
| * Creates an empty {@code CompactHashSet} instance. |
| */ |
| public static <E> CompactHashSet<E> create() { |
| return new CompactHashSet<E>(); |
| } |
| |
| /** |
| * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the elements |
| * of the given collection in unspecified order. |
| * |
| * @param collection the elements that the set should contain |
| * @return a new {@code CompactHashSet} containing those elements (minus duplicates) |
| */ |
| public static <E> CompactHashSet<E> create(Collection<? extends E> collection) { |
| CompactHashSet<E> set = createWithExpectedSize(collection.size()); |
| set.addAll(collection); |
| return set; |
| } |
| |
| /** |
| * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the given |
| * elements in unspecified order. |
| * |
| * @param elements the elements that the set should contain |
| * @return a new {@code CompactHashSet} containing those elements (minus duplicates) |
| */ |
| @SafeVarargs |
| public static <E> CompactHashSet<E> create(E... elements) { |
| CompactHashSet<E> set = createWithExpectedSize(elements.length); |
| Collections.addAll(set, elements); |
| return set; |
| } |
| |
| /** |
| * Creates a {@code CompactHashSet} instance, with a high enough "initial capacity" |
| * that it <i>should</i> hold {@code expectedSize} elements without growth. |
| * |
| * @param expectedSize the number of elements you expect to add to the returned set |
| * @return a new, empty {@code CompactHashSet} with enough capacity to hold {@code |
| * expectedSize} elements without resizing |
| * @throws IllegalArgumentException if {@code expectedSize} is negative |
| */ |
| public static <E> CompactHashSet<E> createWithExpectedSize(int expectedSize) { |
| return new CompactHashSet<E>(expectedSize); |
| } |
| |
| private static final int MAXIMUM_CAPACITY = 1 << 30; |
| |
| // TODO(bazel-team): decide, and inline, load factor. 0.75? |
| private static final float DEFAULT_LOAD_FACTOR = 1.0f; |
| |
| /** |
| * Bitmask that selects the low 32 bits. |
| */ |
| private static final long NEXT_MASK = (1L << 32) - 1; |
| |
| /** |
| * Bitmask that selects the high 32 bits. |
| */ |
| private static final long HASH_MASK = ~NEXT_MASK; |
| |
| // TODO(bazel-team): decide default size |
| private static final int DEFAULT_SIZE = 3; |
| |
| static final int UNSET = -1; |
| |
| /** |
| * The hashtable. Its values are indexes to both the elements and entries arrays. |
| * |
| * Currently, the UNSET value means "null pointer", and any non negative value x is |
| * the actual index. |
| * |
| * Its size must be a power of two. |
| */ |
| private transient int[] table; |
| |
| /** |
| * Contains the logical entries, in the range of [0, size()). The high 32 bits of each |
| * long is the smeared hash of the element, whereas the low 32 bits is the "next" pointer |
| * (pointing to the next entry in the bucket chain). The pointers in [size(), entries.length) |
| * are all "null" (UNSET). |
| */ |
| private transient long[] entries; |
| |
| /** |
| * The elements contained in the set, in the range of [0, size()). |
| */ |
| transient Object[] elements; |
| |
| /** |
| * The load factor. |
| */ |
| transient float loadFactor; |
| |
| /** |
| * Keeps track of modifications of this set, to make it possible to throw |
| * ConcurrentModificationException in the iterator. Note that we choose not to |
| * make this volatile, so we do less of a "best effort" to track such errors, |
| * for better performance. |
| */ |
| transient int modCount; |
| |
| /** |
| * When we have this many elements, resize the hashtable. |
| */ |
| private transient int threshold; |
| |
| /** |
| * The number of elements contained in the set. |
| */ |
| private transient int size; |
| |
| /** |
| * Constructs a new empty instance of {@code CompactHashSet}. |
| */ |
| CompactHashSet() { |
| init(DEFAULT_SIZE, DEFAULT_LOAD_FACTOR); |
| } |
| |
| /** |
| * Constructs a new instance of {@code CompactHashSet} with the specified capacity. |
| * |
| * @param expectedSize the initial capacity of this {@code CompactHashSet}. |
| */ |
| CompactHashSet(int expectedSize) { |
| init(expectedSize, DEFAULT_LOAD_FACTOR); |
| } |
| |
| /** |
| * Pseudoconstructor for serialization support. |
| */ |
| void init(int expectedSize, float loadFactor) { |
| Preconditions.checkArgument(expectedSize >= 0, "Initial capacity must be non-negative"); |
| Preconditions.checkArgument(loadFactor > 0, "Illegal load factor"); |
| int buckets = closedTableSize(expectedSize, loadFactor); |
| this.table = newTable(buckets); |
| this.loadFactor = loadFactor; |
| this.elements = new Object[expectedSize]; |
| this.entries = newEntries(expectedSize); |
| this.threshold = Math.max(1, (int) (buckets * loadFactor)); |
| } |
| |
| private static int[] newTable(int size) { |
| int[] array = new int[size]; |
| Arrays.fill(array, UNSET); |
| return array; |
| } |
| |
| private static long[] newEntries(int size) { |
| long[] array = new long[size]; |
| Arrays.fill(array, UNSET); |
| return array; |
| } |
| |
| private static int getHash(long entry) { |
| return (int) (entry >>> 32); |
| } |
| |
| /** |
| * Returns the index, or UNSET if the pointer is "null" |
| */ |
| private static int getNext(long entry) { |
| return (int) entry; |
| } |
| |
| /** |
| * Returns a new entry value by changing the "next" index of an existing entry |
| */ |
| private static long swapNext(long entry, int newNext) { |
| return (HASH_MASK & entry) | (NEXT_MASK & newNext); |
| } |
| |
| private int hashTableMask() { |
| return table.length - 1; |
| } |
| |
| @Override |
| public boolean add(@Nullable E object) { |
| long[] entries = this.entries; |
| Object[] elements = this.elements; |
| int hash = smearedHash(object); |
| int tableIndex = hash & hashTableMask(); |
| int newEntryIndex = this.size; // current size, and pointer to the entry to be appended |
| int next = table[tableIndex]; |
| if (next == UNSET) { // uninitialized bucket |
| table[tableIndex] = newEntryIndex; |
| } else { |
| int last; |
| long entry; |
| do { |
| last = next; |
| entry = entries[next]; |
| if (getHash(entry) == hash && Objects.equals(object, elements[next])) { |
| return false; |
| } |
| next = getNext(entry); |
| } while (next != UNSET); |
| entries[last] = swapNext(entry, newEntryIndex); |
| } |
| if (newEntryIndex == Integer.MAX_VALUE) { |
| throw new IllegalStateException("Cannot contain more than Integer.MAX_VALUE elements!"); |
| } |
| int newSize = newEntryIndex + 1; |
| resizeMeMaybe(newSize); |
| insertEntry(newEntryIndex, object, hash); |
| this.size = newSize; |
| if (newEntryIndex >= threshold) { |
| resizeTable(2 * table.length); |
| } |
| modCount++; |
| return true; |
| } |
| |
| /** |
| * Creates a fresh entry with the specified object at the specified position in the entry |
| * arrays. |
| */ |
| void insertEntry(int entryIndex, E object, int hash) { |
| this.entries[entryIndex] = ((long) hash << 32) | (NEXT_MASK & UNSET); |
| this.elements[entryIndex] = object; |
| } |
| |
| /** |
| * Returns currentSize + 1, after resizing the entries storage if necessary. |
| */ |
| private void resizeMeMaybe(int newSize) { |
| int entriesSize = entries.length; |
| if (newSize > entriesSize) { |
| int newCapacity = entriesSize + Math.max(1, entriesSize >>> 1); |
| if (newCapacity < 0) { |
| newCapacity = Integer.MAX_VALUE; |
| } |
| if (newCapacity != entriesSize) { |
| resizeEntries(newCapacity); |
| } |
| } |
| } |
| |
| /** |
| * Resizes the internal entries array to the specified capacity, which may be greater or less |
| * than the current capacity. |
| */ |
| void resizeEntries(int newCapacity) { |
| this.elements = Arrays.copyOf(elements, newCapacity); |
| long[] entries = this.entries; |
| int oldSize = entries.length; |
| entries = Arrays.copyOf(entries, newCapacity); |
| if (newCapacity > oldSize) { |
| Arrays.fill(entries, oldSize, newCapacity, UNSET); |
| } |
| this.entries = entries; |
| } |
| |
| private void resizeTable(int newCapacity) { // newCapacity always a power of two |
| int[] oldTable = table; |
| int oldCapacity = oldTable.length; |
| if (oldCapacity >= MAXIMUM_CAPACITY) { |
| threshold = Integer.MAX_VALUE; |
| return; |
| } |
| int newThreshold = 1 + (int) (newCapacity * loadFactor); |
| int[] newTable = newTable(newCapacity); |
| long[] entries = this.entries; |
| |
| int mask = newTable.length - 1; |
| for (int i = 0; i < size; i++) { |
| long oldEntry = entries[i]; |
| int hash = getHash(oldEntry); |
| int tableIndex = hash & mask; |
| int next = newTable[tableIndex]; |
| newTable[tableIndex] = i; |
| entries[i] = ((long) hash << 32) | (NEXT_MASK & next); |
| } |
| |
| this.threshold = newThreshold; |
| this.table = newTable; |
| } |
| |
| @Override |
| public boolean contains(@Nullable Object object) { |
| int hash = smearedHash(object); |
| int next = table[hash & hashTableMask()]; |
| while (next != UNSET) { |
| long entry = entries[next]; |
| if (getHash(entry) == hash && Objects.equals(object, elements[next])) { |
| return true; |
| } |
| next = getNext(entry); |
| } |
| return false; |
| } |
| |
| @Override |
| public boolean remove(@Nullable Object object) { |
| return remove(object, smearedHash(object)); |
| } |
| |
| private boolean remove(Object object, int hash) { |
| int tableIndex = hash & hashTableMask(); |
| int next = table[tableIndex]; |
| if (next == UNSET) { |
| return false; |
| } |
| int last = UNSET; |
| do { |
| if (getHash(entries[next]) == hash && Objects.equals(object, elements[next])) { |
| if (last == UNSET) { |
| // we need to update the root link from table[] |
| table[tableIndex] = getNext(entries[next]); |
| } else { |
| // we need to update the link from the chain |
| entries[last] = swapNext(entries[last], getNext(entries[next])); |
| } |
| |
| moveEntry(next); |
| size--; |
| modCount++; |
| return true; |
| } |
| last = next; |
| next = getNext(entries[next]); |
| } while (next != UNSET); |
| return false; |
| } |
| |
| /** |
| * Moves the last entry in the entry array into {@code dstIndex}, and nulls out its old position. |
| */ |
| void moveEntry(int dstIndex) { |
| int srcIndex = size() - 1; |
| if (dstIndex < srcIndex) { |
| // move last entry to deleted spot |
| elements[dstIndex] = elements[srcIndex]; |
| elements[srcIndex] = null; |
| |
| // move the last entry to the removed spot, just like we moved the element |
| long lastEntry = entries[srcIndex]; |
| entries[dstIndex] = lastEntry; |
| entries[srcIndex] = UNSET; |
| |
| // also need to update whoever's "next" pointer was pointing to the last entry place |
| // reusing "tableIndex" and "next"; these variables were no longer needed |
| int tableIndex = getHash(lastEntry) & hashTableMask(); |
| int lastNext = table[tableIndex]; |
| if (lastNext == srcIndex) { |
| // we need to update the root pointer |
| table[tableIndex] = dstIndex; |
| } else { |
| // we need to update a pointer in an entry |
| int previous; |
| long entry; |
| do { |
| previous = lastNext; |
| lastNext = getNext(entry = entries[lastNext]); |
| } while (lastNext != srcIndex); |
| // here, entries[previous] points to the old entry location; update it |
| entries[previous] = swapNext(entry, dstIndex); |
| } |
| } else { |
| elements[dstIndex] = null; |
| entries[dstIndex] = UNSET; |
| } |
| } |
| |
| @Override |
| public Iterator<E> iterator() { |
| return new Iterator<E>() { |
| int expectedModCount = modCount; |
| boolean nextCalled = false; |
| int index = 0; |
| |
| @Override |
| public boolean hasNext() { |
| return index < size; |
| } |
| |
| @Override |
| @SuppressWarnings("unchecked") |
| public E next() { |
| checkForConcurrentModification(); |
| if (!hasNext()) { |
| throw new NoSuchElementException(); |
| } |
| nextCalled = true; |
| return (E) elements[index++]; |
| } |
| |
| @Override |
| public void remove() { |
| checkForConcurrentModification(); |
| Preconditions.checkState(nextCalled, "no calls to next() since the last call to remove()"); |
| expectedModCount++; |
| index--; |
| CompactHashSet.this.remove(elements[index], getHash(entries[index])); |
| nextCalled = false; |
| } |
| |
| private void checkForConcurrentModification() { |
| if (modCount != expectedModCount) { |
| throw new ConcurrentModificationException(); |
| } |
| } |
| }; |
| } |
| |
| @Override |
| public int size() { |
| return size; |
| } |
| |
| @Override |
| public boolean isEmpty() { |
| return size == 0; |
| } |
| |
| @Override |
| public Object[] toArray() { |
| return Arrays.copyOf(elements, size); |
| } |
| |
| @SuppressWarnings("unchecked") |
| @Override |
| public <T> T[] toArray(T[] a) { |
| if (a.length < size) { |
| a = (T[]) Array.newInstance(a.getClass().getComponentType(), size); |
| } |
| System.arraycopy(elements, 0, a, 0, size); |
| return a; |
| } |
| |
| /** |
| * Ensures that this {@code CompactHashSet} has the smallest representation in memory, |
| * given its current size. |
| */ |
| public void trimToSize() { |
| int size = this.size; |
| if (size < entries.length) { |
| resizeEntries(size); |
| } |
| // size / loadFactor gives the table size of the appropriate load factor, |
| // but that may not be a power of two. We floor it to a power of two by |
| // keeping its highest bit. But the smaller table may have a load factor |
| // larger than what we want; then we want to go to the next power of 2 if we can |
| int minimumTableSize = Math.max(1, Integer.highestOneBit((int) (size / loadFactor))); |
| if (minimumTableSize < MAXIMUM_CAPACITY) { |
| double load = (double) size / minimumTableSize; |
| if (load > loadFactor) { |
| minimumTableSize <<= 1; // increase to next power if possible |
| } |
| } |
| |
| if (minimumTableSize < table.length) { |
| resizeTable(minimumTableSize); |
| } |
| } |
| |
| @Override |
| public void clear() { |
| modCount++; |
| Arrays.fill(elements, 0, size, null); |
| Arrays.fill(table, UNSET); |
| Arrays.fill(entries, UNSET); |
| this.size = 0; |
| } |
| |
| private void writeObject(ObjectOutputStream stream) throws IOException { |
| stream.defaultWriteObject(); |
| stream.writeInt(table.length); |
| stream.writeFloat(loadFactor); |
| stream.writeInt(size); |
| for (E e : this) { |
| stream.writeObject(e); |
| } |
| } |
| |
| @SuppressWarnings("unchecked") |
| private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { |
| stream.defaultReadObject(); |
| int length = stream.readInt(); |
| float loadFactor = stream.readFloat(); |
| int elementCount = stream.readInt(); |
| try { |
| init(length, loadFactor); |
| } catch (IllegalArgumentException e) { |
| throw new InvalidObjectException(e.getMessage()); |
| } |
| for (int i = elementCount; --i >= 0;) { |
| E element = (E) stream.readObject(); |
| add(element); |
| } |
| } |
| } |