blob: 8e1a8fc641acbc283453ca31ce77ce6716a5647b [file] [log] [blame]
// Copyright 2017 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.skyframe.serialization.strings;
import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import sun.misc.Unsafe;
/**
* Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can
* still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the
* majority of the data passing through will be ascii.
*
* <p>Users <b>MUST</b> check if this class is usable by checking {@link #isAvailable()}.
*/
class FastStringCodec implements ObjectCodec<String> {
/** Sentinel value for missing {@link #STRING_VALUE_OFFSET}. */
private static final long UNSUPPORTED_STRING_VALUE_OFFSET = -1;
private static final Unsafe theUnsafe;
private static final long STRING_VALUE_OFFSET;
private static final String EMPTY_STRING = "";
static {
theUnsafe = getUnsafe();
STRING_VALUE_OFFSET = getStringValueOffset();
}
/** Returns whether or not this implementation is supported. */
static boolean isAvailable() {
return STRING_VALUE_OFFSET != UNSUPPORTED_STRING_VALUE_OFFSET;
}
FastStringCodec() {
Preconditions.checkState(isAvailable(), "FastStringCodec isn't available!");
}
@Override
public Class<String> getEncodedClass() {
return String.class;
}
@Override
public void serialize(SerializationContext context, String string, CodedOutputStream codedOut)
throws IOException {
codedOut.writeStringNoTag(string);
}
@Override
public String deserialize(DeserializationContext context, CodedInputStream codedIn)
throws IOException {
int length = codedIn.readInt32();
if (length == 0) {
return EMPTY_STRING;
}
char[] maybeDecoded = new char[length];
for (int i = 0; i < length; i++) {
// Read one byte at a time to avoid creating a new ByteString/copy of the underlying array.
byte b = codedIn.readRawByte();
// Check highest order bit, if it's set we've crossed into extended ascii/utf8.
if ((b & 0x80) == 0) {
maybeDecoded[i] = (char) b;
} else {
// Fail, we encountered a non-ascii byte. Copy what we have so far plus and then the rest
// of the data into a buffer and let String's constructor do the UTF-8 decoding work.
byte[] decodeFrom = new byte[length];
for (int j = 0; j < i; j++) {
decodeFrom[j] = (byte) maybeDecoded[j];
}
decodeFrom[i] = b;
for (int j = i + 1; j < length; j++) {
decodeFrom[j] = codedIn.readRawByte();
}
return new String(decodeFrom, StandardCharsets.UTF_8);
}
}
try {
String result = (String) theUnsafe.allocateInstance(String.class);
theUnsafe.putObject(result, STRING_VALUE_OFFSET, maybeDecoded);
return result;
} catch (Exception e) {
// This should only catch InstantiationException, but that makes IntelliJ unhappy for
// some reason; it insists that that exception cannot be thrown from here, even though it
// is set to JDK 8
throw new IllegalStateException("Could not create string", e);
}
}
/**
* Get a reference to {@link sun.misc.Unsafe} or throw an {@link AssertionError} if failing to do
* so. Failure is highly unlikely, but possible if the underlying VM stores unsafe in an
* unexpected location.
*/
private static Unsafe getUnsafe() {
try {
// sun.misc.Unsafe is intentionally difficult to get a hold of - it gives us the power to
// do things like access raw memory and segfault the JVM.
return AccessController.doPrivileged(
new PrivilegedExceptionAction<Unsafe>() {
@Override
public Unsafe run() throws Exception {
Class<Unsafe> unsafeClass = Unsafe.class;
// Unsafe usually exists in the field 'theUnsafe', however check all fields
// in case it's somewhere else in this VM's version of Unsafe.
for (Field f : unsafeClass.getDeclaredFields()) {
f.setAccessible(true);
Object fieldValue = f.get(null);
if (unsafeClass.isInstance(fieldValue)) {
return unsafeClass.cast(fieldValue);
}
}
throw new AssertionError("Failed to find sun.misc.Unsafe instance");
}
});
} catch (PrivilegedActionException pae) {
throw new AssertionError("Unable to get sun.misc.Unsafe", pae);
}
}
private static long getStringValueOffset() {
try {
// We expect a String's value field to be a char[] - if that's not the case then we're
// probably on a more modern JDK and this optimization isn't available.
Field valueField = String.class.getDeclaredField("value");
Class<?> valueFieldType = valueField.getType();
if (valueFieldType.equals(char[].class)) {
return theUnsafe.objectFieldOffset(valueField);
} else {
// value was of a different type, bail.
return UNSUPPORTED_STRING_VALUE_OFFSET;
}
} catch (NoSuchFieldException | SecurityException e) {
throw new AssertionError("Failed to find String's 'value' field/offset", e);
}
}
}