In Parser, dedupe the String instances referred to by StringLiteral instances. Implement the same optimization in the serialization code.
Some .bzl files have tons of duplicate string constants.
We can't dedupe the StringLiteral instances themselves, because they all have different Locations.
RELNOTES: None
PiperOrigin-RevId: 232495067
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
index b309479..ebea05d 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
@@ -18,10 +18,12 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.skyframe.serialization.Memoizer.Deserializer;
+import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec.MemoizationStrategy;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodecRegistry.CodecDescriptor;
import com.google.protobuf.CodedInputStream;
import java.io.IOException;
import javax.annotation.CheckReturnValue;
+import javax.annotation.Nullable;
/**
* Stateful class for providing additional context to a single deserialization "session". This class
@@ -55,8 +57,22 @@
}
// TODO(shahan): consider making codedIn a member of this class.
- @SuppressWarnings({"TypeParameterUnusedInFormals", "unchecked"})
+ @SuppressWarnings({"TypeParameterUnusedInFormals"})
public <T> T deserialize(CodedInputStream codedIn) throws IOException, SerializationException {
+ return deserializeInternal(codedIn, /*customMemoizationStrategy=*/ null);
+ }
+
+ @SuppressWarnings({"TypeParameterUnusedInFormals"})
+ public <T> T deserializeWithAdHocMemoizationStrategy(
+ CodedInputStream codedIn, MemoizationStrategy memoizationStrategy)
+ throws IOException, SerializationException {
+ return deserializeInternal(codedIn, memoizationStrategy);
+ }
+
+ @SuppressWarnings({"TypeParameterUnusedInFormals"})
+ private <T> T deserializeInternal(
+ CodedInputStream codedIn, @Nullable MemoizationStrategy customMemoizationStrategy)
+ throws IOException, SerializationException {
int tag = codedIn.readSInt32();
if (tag == 0) {
return null;
@@ -73,10 +89,15 @@
if (deserializer == null) {
return (T) codecDescriptor.deserialize(this, codedIn);
} else {
- return deserializer.deserialize(this, (ObjectCodec<T>) codecDescriptor.getCodec(), codedIn);
+ @SuppressWarnings("unchecked")
+ ObjectCodec<T> castCodec = (ObjectCodec<T>) codecDescriptor.getCodec();
+ MemoizationStrategy memoizationStrategy =
+ customMemoizationStrategy != null ? customMemoizationStrategy : castCodec.getStrategy();
+ return deserializer.deserialize(this, castCodec, memoizationStrategy, codedIn);
}
}
+
/**
* Register an initial value for the currently deserializing value, for use by child objects that
* may have references to it.
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
index 11f1436..897242e 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
@@ -146,9 +146,9 @@
SerializationContext context,
T obj,
ObjectCodec<? super T> codec,
- CodedOutputStream codedOut)
+ CodedOutputStream codedOut,
+ MemoizationStrategy strategy)
throws SerializationException, IOException {
- MemoizationStrategy strategy = codec.getStrategy();
if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) {
codec.serialize(context, obj, codedOut);
} else {
@@ -235,14 +235,16 @@
* @throws IOException on {@link IOException} during deserialization
*/
<T> T deserialize(
- DeserializationContext context, ObjectCodec<? extends T> codec, CodedInputStream codedIn)
+ DeserializationContext context,
+ ObjectCodec<? extends T> codec,
+ MemoizationStrategy strategy,
+ CodedInputStream codedIn)
throws SerializationException, IOException {
Preconditions.checkState(
tagForMemoizedBefore == null,
"non-null memoized-before tag %s (%s)",
tagForMemoizedBefore,
codec);
- MemoizationStrategy strategy = codec.getStrategy();
if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) {
return codec.deserialize(context, codedIn);
} else {
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/SerializationContext.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/SerializationContext.java
index 0ea80cd..f51a2fb 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/SerializationContext.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/SerializationContext.java
@@ -24,6 +24,7 @@
import com.google.common.util.concurrent.MoreExecutors;
import com.google.devtools.build.lib.bugreport.BugReport;
import com.google.devtools.build.lib.skyframe.serialization.Memoizer.Serializer;
+import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec.MemoizationStrategy;
import com.google.devtools.build.lib.skyframe.serialization.SerializationException.NoCodecException;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
@@ -77,6 +78,20 @@
// TODO(shahan): consider making codedOut a member of this class.
public void serialize(Object object, CodedOutputStream codedOut)
throws IOException, SerializationException {
+ serializeInternal(object, /*customMemoizationStrategy=*/ null, codedOut);
+ }
+
+ public void serializeWithAdHocMemoizationStrategy(
+ Object object, MemoizationStrategy memoizationStrategy, CodedOutputStream codedOut)
+ throws IOException, SerializationException {
+ serializeInternal(object, memoizationStrategy, codedOut);
+ }
+
+ private void serializeInternal(
+ Object object,
+ @Nullable MemoizationStrategy customMemoizationStrategy,
+ CodedOutputStream codedOut)
+ throws IOException, SerializationException {
ObjectCodecRegistry.CodecDescriptor descriptor =
recordAndGetDescriptorIfNotConstantMemoizedOrNull(object, codedOut);
if (descriptor != null) {
@@ -85,7 +100,9 @@
} else {
@SuppressWarnings("unchecked")
ObjectCodec<Object> castCodec = (ObjectCodec<Object>) descriptor.getCodec();
- serializer.serialize(this, object, castCodec, codedOut);
+ MemoizationStrategy memoizationStrategy =
+ customMemoizationStrategy != null ? customMemoizationStrategy : castCodec.getStrategy();
+ serializer.serialize(this, object, castCodec, codedOut, memoizationStrategy);
}
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
index dea8f33..000c6de 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
@@ -19,7 +19,9 @@
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Interner;
import com.google.common.collect.Iterables;
+import com.google.devtools.build.lib.concurrent.BlazeInterners;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.EventHandler;
import com.google.devtools.build.lib.events.Location;
@@ -164,6 +166,8 @@
private int errorsCount;
private boolean recoveryMode; // stop reporting errors until next statement
+ private final Interner<String> stringInterner = BlazeInterners.newStrongInterner();
+
private Parser(Lexer lexer, EventHandler eventHandler) {
this.lexer = lexer;
this.eventHandler = eventHandler;
@@ -599,7 +603,8 @@
Preconditions.checkState(token.kind == TokenKind.STRING);
int end = token.right;
StringLiteral literal =
- setLocation(new StringLiteral((String) token.value), token.left, end);
+ setLocation(
+ new StringLiteral(stringInterner.intern((String) token.value)), token.left, end);
nextToken();
if (token.kind == TokenKind.STRING) {
@@ -958,7 +963,7 @@
if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
StringLiteral left = (StringLiteral) expr;
StringLiteral right = (StringLiteral) secondary;
- return new StringLiteral(left.getValue() + right.getValue());
+ return new StringLiteral(stringInterner.intern(left.getValue() + right.getValue()));
}
}
return new BinaryOperatorExpression(operator, expr, secondary);
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StringLiteral.java b/src/main/java/com/google/devtools/build/lib/syntax/StringLiteral.java
index b88bbb1..f09d87d 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/StringLiteral.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/StringLiteral.java
@@ -13,6 +13,13 @@
// limitations under the License.
package com.google.devtools.build.lib.syntax;
+import com.google.devtools.build.lib.events.Location;
+import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
+import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
/** Syntax node for a string literal. */
@@ -46,4 +53,34 @@
public Kind kind() {
return Kind.STRING_LITERAL;
}
+
+ static final class StringLiteralCodec implements ObjectCodec<StringLiteral> {
+ @Override
+ public Class<? extends StringLiteral> getEncodedClass() {
+ return StringLiteral.class;
+ }
+
+ @Override
+ public void serialize(
+ SerializationContext context, StringLiteral stringLiteral, CodedOutputStream codedOut)
+ throws SerializationException, IOException {
+ // The String instances referred to by StringLiterals are deduped by Parser, so therefore
+ // memoization is guaranteed to be profitable.
+ context.serializeWithAdHocMemoizationStrategy(
+ stringLiteral.getValue(), MemoizationStrategy.MEMOIZE_AFTER, codedOut);
+ context.serialize(stringLiteral.getLocation(), codedOut);
+ }
+
+ @Override
+ public StringLiteral deserialize(DeserializationContext context, CodedInputStream codedIn)
+ throws SerializationException, IOException {
+ String value =
+ context.deserializeWithAdHocMemoizationStrategy(
+ codedIn, MemoizationStrategy.MEMOIZE_AFTER);
+ Location location = context.deserialize(codedIn);
+ StringLiteral stringLiteral = new StringLiteral(value);
+ stringLiteral.setLocation(location);
+ return stringLiteral;
+ }
+ }
}
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/ParserTest.java b/src/test/java/com/google/devtools/build/lib/syntax/ParserTest.java
index 78de4f9..4e59cdd 100644
--- a/src/test/java/com/google/devtools/build/lib/syntax/ParserTest.java
+++ b/src/test/java/com/google/devtools/build/lib/syntax/ParserTest.java
@@ -18,6 +18,7 @@
import static org.junit.Assert.fail;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
import com.google.devtools.build.lib.cmdline.Label;
import com.google.devtools.build.lib.events.Location;
import com.google.devtools.build.lib.events.Location.LineAndColumn;
@@ -27,6 +28,7 @@
import com.google.devtools.build.lib.syntax.util.EvaluationTestCase;
import java.util.LinkedList;
import java.util.List;
+import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -47,11 +49,14 @@
return parseFileWithComments(input).getStatements();
}
+ private BuildFileAST parseFileForSkylarkAsAST(String... input) {
+ BuildFileAST ast = BuildFileAST.parseString(getEventHandler(), input);
+ return ast.validate(env, getEventHandler());
+ }
+
/** Parses Skylark code */
private List<Statement> parseFileForSkylark(String... input) {
- BuildFileAST ast = BuildFileAST.parseString(getEventHandler(), input);
- ast = ast.validate(env, getEventHandler());
- return ast.getStatements();
+ return parseFileForSkylarkAsAST(input).getStatements();
}
private static String getText(String text, ASTNode node) {
@@ -1504,4 +1509,20 @@
")\n");
assertContainsError(":4:5: non-keyword arg after keyword arg");
}
+
+ @Test
+ public void testStringsAreDeduped() throws Exception {
+ BuildFileAST buildFileAST =
+ parseFileForSkylarkAsAST("L1 = ['cat', 'dog', 'fish']", "L2 = ['dog', 'fish', 'cat']");
+ Set<String> uniqueStringInstances = Sets.newIdentityHashSet();
+ SyntaxTreeVisitor collectAllStringsInStringLiteralsVisitor =
+ new SyntaxTreeVisitor() {
+ @Override
+ public void visit(StringLiteral stringLiteral) {
+ uniqueStringInstances.add(stringLiteral.getValue());
+ }
+ };
+ collectAllStringsInStringLiteralsVisitor.visit(buildFileAST);
+ assertThat(uniqueStringInstances).containsExactly("cat", "dog", "fish");
+ }
}