Add --deduplicate_depsets to aquery.
This flag would instruct aquery to remove duplicate subsets in dep_set_of_files
before writing the proto/textproto/json output. The duplication caused the
aquery output to be corrupt in some cases, an issue which we haven't managed to
get to the bottom of yet. Duplicate subsets carry little meaning anyway, since
in the end we mostly care about the effective collection of inputs of actions.
It's already not possible to create NestedSets with duplicate subsets using
NestedSetBuilder.
The deduplication is done with a flag in case we want to study the depsets with
duplicate subsets, which is really the unfortunate exception rather than the
norm. The flag is set to True by default.
PiperOrigin-RevId: 375107739
diff --git a/src/main/java/com/google/devtools/build/lib/buildtool/AqueryBuildTool.java b/src/main/java/com/google/devtools/build/lib/buildtool/AqueryBuildTool.java
index badfa6a..515d68f 100644
--- a/src/main/java/com/google/devtools/build/lib/buildtool/AqueryBuildTool.java
+++ b/src/main/java/com/google/devtools/build/lib/buildtool/AqueryBuildTool.java
@@ -85,6 +85,7 @@
aqueryOptions.includeArtifacts,
actionFilters,
aqueryOptions.includeParamFiles,
+ aqueryOptions.deduplicateDepsets,
aqueryOutputHandler);
((SequencedSkyframeExecutor) env.getSkyframeExecutor()).dumpSkyframeState(actionGraphDump);
} catch (InvalidAqueryOutputFormatException e) {
diff --git a/src/main/java/com/google/devtools/build/lib/buildtool/BuildTool.java b/src/main/java/com/google/devtools/build/lib/buildtool/BuildTool.java
index 34ef531..f63e234 100644
--- a/src/main/java/com/google/devtools/build/lib/buildtool/BuildTool.java
+++ b/src/main/java/com/google/devtools/build/lib/buildtool/BuildTool.java
@@ -347,6 +347,7 @@
/* includeArtifacts= */ true,
/* actionFilters= */ null,
/* includeParamFiles= */ false,
+ /* deduplicateDepsets= */ true,
aqueryOutputHandler);
((SequencedSkyframeExecutor) env.getSkyframeExecutor()).dumpSkyframeState(actionGraphDump);
}
diff --git a/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphProtoOutputFormatterCallback.java b/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphProtoOutputFormatterCallback.java
index e29e5b2..83ab647 100644
--- a/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphProtoOutputFormatterCallback.java
+++ b/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphProtoOutputFormatterCallback.java
@@ -62,6 +62,7 @@
options.includeArtifacts,
this.actionFilters,
options.includeParamFiles,
+ options.deduplicateDepsets,
aqueryOutputHandler);
}
diff --git a/src/main/java/com/google/devtools/build/lib/query2/aquery/AqueryOptions.java b/src/main/java/com/google/devtools/build/lib/query2/aquery/AqueryOptions.java
index 1a63805..83aac99 100644
--- a/src/main/java/com/google/devtools/build/lib/query2/aquery/AqueryOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/query2/aquery/AqueryOptions.java
@@ -82,4 +82,15 @@
},
help = "No-op.")
public boolean protoV2;
+
+ @Option(
+ name = "deduplicate_depsets",
+ defaultValue = "true",
+ documentationCategory = OptionDocumentationCategory.QUERY,
+ effectTags = {OptionEffectTag.TERMINAL_OUTPUT},
+ help =
+ "De-duplicate non-leaf children of a dep_set_of_files in the final proto/textproto/json"
+ + " output. This does not deduplicate depsets that don't share an immediate parent."
+ + " This does not affect the final effective list of input artifacts of the actions.")
+ public boolean deduplicateDepsets;
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/ActionGraphDump.java b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/ActionGraphDump.java
index 7dd45a1..6efefd5 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/ActionGraphDump.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/ActionGraphDump.java
@@ -70,6 +70,7 @@
boolean includeArtifacts,
AqueryActionFilter actionFilters,
boolean includeParamFiles,
+ boolean deduplicateDepsets,
AqueryOutputHandler aqueryOutputHandler) {
this(
/* actionGraphTargets= */ ImmutableList.of("..."),
@@ -77,6 +78,7 @@
includeArtifacts,
actionFilters,
includeParamFiles,
+ deduplicateDepsets,
aqueryOutputHandler);
}
@@ -86,6 +88,7 @@
boolean includeArtifacts,
AqueryActionFilter actionFilters,
boolean includeParamFiles,
+ boolean deduplicateDepsets,
AqueryOutputHandler aqueryOutputHandler) {
this.actionGraphTargets = ImmutableSet.copyOf(actionGraphTargets);
this.includeActionCmdLine = includeActionCmdLine;
@@ -97,7 +100,7 @@
KnownRuleClassStrings knownRuleClassStrings = new KnownRuleClassStrings(aqueryOutputHandler);
knownArtifacts = new KnownArtifacts(aqueryOutputHandler);
knownConfigurations = new KnownConfigurations(aqueryOutputHandler);
- knownNestedSets = new KnownNestedSets(aqueryOutputHandler, knownArtifacts);
+ knownNestedSets = new KnownNestedSets(aqueryOutputHandler, knownArtifacts, deduplicateDepsets);
knownAspectDescriptors = new KnownAspectDescriptors(aqueryOutputHandler);
knownTargets = new KnownTargets(aqueryOutputHandler, knownRuleClassStrings);
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownNestedSets.java b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownNestedSets.java
index 4114659..a3a759d 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownNestedSets.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownNestedSets.java
@@ -16,15 +16,23 @@
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.analysis.AnalysisProtosV2.DepSetOfFiles;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
+import com.google.devtools.build.lib.collect.nestedset.NestedSet.Node;
import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
/** Cache for NestedSets in the action graph. */
public class KnownNestedSets extends BaseCache<Object, DepSetOfFiles> {
private final KnownArtifacts knownArtifacts;
+ private final boolean deduplicateDepsets;
- KnownNestedSets(AqueryOutputHandler aqueryOutputHandler, KnownArtifacts knownArtifacts) {
+ KnownNestedSets(
+ AqueryOutputHandler aqueryOutputHandler,
+ KnownArtifacts knownArtifacts,
+ boolean deduplicateDepsets) {
super(aqueryOutputHandler);
this.knownArtifacts = knownArtifacts;
+ this.deduplicateDepsets = deduplicateDepsets;
}
@Override
@@ -37,8 +45,14 @@
throws IOException, InterruptedException {
NestedSet<?> nestedSet = (NestedSet) nestedSetObject;
DepSetOfFiles.Builder depSetBuilder = DepSetOfFiles.newBuilder().setId(id);
+
+ // Some malformed NestedSets have duplicate non-leaf child subsets. This does not add any
+ // meaningful info and sometimes even corrupt the proto3 output. More context: b/186193294.
+ Set<Node> visited = new HashSet<>();
for (NestedSet<?> succ : nestedSet.getNonLeaves()) {
- depSetBuilder.addTransitiveDepSetIds(this.dataToIdAndStreamOutputProto(succ));
+ if (!deduplicateDepsets || visited.add(succ.toNode())) {
+ depSetBuilder.addTransitiveDepSetIds(this.dataToIdAndStreamOutputProto(succ));
+ }
}
for (Object elem : nestedSet.getLeaves()) {
depSetBuilder.addDirectArtifactIds(