Replace Artifact's exec_path with PathFragments. There are many common substrings in exec_paths (e.g. bazel-out, ...). By preserving the PathFragment structure in the output, we can avoid this duplication. This is a breaking change, and consumers of analysis_v2.proto should change their implementation to use PathFragment instead of exec_path. RELNOTES: None PiperOrigin-RevId: 281936154
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownArtifacts.java b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownArtifacts.java index 8b4989f..6d78d55 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownArtifacts.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownArtifacts.java
@@ -20,19 +20,25 @@ /** Cache for Artifacts in the action graph. */ public class KnownArtifacts extends BaseCache<Artifact, AnalysisProtosV2.Artifact> { + private final KnownPathFragments knownPathFragments; + KnownArtifacts(ActionGraphContainer.Builder actionGraphBuilder) { super(actionGraphBuilder); + knownPathFragments = new KnownPathFragments(actionGraphBuilder); } @Override AnalysisProtosV2.Artifact createProto(Artifact artifact, Long id) { - return AnalysisProtosV2.Artifact.newBuilder() - .setId(id) - .setExecPath(artifact.getExecPathString()) - .setIsTreeArtifact(artifact.isTreeArtifact()) - .build(); + AnalysisProtosV2.Artifact.Builder artifactProtoBuilder = + AnalysisProtosV2.Artifact.newBuilder() + .setId(id) + .setIsTreeArtifact(artifact.isTreeArtifact()); + + Long pathFragmentId = knownPathFragments.dataToId(artifact.getExecPath()); + return artifactProtoBuilder.setPathFragmentId(pathFragmentId).build(); } + @Override void addToActionGraphBuilder(AnalysisProtosV2.Artifact artifactProto) { actionGraphBuilder.addArtifacts(artifactProto);
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownPathFragments.java b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownPathFragments.java new file mode 100644 index 0000000..0f918a3 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/actiongraph/v2/KnownPathFragments.java
@@ -0,0 +1,50 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.actiongraph.v2; + +import com.google.devtools.build.lib.analysis.AnalysisProtosV2; +import com.google.devtools.build.lib.analysis.AnalysisProtosV2.ActionGraphContainer; +import com.google.devtools.build.lib.vfs.PathFragment; + +/** Cache for {@link PathFragment} in the action graph. */ +public class KnownPathFragments extends BaseCache<PathFragment, AnalysisProtosV2.PathFragment> { + KnownPathFragments(ActionGraphContainer.Builder actionGraphBuilder) { + super(actionGraphBuilder); + } + + @Override + AnalysisProtosV2.PathFragment createProto(PathFragment pathFragment, Long id) { + AnalysisProtosV2.PathFragment.Builder pathFragmentProtoBuilder = + AnalysisProtosV2.PathFragment.newBuilder().setId(id).setLabel(pathFragment.getBaseName()); + + // Recursively create the ancestor path fragments. + // If pathFragment has no parent, leave parentId blank and avoid calling dataToId + // to prevent the cache from being polluted with a null entry. + if (hasParent(pathFragment)) { + pathFragmentProtoBuilder.setParentId(dataToId(pathFragment.getParentDirectory())); + } + + return pathFragmentProtoBuilder.build(); + } + + @Override + void addToActionGraphBuilder(AnalysisProtosV2.PathFragment pathFragmentProto) { + actionGraphBuilder.addPathFragments(pathFragmentProto); + } + + private static boolean hasParent(PathFragment pathFragment) { + return pathFragment.getParentDirectory() != null + && !pathFragment.getParentDirectory().getBaseName().isEmpty(); + } +}
diff --git a/src/main/protobuf/analysis_v2.proto b/src/main/protobuf/analysis_v2.proto index eaba2a6..868d33d 100644 --- a/src/main/protobuf/analysis_v2.proto +++ b/src/main/protobuf/analysis_v2.proto
@@ -30,6 +30,7 @@ repeated Configuration configuration = 5; repeated AspectDescriptor aspect_descriptors = 6; repeated RuleClass rule_classes = 7; + repeated PathFragment path_fragments = 8; } // Represents a single artifact, whether it's a source file or a derived output @@ -39,8 +40,9 @@ // particular dump of the analysis. uint64 id = 1; - // The relative path of the file within the execution root. - string exec_path = 2; + // The id of the PathFragment that represents the relative path of the file + // within the execution root. + uint64 path_fragment_id = 2; // True iff the artifact is a tree artifact, i.e. the above exec_path refers // a directory. @@ -198,3 +200,15 @@ // Each argument corresponds to a line in the param file. repeated string arguments = 2; } + +// The path fragment that makes up a full path. +message PathFragment { + // Identifier for this path fragment. + uint64 id = 1; + + // The label of the section in the path. + string label = 2; + + // The id of the parent path fragment. + uint64 parent_id = 3; +}
diff --git a/src/test/shell/integration/aquery_test.sh b/src/test/shell/integration/aquery_test.sh index 5952164..ba649b8 100755 --- a/src/test/shell/integration/aquery_test.sh +++ b/src/test/shell/integration/aquery_test.sh
@@ -1090,7 +1090,6 @@ cmd = "echo unused > $(OUTS)", ) EOF - echo "hello aquery" > "$pkg/in.txt" bazel aquery --incompatible_proto_output_v2 --output=proto "//$pkg:bar" \ || fail "Expected success" @@ -1100,8 +1099,9 @@ # Verify than ids come in integers instead of strings. assert_contains "id: 1" output assert_not_contains "id: \"1\"" output - assert_contains "exec_path: \"$pkg/dummy.txt\"" output - assert_contains "nemonic: \"Genrule\"" output + assert_contains "path_fragments {" output + assert_contains "label: \"dummy.txt\"" output + assert_contains "mnemonic: \"Genrule\"" output assert_contains "mnemonic: \".*-fastbuild\"" output assert_contains "echo unused" output }