[7.1.0] Reland "Also path map transitive header jar paths with direct classpath optimization" (#21458)

Rollforward of fd196bf03f4301cc3fbfc307570637f4de71c82c, which was
rolled back in f5d76b1777ce861a7e551342fe25363517d53ff5.

The analysis-time memory regression (newly retained `NestedSet`
instances) has been resolved separately in
31fae9e8e6687cbaf0dfe55a466696210c80be96. The execution-time memory
regression (unconditionally flattened `NestedSet`s) is now only incurred
with path mapping enabled.

Original description:

`JavaHeaderCompileAction` can apply an optimization where it compiles
the source files only against direct dependencies, making use of the
fact that Turbine includes sufficient information about transitive
dependencies into the direct header jars in a special directory.

In order to ensure that downstream consumers of header compilation
action can use its `.jdeps` file regardless of which of these actions
actually uses path mapping, all such paths to transitive jars have to be
unmapped.

With this commit, actions can declare additional artifacts whose paths
they want to apply path mapping to. By always passing all transitive
jars into the path mapper, even when only the direct jars are inputs to
the action, the transitive header jar paths can be unmapped and stripped
path collisions between them correctly result in a noop `PathMapper`
being used for the current action.

Closes #21401.

Commit
https://github.com/bazelbuild/bazel/commit/74fe66974e0c4c6b920e067070a519140829c22a

PiperOrigin-RevId: 609010786
Change-Id: I0d9ea5b11430ee40be74fe582af84fedaa52ade6

Co-authored-by: Fabian Meumertzheim <fabian@meumertzhe.im>
diff --git a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
index 6ea3432..2553639 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
@@ -666,4 +666,12 @@
   public PlatformInfo getExecutionPlatform() {
     return owner.getExecutionPlatform();
   }
+
+  /**
+   * Returns artifacts that should be subject to path mapping (see {@link Spawn#getPathMapper()},
+   * but aren't inputs of the action.
+   */
+  public NestedSet<Artifact> getAdditionalArtifactsForPathMapping() {
+    return NestedSetBuilder.emptySet(Order.STABLE_ORDER);
+  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java
index 3ebba47..f0d5288 100644
--- a/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java
+++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java
@@ -15,6 +15,7 @@
 package com.google.devtools.build.lib.analysis.actions;
 
 import com.google.common.collect.ImmutableSet;
+import com.google.devtools.build.lib.actions.AbstractAction;
 import com.google.devtools.build.lib.actions.Action;
 import com.google.devtools.build.lib.actions.ActionKeyContext;
 import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
@@ -63,8 +64,8 @@
    * Actions that support path mapping should call this method from {@link
    * Action#getKey(ActionKeyContext, ArtifactExpander)}.
    *
-   * <p>Compared to {@link #create(Action, OutputPathsMode)}, this method does not flatten nested
-   * sets and thus can't result in memory regressions.
+   * <p>Compared to {@link #create(AbstractAction, OutputPathsMode)}, this method does not flatten
+   * nested sets and thus can't result in memory regressions.
    *
    * @param mnemonic the mnemonic of the action
    * @param executionInfo the execution info of the action
@@ -103,22 +104,12 @@
    * OutputPathsMode, Fingerprint)} from {@link Action#getKey(ActionKeyContext, ArtifactExpander)}
    * to ensure correct incremental builds.
    *
-   * @param action the {@link Action} for which a {@link Spawn} is to be created
+   * @param action the {@link AbstractAction} for which a {@link Spawn} is to be created
    * @param outputPathsMode the value of {@link CoreOptions#outputPathsMode}
    * @return a {@link PathMapper} that maps paths of the action's inputs and outputs. May be {@link
    *     PathMapper#NOOP} if path mapping is not applicable to the action.
    */
-  public static PathMapper create(Action action, OutputPathsMode outputPathsMode) {
-    if (getEffectiveOutputPathsMode(
-            outputPathsMode, action.getMnemonic(), action.getExecutionInfo())
-        != OutputPathsMode.STRIP) {
-      return PathMapper.NOOP;
-    }
-    return StrippingPathMapper.tryCreate(action).orElse(PathMapper.NOOP);
-  }
-
-  public static PathMapper createPathMapperForTesting(
-      Action action, OutputPathsMode outputPathsMode) {
+  public static PathMapper create(AbstractAction action, OutputPathsMode outputPathsMode) {
     if (getEffectiveOutputPathsMode(
             outputPathsMode, action.getMnemonic(), action.getExecutionInfo())
         != OutputPathsMode.STRIP) {
@@ -128,8 +119,8 @@
   }
 
   /**
-   * Helper method to simplify calling {@link #create(Action, OutputPathsMode)} for actions that
-   * store the configuration directly.
+   * Helper method to simplify calling {@link #create(SpawnAction, OutputPathsMode)} for actions
+   * that store the configuration directly.
    *
    * @param configuration the configuration
    * @return the value of
diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java
index 76788fc..563bd8c 100644
--- a/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java
+++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java
@@ -15,6 +15,8 @@
 package com.google.devtools.build.lib.analysis.actions;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.devtools.build.lib.actions.AbstractAction;
 import com.google.devtools.build.lib.actions.Action;
 import com.google.devtools.build.lib.actions.ActionInput;
 import com.google.devtools.build.lib.actions.Artifact.DerivedArtifact;
@@ -25,9 +27,8 @@
 import com.google.devtools.build.lib.actions.PathMapper;
 import com.google.devtools.build.lib.actions.Spawn;
 import com.google.devtools.build.lib.analysis.config.CoreOptions.OutputPathsMode;
-import com.google.devtools.build.lib.collect.nestedset.NestedSet;
 import com.google.devtools.build.lib.vfs.PathFragment;
-import java.util.HashSet;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
@@ -87,10 +88,15 @@
    * @param action the action to potentially strip paths from
    * @return a {@link StrippingPathMapper} if the action supports it, else {@link Optional#empty()}.
    */
-  static Optional<PathMapper> tryCreate(Action action) {
+  static Optional<PathMapper> tryCreate(AbstractAction action) {
     // This is expected to always be "bazel-out", but we don't want to hardcode it here.
     PathFragment outputRoot = action.getPrimaryOutput().getExecPath().subFragment(0, 1);
-    if (isPathStrippable(action.getInputs(), outputRoot)) {
+    // Additional artifacts to map are not part of the action's inputs, but may still lead to
+    // path collisions after stripping. It is thus important to include them in this check.
+    if (isPathStrippable(
+        Iterables.concat(
+            action.getInputs().toList(), action.getAdditionalArtifactsForPathMapping().toList()),
+        outputRoot)) {
       return Optional.of(
           create(action.getMnemonic(), action instanceof StarlarkAction, outputRoot));
     }
@@ -243,7 +249,7 @@
    * <p>This method checks b).
    */
   private static boolean isPathStrippable(
-      NestedSet<? extends ActionInput> actionInputs, PathFragment outputRoot) {
+      Iterable<? extends ActionInput> actionInputs, PathFragment outputRoot) {
     // For qualifying action types, check that no inputs or outputs would clash if paths were
     // removed, e.g. "bazel-out/k8-fastbuild/foo" and "bazel-out/host/foo".
     //
@@ -254,15 +260,15 @@
     // with configurations). While this would help more action instances qualify, it also blocks
     // caching the same action in host and target configurations. This could be mitigated by
     // stripping the host prefix *only* when the entire action is in the host configuration.
-    HashSet<PathFragment> rootRelativePaths = new HashSet<>();
-    for (ActionInput input : actionInputs.toList()) {
+    HashMap<PathFragment, ActionInput> rootRelativePaths = new HashMap<>();
+    for (ActionInput input : actionInputs) {
       if (!isOutputPath(input, outputRoot)) {
         continue;
       }
       // For "bazel-out/k8-fastbuild/foo/bar", get "foo/bar".
-      if (!rootRelativePaths.add(input.getExecPath().subFragment(2))) {
-        // TODO(bazel-team): don't fail on duplicate inputs, i.e. when the same exact exec path
-        // (including config prefix) is included twice.
+      if (!rootRelativePaths
+          .computeIfAbsent(input.getExecPath().subFragment(2), k -> input)
+          .equals(input)) {
         return false;
       }
     }
diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java
index 7e09666..da08a18 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java
@@ -25,6 +25,8 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Strings;
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
@@ -82,7 +84,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -293,7 +294,6 @@
     }
   }
 
-
   private JavaSpawn getReducedSpawn(
       ActionExecutionContext actionExecutionContext,
       ReducedClasspath reducedClasspath,
@@ -725,6 +725,8 @@
    * @param spawnResult the executor action that created the possibly stripped .jdeps output
    * @param outputDepsProto path to the .jdeps output
    * @param actionInputs all inputs to the current action
+   * @param additionalArtifactsForPathMapping any additional artifacts that may be referenced in the
+   *     .jdeps file by path
    * @param actionExecutionContext the action execution context
    * @return the full deps proto (also written to disk to satisfy the action's declared output)
    */
@@ -732,6 +734,7 @@
       SpawnResult spawnResult,
       Artifact outputDepsProto,
       NestedSet<Artifact> actionInputs,
+      NestedSet<Artifact> additionalArtifactsForPathMapping,
       ActionExecutionContext actionExecutionContext,
       PathMapper pathMapper)
       throws IOException {
@@ -743,36 +746,51 @@
       return executorJdeps;
     }
 
+    // No paths to rewrite.
+    if (executorJdeps.getDependencyCount() == 0) {
+      return executorJdeps;
+    }
+
     // For each of the action's generated inputs, revert its mapped path back to its original path.
-    Map<String, PathFragment> mappedToOriginalPath = new HashMap<>();
-    for (Artifact actionInput : actionInputs.toList()) {
+    BiMap<String, PathFragment> mappedToOriginalPath = HashBiMap.create();
+    for (Artifact actionInput :
+        Iterables.concat(actionInputs.toList(), additionalArtifactsForPathMapping.toList())) {
       if (actionInput.isSourceArtifact()) {
         continue;
       }
       String mappedPath = pathMapper.getMappedExecPathString(actionInput);
-      if (mappedToOriginalPath.put(mappedPath, actionInput.getExecPath()) != null) {
-        // If an entry already exists, that means different inputs reduce to the same stripped path.
-        // That also means PathStripper would exempt this action from path stripping, so the
-        // executor-produced .jdeps already includes full paths. No need to update it.
-        return executorJdeps;
+      PathFragment previousPath = mappedToOriginalPath.put(mappedPath, actionInput.getExecPath());
+      if (previousPath != null && !previousPath.equals(actionInput.getExecPath())) {
+        throw new IllegalStateException(
+            String.format(
+                "Duplicate mapped path %s derived from %s and %s",
+                mappedPath, actionInput.getExecPath(), mappedToOriginalPath.get(mappedPath)));
       }
     }
 
-    // No paths to rewrite.
-    if (executorJdeps.getDependencyCount() == 0) {
-      return executorJdeps;
-    }
-
     // Rewrite the .jdeps proto with full paths.
     PathFragment outputRoot = outputDepsProto.getExecPath().subFragment(0, 1);
     Deps.Dependencies.Builder fullDepsBuilder = Deps.Dependencies.newBuilder(executorJdeps);
     for (Deps.Dependency.Builder dep : fullDepsBuilder.getDependencyBuilderList()) {
       PathFragment pathOnExecutor = PathFragment.create(dep.getPath());
       PathFragment originalPath = mappedToOriginalPath.get(pathOnExecutor.getPathString());
-      if (originalPath == null && pathOnExecutor.subFragment(0, 1).equals(outputRoot)) {
-        // The mapped path -> full path map failed, which means the paths weren't mapped. Fast-
-        // return the original jdeps to save unnecessary CPU time.
-        return executorJdeps;
+      // Source files, which do not lie under the output root, are not mapped. It is also possible
+      // that a jdeps file contains a reference to a transitive classpath element that isn't an
+      // input to the current action (see
+      // https://github.com/google/turbine/commit/f9f2decee04a3c651671f7488a7c9d7952df88c8), just an
+      // additional artifact marked for path mapping, and itself wasn't built with path mapping
+      // enabled (e .g. due to path collisions). In that case, the path will already be unmapped and
+      // we can leave it as is. For entirely unexpected paths, we still report an error.
+      if (originalPath == null
+          && pathOnExecutor.subFragment(0, 1).equals(outputRoot)
+          && !mappedToOriginalPath.containsValue(pathOnExecutor)) {
+        throw new IllegalStateException(
+            String.format(
+                "Missing original path for mapped path %s in %s%njdeps: %s%npath map: %s",
+                pathOnExecutor,
+                outputDepsProto.getExecPath(),
+                executorJdeps,
+                mappedToOriginalPath));
       }
       dep.setPath(
           originalPath == null ? pathOnExecutor.getPathString() : originalPath.getPathString());
@@ -820,7 +838,12 @@
     SpawnResult result = Iterables.getOnlyElement(results);
     try {
       return createFullOutputDeps(
-          result, outputDepsProto, getInputs(), actionExecutionContext, pathMapper);
+          result,
+          outputDepsProto,
+          getInputs(),
+          getAdditionalArtifactsForPathMapping(),
+          actionExecutionContext,
+          pathMapper);
     } catch (IOException e) {
       throw ActionExecutionException.fromExecException(
           new EnvironmentalExecException(
diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java
index 50194c9..be54d1d 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java
@@ -78,6 +78,7 @@
 public final class JavaHeaderCompileAction extends SpawnAction {
 
   private final boolean insertDependencies;
+  private final NestedSet<Artifact> additionalArtifactsForPathMapping;
 
   private JavaHeaderCompileAction(
       ActionOwner owner,
@@ -92,7 +93,8 @@
       RunfilesSupplier runfilesSupplier,
       String mnemonic,
       OutputPathsMode outputPathsMode,
-      boolean insertDependencies) {
+      boolean insertDependencies,
+      NestedSet<Artifact> additionalArtifactsForPathMapping) {
     super(
         owner,
         tools,
@@ -107,6 +109,12 @@
         mnemonic,
         outputPathsMode);
     this.insertDependencies = insertDependencies;
+    this.additionalArtifactsForPathMapping = additionalArtifactsForPathMapping;
+  }
+
+  @Override
+  public NestedSet<Artifact> getAdditionalArtifactsForPathMapping() {
+    return additionalArtifactsForPathMapping;
   }
 
   @Override
@@ -117,7 +125,12 @@
     try {
       Deps.Dependencies fullOutputDeps =
           JavaCompileAction.createFullOutputDeps(
-              spawnResult, outputDepsProto, getInputs(), context, pathMapper);
+              spawnResult,
+              outputDepsProto,
+              getInputs(),
+              getAdditionalArtifactsForPathMapping(),
+              context,
+              pathMapper);
       JavaCompileActionContext javaContext = context.getContext(JavaCompileActionContext.class);
       if (insertDependencies && javaContext != null) {
         javaContext.insertDependencies(outputDepsProto, fullOutputDeps);
@@ -463,10 +476,20 @@
       }
       if (useDirectClasspath) {
         NestedSet<Artifact> classpath;
+        NestedSet<Artifact> additionalArtifactsForPathMapping;
         if (!directJars.isEmpty() || classpathEntries.isEmpty()) {
           classpath = directJars;
+          // When using the direct classpath optimization, Turbine generates .jdeps entries based on
+          // the transitive dependency information packages into META-INF/TRANSITIVE. When path
+          // mapping is used, these entries may have been subject to it when they were generated.
+          // Since the contents of that directory are not unmapped, we need to instead unmap the
+          // paths emitted in the .jdeps file, which requires knowing the full list of artifact
+          // paths even if they aren't inputs to the current action.
+          // https://github.com/google/turbine/commit/f9f2decee04a3c651671f7488a7c9d7952df88c8
+          additionalArtifactsForPathMapping = classpathEntries;
         } else {
           classpath = classpathEntries;
+          additionalArtifactsForPathMapping = NestedSetBuilder.emptySet(Order.STABLE_ORDER);
         }
         mandatoryInputsBuilder.addTransitive(classpath);
 
@@ -499,7 +522,8 @@
                 // If classPathMode == BAZEL, also make sure to inject the dependencies to be
                 // available to downstream actions. Else just do enough work to locally create the
                 // full .jdeps from the .stripped .jdeps produced on the executor.
-                /* insertDependencies= */ classpathMode == JavaClasspathMode.BAZEL));
+                /* insertDependencies= */ classpathMode == JavaClasspathMode.BAZEL,
+                additionalArtifactsForPathMapping));
         return;
       }
 
diff --git a/src/test/shell/integration/config_stripped_outputs_test.sh b/src/test/shell/integration/config_stripped_outputs_test.sh
index 8498a0c..da111c4 100755
--- a/src/test/shell/integration/config_stripped_outputs_test.sh
+++ b/src/test/shell/integration/config_stripped_outputs_test.sh
@@ -328,4 +328,55 @@
   assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/libbase_lib-hjar.jdeps"
 }
 
+function test_direct_classpath() {
+  local -r pkg="${FUNCNAME[0]}"
+  mkdir -p "$pkg/java/hello/" || fail "Expected success"
+  # When compiling C, the direct classpath optimization in Turbine embeds information about the
+  # dependency D into the header jar, which then results in the path to Ds header jar being included
+  # in the jdeps file for B. The full compilation action for A requires the header jar for D  and
+  # thus the path to it in the jdeps file of B has to be unmapped properly for the reduced classpath
+  # created for A to contain it.
+  cat > "$pkg/java/hello/A.java" <<'EOF'
+package hello;
+public class A extends B {}
+EOF
+  cat > "$pkg/java/hello/B.java" <<'EOF'
+package hello;
+public class B extends C {}
+EOF
+  cat > "$pkg/java/hello/C.java" <<'EOF'
+package hello;
+public class C extends D {}
+EOF
+  cat > "$pkg/java/hello/D.java" <<'EOF'
+package hello;
+public class D {}
+EOF
+  cat > "$pkg/java/hello/BUILD" <<'EOF'
+java_library(name='a', srcs=['A.java'], deps = [':b'])
+java_library(name='b', srcs=['B.java'], deps = [':c'])
+java_library(name='c', srcs=['C.java'], deps = [':d'])
+java_library(name='d', srcs=['D.java'])
+EOF
+
+  bazel build --experimental_java_classpath=bazel  \
+    --experimental_output_paths=strip \
+    //"$pkg"/java/hello:a -s 2>"$TEST_log" \
+    || fail "Expected success"
+
+  # java_library .jar compilation:
+  assert_paths_stripped "$TEST_log" "$pkg/java/hello/liba.jar-0.params"
+  # java_library header jar compilation:
+  assert_paths_stripped "$TEST_log" "bin/$pkg/java/hello/libb-hjar.jar"
+  # jdeps files should contain the original paths since they are read by downstream actions that may
+  # not use path mapping.
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/liba.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libb.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libb-hjar.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libc.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libc-hjar.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libd.jdeps"
+  assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libd-hjar.jdeps"
+}
+
 run_suite "Tests stripping config prefixes from output paths for better action caching"