// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.actions;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.Artifact.DerivedArtifact;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

/**
 * Main logic for experimental config-stripped execution paths:
 * https://github.com/bazelbuild/bazel/issues/6526.
 *
 * <p>The actions executors run look like: {@code tool_pkg/mytool src/source.file
 * bazel-out/x86-opt/pkg/gen.file -o bazel-out/x86-opt/pkg/myout}.
 *
 * <p>The "x86-opt" part is a path's "configuration prefix": information describing the build
 * configuration of the action creating the artifact. This example shows artifacts created with
 * {@code --cpu=x86 --compilation_mode=opt}.
 *
 * <p>Executors cache actions based on their a) command line, b) input and output paths, c) input
 * digests. Configuration prefixes harm caching because even if an action behaves exactly the same
 * for different CPU architectures, {@code <cpu>-opt} guarantees the paths will differ.
 *
 * <p>Config-stripping is an experimental feature that strips the configuration prefix from
 * qualifying actions before running them, thus improving caching. "Qualifying" actions are actions
 * known not to depend on the names of their input and output paths. Non-qualifying actions include
 * manifest generators and compilers that store debug symbol source paths.
 *
 * <p>As an experimental feature, most logic is centralized here to provide easy hooks into executor
 * and action code and avoid complicating large swaths of the code base.
 *
 * <p>Enable this feature by setting {@code --experimental_output_paths=strip}. This activates two
 * effects:
 *
 * <ol>
 *   <li>"Qualifying" actions strip config paths from their command lines. An action qualifies if
 *       its implementation logic checks {@code --experimental_output_paths=strip}, creates a {@link
 *       Spawn} with {@link Spawn#stripOutputPaths()} == true, and removes config prefixes from its
 *       command line with the help of {@link PathStripper.CommandAdjuster}. Action logic should
 *       also check {@link PathStripper#isPathStrippable}: see that method's javadoc for why.
 *   <li>A supporting executor strips paths from qualifying actions' inputs and outputs before
 *       staging for execution, with the help of {@link PathStripper.ActionStager}.
 * </ol>
 *
 * <p>So an action is responsible for declaring that it strips paths and adjusting its command line
 * accordingly. The executor is responsible for remapping action inputs and outputs to match.
 *
 * <p>A lot of this work is handled generically in {@link CustomCommandLine} and related classes.
 * Simple actions may be able to opt into this behavior with little more than setting {@link
 * com.google.devtools.build.lib.analysis.actions.SpawnAction.Builder#stripOutputPaths(boolean)}.
 * Starlark actions don't yet have API support: specific mnemonics are enabled by {@link
 * com.google.devtools.build.lib.analysis.actions.StarlarkAction.Builder#stripOutputPaths(String,
 * NestedSet, Artifact, BuildConfigurationValue)}.
 */
public final class PathStripper {
  /**
   * Support for mapping config parts of exec paths of an action's inputs and outputs.
   *
   * <p>The executor should use this to correctly stage an action for execution.
   */
  public interface ActionStager {
    /**
     * Returns the exec path where the executor should stage an action input or output.
     *
     * <p>If the action should be config-stripped ({@link PathStripper}), removes "k8-fastbuild"
     * from paths like "bazel-out/k8-fastbuild/foo/bar".
     *
     * <p>Else returns the artifact's original exec path.
     */
    default String getMappedExecPathString(ActionInput artifact) {
      return map(artifact.getExecPath()).getPathString();
    }

    /** Same as {@link #getMappedExecPathString(ActionInput)} but for a {@link PathFragment}. */
    PathFragment map(PathFragment execPath);

    /**
     * Creates a new action stager for executor implementation logic to use.
     *
     * @param spawn the action to stage. If {@link Spawn#stripOutputPaths()} is true, paths like
     *     "bazel-out/k8-fastbuild/bin/foo" are reduced to "bazel-out/bin/foo". Else they're
     *     unchanged.
     * @param outputRoot the root path where outputs are written (e.g. "bazel-out")
     */
    static ActionStager create(Spawn spawn, PathFragment outputRoot) {
      Preconditions.checkState(outputRoot.isSingleSegment());
      Preconditions.checkState(!outputRoot.getPathString().contains("\\"));
      return spawn.stripOutputPaths() ? actionStripper(outputRoot) : NOOP;
    }

    /** An {@link ActionStager} that doesn't change paths. */
    ActionStager NOOP = execPath -> execPath;

    /** Instantiates an {@link ActionStager} that strips config prefixes from output paths. */
    private static ActionStager actionStripper(PathFragment outputRoot) {
      return execPath ->
          isOutputPath(execPath, outputRoot) ? PathStripper.strip(execPath) : execPath;
    }
  }

  /**
   * Support for mapping config parts of exec paths in an action's command line.
   *
   * <p>Action implementation logic should use this to correctly set an action's command line.
   */
  public interface CommandAdjuster {
    /**
     * Returns the exec path to refer to an input or output by.
     *
     * <p>If the action should be config-stripped ({@link PathStripper}), removes "k8-fastbuild"
     * from paths like "bazel-out/k8-fastbuild/foo/bar".
     *
     * <p>Else returns the artifact's original exec path.
     */
    default String getMappedExecPathString(ActionInput artifact) {
      return map(artifact.getExecPath()).getPathString();
    }

    /** Same as {@link #getMappedExecPathString(ActionInput)} but for a {@link PathFragment}. */
    PathFragment map(PathFragment execPath);

    /**
     * We don't yet have a Starlark API for mapping paths in command lines. Simple Starlark calls
     * like {@code args.add(arg_name, file_path} are automatically handled. But calls that involve
     * custom Starlark code require deeper API support that remains a TODO.
     *
     * <p>This method hard-codes support for specific command line entries for specific Starlark
     * actions that we know we want to apply stripping to.
     */
    default List<String> mapCustomStarlarkArgs(List<String> args) {
      return args;
    }

    /**
     * Creates a new command adjuster for action implementation logic to use.
     *
     * @param stripOutputPaths should this action strip config prefixes?
     * @param starlarkMnemonic this action's mnemonic if it's a Starlark action, else null
     * @param outputRoot the root path where outputs are written (e.g. "bazel-out"). Actions that
     *     don't strip outputs can set this to null.
     */
    static CommandAdjuster create(
        boolean stripOutputPaths,
        @Nullable String starlarkMnemonic,
        @Nullable PathFragment outputRoot) {
      if (stripOutputPaths) {
        Preconditions.checkNotNull(outputRoot);
        Preconditions.checkState(outputRoot.isSingleSegment());
        Preconditions.checkState(!outputRoot.getPathString().contains("\\"));
      }
      return stripOutputPaths ? commandStripper(starlarkMnemonic, outputRoot) : NOOP;
    }

    /** Instantiates a {@link CommandAdjuster} that doesn't change paths. */
    CommandAdjuster NOOP = execPath -> execPath;

    /** Instantiates a {@link CommandAdjuster} that strips config prefixes from output paths. */
    private static CommandAdjuster commandStripper(
        @Nullable String starlarkMnemonic, PathFragment outputRoot) {
      final StringStripper argStripper =
          starlarkMnemonic != null ? new StringStripper(outputRoot.getPathString()) : null;
      return new CommandAdjuster() {
        @Override
        public String getMappedExecPathString(ActionInput artifact) {
          if (artifact instanceof DerivedArtifact) {
            return PathStripper.strip(artifact);
          } else {
            return artifact.getExecPathString();
          }
        }

        @Override
        public PathFragment map(PathFragment execPath) {
          return PathStripper.isOutputPath(execPath, outputRoot)
              ? PathStripper.strip(execPath)
              : execPath;
        }

        @Override
        public List<String> mapCustomStarlarkArgs(List<String> args) {
          // Add your favorite Starlark mnemonic that needs custom arg processing here.
          if (!starlarkMnemonic.contains("Android")
              && !starlarkMnemonic.equals("MergeManifests")
              && !starlarkMnemonic.equals("StarlarkRClassGenerator")) {
            return args;
          }
          // Add your favorite arg to custom-process here. When Bazel finds one of these in the
          // argument list (an argument name), it strips output path prefixes from the following
          // argument (the argument value).
          ImmutableList<String> starlarkArgsToStrip =
              ImmutableList.of(
                  "--primaryData",
                  "--directData",
                  "--data",
                  "--resources",
                  "--mergeeManifests",
                  "--library");
          for (int i = 1; i < args.size(); i++) {
            if (starlarkArgsToStrip.contains(args.get(i - 1))) {
              args.set(i, argStripper.strip(args.get(i)));
            }
          }
          return args;
        }
      };
    }
  }

  /**
   * Utility class to strip output path configuration prefixes from arbitrary strings.
   *
   * <p>Rules that support path stripping can use this to help their implementation logic.
   */
  public static class StringStripper {
    private final Pattern pattern;
    private final String outputRoot;

    public StringStripper(String outputRoot) {
      this.outputRoot = outputRoot;
      this.pattern = stripPathsPattern(outputRoot);
    }

    public String strip(String str) {
      return pattern.matcher(str).replaceAll(outputRoot + "/");
    }
  }

  /**
   * Returns the regex to strip output paths from a string.
   *
   * <p>Supports strings with multiple output paths in arbitrary places. For example
   * "/path/to/compiler bazel-out/x86-fastbuild/foo src/my.src -Dbazel-out/arm-opt/bar".
   *
   * <p>Doesn't strip paths that would be non-existent without config prefixes. For example, these
   * are unchanged: "bazel-out/x86-fastbuild", "bazel-out;foo", "/path/to/compiler bazel-out".
   *
   * @param outputRoot root segment of output paths (e.g. "bazel-out")
   */
  private static Pattern stripPathsPattern(String outputRoot) {
    // Match "bazel-out" followed by a slash followed by any combination of word characters, "_",
    // and "-", followed by another slash. This would miss substrings like "bazel-out/k8-fastbuild".
    // But those don't represent actual outputs (all outputs would have to have names beneath that
    // path). So we're not trying to replace those.
    return Pattern.compile(outputRoot + "/[\\w_-]+/");
  }

  /**
   * Is this a strippable path?
   *
   * @param artifact artifact whose path to check
   * @param outputRoot - the output tree's execPath-relative root (e.g. "bazel-out")
   */
  private static boolean isOutputPath(ActionInput artifact, PathFragment outputRoot) {
    // We can't simply check for DerivedArtifact. Output paths can also appear, for example, in
    // ParamFileActionInput and ActionInputHelper.BasicActionInput.
    return isOutputPath(artifact.getExecPath(), outputRoot);
  }

  /** Private utility method: Is this a strippable path? */
  private static boolean isOutputPath(PathFragment pathFragment, PathFragment outputRoot) {
    return pathFragment.startsWith(outputRoot);
  }

  /**
   * Is this action safe to strip?
   *
   * <p>This is distinct from whether we <b>should</b> strip it. An action is stripped if a) the
   * action logic declares it's strippable via {@link Spawn#stripOutputPaths()} and b) it's safe to
   * do that (for example, the action doesn't have two inputs in different configurations that would
   * resolve to the same path if prefixes were removed).
   *
   * <p>This method checks b). Action logic is responsible for considering this to set a) correctly.
   */
  public static boolean isPathStrippable(
      NestedSet<? extends ActionInput> actionInputs, PathFragment outputRoot) {
    // For qualifying action types, check that no inputs or outputs would clash if paths were
    // removed, e.g. "bazel-out/k8-fastbuild/foo" and "bazel-out/host/foo".
    //
    // A more clever algorithm could remap these with custom prefixes - "bazel-out/1/foo" and
    // "bazel-out/2/foo" - if experience shows that would help.
    //
    // Another approach could keep host paths intact (since the "host" path prefix doesn't vary
    // with configurations). While this would help more action instances qualify, it also blocks
    // caching the same action in host and target configurations. This could be mitigated by
    // stripping the host prefix *only* when the entire action is in the host configuration.
    HashSet<PathFragment> rootRelativePaths = new HashSet<>();
    for (ActionInput input : actionInputs.toList()) {
      if (!isOutputPath(input, outputRoot)) {
        continue;
      }
      // For "bazel-out/k8-fastbuild/foo/bar", get "foo/bar".
      if (!rootRelativePaths.add(input.getExecPath().subFragment(2))) {
        // TODO(bazel-team): don't fail on duplicate inputs, i.e. when the same exact exec path
        // (including config prefix) is included twice.
        return false;
      }
    }
    return true;
  }

  /*
   * Private utility method: strips the configuration prefix from an output artifact's exec path.
   */
  static PathFragment strip(PathFragment execPath) {
    return execPath.subFragment(0, 1).getRelative(execPath.subFragment(2));
  }

  /**
   * Private utility method: returns an output artifact's exec path with its configuration prefix
   * stripped.
   */
  static String strip(ActionInput artifact) {
    return strip(artifact.getExecPath()).getPathString();
  }

  private PathStripper() {}
}
