blob: e4593699997c45c5a94fcfa3f45cae72ebce9d84 [file] [log] [blame]
// Copyright 2018 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.dynamic;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.io.Files;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.DynamicStrategyRegistry;
import com.google.devtools.build.lib.actions.EnvironmentalExecException;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.actions.ExecutionRequirements;
import com.google.devtools.build.lib.actions.SandboxedSpawnStrategy;
import com.google.devtools.build.lib.actions.SandboxedSpawnStrategy.StopConcurrentSpawns;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.SpawnStrategy;
import com.google.devtools.build.lib.actions.Spawns;
import com.google.devtools.build.lib.actions.UserExecException;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.exec.ExecutionPolicy;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Phaser;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.logging.Logger;
import javax.annotation.Nullable;
/**
* A spawn strategy that speeds up incremental builds while not slowing down full builds.
*
* <p>This strategy tries to run spawn actions on the local and remote machine at the same time, and
* picks the spawn action that completes first. This gives the benefits of remote execution on full
* builds, and local execution on incremental builds.
*
* <p>One might ask, why we don't run spawns on the workstation all the time and just "spill over"
* actions to remote execution when there are no local resources available. This would work, except
* that the cost of transferring action inputs and outputs from the local machine to and from remote
* executors over the network is way too high - there is no point in executing an action locally and
* save 0.5s of time, when it then takes us 5 seconds to upload the results to remote executors for
* another action that's scheduled to run there.
*/
public class LegacyDynamicSpawnStrategy implements SpawnStrategy {
private static final Logger logger = Logger.getLogger(DynamicSpawnStrategy.class.getName());
enum StrategyIdentifier {
NONE("unknown"),
LOCAL("locally"),
REMOTE("remotely");
private final String prettyName;
StrategyIdentifier(String prettyName) {
this.prettyName = prettyName;
}
String prettyName() {
return prettyName;
}
}
@AutoValue
abstract static class DynamicExecutionResult {
static DynamicExecutionResult create(
StrategyIdentifier strategyIdentifier,
@Nullable FileOutErr fileOutErr,
@Nullable ExecException execException,
List<SpawnResult> spawnResults) {
return new AutoValue_LegacyDynamicSpawnStrategy_DynamicExecutionResult(
strategyIdentifier, fileOutErr, execException, ImmutableList.copyOf(spawnResults));
}
abstract StrategyIdentifier strategyIdentifier();
@Nullable
abstract FileOutErr fileOutErr();
@Nullable
abstract ExecException execException();
/**
* Returns a list of SpawnResults associated with executing a Spawn.
*
* <p>The list will typically contain one element, but could contain zero elements if spawn
* execution did not complete, or multiple elements if multiple sub-spawns were executed.
*/
abstract ImmutableList<SpawnResult> spawnResults();
}
private static final ImmutableSet<String> WORKER_BLACKLISTED_MNEMONICS =
ImmutableSet.of("JavaDeployJar");
private final ExecutorService executorService;
private final DynamicExecutionOptions options;
private final Function<Spawn, ExecutionPolicy> getExecutionPolicy;
private final AtomicBoolean delayLocalExecution = new AtomicBoolean(false);
// TODO(steinman): This field is never assigned and canExec() would throw if trying to access it.
@Nullable private SandboxedSpawnStrategy workerStrategy;
/**
* Constructs a {@code DynamicSpawnStrategy}.
*
* @param executorService an {@link ExecutorService} that will be used to run Spawn actions.
*/
public LegacyDynamicSpawnStrategy(
ExecutorService executorService,
DynamicExecutionOptions options,
Function<Spawn, ExecutionPolicy> getExecutionPolicy) {
this.executorService = executorService;
this.options = options;
this.getExecutionPolicy = getExecutionPolicy;
}
@Override
public ImmutableList<SpawnResult> exec(
final Spawn spawn, final ActionExecutionContext actionExecutionContext)
throws ExecException, InterruptedException {
if (options.requireAvailabilityInfo) {
if (spawn.getExecutionInfo().containsKey(ExecutionRequirements.REQUIRES_DARWIN)
&& !spawn.getMnemonic().equals("Genrule")) {
if (!spawn.getExecutionInfo().containsKey(ExecutionRequirements.REQUIREMENTS_SET)) {
throw new EnvironmentalExecException(
String.format(
"The following spawn was missing Xcode-related execution requirements. Please"
+ " let the Bazel team know if you encounter this issue. You can work around"
+ " this error by passing --experimental_require_availability_info=false --"
+ " at your own risk! This may cause some actions to be executed on the"
+ " wrong platform, which can result in build failures.\n"
+ "Failing spawn: mnemonic = %s\n"
+ "tool files = %s\n"
+ "execution platform = %s\n"
+ "execution info = %s\n",
spawn.getMnemonic(),
spawn.getToolFiles(),
spawn.getExecutionPlatform(),
spawn.getExecutionInfo()));
}
}
}
ExecutionPolicy executionPolicy = getExecutionPolicy.apply(spawn);
// If a Spawn cannot run remotely, we must always execute it locally. Resources will already
// have been acquired by Skyframe for us.
if (executionPolicy.canRunLocallyOnly()) {
return runLocally(spawn, actionExecutionContext, null);
}
// If a Spawn cannot run locally, we must always execute it remotely. For remote execution,
// local resources should not be acquired.
if (executionPolicy.canRunRemotelyOnly()) {
return runRemotely(spawn, actionExecutionContext, null);
}
// At this point we have a Spawn that can run locally and can run remotely. Run it in parallel
// using both the remote and the local strategy.
ExecException exceptionDuringExecution = null;
DynamicExecutionResult dynamicExecutionResult =
DynamicExecutionResult.create(
StrategyIdentifier.NONE, null, null, /*spawnResults=*/ ImmutableList.of());
// As an invariant in Bazel, all actions must terminate before the build ends. We use a
// synchronizer here, in the main thread, to wait for the termination of both local and remote
// spawns. Termination implies successful completion, failure, or, if one spawn wins,
// cancellation by the executor.
//
// In the case where one task completes successfully before the other starts, Bazel must
// proceed and return, skipping the other spawn. To achieve this, we use Phaser for its ability
// to register a variable number of tasks.
//
// TODO(b/118451841): Note that this may incur a performance issue where a remote spawn is
// faster than a worker spawn, because the worker spawn cannot be cancelled once it starts. This
// nullifies the gains from the faster spawn.
Phaser bothTasksFinished = new Phaser(/*parties=*/ 1);
try {
final AtomicReference<SpawnStrategy> outputsHaveBeenWritten = new AtomicReference<>(null);
dynamicExecutionResult =
executorService.invokeAny(
ImmutableList.of(
new DynamicExecutionCallable(
bothTasksFinished,
StrategyIdentifier.LOCAL,
actionExecutionContext.getFileOutErr()) {
@Override
List<SpawnResult> callImpl() throws InterruptedException, ExecException {
// This is a rather simple approach to make it possible to score a cache hit
// on remote execution before even trying to start the action locally. This
// saves resources that would otherwise be wasted by continuously starting and
// immediately killing local processes. One possibility for improvement would
// be to establish a reporting mechanism from strategies back to here, where
// we delay starting locally until the remote strategy tells us that the
// action isn't a cache hit.
if (delayLocalExecution.get()) {
Thread.sleep(options.localExecutionDelay);
}
return runLocally(
spawn,
actionExecutionContext.withFileOutErr(fileOutErr),
outputsHaveBeenWritten);
}
},
new DynamicExecutionCallable(
bothTasksFinished,
StrategyIdentifier.REMOTE,
actionExecutionContext.getFileOutErr()) {
@Override
public List<SpawnResult> callImpl() throws InterruptedException, ExecException {
List<SpawnResult> spawnResults =
runRemotely(
spawn,
actionExecutionContext.withFileOutErr(fileOutErr),
outputsHaveBeenWritten);
delayLocalExecution.set(true);
return spawnResults;
}
}));
} catch (ExecutionException e) {
Throwables.propagateIfPossible(e.getCause(), InterruptedException.class);
// DynamicExecutionCallable.callImpl only declares InterruptedException, so this should never
// happen.
exceptionDuringExecution = new UserExecException(e.getCause());
} finally {
bothTasksFinished.arriveAndAwaitAdvance();
if (dynamicExecutionResult.execException() != null) {
exceptionDuringExecution = dynamicExecutionResult.execException();
}
if (Thread.currentThread().isInterrupted()) {
// Warn but don't throw, in case we're crashing.
logger.warning("Interrupted waiting for dynamic execution tasks to finish");
}
}
// Check for interruption outside of finally block, so we don't mask any other exceptions.
// Clear the interrupt bit if it's set.
if (exceptionDuringExecution == null && Thread.interrupted()) {
throw new InterruptedException("Interrupted waiting for dynamic execution tasks to finish");
}
StrategyIdentifier winningStrategy = dynamicExecutionResult.strategyIdentifier();
FileOutErr fileOutErr = dynamicExecutionResult.fileOutErr();
if (StrategyIdentifier.NONE.equals(winningStrategy) || fileOutErr == null) {
throw new IllegalStateException("Neither local or remote execution has started.");
}
try {
moveFileOutErr(actionExecutionContext, fileOutErr);
} catch (IOException e) {
String strategyName = winningStrategy.name().toLowerCase();
if (exceptionDuringExecution == null) {
throw new UserExecException(
String.format("Could not move action logs from %s execution", strategyName), e);
} else {
actionExecutionContext
.getEventHandler()
.handle(
Event.warn(
String.format(
"Could not move action logs from %s execution: %s",
strategyName, e.toString())));
}
}
if (exceptionDuringExecution != null) {
throw exceptionDuringExecution;
}
if (options.debugSpawnScheduler) {
actionExecutionContext
.getEventHandler()
.handle(
Event.info(
String.format(
"%s action %s %s",
spawn.getMnemonic(),
dynamicExecutionResult.execException() == null ? "finished" : "failed",
winningStrategy.prettyName())));
}
// TODO(b/62588075) If a second list of spawnResults was generated (before execution was
// cancelled), then we might want to save it as well (e.g. for metrics purposes).
return dynamicExecutionResult.spawnResults();
}
@Override
public boolean canExec(Spawn spawn, ActionContextRegistry actionContextRegistry) {
DynamicStrategyRegistry dynamicStrategyRegistry =
actionContextRegistry.getContext(DynamicStrategyRegistry.class);
for (SandboxedSpawnStrategy strategy :
dynamicStrategyRegistry.getDynamicSpawnActionContexts(
spawn, DynamicStrategyRegistry.DynamicMode.LOCAL)) {
if (strategy.canExec(spawn, actionContextRegistry)) {
return true;
}
}
for (SandboxedSpawnStrategy strategy :
dynamicStrategyRegistry.getDynamicSpawnActionContexts(
spawn, DynamicStrategyRegistry.DynamicMode.REMOTE)) {
if (strategy.canExec(spawn, actionContextRegistry)) {
return true;
}
}
return workerStrategy.canExec(spawn, actionContextRegistry);
}
private void moveFileOutErr(ActionExecutionContext actionExecutionContext, FileOutErr outErr)
throws IOException {
if (outErr.getOutputPath().exists()) {
Files.move(
outErr.getOutputPath().getPathFile(),
actionExecutionContext.getFileOutErr().getOutputPath().getPathFile());
}
if (outErr.getErrorPath().exists()) {
Files.move(
outErr.getErrorPath().getPathFile(),
actionExecutionContext.getFileOutErr().getErrorPath().getPathFile());
}
}
private static FileOutErr getSuffixedFileOutErr(FileOutErr fileOutErr, String suffix) {
Path outDir = Preconditions.checkNotNull(fileOutErr.getOutputPath().getParentDirectory());
String outBaseName = fileOutErr.getOutputPath().getBaseName();
Path errDir = Preconditions.checkNotNull(fileOutErr.getErrorPath().getParentDirectory());
String errBaseName = fileOutErr.getErrorPath().getBaseName();
return new FileOutErr(
outDir.getChild(outBaseName + suffix), errDir.getChild(errBaseName + suffix));
}
private static boolean supportsWorkers(Spawn spawn) {
return (!WORKER_BLACKLISTED_MNEMONICS.contains(spawn.getMnemonic())
&& Spawns.supportsWorkers(spawn));
}
private static StopConcurrentSpawns lockOutputFiles(
SandboxedSpawnStrategy token, @Nullable AtomicReference<SpawnStrategy> outputWriteBarrier) {
if (outputWriteBarrier == null) {
return null;
} else {
return () -> {
if (outputWriteBarrier.get() != token && !outputWriteBarrier.compareAndSet(null, token)) {
throw new DynamicInterruptedException(
"Execution stopped because other strategy finished first");
}
};
}
}
private static ImmutableList<SpawnResult> runLocally(
Spawn spawn,
ActionExecutionContext actionExecutionContext,
@Nullable AtomicReference<SpawnStrategy> outputWriteBarrier)
throws ExecException, InterruptedException {
DynamicStrategyRegistry dynamicStrategyRegistry =
actionExecutionContext.getContext(DynamicStrategyRegistry.class);
for (SandboxedSpawnStrategy strategy :
dynamicStrategyRegistry.getDynamicSpawnActionContexts(
spawn, DynamicStrategyRegistry.DynamicMode.LOCAL)) {
if (!strategy.toString().contains("worker") || supportsWorkers(spawn)) {
return strategy.exec(
spawn, actionExecutionContext, lockOutputFiles(strategy, outputWriteBarrier));
}
}
throw new RuntimeException(
"executorCreated not yet called or no default dynamic_local_strategy set");
}
private static ImmutableList<SpawnResult> runRemotely(
Spawn spawn,
ActionExecutionContext actionExecutionContext,
@Nullable AtomicReference<SpawnStrategy> outputWriteBarrier)
throws ExecException, InterruptedException {
DynamicStrategyRegistry dynamicStrategyRegistry =
actionExecutionContext.getContext(DynamicStrategyRegistry.class);
for (SandboxedSpawnStrategy strategy :
dynamicStrategyRegistry.getDynamicSpawnActionContexts(
spawn, DynamicStrategyRegistry.DynamicMode.REMOTE)) {
return strategy.exec(
spawn, actionExecutionContext, lockOutputFiles(strategy, outputWriteBarrier));
}
throw new RuntimeException(
"executorCreated not yet called or no default dynamic_remote_strategy set");
}
private abstract static class DynamicExecutionCallable
implements Callable<DynamicExecutionResult> {
private final Phaser taskFinished;
private final StrategyIdentifier strategyIdentifier;
protected final FileOutErr fileOutErr;
DynamicExecutionCallable(
Phaser taskFinished,
StrategyIdentifier strategyIdentifier,
FileOutErr fileOutErr) {
this.taskFinished = taskFinished;
this.strategyIdentifier = strategyIdentifier;
this.fileOutErr = getSuffixedFileOutErr(fileOutErr, "." + strategyIdentifier.name());
}
abstract List<SpawnResult> callImpl() throws InterruptedException, ExecException;
@Override
public final DynamicExecutionResult call() throws InterruptedException {
taskFinished.register();
try {
List<SpawnResult> spawnResults = callImpl();
return DynamicExecutionResult.create(strategyIdentifier, fileOutErr, null, spawnResults);
} catch (Exception e) {
Throwables.throwIfInstanceOf(e, InterruptedException.class);
return DynamicExecutionResult.create(
strategyIdentifier,
fileOutErr, e instanceof ExecException ? (ExecException) e : new UserExecException(e),
/*spawnResults=*/ ImmutableList.of());
} finally {
try {
fileOutErr.close();
} catch (IOException ignored) {
// Nothing we can do here.
}
taskFinished.arriveAndDeregister();
}
}
}
}