blob: ba026da5c6e886a22a3888ec2f4c92dad488947d [file] [log] [blame]
// Copyright 2017 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.exec;
import com.google.devtools.build.lib.actions.ActionContext;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.actions.FutureSpawn;
import com.google.devtools.build.lib.actions.LostInputsExecException;
import com.google.devtools.build.lib.actions.MetadataProvider;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.actions.cache.MetadataInjector;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.time.Duration;
import java.util.SortedMap;
import javax.annotation.Nullable;
/**
* A runner for spawns. Implementations can execute spawns on the local machine as a subprocess with
* or without sandboxing, on a remote machine, or only consult a remote cache.
*
* <h2>Environment Variables</h2>
*
* <ul>
* <li>Implementations MUST set the specified environment variables.
* <li>Implementations MAY add TMPDIR as an additional env variable, if it is not set already.
* <li>If an implementation sets TMPDIR, it MUST be set to an absolute path.
* <li>Implementations MUST NOT add any other environment variables.
* </ul>
*
* <h2>Command line</h2>
*
* <ul>
* <li>Implementations MUST use the specified command line unmodified by default.
* <li>Implementations MAY modify the specified command line if explicitly requested by the user.
* </ul>
*
* <h2>Process</h2>
*
* <ul>
* <li>Implementations MUST be thread-safe.
* <li>Implementations MUST ensure that all child processes (including transitive) exit in all
* cases, including successful completion, interruption, and timeout
* <li>Implementations MUST return the exit code as observed from the subprocess if the subprocess
* exits naturally; they MUST not throw an exception for non-zero exit codes
* <li>Implementations MUST be interruptible; they MUST throw {@link InterruptedException} from
* {@link #exec} when interrupted
* <li>Implementations MUST apply the specified timeout to the execution of the subprocess
* <ul>
* <li>If no timeout is specified, the implementation MAY apply an implementation-specific
* timeout
* <li>If the specified timeout is larger than an implementation-dependent maximum, then the
* implementation MUST throw {@link IllegalArgumentException}; it MUST not silently
* change the timeout to a smaller value
* <li>If the timeout is exceeded, the implementation MUST throw TimeoutException, with the
* timeout that was applied to the subprocess (TODO)
* </ul>
* </ul>
*
* <h2>Optimistic Concurrency</h2>
*
* Bazel may choose to execute a spawn using multiple {@link SpawnRunner} implementations
* simultaneously in order to minimize total latency. This is especially useful for builds with few
* actions where remotely executing the actions incurs high round trip times.
*
* <ul>
* <li>All implementations MUST call {@link SpawnExecutionContext#lockOutputFiles} before writing
* to any of the output files, but may write to stdout and stderr without calling it. Instead,
* all callers must provide temporary locations for stdout & stderr if they ever call multiple
* {@link SpawnRunner} implementations concurrently. Spawn runners that use the local machine
* MUST either call it before starting the subprocess, or ensure that subprocesses write to
* temporary locations (for example by running in a mount namespace) and then copy or move the
* outputs into place.
* <li>Implementations SHOULD delay calling {@link SpawnExecutionContext#lockOutputFiles} until
* just before writing.
* </ul>
*/
public interface SpawnRunner {
/**
* Used to report progress on the current spawn. This is mainly used to report the current state
* of the subprocess to the user, but may also be used to trigger parallel execution. For example,
* a dynamic scheduler may use the signal that there was a cache miss to start parallel execution
* of the same Spawn - also see the {@link SpawnRunner} documentation section on "optimistic
* concurrency".
*
* <p>{@link SpawnRunner} implementations should post a progress status before any potentially
* long-running operation.
*/
enum ProgressStatus {
/** Spawn is waiting for local or remote resources to become available. */
SCHEDULING,
/** The {@link SpawnRunner} is looking for a cache hit. */
CHECKING_CACHE,
/**
* Resources are acquired, and there was probably no cache hit. This MUST be posted before
* attempting to execute the subprocess.
*
* <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache
* lookup, but may post this if cache lookup and execution happen within the same step, e.g. as
* part of a single RPC call with no mechanism to report cache misses.
*/
EXECUTING,
/** Downloading outputs from a remote machine. */
DOWNLOADING
}
/**
* A context that binds a {@link Spawn} to a {@link SpawnRunner}.
*
* <p>This interface may change without notice.
*
* <p>Implementations must be at least thread-compatible, i.e., they must be safe as long as each
* instance is only used within a single thread. Different instances of the same class may be used
* by different threads, so they MUST not call any shared non-thread-safe objects.
*/
interface SpawnExecutionContext {
/**
* Returns a unique id for this spawn, to be used for logging. Note that a single spawn may be
* passed to multiple {@link SpawnRunner} implementations, so any log entries should also
* contain the identity of the spawn runner implementation.
*/
int getId();
/**
* Prefetches the Spawns input files to the local machine. There are cases where Bazel runs on a
* network file system, and prefetching the files in parallel is a significant performance win.
* This should only be called by local strategies when local execution is imminent.
*
* <p>Should be called with the equivalent of: <code>
* policy.prefetchInputs(
* Iterables.filter(policy.getInputMapping().values(), Predicates.notNull()));
* </code>
*
* <p>Note in particular that {@link #getInputMapping} may return {@code null} values, but this
* method does not accept {@code null} values.
*
* <p>The reason why this method requires passing in the inputs is that getInputMapping may be
* slow to compute, so if the implementation already called it, we don't want to compute it
* again. I suppose we could require implementations to memoize getInputMapping (but not compute
* it eagerly), and that may change in the future.
*/
void prefetchInputs() throws IOException, InterruptedException;
/**
* The input file metadata cache for this specific spawn, which can be used to efficiently
* obtain file digests and sizes.
*/
MetadataProvider getMetadataProvider();
/** An artifact expander. */
// TODO(ulfjack): This is only used for the sandbox runners to compute a set of empty
// directories. We shouldn't have this and the getInputMapping method; maybe there's a way to
// unify the two? Alternatively, maybe the input mapping should (optionally?) contain
// directories? Or maybe we need a separate method to return the set of directories?
ArtifactExpander getArtifactExpander();
/** The {@link ArtifactPathResolver} to use when directly writing output files. */
default ArtifactPathResolver getPathResolver() {
return ArtifactPathResolver.IDENTITY;
}
/**
* All implementations must call this method before writing to the provided stdout / stderr or
* to any of the output file locations. This method is used to coordinate - implementations must
* throw an {@link InterruptedException} for all but one caller.
*/
void lockOutputFiles() throws InterruptedException, IOException;
/**
* Returns whether this spawn may be executing concurrently under multiple spawn runners. If so,
* {@link #lockOutputFiles} may raise {@link InterruptedException}.
*/
boolean speculating();
/** Returns the timeout that should be applied for the given {@link Spawn} instance. */
Duration getTimeout();
/** The files to which to write stdout and stderr. */
FileOutErr getFileOutErr();
SortedMap<PathFragment, ActionInput> getInputMapping(boolean expandTreeArtifactsInRunfiles)
throws IOException;
/** Reports a progress update to the Spawn strategy. */
void report(ProgressStatus state, String name);
/**
* Returns a {@link MetadataInjector} that allows a caller to inject metadata about spawn
* outputs that are stored remotely.
*/
MetadataInjector getMetadataInjector();
/**
* Returns the context registered for the given identifying type or {@code null} if none was
* registered.
*/
@Nullable
<T extends ActionContext> T getContext(Class<T> identifyingType);
/** Throws if lost inputs have been detected. */
void checkForLostInputs() throws LostInputsExecException;
}
/**
* Run the given spawn asynchronously. The default implementation is synchronous for migration.
*
* @param spawn the spawn to run
* @param context the spawn execution context
* @return the result from running the spawn
* @throws InterruptedException if the calling thread was interrupted, or if the runner could not
* lock the output files (see {@link SpawnExecutionContext#lockOutputFiles()})
* @throws IOException if something went wrong reading or writing to the local file system
* @throws ExecException if the request is malformed
*/
default FutureSpawn execAsync(Spawn spawn, SpawnExecutionContext context)
throws InterruptedException, IOException, ExecException {
// TODO(ulfjack): Remove this default implementation. [exec-async]
return FutureSpawn.immediate(exec(spawn, context));
}
/**
* Run the given spawn.
*
* @param spawn the spawn to run
* @param context the spawn execution context
* @return the result from running the spawn
* @throws InterruptedException if the calling thread was interrupted, or if the runner could not
* lock the output files (see {@link SpawnExecutionContext#lockOutputFiles()})
* @throws IOException if something went wrong reading or writing to the local file system
* @throws ExecException if the request is malformed
*/
SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
throws InterruptedException, IOException, ExecException;
/** Returns whether this SpawnRunner supports executing the given Spawn. */
boolean canExec(Spawn spawn);
/** Returns the name of the SpawnRunner. */
String getName();
/**
* Removes any files or directories that this spawn runner may have put in the sandbox base.
*
* <p>It is important that this function only removes entries that may have been generated by this
* build, not any possible entries that a future build may generate.
*
* @param sandboxBase path to the base of the sandbox tree where the spawn runner may have created
* entries
* @param treeDeleter scheduler for tree deletions
* @throws IOException if there are problems deleting the entries
*/
default void cleanupSandboxBase(Path sandboxBase, TreeDeleter treeDeleter) throws IOException {}
}