blob: 10be6394fd1403e6540328d161c8351601e150e2 [file] [log] [blame]
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.runtime;
import com.google.common.base.Preconditions;
import com.google.common.collect.Comparators;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.eventbus.AllowConcurrentEvents;
import com.google.common.eventbus.Subscribe;
import com.google.devtools.build.lib.actions.Action;
import com.google.devtools.build.lib.actions.ActionAnalysisMetadata;
import com.google.devtools.build.lib.actions.ActionCompletionEvent;
import com.google.devtools.build.lib.actions.ActionKeyContext;
import com.google.devtools.build.lib.actions.ActionMiddlemanEvent;
import com.google.devtools.build.lib.actions.ActionRewoundEvent;
import com.google.devtools.build.lib.actions.ActionStartedEvent;
import com.google.devtools.build.lib.actions.Actions;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.CachedActionEvent;
import com.google.devtools.build.lib.actions.SpawnExecutedEvent;
import com.google.devtools.build.lib.actions.SpawnMetrics;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.clock.Clock;
import java.time.Duration;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BinaryOperator;
import java.util.stream.Stream;
import javax.annotation.concurrent.ThreadSafe;
/**
* Computes the critical path in the action graph based on events published to the event bus.
*
* <p>After instantiation, this object needs to be registered on the event bus to work.
*/
@ThreadSafe
public class CriticalPathComputer {
/** Number of top actions to record. */
static final int SLOWEST_COMPONENTS_SIZE = 30;
private static final int LARGEST_MEMORY_COMPONENTS_SIZE = 20;
private static final int LARGEST_INPUT_SIZE_COMPONENTS_SIZE = 20;
/** Selects and returns the longer of two components (the first may be {@code null}). */
private static final BinaryOperator<CriticalPathComponent> SELECT_LONGER_COMPONENT =
(a, b) ->
a == null || a.getAggregatedElapsedTime().compareTo(b.getAggregatedElapsedTime()) < 0
? b
: a;
private final AtomicInteger idGenerator = new AtomicInteger();
// outputArtifactToComponent is accessed from multiple event handlers.
protected final ConcurrentMap<Artifact, CriticalPathComponent> outputArtifactToComponent =
Maps.newConcurrentMap();
private final ActionKeyContext actionKeyContext;
/** Maximum critical path found. */
private final AtomicReference<CriticalPathComponent> maxCriticalPath;
private final Clock clock;
protected CriticalPathComputer(ActionKeyContext actionKeyContext, Clock clock) {
this.actionKeyContext = actionKeyContext;
this.clock = clock;
maxCriticalPath = new AtomicReference<>();
}
/**
* Creates a critical path component for an action.
*
* @param action the action for the critical path component
* @param relativeStartNanos time when the action started to run in nanos. Only meant to be used
* for computing time differences.
*/
private CriticalPathComponent createComponent(Action action, long relativeStartNanos) {
return new CriticalPathComponent(idGenerator.getAndIncrement(), action, relativeStartNanos);
}
/**
* Return the critical path stats for the current command execution.
*
* <p>This method allows us to calculate lazily the aggregate statistics of the critical path,
* avoiding the memory and cpu penalty for doing it for all the actions executed.
*/
public AggregatedCriticalPath aggregate() {
CriticalPathComponent criticalPath = getMaxCriticalPath();
Duration totalTime = Duration.ZERO;
Duration parseTime = Duration.ZERO;
Duration networkTime = Duration.ZERO;
Duration fetchTime = Duration.ZERO;
Duration remoteQueueTime = Duration.ZERO;
Duration uploadTime = Duration.ZERO;
Duration setupTime = Duration.ZERO;
Duration executionWallTime = Duration.ZERO;
Duration retryTime = Duration.ZERO;
long inputFiles = 0L;
long inputBytes = 0L;
long memoryEstimate = 0L;
ImmutableList.Builder<CriticalPathComponent> components = ImmutableList.builder();
if (criticalPath == null) {
return AggregatedCriticalPath.EMPTY;
}
CriticalPathComponent child = criticalPath;
while (child != null) {
SpawnMetrics childSpawnMetrics = child.getSpawnMetrics();
if (childSpawnMetrics != null) {
totalTime = totalTime.plus(childSpawnMetrics.totalTime());
parseTime = parseTime.plus(childSpawnMetrics.parseTime());
networkTime = networkTime.plus(childSpawnMetrics.networkTime());
fetchTime = fetchTime.plus(childSpawnMetrics.fetchTime());
remoteQueueTime = remoteQueueTime.plus(childSpawnMetrics.remoteQueueTime());
uploadTime = uploadTime.plus(childSpawnMetrics.uploadTime());
setupTime = setupTime.plus(childSpawnMetrics.setupTime());
executionWallTime = executionWallTime.plus(childSpawnMetrics.executionWallTime());
retryTime = retryTime.plus(childSpawnMetrics.retryTime());
inputBytes += childSpawnMetrics.inputBytes();
inputFiles += childSpawnMetrics.inputFiles();
memoryEstimate += childSpawnMetrics.memoryEstimate();
}
components.add(child);
child = child.getChild();
}
return new AggregatedCriticalPath(
criticalPath.getAggregatedElapsedTime(),
new SpawnMetrics(
totalTime,
parseTime,
networkTime,
fetchTime,
remoteQueueTime,
setupTime,
uploadTime,
executionWallTime,
retryTime,
inputBytes,
inputFiles,
memoryEstimate),
components.build());
}
/** Adds spawn metrics to the action stats. */
@Subscribe
@AllowConcurrentEvents
public void spawnExecuted(SpawnExecutedEvent event) {
ActionAnalysisMetadata action = event.getActionMetadata();
Artifact primaryOutput = action.getPrimaryOutput();
if (primaryOutput == null) {
// Despite the documentation to the contrary, the SpawnIncludeScanner creates an
// ActionExecutionMetadata instance that returns a null primary output. That said, this
// class is incorrect wrt. multiple Spawns in a single action. See b/111583707.
return;
}
CriticalPathComponent stats =
Preconditions.checkNotNull(outputArtifactToComponent.get(primaryOutput));
SpawnResult spawnResult = event.getSpawnResult();
stats.addSpawnMetrics(spawnResult.getMetrics());
}
/** Returns the list of components using the most memory. */
public List<CriticalPathComponent> getLargestMemoryComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
LARGEST_MEMORY_COMPONENTS_SIZE,
Comparator.comparingLong((c) -> c.getSpawnMetrics().memoryEstimate())));
}
/** Returns the list of components with the largest input sizes. */
public List<CriticalPathComponent> getLargestInputSizeComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
LARGEST_INPUT_SIZE_COMPONENTS_SIZE,
Comparator.comparingLong((c) -> c.getSpawnMetrics().inputBytes())));
}
/** Returns the list of slowest components. */
public List<CriticalPathComponent> getSlowestComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
SLOWEST_COMPONENTS_SIZE,
Comparator.comparingLong(CriticalPathComponent::getElapsedTimeNanos)));
}
private Stream<CriticalPathComponent> uniqueActions() {
return outputArtifactToComponent.entrySet().stream()
.filter((e) -> e.getValue().isPrimaryOutput(e.getKey()))
.map((e) -> e.getValue());
}
/**
* Record an action that has started to run.
*
* @param event information about the started action
*/
@Subscribe
@AllowConcurrentEvents
public void actionStarted(ActionStartedEvent event) {
Action action = event.getAction();
tryAddComponent(createComponent(action, event.getNanoTimeStart()));
}
/**
* Record a middleman action execution. Even if middleman are almost instant, we record them
* because they depend on other actions and we need them for constructing the critical path.
*
* <p>For some rules with incorrect configuration transitions we might get notified several times
* for the same middleman. This should only happen if the actions are shared.
*/
@Subscribe
@AllowConcurrentEvents
public void middlemanAction(ActionMiddlemanEvent event) {
Action action = event.getAction();
CriticalPathComponent component =
tryAddComponent(createComponent(action, event.getNanoTimeStart()));
finalizeActionStat(event.getNanoTimeStart(), action, component);
}
/**
* Try to add the component to the map of critical path components. If there is an existing
* component for its primary output it uses that to update the rest of the outputs.
*
* @return The component to be used for updating the time stats.
*/
@SuppressWarnings("ReferenceEquality")
private CriticalPathComponent tryAddComponent(CriticalPathComponent newComponent) {
Action newAction = newComponent.getAction();
Artifact primaryOutput = newAction.getPrimaryOutput();
CriticalPathComponent storedComponent =
outputArtifactToComponent.putIfAbsent(primaryOutput, newComponent);
if (storedComponent != null) {
Action oldAction = storedComponent.getAction();
// TODO(b/120663721) Replace this fragile reference equality check with something principled.
if (oldAction != newAction && !Actions.canBeShared(actionKeyContext, newAction, oldAction)) {
throw new IllegalStateException(
"Duplicate output artifact found for unsharable actions."
+ "This can happen if a previous event registered the action.\n"
+ "Old action: "
+ oldAction
+ "\n\nNew action: "
+ newAction
+ "\n\nArtifact: "
+ primaryOutput
+ "\n");
}
} else {
storedComponent = newComponent;
}
// Try to insert the existing component for the rest of the outputs even if we failed to be
// the ones inserting the component so that at the end of this method we guarantee that all the
// outputs have a component.
for (Artifact output : newAction.getOutputs()) {
if (output == primaryOutput) {
continue;
}
CriticalPathComponent old = outputArtifactToComponent.putIfAbsent(output, storedComponent);
// If two actions run concurrently maybe we find a component by primary output but we are
// the first updating the rest of the outputs.
Preconditions.checkState(
old == null || old == storedComponent, "Inconsistent state for %s", newAction);
}
return storedComponent;
}
/**
* Record an action that was not executed because it was in the (disk) cache. This is needed so
* that we can calculate correctly the dependencies tree if we have some cached actions in the
* middle of the critical path.
*/
@Subscribe
@AllowConcurrentEvents
public void actionCached(CachedActionEvent event) {
Action action = event.getAction();
CriticalPathComponent component
= tryAddComponent(createComponent(action, event.getNanoTimeStart()));
finalizeActionStat(event.getNanoTimeStart(), action, component);
}
/**
* Records the elapsed time stats for the action. For each input artifact, it finds the real
* dependent artifacts and records the critical path stats.
*/
@Subscribe
@AllowConcurrentEvents
public void actionComplete(ActionCompletionEvent event) {
Action action = event.getAction();
CriticalPathComponent component =
Preconditions.checkNotNull(
outputArtifactToComponent.get(action.getPrimaryOutput()), action);
finalizeActionStat(event.getRelativeActionStartTime(), action, component);
}
/**
* Record that the failed rewound action is no longer running. The action may or may not start
* again later.
*/
@Subscribe
@AllowConcurrentEvents
public void actionRewound(ActionRewoundEvent event) {
Action action = event.getFailedRewoundAction();
CriticalPathComponent component =
Preconditions.checkNotNull(outputArtifactToComponent.get(action.getPrimaryOutput()));
component.finishActionExecution(event.getRelativeActionStartTime(), clock.nanoTime());
}
/** Maximum critical path component found during the build. */
protected CriticalPathComponent getMaxCriticalPath() {
return maxCriticalPath.get();
}
private void finalizeActionStat(
long startTimeNanos, Action action, CriticalPathComponent component) {
for (Artifact input : action.getInputs()) {
addArtifactDependency(component, input);
}
component.finishActionExecution(startTimeNanos, clock.nanoTime());
maxCriticalPath.accumulateAndGet(component, SELECT_LONGER_COMPONENT);
}
/**
* If "input" is a generated artifact, link its critical path to the one we're building.
*/
private void addArtifactDependency(CriticalPathComponent actionStats, Artifact input) {
CriticalPathComponent depComponent = outputArtifactToComponent.get(input);
if (depComponent != null) {
if (depComponent.isRunning()) {
checkCriticalPathInconsistency(input, depComponent.getAction(), actionStats);
return;
}
actionStats.addDepInfo(depComponent);
}
}
protected void checkCriticalPathInconsistency(
Artifact input, Action action, CriticalPathComponent actionStats) {
// Rare case that an action depending on a previously-cached shared action sees a different
// shared action that is in the midst of being an action cache hit.
for (Artifact actionOutput : action.getOutputs()) {
if (input.equals(actionOutput)
&& Objects.equals(input.getArtifactOwner(), actionOutput.getArtifactOwner())) {
// As far as we can tell, this (currently running) action is the same action that
// produced input, not another shared action. This should be impossible.
throw new IllegalStateException(
String.format(
"Cannot add critical path stats when the action is not finished. %s. %s. %s",
input, actionStats.prettyPrintAction(), action));
}
}
}
}