blob: 8d60f29d96dfcc4708cd463de94dd61de8cd581d [file] [log] [blame]
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.skyframe;
import static java.util.concurrent.TimeUnit.MINUTES;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor;
import com.google.devtools.build.lib.concurrent.ErrorClassifier;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.EventHandler;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import java.util.Collection;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
/**
* SkyframeFocuser is a minimizing optimizer (i.e. garbage collector) for the Skyframe graph, based
* on a set of known inputs known as a working set, while ensuring correct incremental builds.
*
* <p>This is also a subclass of {@link AbstractQueueVisitor} to take advantage of highly
* parallelizable operations over the Skyframe graph.
*/
public final class SkyframeFocuser extends AbstractQueueVisitor {
// The in-memory Skyframe graph
private final InMemoryGraph graph;
// Event handler to report stats during focusing
private final EventHandler eventHandler;
private SkyframeFocuser(InMemoryGraph graph, EventHandler eventHandler) {
super(
/* parallelism= */ Runtime.getRuntime().availableProcessors(),
/* keepAliveTime= */ 2,
MINUTES,
ExceptionHandlingMode.FAIL_FAST,
/* poolName= */ "skyframe-focuser",
ErrorClassifier.DEFAULT);
this.graph = graph;
this.eventHandler = eventHandler;
}
/**
* Minimize the Skyframe graph by traverse it to prune nodes and edges that are not necessary for
* the build correctness of a working set of files. The graph focusing algorithm pseudocode is as
* follows.
*
* <ol>
* <li>Mark all the leafs and their transitive rdeps. For each marked node, also mark all their
* direct dependencies. An injectable function can also mark additional nodes reachable from
* the node itself.
* <li>For each marked node, remove all direct deps edges. Also remove all rdep edges unless
* they point to a rdep that should be kept. This creates the "flattened verification set".
* </ol>
*
* @param graph the in-memory graph to operate on
* @param eventHandler handler to report events during focusing
* @param roots the SkyKeys of the roots to be kept, i.e. the top level keys.
* @param leafs the SkyKeys of the leafs to be kept. This is the "working set".
* @param additionalDepsToKeep by default, all direct deps of rdeps are kept. this function is
* applied on all direct deps, in case there are optimizations elsewhere in the skyframe
* implementation that reads transitive nodes without specifying a dependency on them.
* @return the set of kept SkyKeys in the in-memory graph, categorized by deps and rdeps.
*/
public static FocusResult focus(
InMemoryGraph graph,
EventHandler eventHandler,
Set<SkyKey> roots,
Set<SkyKey> leafs,
Function<SkyKey, Set<SkyKey>> additionalDepsToKeep)
throws InterruptedException {
SkyframeFocuser focuser = new SkyframeFocuser(graph, eventHandler);
return focuser.run(roots, leafs, additionalDepsToKeep);
}
/**
* The set of SkyKeys kept after focusing. The actual change is done in place with the in-memory
* graph.
*/
public static class FocusResult {
private final ImmutableSet<SkyKey> rdeps;
private final ImmutableSet<SkyKey> deps;
private FocusResult(ImmutableSet<SkyKey> rdeps, ImmutableSet<SkyKey> deps) {
this.rdeps = rdeps;
this.deps = deps;
}
/**
* Returns the set of SkyKeys that are in the dependencies of all roots, and rdeps from the
* leafs. May contain transitive dependencies, in cases where certain functions use them without
* establishing a Skyframe dependency.
*/
public ImmutableSet<SkyKey> getDeps() {
return deps;
}
/** Returns the set of SkyKeys that are in the reverse dependencies of the leafs. */
public ImmutableSet<SkyKey> getRdeps() {
return rdeps;
}
}
/**
* NodeVisitor is parallelizable graph visitor that's applied transitively from leafs to the
* roots, while marking rdeps and all direct deps of those rdeps to be kept by SkyframeFocuser.
*/
private class NodeVisitor implements Runnable {
// The SkyKey that this NodeVisitor is responsible for.
private final SkyKey key;
// Threadsafe set of keys that depend on this key. May be modified by multiple NodeVisitors
// concurrently.
private final Set<SkyKey> keptRdeps;
// Threadsafe set of keys that this key depends on. May be modified by multiple NodeVisitors
// concurrently.
private final Set<SkyKey> keptDeps;
// See javadoc for the additionalDepsToKeep parameter of {@code SkyframeFocuser#focus}.
private final Function<SkyKey, Set<SkyKey>> additionalDepsToKeep;
protected NodeVisitor(
SkyKey key,
Set<SkyKey> keptRdeps,
Set<SkyKey> keptDeps,
Function<SkyKey, Set<SkyKey>> additionalDepsToKeep) {
this.key = key;
this.keptRdeps = keptRdeps;
this.keptDeps = keptDeps;
this.additionalDepsToKeep = additionalDepsToKeep;
}
@Override
public void run() {
InMemoryNodeEntry nodeEntry = graph.getIfPresent(key);
if (nodeEntry == null) {
// Throwing may be too strong if the working set is loosely defined, e.g. an entire
// directory instead of a single file, and there are some files in the directory that
// are not in the TC of the roots.
//
// TODO: b/312819241 - handle this gracefully without throwing.
throw new IllegalStateException("nodeEntry not found for: " + key.getCanonicalName());
}
if (!nodeEntry.isDone()) {
// TODO: b/312819241 - handle this gracefully without throwing.
throw new IllegalStateException("nodeEntry not done: " + key.getCanonicalName());
}
for (SkyKey rdep : nodeEntry.getReverseDepsForDoneEntry()) {
if (!keptRdeps.add(rdep)) {
// Memoization. Already processed.
continue;
}
// Queue a traversal up the graph. This will not create duplicate NodeVisitors on the
// same rdep due to the atomic keptRdeps.add check above.
execute(new NodeVisitor(rdep, keptRdeps, keptDeps, additionalDepsToKeep));
}
for (SkyKey dep : nodeEntry.getDirectDeps()) {
if (!keptDeps.add(dep)) {
// Memoization. Already processed.
continue;
}
// This is necessary to keep the action inputs encapsulated by a NestedSet. Otherwise,
// those inputs will be missing.
//
// TODO: b/312819241 - move SkyframeFocuser from build.skyframe to build.lib.skyframe so
// that we can do the check directly without using an injected Function.
execute(() -> keptDeps.addAll(additionalDepsToKeep.apply(dep)));
}
}
}
/** Entry point of the Skyframe garbage collection algorithm. */
private FocusResult run(
Set<SkyKey> roots, Set<SkyKey> leafs, Function<SkyKey, Set<SkyKey>> additionalDepsToKeep)
throws InterruptedException {
Set<SkyKey> keptDeps = Sets.newConcurrentHashSet();
Set<SkyKey> keptRdeps = Sets.newConcurrentHashSet();
// All leafs are automatically considered as rdeps.
keptRdeps.addAll(leafs);
// All roots are automatically considered as deps.
//
// Some roots are re-evaluated on every build. These roots may not be in the reverse TC
// of leafs (working set), but may influence how the working set is evaluated (e.g. platform
// mapping). If we remove them from the graph, those keys may be re-evaluated anyway (along with
// their TC) on subsequent invocations, leading to wasted compute and RAM.
// The exercise of ensuring which roots should be kept is left to the caller of
// this function, but we ensure that all specified ones are kept here.
keptDeps.addAll(roots);
try (SilentCloseable c = Profiler.instance().profile("focus.mark")) {
// Start traversal from leafs.
for (SkyKey leaf : leafs) {
execute(new NodeVisitor(leaf, keptRdeps, keptDeps, additionalDepsToKeep));
}
awaitQuiescenceWithoutShutdown(true);
}
// Keep the rdeps transitive closure from leafs distinct from the deps.
keptDeps.removeAll(keptRdeps);
eventHandler.handle(
Event.info("Nodes in reverse transitive closure from leafs: " + keptRdeps.size()));
eventHandler.handle(
Event.info("Nodes in direct deps of reverse transitive closure: " + keptDeps.size()));
try (SilentCloseable c = Profiler.instance().profile("focus.sweep_nodes")) {
Set<SkyKey> toKeep = Sets.union(keptDeps, keptRdeps);
graph.parallelForEach(
inMemoryNodeEntry -> {
SkyKey key = inMemoryNodeEntry.getKey();
if (!toKeep.contains(key)) {
graph.remove(key);
}
});
graph.shrinkNodeMap();
}
AtomicLong rdepEdgesBefore = new AtomicLong();
AtomicLong rdepEdgesAfter = new AtomicLong();
try (SilentCloseable c = Profiler.instance().profile("focus.sweep_edges")) {
for (SkyKey key : keptDeps) {
execute(
() -> {
// TODO: b/312819241 - Consider transforming IncrementalInMemoryNodeEntry only used
// for their immutable states to an ImmutableDoneNodeEntry or
// NonIncrementalInMemoryNodeEntry
// for further memory savings.
IncrementalInMemoryNodeEntry nodeEntry =
(IncrementalInMemoryNodeEntry) graph.getIfPresent(key);
// No need to keep the direct deps edges of existing deps. For example:
//
// B
// / |\
// A C \
// \ |
// D
//
// B is the root, and A is the only leaf. We can throw out the CD edge, even
// though both C and D are still used by B. This is because no changes are expected to
// C and D, so it's unnecessary to maintain the edges.
nodeEntry.directDeps = GroupedDeps.EMPTY_COMPRESSED;
// No need to keep the rdep edges of the deps if they do not point to an rdep
// reachable (hence, dirty-able) by the working set.
//
// This accounts for nearly 5% of 9+GB retained heap on a large server build.
Collection<SkyKey> existingRdeps = nodeEntry.getReverseDepsForDoneEntry();
rdepEdgesBefore.getAndAdd(existingRdeps.size());
int rdepEdgesKept = 0;
for (SkyKey rdep : existingRdeps) {
if (keptRdeps.contains(rdep)) {
rdepEdgesKept++;
} else {
nodeEntry.removeReverseDep(rdep);
}
}
rdepEdgesAfter.getAndAdd(rdepEdgesKept);
ReverseDepsUtility.consolidateData(nodeEntry);
});
}
awaitQuiescence(true); // and shut down the ExecutorService.
}
eventHandler.handle(
Event.info(
String.format("Rdep edges: %s -> %s", rdepEdgesBefore.get(), rdepEdgesAfter.get())));
return new FocusResult(ImmutableSet.copyOf(keptRdeps), ImmutableSet.copyOf(keptDeps));
}
}