blob: b18243d6e1b79aa7eaa99d1c7bdcd75847e30dab [file] [log] [blame]
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.skyframe;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.actions.ThreadStateReceiver;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.io.FileSymlinkException;
import com.google.devtools.build.lib.io.InconsistentFilesystemException;
import com.google.devtools.build.lib.packages.BuildFileNotFoundException;
import com.google.devtools.build.lib.packages.CachingPackageLocator;
import com.google.devtools.build.lib.packages.Globber;
import com.google.devtools.build.lib.packages.GlobberUtils;
import com.google.devtools.build.lib.packages.NonSkyframeGlobber;
import com.google.devtools.build.lib.packages.Package;
import com.google.devtools.build.lib.packages.PackageFactory;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Root;
import com.google.devtools.build.lib.vfs.UnixGlob;
import com.google.devtools.build.skyframe.SkyKey;
import com.google.devtools.build.skyframe.SkyValue;
import com.google.devtools.build.skyframe.SkyframeLookupResult;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import javax.annotation.Nullable;
/**
* Computes the {@link PackageValue} which depends on multiple GLOB nodes.
*
* <p>Every glob pattern defined in the package's {@code BUILD} file is represented as a single GLOB
* node in the dependency graph.
*
* <p>{@link PackageFunctionWithMultipleGlobDeps} subclass is created when the globbing strategy is
* {@link
* com.google.devtools.build.lib.skyframe.PackageFunction.GlobbingStrategy#MULTIPLE_GLOB_HYBRID}.
* Incremental evaluation adopts {@code SKYFRAME_HYBRID} globbing strategy in order to use the
* unchanged {@link GlobValue} stored in Skyframe when incrementally reloading the package.
*/
final class PackageFunctionWithMultipleGlobDeps extends PackageFunction {
private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();
PackageFunctionWithMultipleGlobDeps(
PackageFactory packageFactory,
CachingPackageLocator pkgLocator,
AtomicBoolean showLoadingProgress,
AtomicInteger numPackagesSuccessfullyLoaded,
@Nullable BzlLoadFunction bzlLoadFunctionForInlining,
@Nullable PackageProgressReceiver packageProgress,
ActionOnIOExceptionReadingBuildFile actionOnIOExceptionReadingBuildFile,
boolean shouldUseRepoDotBazel,
Function<SkyKey, ThreadStateReceiver> threadStateReceiverFactoryForMetrics,
AtomicReference<Semaphore> cpuBoundSemaphore) {
super(
packageFactory,
pkgLocator,
showLoadingProgress,
numPackagesSuccessfullyLoaded,
bzlLoadFunctionForInlining,
packageProgress,
actionOnIOExceptionReadingBuildFile,
shouldUseRepoDotBazel,
threadStateReceiverFactoryForMetrics,
cpuBoundSemaphore);
}
/**
* If globbing strategy is {@code GlobbingStrategy#SKYFRAME_HYBRID}, these deps should have
* already been marked by the {@link Globber} but we need to properly handle symlink issues that
* {@link NonSkyframeGlobber} can't handle gracefully.
*/
@Override
protected void handleGlobDepsAndPropagateFilesystemExceptions(
PackageIdentifier packageIdentifier,
Root packageRoot,
LoadedPackage loadedPackage,
Environment env,
boolean packageWasInError)
throws InternalInconsistentFilesystemException, FileSymlinkException, InterruptedException {
Set<SkyKey> depKeys = ((LoadedPackageWithGlobDeps) loadedPackage).globDepKeys;
checkState(Iterables.all(depKeys, SkyFunctions.isSkyFunction(SkyFunctions.GLOB)), depKeys);
FileSymlinkException arbitraryFse = null;
SkyframeLookupResult result = env.getValuesAndExceptions(depKeys);
for (SkyKey key : depKeys) {
try {
result.getOrThrow(key, IOException.class, BuildFileNotFoundException.class);
} catch (InconsistentFilesystemException e) {
throw new InternalInconsistentFilesystemException(packageIdentifier, e);
} catch (FileSymlinkException e) {
// Non-Skyframe globbing doesn't explicitly detect symlink issues, but certain filesystems
// might detect some symlink issues. For example, many filesystems have a hardcoded bound on
// the number of symlink hops they will follow when resolving paths (e.g. Unix's ELOOP).
// Since Skyframe globbing does explicitly detect symlink issues, we are able to:
// (1) Provide a more informative error message.
// (2) Confidently act as though the symlink issue is non-transient.
arbitraryFse = e;
} catch (IOException | BuildFileNotFoundException e) {
maybeThrowFilesystemInconsistency(packageIdentifier, e, packageWasInError);
}
}
if (arbitraryFse != null) {
// If there was at least one symlink issue and no inconsistent filesystem issues, arbitrarily
// rethrow one of the symlink issues.
throw arbitraryFse;
}
}
private static final class LoadedPackageWithGlobDeps extends LoadedPackage {
private final Set<SkyKey> globDepKeys;
private LoadedPackageWithGlobDeps(
Package.Builder builder, long loadTimeNanos, Set<SkyKey> globDepKeys) {
super(builder, loadTimeNanos);
this.globDepKeys = globDepKeys;
}
}
/**
* A {@link Globber} implemented on top of Skyframe that falls back to a {@link
* NonSkyframeGlobber} on a Skyframe cache-miss. This way we don't require a Skyframe restart
* after a call to {@link Globber#runAsync} and before/during a call to {@link
* Globber#fetchUnsorted}.
*
* <p>There are three advantages to this hybrid approach over the more obvious approach of solely
* using a {@link NonSkyframeGlobber}:
*
* <ul>
* <li>We trivially have the proper Skyframe {@link GlobValue} deps, whereas we would need to
* request them after-the-fact if we solely used a {@link NonSkyframeGlobber}.
* <li>We don't need to re-evaluate globs whose expression hasn't changed (e.g. in the common
* case of a BUILD file edit that doesn't change a glob expression), whereas invoking the
* package loading machinery in {@link PackageFactory} with a {@link NonSkyframeGlobber}
* would naively re-evaluate globs when re-evaluating the BUILD file.
* <li>We don't need to re-evaluate invalidated globs *twice* (the single re-evaluation via our
* GlobValue deps is sufficient and optimal). See above for why the second evaluation would
* happen.
* </ul>
*
* <p>One general disadvantage of the hybrid approach is that we do the logical globbing work
* twice on clean builds. A part of this is that we do double the number of 'stat' filesystem
* operations: non-Skyframe globbing does `stat` operations following symlinks, and Skyframe's
* {@link FileStateFunction} does those operations not following symlinks (since {@link
* FileFunction} handles symlink chains manually). We used to have a similar concern for `readdir`
* operations, but we mitigated it by restructuring the non-Skyframe globbing code so that it
* doesn't follow symlinks for these operations, allowing the results to be cached and used by
* {@link DirectoryListingStateFunction}.
*
* <p>This theoretical inefficiency isn't a big deal in practice, and historical attempts to
* completely remove it by solely using Skyframe's {@link GlobFunction} have been unsuccessful due
* to the consequences of Skyframe restarts on package loading performance. If we knew the full
* set of `glob` calls that would be performed during BUILD file evaluation, then we could
* precompute those {@link GlobValue} nodes and not have any Skyframe restarts during. But that's
* a big "if"; consider glob calls with non-static arguments:
*
* <pre>
* P = f(42)
* g(glob(P))
* </pre>
*
* Also consider dependent glob calls:
*
* <pre>
* L = glob(["foo.*"])
* g(glob([f(x) for x in L])
* </pre>
*
* One historical attempt at addressing this issue was to do a first pass of BUILD file evaluation
* where we tried to encounter as many concrete glob calls as possible but without doing full
* Starlark evaluation, and then do the real pass of BUILD file evaluation. This approach was a
* net performance regression, due to the first pass both having non-trivial Starlark evaluation
* cost (consider a very expensive function 'f' in the first example above) and also not
* encountering all glob calls (meaning the real pass can still have the core problem with
* Skyframe restarts).
*/
private static final class SkyframeHybridGlobber implements Globber {
private final PackageIdentifier packageId;
private final Root packageRoot;
private final Environment env;
private final NonSkyframeGlobber nonSkyframeGlobber;
private final Set<SkyKey> globDepsRequested = Sets.newConcurrentHashSet();
private SkyframeHybridGlobber(
PackageIdentifier packageId,
Root packageRoot,
Environment env,
NonSkyframeGlobber nonSkyframeGlobber) {
this.packageId = packageId;
this.packageRoot = packageRoot;
this.env = env;
this.nonSkyframeGlobber = nonSkyframeGlobber;
}
public Set<SkyKey> getGlobDepsRequested() {
return ImmutableSet.copyOf(globDepsRequested);
}
private SkyKey getGlobKey(String pattern, Globber.Operation globberOperation)
throws BadGlobException {
try {
return GlobValue.key(
packageId, packageRoot, pattern, globberOperation, PathFragment.EMPTY_FRAGMENT);
} catch (InvalidGlobPatternException e) {
throw new BadGlobException(e.getMessage());
}
}
@Override
public Token runAsync(
List<String> includes,
List<String> excludes,
Globber.Operation globberOperation,
boolean allowEmpty)
throws BadGlobException, InterruptedException {
LinkedHashSet<SkyKey> globKeys = Sets.newLinkedHashSetWithExpectedSize(includes.size());
Map<SkyKey, String> globKeyToPatternMap = Maps.newHashMapWithExpectedSize(includes.size());
for (String pattern : includes) {
SkyKey globKey = getGlobKey(pattern, globberOperation);
globKeys.add(globKey);
globKeyToPatternMap.put(globKey, pattern);
}
globDepsRequested.addAll(globKeys);
SkyframeLookupResult globValueMap = env.getValuesAndExceptions(globKeys);
// For each missing glob, evaluate it asynchronously via the delegate.
List<SkyKey> missingKeys = new ArrayList<>(globKeys.size());
for (SkyKey globKey : globKeys) {
try {
SkyValue value =
globValueMap.getOrThrow(globKey, IOException.class, BuildFileNotFoundException.class);
if (value == null) {
missingKeys.add(globKey);
}
} catch (IOException | BuildFileNotFoundException e) {
logger.atWarning().withCause(e).log("Exception processing %s", globKey);
}
}
List<String> globsToDelegate = new ArrayList<>(missingKeys.size());
for (SkyKey missingKey : missingKeys) {
String missingPattern = globKeyToPatternMap.get(missingKey);
if (missingPattern != null) {
globsToDelegate.add(missingPattern);
globKeys.remove(missingKey);
}
}
NonSkyframeGlobber.Token nonSkyframeIncludesToken =
globsToDelegate.isEmpty()
? null
: nonSkyframeGlobber.runAsync(
globsToDelegate, ImmutableList.of(), globberOperation, allowEmpty);
return new HybridToken(
globValueMap, globKeys, nonSkyframeIncludesToken, excludes, globberOperation, allowEmpty);
}
@Override
public List<String> fetchUnsorted(Token token)
throws BadGlobException, IOException, InterruptedException {
HybridToken hybridToken = (HybridToken) token;
return hybridToken.resolve(nonSkyframeGlobber);
}
@Override
public void onInterrupt() {
nonSkyframeGlobber.onInterrupt();
}
@Override
public void onCompletion() {
nonSkyframeGlobber.onCompletion();
}
/**
* A {@link Globber.Token} that encapsulates the result of a single {@link Globber#runAsync}
* call via the fetching of some globs from skyframe, and some other globs via a {@link
* NonSkyframeGlobber}. 'exclude' patterns are evaluated using {@link UnixGlob#removeExcludes}
* after merging the glob results in {@link #resolve}.
*/
private static class HybridToken extends Globber.Token {
// The result of the Skyframe lookup for all the needed glob patterns.
private final SkyframeLookupResult globValueMap;
// The skyframe keys corresponding to the 'includes' patterns fetched from Skyframe
// (this is includes_sky above).
private final Iterable<SkyKey> includesGlobKeys;
@Nullable private final NonSkyframeGlobber.Token nonSkyframeGlobberIncludesToken;
private final List<String> excludes;
private final Globber.Operation globberOperation;
private final boolean allowEmpty;
private HybridToken(
SkyframeLookupResult globValueMap,
Iterable<SkyKey> includesGlobKeys,
@Nullable NonSkyframeGlobber.Token nonSkyframeGlobberIncludesToken,
List<String> excludes,
Globber.Operation globberOperation,
boolean allowEmpty) {
this.globValueMap = globValueMap;
this.includesGlobKeys = includesGlobKeys;
this.nonSkyframeGlobberIncludesToken = nonSkyframeGlobberIncludesToken;
this.excludes = excludes;
this.globberOperation = globberOperation;
this.allowEmpty = allowEmpty;
}
private List<String> resolve(NonSkyframeGlobber nonSkyframeGlobber)
throws BadGlobException, IOException, InterruptedException {
HashSet<String> matches = new HashSet<>();
for (SkyKey includeGlobKey : includesGlobKeys) {
// TODO(bazel-team): NestedSet expansion here is suboptimal.
boolean foundMatch = false;
for (PathFragment match : getGlobMatches(includeGlobKey, globValueMap)) {
matches.add(match.getPathString());
foundMatch = true;
}
if (!allowEmpty && !foundMatch) {
GlobberUtils.throwBadGlobExceptionEmptyResult(
((GlobDescriptor) includeGlobKey.argument()).getPattern(), globberOperation);
}
}
if (nonSkyframeGlobberIncludesToken != null) {
matches.addAll(nonSkyframeGlobber.fetchUnsorted(nonSkyframeGlobberIncludesToken));
}
try {
UnixGlob.removeExcludes(matches, excludes);
} catch (UnixGlob.BadPattern ex) {
throw new BadGlobException(ex.getMessage());
}
List<String> result = new ArrayList<>(matches);
if (!allowEmpty && result.isEmpty()) {
GlobberUtils.throwBadGlobExceptionAllExcluded(globberOperation);
}
return result;
}
private static ImmutableSet<PathFragment> getGlobMatches(
SkyKey globKey, SkyframeLookupResult globValueMap) throws IOException {
try {
return checkNotNull(
(GlobValue)
globValueMap.getOrThrow(
globKey, BuildFileNotFoundException.class, IOException.class),
"%s should not be missing",
globKey)
.getMatches();
} catch (BuildFileNotFoundException e) {
throw new SkyframeGlobbingIOException(e);
}
}
}
}
static class SkyframeGlobbingIOException extends IOException {
SkyframeGlobbingIOException(BuildFileNotFoundException cause) {
super(cause.getMessage(), cause);
}
}
@Override
protected Globber makeGlobber(
NonSkyframeGlobber nonSkyframeGlobber,
PackageIdentifier packageId,
Root packageRoot,
Environment env) {
return new SkyframeHybridGlobber(packageId, packageRoot, env, nonSkyframeGlobber);
}
@Override
protected LoadedPackage newLoadedPackage(
Package.Builder packageBuilder, @Nullable Globber globber, long loadTimeNanos) {
Set<SkyKey> globDepKeys = ImmutableSet.of();
if (globber != null) {
globDepKeys = ((SkyframeHybridGlobber) globber).getGlobDepsRequested();
}
return new LoadedPackageWithGlobDeps(packageBuilder, loadTimeNanos, globDepKeys);
}
}