blob: 6b11e9ca680f057aacf2eca7a0b3aaf46df91db9 [file] [log] [blame]
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.remote;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.devtools.build.lib.vfs.FileSymlinkLoopException;
import com.google.devtools.build.lib.vfs.FileSystem;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap;
import javax.annotation.Nullable;
/**
* Canonicalizes paths like {@link FileSystem#resolveSymbolicLinks}, while storing the intermediate
* results in a trie so they can be reused by future canonicalizations.
*
* <p>This is an implementation detail of {@link RemoteActionFileSystem}, factored out for testing.
* Because {@link RemoteActionFileSystem} implements a union filesystem and must account for the
* possibility of symlinks straddling the underlying filesystems, the performance of large
* filesystem scans can be greatly improved with a custom {@link FileSystem#resolveSymbolicLinks}
* implementation that leverages the trie to avoid repeated work.
*
* <p>On case-insensitive filesystems, accessing the same path through different case variations
* will produce distinct trie entries. This could be fixed, but it's a performance rather than a
* correctness concern, and shouldn't matter most of the time.
*
* <p>Thread-safe: concurrent calls to {@link #resolveSymbolicLinks} are supported. As with {@link
* FileSystem#resolveSymbolicLinks}, the result is undefined if the filesystem is mutated
* concurrently.
*/
final class PathCanonicalizer {
interface Resolver {
/**
* Returns the result of {@link FileSystem#readSymbolicLink} if the path is a symlink, otherwise
* null. All but the last path segment must be canonical.
*
* @throws IOException if the file type or symlink target path could not be determined
*/
@Nullable
PathFragment resolveOneLink(PathFragment path) throws IOException;
}
/** The trie node for a path segment. */
private static final class Node {
/**
* Maps the next segment to the corresponding node. Null iff the path ending with the current
* segment is a symlink.
*/
@Nullable private final ConcurrentHashMap<String, Node> tab;
/** The symlink target. Null iff the path ending with the current segment is not a symlink. */
@Nullable private final PathFragment targetPath;
private Node(@Nullable PathFragment targetPath) {
this.tab = targetPath == null ? new ConcurrentHashMap<>() : null;
this.targetPath = targetPath;
}
}
private final Resolver resolver;
private final Node root = new Node(null);
PathCanonicalizer(Resolver resolver) {
this.resolver = resolver;
}
/** Returns the root node for an absolute path. */
private Node getRootNode(PathFragment path) {
checkArgument(path.isAbsolute());
// Unix has a single root. Windows has one root per drive.
return path.getDriveStrLength() > 1
? root.tab.computeIfAbsent(path.getDriveStr(), unused -> new Node(null))
: root;
}
/**
* Canonicalizes a path, reusing cached information if possible.
*
* @param path the path to canonicalize.
* @param maxLinks the maximum number of symlinks that can be followed in the process of
* canonicalizing the path.
* @throws FileSymlinkLoopException if too many symlinks had to be followed.
* @throws IOException if an I/O error occurs
* @return the canonical path.
*/
private PathFragment resolveSymbolicLinks(PathFragment path, int maxLinks) throws IOException {
// This code is carefully written to be as fast as possible when the path is already canonical
// and has been previously cached. Avoid making changes without benchmarking. A tree artifact
// with hundreds of thousands of files makes for a good benchmark.
Node node = getRootNode(path);
Iterable<String> segments = path.segments();
int segmentIndex = 0;
// Loop invariants:
// - `segmentIndex` is the index of the current `segment` relative to the start of `path`. The
// first segment has index 0.
// - `path` is the absolute path to canonicalize. If `segmentIndex` > 0, `path` is already
// canonical up to and including `segmentIndex` - 1.
// - `node` is the trie node corresponding to the `path` prefix ending with `segmentIndex` - 1,
// or to the root path when `segmentIndex` is 0.
for (String segment : segments) {
Node nextNode = node.tab.get(segment);
if (nextNode == null) {
PathFragment naivePath = path.subFragment(0, segmentIndex + 1);
PathFragment targetPath = resolver.resolveOneLink(naivePath);
nextNode = node.tab.computeIfAbsent(segment, unused -> new Node(targetPath));
}
if (nextNode.targetPath != null) {
if (maxLinks == 0) {
throw new FileSymlinkLoopException(path);
}
maxLinks--;
// Compute the path obtained by resolving the symlink.
// Note that path normalization already handles uplevel references.
PathFragment newPath;
if (nextNode.targetPath.isAbsolute()) {
newPath = nextNode.targetPath.getRelative(path.subFragment(segmentIndex + 1));
} else {
newPath =
path.subFragment(0, segmentIndex)
.getRelative(nextNode.targetPath)
.getRelative(path.subFragment(segmentIndex + 1));
}
// For absolute symlinks, we must start over.
// For relative symlinks, it would have been possible to restart after the already
// canonicalized prefix, but they're too rare to be worth optimizing for.
return resolveSymbolicLinks(newPath, maxLinks);
}
node = nextNode;
segmentIndex++;
}
return path;
}
/**
* Canonicalizes a path, reusing cached information if possible.
*
* <p>See {@link FileSystem#resolveSymbolicLinks} for the full specification.
*
* @param path the path to canonicalize.
* @throws FileSymlinkLoopException if too many symlinks had to be followed.
* @throws IOException if an I/O error occurs
* @return the canonical path.
*/
PathFragment resolveSymbolicLinks(PathFragment path) throws IOException {
return resolveSymbolicLinks(path, FileSystem.MAX_SYMLINKS);
}
/** Removes cached information for a path prefix. */
void clearPrefix(PathFragment pathPrefix) {
Node node = getRootNode(pathPrefix);
Iterator<String> segments = pathPrefix.segments().iterator();
boolean hasNext = segments.hasNext();
while (node != null && hasNext) {
String segment = segments.next();
hasNext = segments.hasNext();
if (!hasNext) {
// Found the path prefix.
node.tab.remove(segment);
return;
}
if (node.targetPath != null) {
// Path prefix not in trie.
return;
}
node = node.tab.get(segment);
}
}
}