blob: b7a44c475cbb0cd476e5cd0299087a18e8dca154 [file] [log] [blame]
/*
* Copyright 2016 The Bazel Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.idea.blaze.base.lang.buildfile.globbing;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ForwardingListenableFuture;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import com.google.idea.blaze.base.io.FileAttributeProvider;
import com.google.idea.blaze.base.lang.buildfile.validation.GlobPatternValidator;
import com.intellij.openapi.application.ApplicationManager;
import com.intellij.openapi.progress.ProgressManager;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.ex.temp.TempFileSystem;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* Implementation of a subset of UNIX-style file globbing, expanding "*" and "?" as wildcards, but
* not [a-z] ranges.
*
* <p><code>**</code> gets special treatment in include patterns. If it is used as a complete path
* segment it matches the filenames in subdirectories recursively.
*
* <p>Largely copied from {@link com.google.devtools.build.lib.vfs.UnixGlob}
*/
public final class UnixGlob {
private UnixGlob() {}
private static List<File> globInternal(
File base,
Collection<String> patterns,
Collection<String> excludePatterns,
boolean excludeDirectories,
Predicate<File> dirPred,
ThreadPoolExecutor threadPool)
throws IOException, InterruptedException {
GlobVisitor visitor = (threadPool == null) ? new GlobVisitor() : new GlobVisitor(threadPool);
return visitor.glob(base, patterns, excludePatterns, excludeDirectories, dirPred);
}
private static Future<List<File>> globAsyncInternal(
File base,
Collection<String> patterns,
Collection<String> excludePatterns,
boolean excludeDirectories,
Predicate<File> dirPred,
ThreadPoolExecutor threadPool) {
Preconditions.checkNotNull(threadPool, "%s %s", base, patterns);
try {
return new GlobVisitor(threadPool)
.globAsync(base, patterns, excludePatterns, excludeDirectories, dirPred);
} catch (IOException e) {
// We are evaluating asynchronously, so no exceptions should be thrown until the future is
// retrieved.
throw new IllegalStateException(e);
}
}
/**
* Checks that each pattern is valid, splits it into segments and checks that each segment
* contains only valid wildcards.
*
* @return list of segment arrays
*/
private static List<String[]> checkAndSplitPatterns(Collection<String> patterns) {
List<String[]> list = Lists.newArrayListWithCapacity(patterns.size());
for (String pattern : patterns) {
String error = GlobPatternValidator.validate(pattern);
if (error != null) {
throw new IllegalArgumentException(error);
}
Iterable<String> segments = Splitter.on('/').split(pattern);
list.add(Iterables.toArray(segments, String.class));
}
return list;
}
private static boolean excludedOnMatch(
File path, List<String[]> excludePatterns, int idx, Cache<String, Pattern> cache) {
for (String[] excludePattern : excludePatterns) {
String text = path.getName();
if (idx == excludePattern.length && matches(excludePattern[idx - 1], text, cache)) {
return true;
}
}
return false;
}
/**
* Returns the exclude patterns in {@code excludePatterns} which could apply to the children of
* {@code base}
*
* @param idx index into {@code excludePatterns} for the part of the pattern which might match
* {@code base}
*/
private static List<String[]> getRelevantExcludes(
final File base,
List<String[]> excludePatterns,
final int idx,
final Cache<String, Pattern> cache) {
if (excludePatterns.isEmpty()) {
return excludePatterns;
}
List<String[]> list = new ArrayList<>();
for (String[] patterns : excludePatterns) {
if (excludePatternMatches(patterns, idx, base, cache)) {
list.add(patterns);
}
}
return list;
}
/**
* @param patterns a list of patterns
* @param idx index into {@code patterns}
*/
private static boolean excludePatternMatches(
String[] patterns, int idx, File base, Cache<String, Pattern> cache) {
if (idx == 0) {
return true;
}
String text = base.getName();
return patterns.length > idx && matches(patterns[idx - 1], text, cache);
}
/** Calls {@link #matches(String, String, Cache) matches(pattern, str, null)} */
public static boolean matches(String pattern, String str) {
return matches(pattern, str, null);
}
/**
* Returns whether {@code str} matches the glob pattern {@code pattern}. This method may use the
* {@code patternCache} to speed up the matching process.
*
* @param pattern a glob pattern
* @param str the string to match
* @param patternCache a cache from patterns to compiled Pattern objects, or {@code null} to skip
* caching
*/
public static boolean matches(String pattern, String str, Cache<String, Pattern> patternCache) {
if (pattern.length() == 0 || str.length() == 0) {
return false;
}
// Common case: **
if (pattern.equals("**")) {
return true;
}
// Common case: *
if (pattern.equals("*")) {
return true;
}
// If a filename starts with '.', this char must be matched explicitly.
if (str.charAt(0) == '.' && pattern.charAt(0) != '.') {
return false;
}
// Common case: *.xyz
if (pattern.charAt(0) == '*' && pattern.lastIndexOf('*') == 0) {
return str.endsWith(pattern.substring(1));
}
// Common case: xyz*
int lastIndex = pattern.length() - 1;
// The first clause of this if statement is unnecessary, but is an
// optimization--charAt runs faster than indexOf.
if (pattern.charAt(lastIndex) == '*' && pattern.indexOf('*') == lastIndex) {
return str.startsWith(pattern.substring(0, lastIndex));
}
Pattern regex = patternCache == null ? null : patternCache.getIfPresent(pattern);
if (regex == null) {
regex = makePatternFromWildcard(pattern);
if (patternCache != null) {
patternCache.put(pattern, regex);
}
}
return regex.matcher(str).matches();
}
/**
* Returns a regular expression implementing a matcher for "pattern", in which "*" and "?" are
* wildcards.
*
* <p>e.g. "foo*bar?.java" -> "foo.*bar.\\.java"
*/
private static Pattern makePatternFromWildcard(String pattern) {
StringBuilder regexp = new StringBuilder();
for (int i = 0, len = pattern.length(); i < len; i++) {
char c = pattern.charAt(i);
switch (c) {
case '*':
int toIncrement = 0;
if (len > i + 1 && pattern.charAt(i + 1) == '*') {
// The pattern '**' is interpreted to match 0 or more directory separators, not 1 or
// more. We skip the next * and then find a trailing/leading '/' and get rid of it.
toIncrement = 1;
if (len > i + 2 && pattern.charAt(i + 2) == '/') {
// We have '**/' -- skip the '/'.
toIncrement = 2;
} else if (len == i + 2 && i > 0 && pattern.charAt(i - 1) == '/') {
// We have '/**' -- remove the '/'.
regexp.delete(regexp.length() - 1, regexp.length());
}
}
regexp.append(".*");
i += toIncrement;
break;
case '?':
regexp.append('.');
break;
//escape the regexp special characters that are allowed in wildcards
case '^':
case '$':
case '|':
case '+':
case '{':
case '}':
case '[':
case ']':
case '\\':
case '.':
regexp.append('\\');
regexp.append(c);
break;
default:
regexp.append(c);
break;
}
}
return Pattern.compile(regexp.toString());
}
public static Builder forPath(File path) {
return new Builder(path);
}
/** Builder class for UnixGlob. */
public static class Builder {
private File base;
private List<String> patterns;
private List<String> excludes;
private boolean excludeDirectories;
private Predicate<File> pathFilter;
private ThreadPoolExecutor threadPool;
/** Creates a glob builder with the given base path. */
public Builder(File base) {
this.base = base;
this.patterns = Lists.newArrayList();
this.excludes = Lists.newArrayList();
this.excludeDirectories = false;
this.pathFilter = file -> true;
}
/**
* Adds a pattern to include to the glob builder.
*
* <p>For a description of the syntax of the patterns, see {@link UnixGlob}.
*/
public Builder addPattern(String pattern) {
this.patterns.add(pattern);
return this;
}
/**
* Adds a pattern to include to the glob builder.
*
* <p>For a description of the syntax of the patterns, see {@link UnixGlob}.
*/
public Builder addPatterns(String... patterns) {
Collections.addAll(this.patterns, patterns);
return this;
}
/**
* Adds a pattern to include to the glob builder.
*
* <p>For a description of the syntax of the patterns, see {@link UnixGlob}.
*/
public Builder addPatterns(Collection<String> patterns) {
this.patterns.addAll(patterns);
return this;
}
/**
* Adds patterns to exclude from the results to the glob builder.
*
* <p>For a description of the syntax of the patterns, see {@link UnixGlob}.
*/
public Builder addExcludes(String... excludes) {
Collections.addAll(this.excludes, excludes);
return this;
}
/**
* Adds patterns to exclude from the results to the glob builder.
*
* <p>For a description of the syntax of the patterns, see {@link UnixGlob}.
*/
public Builder addExcludes(Collection<String> excludes) {
this.excludes.addAll(excludes);
return this;
}
/** If set to true, directories are not returned in the glob result. */
public Builder setExcludeDirectories(boolean excludeDirectories) {
this.excludeDirectories = excludeDirectories;
return this;
}
/**
* Sets the threadpool to use for parallel glob evaluation. If unset, evaluation is done
* in-thread.
*/
public Builder setThreadPool(ThreadPoolExecutor pool) {
this.threadPool = pool;
return this;
}
/**
* If set, the given predicate is called for every directory encountered. If it returns false,
* the corresponding item is not returned in the output and directories are not traversed
* either.
*/
public Builder setDirectoryFilter(Predicate<File> pathFilter) {
this.pathFilter = pathFilter;
return this;
}
/**
* Executes the glob.
*
* @throws InterruptedException if the thread is interrupted.
*/
public List<File> glob() throws IOException, InterruptedException {
return globInternal(base, patterns, excludes, excludeDirectories, pathFilter, threadPool);
}
/**
* Executes the glob asynchronously. {@link #setThreadPool} must have been called already with a
* non-null argument.
*
* @param checkForInterrupt if the returned future may throw InterruptedException.
*/
public Future<List<File>> globAsync() {
return globAsyncInternal(
base, patterns, excludes, excludeDirectories, pathFilter, threadPool);
}
}
/** Adapts the result of the glob visitation as a Future. */
private static class GlobFuture extends ForwardingListenableFuture<List<File>> {
private final GlobVisitor visitor;
private final SettableFuture<List<File>> delegate = SettableFuture.create();
public GlobFuture(GlobVisitor visitor) {
this.visitor = visitor;
}
@Override
public List<File> get() throws InterruptedException, ExecutionException {
return super.get();
}
@Override
protected ListenableFuture<List<File>> delegate() {
return delegate;
}
public void setException(IOException exception) {
delegate.setException(exception);
}
public void set(List<File> paths) {
delegate.set(paths);
}
@Override
public boolean cancel(boolean mayInterruptIfRunning) {
// Best-effort interrupt of the in-flight visitation.
visitor.cancel();
return true;
}
public void markCanceled() {
super.cancel(true);
}
}
/**
* GlobVisitor executes a glob using parallelism, which is useful when the glob() requires many
* readdir() calls on high latency filesystems.
*/
private static final class GlobVisitor {
// These collections are used across workers and must therefore be thread-safe.
private final Collection<File> results = Sets.newConcurrentHashSet();
private final Cache<String, Pattern> cache =
CacheBuilder.newBuilder()
.build(
new CacheLoader<String, Pattern>() {
@Override
public Pattern load(String wildcard) {
return makePatternFromWildcard(wildcard);
}
});
private final GlobFuture result;
private final ThreadPoolExecutor executor;
private final AtomicLong pendingOps = new AtomicLong(0);
private final AtomicReference<IOException> failure = new AtomicReference<>();
private final FileAttributeProvider fileAttributeProvider = FileAttributeProvider.getInstance();
private volatile boolean canceled = false;
public GlobVisitor(ThreadPoolExecutor executor) {
this.executor = executor;
this.result = new GlobFuture(this);
}
public GlobVisitor() {
this(null);
}
/**
* Performs wildcard globbing: returns the sorted list of filenames that match any of {@code
* patterns} relative to {@code base}, but which do not match {@code excludePatterns}.
* Directories are traversed if and only if they match {@code dirPred}. The predicate is also
* called for the root of the traversal.
*
* <p>Patterns may include "*" and "?", but not "[a-z]".
*
* <p><code>**</code> gets special treatment in include patterns. If it is used as a complete
* path segment it matches the filenames in subdirectories recursively.
*
* @throws IllegalArgumentException if any glob or exclude pattern {@linkplain
* #checkPatternForError(String) contains errors} or if any exclude pattern segment contains
* <code>**</code> or if any include pattern segment contains <code>**</code> but not equal
* to it.
*/
public List<File> glob(
File base,
Collection<String> patterns,
Collection<String> excludePatterns,
boolean excludeDirectories,
Predicate<File> dirPred)
throws IOException, InterruptedException {
try {
return globAsync(base, patterns, excludePatterns, excludeDirectories, dirPred).get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
Throwables.propagateIfPossible(cause, IOException.class);
throw new RuntimeException(e);
}
}
public Future<List<File>> globAsync(
File base,
Collection<String> patterns,
Collection<String> excludePatterns,
boolean excludeDirectories,
Predicate<File> dirPred)
throws IOException {
if (!fileAttributeProvider.exists(base) || patterns.isEmpty()) {
return Futures.immediateFuture(Collections.emptyList());
}
boolean baseIsDirectory = fileAttributeProvider.isDirectory(base);
List<String[]> splitPatterns = checkAndSplitPatterns(patterns);
List<String[]> splitExcludes = checkAndSplitPatterns(excludePatterns);
// We do a dumb loop, even though it will likely duplicate work
// (e.g., readdir calls). In order to optimize, we would need
// to keep track of which patterns shared sub-patterns and which did not
// (for example consider the glob [*/*.java, sub/*.java, */*.txt]).
pendingOps.incrementAndGet();
try {
for (String[] splitPattern : splitPatterns) {
queueGlob(
base,
baseIsDirectory,
splitPattern,
0,
excludeDirectories,
splitExcludes,
0,
results,
cache,
dirPred);
}
} finally {
decrementAndCheckDone();
}
return result;
}
private void queueGlob(
File base,
boolean baseIsDirectory,
String[] patternParts,
int idx,
boolean excludeDirectories,
List<String[]> excludePatterns,
int excludeIdx,
Collection<File> results,
Cache<String, Pattern> cache,
Predicate<File> dirPred)
throws IOException {
enqueue(
() -> {
try {
reallyGlob(
base,
baseIsDirectory,
patternParts,
idx,
excludeDirectories,
excludePatterns,
excludeIdx,
results,
cache,
dirPred);
} catch (IOException e) {
failure.set(e);
}
});
}
protected void enqueue(final Runnable r) {
pendingOps.incrementAndGet();
Runnable wrapped =
() -> {
try {
if (!canceled && failure.get() == null) {
r.run();
}
} finally {
decrementAndCheckDone();
}
};
if (executor == null) {
wrapped.run();
} else {
executor.execute(wrapped);
}
}
protected void cancel() {
this.canceled = true;
}
private void decrementAndCheckDone() {
if (pendingOps.decrementAndGet() == 0) {
// We get to 0 iff we are done all the relevant work. This is because we always increment
// the pending ops count as we're enqueuing, and don't decrement until the task is complete
// (which includes accounting for any additional tasks that one enqueues).
if (canceled) {
result.markCanceled();
} else if (failure.get() != null) {
result.setException(failure.get());
} else {
result.set(Ordering.<File>natural().immutableSortedCopy(results));
}
}
}
/**
* Expressed in Haskell:
*
* <pre>
* reallyGlob base [] = { base }
* reallyGlob base [x:xs] = union { reallyGlob(f, xs) | f results "base/x" }
* </pre>
*/
private void reallyGlob(
File base,
boolean baseIsDirectory,
String[] patternParts,
int idx,
boolean excludeDirectories,
List<String[]> excludePatterns,
int excludeIdx,
Collection<File> results,
Cache<String, Pattern> cache,
Predicate<File> dirPred)
throws IOException {
ProgressManager.checkCanceled();
if (baseIsDirectory && !dirPred.test(base)) {
return;
}
if (idx == patternParts.length) { // Base case.
if (!(excludeDirectories && baseIsDirectory)
&& !excludedOnMatch(base, excludePatterns, excludeIdx, cache)) {
results.add(base);
}
return;
}
if (!baseIsDirectory) {
// Nothing to find here.
return;
}
List<String[]> relevantExcludes =
getRelevantExcludes(base, excludePatterns, excludeIdx, cache);
final String pattern = patternParts[idx];
// ** is special: it can match nothing at all.
// For example, x/** matches x, **/y matches y, and x/**/y matches x/y.
if ("**".equals(pattern)) {
queueGlob(
base,
baseIsDirectory,
patternParts,
idx + 1,
excludeDirectories,
excludePatterns,
excludeIdx,
results,
cache,
dirPred);
}
if (!pattern.contains("*") && !pattern.contains("?")) {
// We do not need to do a readdir in this case, just a stat.
File child = new File(base, pattern);
boolean childIsDir = fileAttributeProvider.isDirectory(child);
if (!childIsDir && !fileAttributeProvider.isFile(child)) {
// The file is a dangling symlink, fifo, does not exist, etc.
return;
}
queueGlob(
child,
childIsDir,
patternParts,
idx + 1,
excludeDirectories,
relevantExcludes,
excludeIdx + 1,
results,
cache,
dirPred);
return;
}
File[] children = getChildren(base);
if (children == null) {
return;
}
for (File child : children) {
boolean childIsDir = fileAttributeProvider.isDirectory(child);
if ("**".equals(pattern)) {
// Recurse without shifting the pattern.
if (childIsDir) {
queueGlob(
child,
childIsDir,
patternParts,
idx,
excludeDirectories,
relevantExcludes,
excludeIdx + 1,
results,
cache,
dirPred);
}
}
if (matches(pattern, child.getName(), cache)) {
// Recurse and consume one segment of the pattern.
if (childIsDir) {
queueGlob(
child,
childIsDir,
patternParts,
idx + 1,
excludeDirectories,
relevantExcludes,
excludeIdx + 1,
results,
cache,
dirPred);
} else {
// Instead of using an async call, just repeat the base case above.
if (idx + 1 == patternParts.length
&& !excludedOnMatch(child, relevantExcludes, excludeIdx + 1, cache)) {
results.add(child);
}
}
}
}
}
@Nullable
private File[] getChildren(File file) {
if (!ApplicationManager.getApplication().isUnitTestMode()) {
return file.listFiles();
}
TempFileSystem fs = TempFileSystem.getInstance();
VirtualFile vf = fs.findFileByIoFile(file);
VirtualFile[] children = vf.getChildren();
if (children == null) {
return null;
}
return Arrays.stream(children).map(VirtualFile::getPath).map(File::new).toArray(File[]::new);
}
}
}