| // Copyright 2018 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package com.google.devtools.build.lib.includescanning; |
| |
| import static java.nio.charset.StandardCharsets.ISO_8859_1; |
| |
| import com.google.common.annotations.VisibleForTesting; |
| import com.google.common.base.Preconditions; |
| import com.google.common.cache.CacheBuilder; |
| import com.google.common.cache.CacheLoader; |
| import com.google.common.cache.LoadingCache; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.Sets; |
| import com.google.common.io.CharStreams; |
| import com.google.devtools.build.lib.actions.ActionExecutionContext; |
| import com.google.devtools.build.lib.actions.ActionExecutionMetadata; |
| import com.google.devtools.build.lib.actions.Artifact; |
| import com.google.devtools.build.lib.actions.ArtifactFactory; |
| import com.google.devtools.build.lib.actions.ArtifactRoot; |
| import com.google.devtools.build.lib.actions.ExecException; |
| import com.google.devtools.build.lib.cmdline.PackageIdentifier; |
| import com.google.devtools.build.lib.events.Event; |
| import com.google.devtools.build.lib.includescanning.IncludeParser.Inclusion.Kind; |
| import com.google.devtools.build.lib.packages.NoSuchPackageException; |
| import com.google.devtools.build.lib.profiler.Profiler; |
| import com.google.devtools.build.lib.profiler.ProfilerTask; |
| import com.google.devtools.build.lib.profiler.SilentCloseable; |
| import com.google.devtools.build.lib.skyframe.ContainingPackageLookupValue; |
| import com.google.devtools.build.lib.skyframe.GlobDescriptor; |
| import com.google.devtools.build.lib.skyframe.GlobValue; |
| import com.google.devtools.build.lib.skyframe.GlobValue.InvalidGlobPatternException; |
| import com.google.devtools.build.lib.skyframe.PerBuildSyscallCache; |
| import com.google.devtools.build.lib.vfs.FileSystemUtils; |
| import com.google.devtools.build.lib.vfs.Path; |
| import com.google.devtools.build.lib.vfs.PathFragment; |
| import com.google.devtools.build.lib.vfs.Root; |
| import com.google.devtools.build.lib.vfs.UnixGlob; |
| import com.google.devtools.build.lib.vfs.UnixGlob.FilesystemCalls; |
| import com.google.devtools.build.skyframe.SkyFunction.Environment; |
| import com.google.devtools.build.skyframe.SkyKey; |
| import com.google.devtools.build.skyframe.SkyValue; |
| import com.google.devtools.build.skyframe.ValueOrException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicReference; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| import javax.annotation.Nullable; |
| |
| /** |
| * Scans a source file and extracts the literal inclusions it specifies. Does not store results -- |
| * repeated requests to the same file will result in repeated scans. Clients should implement a |
| * caching layer in order to avoid unnecessary disk access when requesting an already scanned file. |
| */ |
| @VisibleForTesting |
| class IncludeParser { |
| |
| /** |
| * File types supported by the grep-includes binary. {@link #fileType} must be kept is sync with |
| * //tools/cpp:grep-includes. |
| */ |
| public enum GrepIncludesFileType { |
| CPP("c++"), |
| SWIG("swig"); |
| |
| private final String fileType; |
| |
| GrepIncludesFileType(String fileType) { |
| this.fileType = fileType; |
| } |
| |
| public String getFileType() { |
| return fileType; |
| } |
| } |
| |
| private static final Logger logger = Logger.getLogger(IncludeParser.class.getName()); |
| private static final boolean LOG_FINE = logger.isLoggable(Level.FINE); |
| |
| /** |
| * Immutable object representation of the four columns making up a single Rule |
| * in a Hints set. See {@link Hints} for more details. |
| */ |
| private static class Rule { |
| private enum Type { PATH, FILE, INCLUDE_QUOTE, INCLUDE_ANGLE } |
| final Type type; |
| final Pattern pattern; |
| final String findRoot; |
| final String findFilter; |
| |
| private Rule(String type, String pattern, String findRoot, String findFilter) { |
| this.type = Type.valueOf(type.trim().toUpperCase()); |
| this.pattern = Pattern.compile("^" + pattern + "$"); |
| this.findRoot = findRoot.replace('\\', '$'); |
| this.findFilter = findFilter; |
| } |
| |
| Rule(String type, String pattern, String findRoot) { |
| this(type, pattern, findRoot, null); |
| Preconditions.checkArgument((this.type == Type.INCLUDE_QUOTE) |
| || (this.type == Type.INCLUDE_ANGLE), this); |
| } |
| |
| @Override public String toString() { |
| return "" + type + " " + pattern + " " + findRoot + " " + findFilter; |
| } |
| } |
| |
| /** {@link SkyValue} encapsulating the source-state-dependent part of {@link Hints}. */ |
| public static class HintsRules implements SkyValue { |
| private final ImmutableList<Rule> rules; |
| |
| private HintsRules(ImmutableList<Rule> rules) { |
| this.rules = rules; |
| } |
| } |
| |
| /** |
| * This class is a representation of the INCLUDE_HINTS file. The hints file contains regexp-based |
| * rules to help this simple include scanner cope with computed includes, which would otherwise |
| * require a full preprocessor with symbol support. Instead of actually processing symbols to |
| * evaluate the computed includes, we instead apply rules to gather inclusions for matching paths. |
| * |
| * <p>The hints file is read, line by line, into a list of rules each of which encapsulates a line |
| * of four columns. Each non-blank, non-comment line has the format: |
| * |
| * <pre> |
| * "file"|"path" match-pattern find-root find-filter |
| * </pre> |
| * |
| * <p>The first column specifies whether the line is a rule based on matching source |
| * <em>files</em> (passed directly to the compiler as inputs, or transitively #included by other |
| * inputs) or include <em>paths</em> (passed to the compiler as -I, -iquote, or -isystem flags). |
| * |
| * <p>The second column is a regexp for files or paths. Whenever a compiler argument of the |
| * specified type matches that regexp, the rule is taken. (All matching rules for every path and |
| * file on a compiler command line are followed, and the results are combined.) |
| * |
| * <p>The third column is a point in the local filesystem from which to extract a recursive |
| * listing. (This follows symlinks) Backrefs may be used to refer to the regexp or its capturing |
| * groups. (This is mostly necessary because --package_path can cause input paths to carry |
| * arbitrary prefixes.) |
| * |
| * <p>The fourth column is a regexp applied to each file found by the recursive listing. All |
| * matching files are treated as dependencies. |
| */ |
| public static class Hints { |
| private static final Pattern WS_PAT = Pattern.compile("\\s+"); |
| @VisibleForTesting |
| static final String ALLOWED_PREFIX = "third_party/"; |
| // Match regular expressions that can only match paths under ALLOWED_PREFIX . |
| private static final Pattern ALLOWED_PATTERN = Pattern.compile("^\\(*" + ALLOWED_PREFIX + ".*"); |
| |
| private static final int HINTS_CACHE_CONCURRENCY = 100; |
| |
| private final ImmutableList<Rule> rules; |
| private final ArtifactFactory artifactFactory; |
| |
| private final AtomicReference<FilesystemCalls> syscallCache = new AtomicReference<>(); |
| |
| private final LoadingCache<Artifact, Collection<Artifact>> fileLevelHintsCache = |
| CacheBuilder.newBuilder().concurrencyLevel(HINTS_CACHE_CONCURRENCY).build( |
| new CacheLoader<Artifact, Collection<Artifact>>() { |
| @Override |
| public Collection<Artifact> load(Artifact path) { |
| return getHintedInclusionsLegacy(Rule.Type.FILE, |
| path.getExecPath(), path.getRoot()); |
| } |
| }); |
| |
| /** |
| * Constructs a hint set for a given INCLUDE_HINTS file to read. |
| * |
| * @param hintsRules the {@link HintsRules} parsed from INCLUDE_HINTS |
| */ |
| public Hints(HintsRules hintsRules, ArtifactFactory artifactFactory) { |
| this.artifactFactory = artifactFactory; |
| this.rules = hintsRules.rules; |
| clearCachedLegacyHints(); |
| } |
| |
| static HintsRules getRules(Path hintsFile) throws IOException { |
| ImmutableList.Builder<Rule> rules = ImmutableList.builder(); |
| try (InputStream is = hintsFile.getInputStream()) { |
| for (String line : CharStreams.readLines(new InputStreamReader(is, "UTF-8"))) { |
| line = line.trim(); |
| if (line.length() == 0 || line.startsWith("#")) { |
| continue; |
| } |
| String[] tokens = WS_PAT.split(line); |
| try { |
| if (tokens.length == 3) { |
| rules.add(new Rule(tokens[0], tokens[1], tokens[2])); |
| } else if (tokens.length == 4) { |
| if (!ALLOWED_PATTERN.matcher(tokens[1]).matches()) { |
| throw new IOException("Illegal hint regex on: " + line + "\n" |
| + tokens[1] + " does not match only paths in " + ALLOWED_PREFIX); |
| } |
| rules.add(new Rule(tokens[0], tokens[1], tokens[2], tokens[3])); |
| } else { |
| throw new IOException("Malformed hint line: " + line); |
| } |
| } catch (PatternSyntaxException e) { |
| throw new IOException("Malformed hint regex on: " + line + "\n " + e.getMessage()); |
| } catch (IllegalArgumentException e) { |
| throw new IOException("Invalid type on: " + line + "\n " + e.getMessage()); |
| } |
| } |
| } |
| return new HintsRules(rules.build()); |
| } |
| |
| /** |
| * Clears legacy inclusions cache to maintain inter-build correctness, since filesystem changes |
| * are not tracked by cache. |
| */ |
| void clearCachedLegacyHints() { |
| fileLevelHintsCache.invalidateAll(); |
| syscallCache.set( |
| PerBuildSyscallCache.newBuilder().setConcurrencyLevel(HINTS_CACHE_CONCURRENCY).build()); |
| } |
| |
| /** Returns the "file" type hinted inclusions for a given path, caching results by path. */ |
| Collection<Artifact> getFileLevelHintedInclusionsLegacy(Artifact path) { |
| if (!path.getExecPathString().startsWith(ALLOWED_PREFIX)) { |
| return ImmutableList.of(); |
| } |
| return fileLevelHintsCache.getUnchecked(path); |
| } |
| |
| /** |
| * Returns the "path" type hinted inclusions for the given paths. Callers are responsible for |
| * caching. |
| */ |
| Collection<Artifact> getPathLevelHintedInclusions( |
| ImmutableList<PathFragment> paths, Environment env) throws InterruptedException { |
| return getHintedInclusionsWithSkyframe(Rule.Type.PATH, paths, env); |
| } |
| |
| /** |
| * Performs the work of matching the given paths against the hints and returns the matching |
| * files. This is semantically different from {@link #getHintedInclusionsLegacy} in that it will |
| * not cross package boundaries. |
| */ |
| private Collection<Artifact> getHintedInclusionsWithSkyframe( |
| Rule.Type type, ImmutableList<PathFragment> paths, Environment env) |
| throws InterruptedException { |
| ImmutableList<String> pathStrings = |
| paths.stream() |
| .map(PathFragment::getPathString) |
| .filter((p) -> p.startsWith(ALLOWED_PREFIX)) |
| .collect(ImmutableList.toImmutableList()); |
| if (pathStrings.isEmpty()) { |
| return ImmutableList.of(); |
| } |
| // Delay creation until we know we need one. Use a TreeSet to make sure that the results are |
| // sorted with a stable order and unique. |
| Set<Artifact> hints = null; |
| List<ContainingPackageLookupValue.Key> rulePaths = new ArrayList<>(rules.size()); |
| List<String> findFilters = new ArrayList<>(rules.size()); |
| for (Rule rule : rules) { |
| if (type != rule.type) { |
| continue; |
| } |
| String firstMatchPathString = null; |
| Matcher m = null; |
| for (String pathString : pathStrings) { |
| m = rule.pattern.matcher(pathString); |
| if (m.matches()) { |
| firstMatchPathString = pathString; |
| break; |
| } |
| } |
| if (firstMatchPathString == null) { |
| continue; |
| } |
| if (hints == null) { |
| hints = Sets.newTreeSet(Artifact.EXEC_PATH_COMPARATOR); |
| } |
| PathFragment relativePath = PathFragment.create(m.replaceFirst(rule.findRoot)); |
| if (LOG_FINE) { |
| logger.fine( |
| "hint for " + rule.type + " " + firstMatchPathString + " root: " + relativePath); |
| } |
| if (!relativePath.getPathString().startsWith(ALLOWED_PREFIX)) { |
| logger.warning( |
| "Path " |
| + relativePath.getPathString() |
| + " to search after substitution does not start with " |
| + ALLOWED_PREFIX); |
| continue; |
| } |
| rulePaths.add( |
| ContainingPackageLookupValue.key(PackageIdentifier.createInMainRepo(relativePath))); |
| findFilters.add(rule.findFilter); |
| } |
| Map<SkyKey, ValueOrException<NoSuchPackageException>> containingPackageLookupValues = |
| env.getValuesOrThrow(rulePaths, NoSuchPackageException.class); |
| if (env.valuesMissing()) { |
| return null; |
| } |
| List<GlobDescriptor> globKeys = new ArrayList<>(rulePaths.size()); |
| for (int i = 0; i < rulePaths.size(); i++) { |
| ContainingPackageLookupValue containingPackageLookupValue; |
| ContainingPackageLookupValue.Key relativePathKey = rulePaths.get(i); |
| PathFragment relativePath = relativePathKey.argument().getPackageFragment(); |
| try { |
| containingPackageLookupValue = |
| (ContainingPackageLookupValue) |
| containingPackageLookupValues.get(relativePathKey).get(); |
| } catch (NoSuchPackageException e) { |
| logger.warning( |
| "Unexpected exception when looking up containing package for " |
| + relativePath |
| + " (prodaccess expired?): " |
| + e.getMessage()); |
| continue; |
| } |
| if (!containingPackageLookupValue.hasContainingPackage()) { |
| logger.warning(relativePath + " not contained in any package: skipping"); |
| continue; |
| } |
| PathFragment packageFragment = |
| containingPackageLookupValue.getContainingPackageName().getPackageFragment(); |
| String pattern = findFilters.get(i); |
| try { |
| globKeys.add( |
| GlobValue.key( |
| containingPackageLookupValue.getContainingPackageName(), |
| containingPackageLookupValue.getContainingPackageRoot(), |
| pattern, |
| /* excludeDirs= */ true, |
| relativePath.relativeTo(packageFragment))); |
| } catch (InvalidGlobPatternException e) { |
| env.getListener() |
| .handle(Event.warn("Error parsing pattern " + pattern + " for " + relativePath)); |
| continue; |
| } |
| } |
| Map<SkyKey, ValueOrException<IOException>> globResults = |
| env.getValuesOrThrow(globKeys, IOException.class); |
| if (env.valuesMissing()) { |
| return null; |
| } |
| for (Map.Entry<SkyKey, ValueOrException<IOException>> globEntry : globResults.entrySet()) { |
| GlobValue globValue; |
| GlobDescriptor globKey = (GlobDescriptor) globEntry.getKey(); |
| PathFragment packageFragment = globKey.getPackageId().getPackageFragment(); |
| try { |
| globValue = (GlobValue) globEntry.getValue().get(); |
| } catch (IOException e) { |
| logger.warning("Error getting hints for " + packageFragment + ": " + e); |
| continue; |
| } |
| for (PathFragment file : globValue.getMatches()) { |
| hints.add( |
| artifactFactory.getSourceArtifact( |
| packageFragment.getRelative(file), globKey.getPackageRoot())); |
| } |
| } |
| return hints == null || hints.isEmpty() ? ImmutableList.<Artifact>of() : hints; |
| } |
| |
| /** |
| * Performs the work of matching a given path against the hints and returns the expanded paths. |
| * The above {@link #getHintedInclusionsWithSkyframe} should be used in preference, but if the |
| * performance impact of Skyframe restarts is untenable, this can be used as a fallback. |
| */ |
| private Collection<Artifact> getHintedInclusionsLegacy( |
| Rule.Type type, PathFragment path, ArtifactRoot sourceRoot) { |
| String pathString = path.getPathString(); |
| // Delay creation until we know we need one. Use a TreeSet to make sure that the results are |
| // sorted with a stable order and unique. |
| Set<Path> hints = null; |
| for (final Rule rule : rules) { |
| if (type != rule.type) { |
| continue; |
| } |
| Matcher m = rule.pattern.matcher(pathString); |
| if (!m.matches()) { |
| continue; |
| } |
| if (hints == null) { hints = Sets.newTreeSet(); } |
| String relativePath = m.replaceFirst(rule.findRoot); |
| if (!relativePath.startsWith(ALLOWED_PREFIX)) { |
| logger.warning( |
| "Path " |
| + relativePath |
| + " to search after substitution does not start with " |
| + ALLOWED_PREFIX); |
| continue; |
| } |
| Path root = sourceRoot.getRoot().getRelative(relativePath); |
| |
| if (LOG_FINE) { |
| logger.fine("hint for " + rule.type + " " + pathString + " root: " + root); |
| } |
| try { |
| // The assumption is made here that all files specified by this hint are under the same |
| // package path as the original file -- this filesystem tree traversal is completely |
| // ignorant of package paths. This could be violated if there were a hint that resolved to |
| // foo/**/*.h, there was a package foo/bar, and the packages foo and foo/bar were in |
| // different package paths. In that case, this traversal would fail to pick up |
| // foo/bar/**/*.h. No examples of this currently exist in the INCLUDE_HINTS |
| // file. |
| if (LOG_FINE) { |
| logger.fine("Globbing: " + root + " " + rule.findFilter); |
| } |
| hints.addAll(new UnixGlob.Builder(root) |
| .setFilesystemCalls(syscallCache) |
| .addPattern(rule.findFilter) |
| .glob()); |
| } catch (IOException e) { |
| logger.warning("Error in hint expansion: " + e); |
| } |
| } |
| if (hints != null && !hints.isEmpty()) { |
| // Transform paths into source artifacts (all hints must be to source artifacts). |
| List<Artifact> result = new ArrayList<>(hints.size()); |
| for (Path hint : hints) { |
| Root sourcePath = sourceRoot.getRoot(); |
| result.add( |
| Preconditions.checkNotNull( |
| artifactFactory.getSourceArtifact(sourcePath.relativize(hint), sourcePath), |
| "%s %s %s %s", |
| hint, |
| sourcePath, |
| path)); |
| } |
| return result; |
| } else { |
| return ImmutableList.of(); |
| } |
| } |
| |
| private Collection<Inclusion> getHintedInclusions(Artifact path) { |
| String pathString = path.getExecPathString(); |
| // Delay creation until we know we need one. Use a LinkedHashSet to make sure that the results |
| // are sorted with a stable order and unique. |
| Set<Inclusion> hints = null; |
| for (final Rule rule : rules) { |
| if ((rule.type != Rule.Type.INCLUDE_ANGLE) && (rule.type != Rule.Type.INCLUDE_QUOTE)) { |
| continue; |
| } |
| Matcher m = rule.pattern.matcher(pathString); |
| if (!m.matches()) { |
| continue; |
| } |
| if (hints == null) { hints = Sets.newLinkedHashSet(); } |
| Inclusion inclusion = new Inclusion(rule.findRoot, rule.type == Rule.Type.INCLUDE_QUOTE |
| ? Kind.QUOTE : Kind.ANGLE); |
| hints.add(inclusion); |
| if (LOG_FINE) { |
| logger.fine("hint for " + rule.type + " " + pathString + " root: " + inclusion); |
| } |
| } |
| if (hints != null && !hints.isEmpty()) { |
| return ImmutableList.copyOf(hints); |
| } else { |
| return ImmutableList.of(); |
| } |
| } |
| } |
| |
| Hints getHints() { |
| return hints; |
| } |
| |
| /** |
| * An immutable inclusion tuple. This models an {@code #include} or {@code |
| * #include_next} line in a file without the context how this file got |
| * included. |
| */ |
| public static class Inclusion { |
| /** The format of the #include in the source file -- quoted, angle bracket, etc. */ |
| enum Kind { |
| /** Quote includes: {@code #include "name"}. */ |
| QUOTE, |
| |
| /** Angle bracket includes: {@code #include <name>}. */ |
| ANGLE, |
| |
| /** Quote next includes: {@code #include_next "name"}. */ |
| NEXT_QUOTE, |
| |
| /** Angle next includes: {@code #include_next <name>}. */ |
| NEXT_ANGLE; |
| |
| /** |
| * Returns true if this is an {@code #include_next} inclusion, |
| */ |
| boolean isNext() { |
| return this == NEXT_ANGLE || this == NEXT_QUOTE; |
| } |
| } |
| |
| /** The kind of inclusion. */ |
| final Kind kind; |
| /** The relative path of the inclusion. */ |
| final PathFragment pathFragment; |
| |
| Inclusion(String includeTarget, Kind kind) { |
| this.kind = kind; |
| this.pathFragment = PathFragment.create(includeTarget); |
| } |
| |
| Inclusion(PathFragment pathFragment, Kind kind) { |
| this.kind = kind; |
| this.pathFragment = Preconditions.checkNotNull(pathFragment); |
| } |
| |
| String getPathString() { |
| return pathFragment.getPathString(); |
| } |
| |
| @Override |
| public String toString() { |
| return kind + ":" + pathFragment.getPathString(); |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| if (o == this) { |
| return true; |
| } |
| if (!(o instanceof Inclusion)) { |
| return false; |
| } |
| Inclusion that = (Inclusion) o; |
| return kind == that.kind && pathFragment.equals(that.pathFragment); |
| } |
| |
| @Override |
| public int hashCode() { |
| return pathFragment.hashCode() * 37 + kind.hashCode(); |
| } |
| } |
| |
| /** The externally-scoped immutable hints helper that is shared by all scanners. */ |
| private final Hints hints; |
| |
| /** |
| * Constructs a new FileParser. |
| * |
| * @param hints regexps for converting computed includes into simple strings |
| */ |
| public IncludeParser(Hints hints) { |
| this.hints = hints; |
| } |
| |
| /** |
| * Skips whitespace, \+NL pairs, and block-style / * * / comments. Assumes line comments are |
| * handled outside. Does not handle digraphs, trigraphs or decahexagraphs. |
| * |
| * @param chars characters to scan |
| * @param pos the starting position |
| * @return the resulting position after skipping whitespace and comments. |
| */ |
| protected static int skipWhitespace(byte[] chars, int pos, int end) { |
| while (pos < end) { |
| if (Character.isWhitespace(chars[pos] & 0xff)) { |
| pos++; |
| } else if (chars[pos] == '\\' && pos + 1 < end && chars[pos + 1] == '\n') { |
| pos++; |
| } else if (chars[pos] == '/' && pos + 1 < end && chars[pos + 1] == '*') { |
| pos += 2; |
| while (pos < end - 1) { |
| if (chars[pos++] == '*') { |
| if (chars[pos] == '/') { |
| pos++; |
| break; // proper comment end |
| } |
| } |
| } |
| } else { // not whitespace |
| return pos; |
| } |
| } |
| return pos; // pos == len, meaning we fell off the end. |
| } |
| |
| private static final String HAS_INCLUDE = "__has_include"; |
| private static final int HAS_INCLUDE_LENGTH = HAS_INCLUDE.length(); |
| private static final int NECESSARY_HAS_INCLUDE_LENGTH = HAS_INCLUDE_LENGTH + 5; |
| |
| /** |
| * Returns the index of {@code chars} after the first occurrence of "__has_include" or -1 if no |
| * such occurrence exists. Also requires that there be at least 5 characters after the |
| * "__has_include", corresponding to a pair of parentheses and angle brackets/quotes and a |
| * filename. |
| * |
| * <p>This code runs on every line that starts with " *# *", so it should be as fast as possible. |
| */ |
| private static int skipThroughHasInclude(byte[] chars, int pos, int end) { |
| int lastPos = end - NECESSARY_HAS_INCLUDE_LENGTH; |
| while (pos <= lastPos) { |
| int curPos = 0; |
| while (curPos < HAS_INCLUDE_LENGTH |
| && (chars[pos + curPos] & 0xff) == HAS_INCLUDE.charAt(curPos)) { |
| curPos++; |
| } |
| if (curPos == HAS_INCLUDE_LENGTH) { |
| return pos + curPos; |
| } |
| // We're looking for "__has_include" as a preprocessing token, which means that it cannot |
| // start in the middle of any characters we've already processed, nor at the mismatching |
| // character. |
| pos += curPos + 1; |
| } |
| return -1; |
| } |
| |
| /** |
| * Checks for and skips a given token. |
| * |
| * @param chars characters to scan |
| * @param pos the starting position |
| * @param expected the expected token |
| * @return the resulting position if found, otherwise -1 |
| */ |
| protected static int expect(byte[] chars, int pos, int end, String expected) { |
| int si = 0; |
| int expectedLen = expected.length(); |
| while (pos < end) { |
| if (si == expectedLen) { |
| return pos; |
| } |
| if ((chars[pos++] & 0xff) != expected.charAt(si++)) { |
| return -1; |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * Finds the index of a given character token from a starting pos. |
| * |
| * @param chars characters to scan |
| * @param pos the starting position |
| * @param echar the character to find |
| * @return the resulting position of echar if found, otherwise -1 |
| */ |
| private static int indexOf(byte[] chars, int pos, int end, char echar) { |
| while (pos < end) { |
| if (chars[pos] == echar) { |
| return pos; |
| } |
| pos++; |
| } |
| return -1; |
| } |
| |
| private static final Pattern BS_NL_PAT = Pattern.compile("\\\\" + "\n"); |
| |
| // Keep this in sync with the grep-includes binary's scanning output format. |
| private static final ImmutableMap<Character, Kind> KIND_MAP = ImmutableMap.of( |
| '"', Kind.QUOTE, |
| '<', Kind.ANGLE, |
| 'q', Kind.NEXT_QUOTE, |
| 'a', Kind.NEXT_ANGLE); |
| |
| /** |
| * Processes the output generated by an auxiliary include-scanning binary. Closes the stream upon |
| * completion. |
| * |
| * <p>If a source file has the following include statements: |
| * <pre> |
| * #include <string> |
| * #include "directory/header.h" |
| * </pre> |
| * |
| * <p>Then the output file has the following contents: |
| * <pre> |
| * "directory/header.h |
| * <string |
| * </pre> |
| * <p>Each line of the output is translated into an Inclusion object. |
| */ |
| public static List<Inclusion> processIncludes(Object streamName, InputStream is) |
| throws IOException { |
| List<Inclusion> inclusions = new ArrayList<>(); |
| try (InputStreamReader reader = new InputStreamReader(is, ISO_8859_1)) { |
| for (String line : CharStreams.readLines(reader)) { |
| char qchar = line.charAt(0); |
| String name = line.substring(1); |
| Kind kind = KIND_MAP.get(qchar); |
| if (kind == null) { |
| throw new IOException("Illegal inclusion kind '" + qchar + "'"); |
| } |
| inclusions.add(new Inclusion(name, kind)); |
| } |
| } catch (IOException e) { |
| throw new IOException("Error reading include file " + streamName + ": " + e.getMessage()); |
| } |
| return inclusions; |
| } |
| |
| @VisibleForTesting |
| Inclusion extractInclusion(String line) { |
| return extractInclusion(line.getBytes(ISO_8859_1), 0, line.length()); |
| } |
| |
| /** |
| * Extracts a new, unresolved an Inclusion from a line of source. |
| * |
| * @param chars the char array containing the line chars to parse |
| * @param lineBegin the position of the first character in the line |
| * @param lineEnd the position of the character after the last |
| * @return the inclusion object if possible, null if none |
| */ |
| private Inclusion extractInclusion(byte[] chars, int lineBegin, int lineEnd) { |
| // expect WS#WS(include|include_next|__has_include\(_next\)?)WS\(?("name"|<name>|<name>)\)? |
| IncludesKeywordData data = expectIncludeKeyword(chars, lineBegin, lineEnd); |
| int pos = data.pos; |
| if (pos == -1 || pos == lineEnd) { |
| return null; |
| } |
| boolean isNext = false; |
| if (data.canHaveNext) { |
| int npos = expect(chars, pos, lineEnd, "_next"); |
| if (npos >= 0) { |
| isNext = true; |
| pos = npos; |
| } |
| } |
| if ((pos = skipWhitespace(chars, pos, lineEnd)) == lineEnd) { |
| return null; |
| } |
| if (data.hasParens) { |
| if (chars[pos] != '(') { |
| return null; |
| } |
| pos++; |
| if ((pos = skipWhitespace(chars, pos, lineEnd)) == lineEnd) { |
| return null; |
| } |
| } |
| if (chars[pos] == '"' || chars[pos] == '<') { |
| char qchar = (char) (chars[pos++] & 0xff); |
| int spos = pos; |
| pos = indexOf(chars, pos + 1, lineEnd, qchar == '<' ? '>' : '"'); |
| if (pos < 0) { |
| return null; |
| } |
| if (chars[spos] == '/') { |
| return null; // disallow absolute paths |
| } |
| String name = new String(chars, spos, pos - spos); |
| if (name.contains("\n")) { // strip any \+NL pairs within name |
| name = BS_NL_PAT.matcher(name).replaceAll(""); |
| } |
| if (isNext) { |
| return new Inclusion(name, qchar == '"' ? Kind.NEXT_QUOTE : Kind.NEXT_ANGLE); |
| } else { |
| return new Inclusion(name, qchar == '"' ? Kind.QUOTE : Kind.ANGLE); |
| } |
| } else { |
| return createOtherInclusion(new String(chars, pos, lineEnd - pos)); |
| } |
| } |
| |
| /** |
| * Extracts all inclusions from characters of a file. |
| * |
| * @param chars the file contents to parse & extract inclusions from |
| * @return a new set of inclusions, normalized to the cache |
| */ |
| @VisibleForTesting |
| List<Inclusion> extractInclusions(byte[] chars) { |
| List<Inclusion> inclusions = new ArrayList<>(); |
| int lineBegin = 0; // the first char of each line |
| int end = chars.length; // the file end |
| while (lineBegin < end) { |
| int lineEnd = lineBegin; // the char after the last non-\n in each line |
| // skip to the next \n or after end of buffer, ignoring continuations |
| while (lineEnd < end) { |
| if (chars[lineEnd] == '\n') { |
| break; |
| } else if (chars[lineEnd] == '\\') { |
| lineEnd++; |
| if (chars[lineEnd] == '\n') { |
| lineEnd++; |
| } |
| } else { |
| lineEnd++; |
| } |
| } |
| |
| // TODO(bazel-team) handle multiline block comments /* */ for the cases: |
| // /* blah blah blah |
| // lalala */ #include "foo.h" |
| // and: |
| // /* blah |
| // #include "foo.h" |
| // */ |
| |
| // extract the inclusion, and save only the kind we care about. |
| Inclusion inclusion = extractInclusion(chars, lineBegin, lineEnd); |
| if (inclusion != null) { |
| if (isValidInclusionKind(inclusion.kind)) { |
| inclusions.add(inclusion); |
| } |
| } |
| lineBegin = lineEnd + 1; // next line starts after the previous line |
| } |
| return inclusions; |
| } |
| |
| /** |
| * Extracts all inclusions from a given source file. |
| * |
| * @param file the file to parse & extract inclusions from |
| * @param actionExecutionContext Services in the scope of the action, like the stream to which |
| * scanning messages are printed |
| * @return a new set of inclusions, normalized to the cache |
| */ |
| Collection<Inclusion> extractInclusions( |
| Artifact file, |
| ActionExecutionMetadata actionExecutionMetadata, |
| ActionExecutionContext actionExecutionContext, |
| Artifact grepIncludes, |
| @Nullable SpawnIncludeScanner remoteIncludeScanner, |
| boolean isOutputFile) |
| throws IOException, ExecException, InterruptedException { |
| Collection<Inclusion> inclusions; |
| |
| if (remoteIncludeScanner != null |
| && remoteIncludeScanner.shouldParseRemotely(file, actionExecutionContext)) { |
| inclusions = |
| remoteIncludeScanner.extractInclusions( |
| file, |
| actionExecutionMetadata, |
| actionExecutionContext, |
| grepIncludes, |
| getFileType(), |
| isOutputFile); |
| } else { |
| try (SilentCloseable c = |
| Profiler.instance().profile(ProfilerTask.SCANNER, file.getExecPathString())) { |
| inclusions = |
| extractInclusions( |
| FileSystemUtils.readContent(actionExecutionContext.getInputPath(file))); |
| } catch (IOException e) { |
| if (remoteIncludeScanner != null) { |
| logger.log( |
| Level.WARNING, |
| "Falling back on remote parsing of " + actionExecutionContext.getInputPath(file), |
| e); |
| inclusions = |
| remoteIncludeScanner.extractInclusions( |
| file, |
| actionExecutionMetadata, |
| actionExecutionContext, |
| grepIncludes, |
| getFileType(), |
| isOutputFile); |
| } else { |
| throw e; |
| } |
| } |
| } |
| if (hints != null) { |
| inclusions.addAll(hints.getHintedInclusions(file)); |
| } |
| return ImmutableList.copyOf(inclusions); |
| } |
| |
| /** |
| * Returns type of the scanned file. |
| * |
| * <p>Supported values are "c++" for standard c/c++ headers and sources, and "swig" for .swig |
| * files. Changes to this method must be synchronized with change to //tools/cpp:grep-includes. |
| */ |
| protected GrepIncludesFileType getFileType() { |
| return GrepIncludesFileType.CPP; |
| } |
| |
| /** |
| * Position of found include together with information about how to process the remaining include |
| * line further. |
| */ |
| protected static class IncludesKeywordData { |
| protected static final IncludesKeywordData NONE = new IncludesKeywordData(-1, false, false); |
| private final int pos; |
| private final boolean canHaveNext; |
| private final boolean hasParens; |
| |
| private IncludesKeywordData(int pos, boolean canHaveNext, boolean hasParens) { |
| this.pos = pos; |
| this.canHaveNext = canHaveNext; |
| this.hasParens = hasParens; |
| } |
| |
| protected static IncludesKeywordData normal(int pos) { |
| return new IncludesKeywordData(pos, true, false); |
| } |
| |
| protected static IncludesKeywordData importOrSwig(int pos) { |
| return new IncludesKeywordData(pos, false, false); |
| } |
| |
| protected static IncludesKeywordData hasInclude(int pos) { |
| return new IncludesKeywordData(pos, true, true); |
| } |
| } |
| |
| /** |
| * Parses include keyword in the provided char array and returns position immediately after |
| * include keyword or -1 if keyword was not found, along with information to aid future parsing. |
| * Can be overridden by subclasses. |
| */ |
| protected IncludesKeywordData expectIncludeKeyword(byte[] chars, int position, int end) { |
| int pos = expect(chars, skipWhitespace(chars, position, end), end, "#"); |
| if (pos > 0) { |
| int npos = skipWhitespace(chars, pos, end); |
| if ((pos = expect(chars, npos, end, "include")) > 0) { |
| return IncludesKeywordData.normal(pos); |
| } else if ((pos = expect(chars, npos, end, "import")) > 0) { |
| return IncludesKeywordData.importOrSwig(pos); |
| } else if ((pos = skipThroughHasInclude(chars, npos, end)) > 0) { |
| return IncludesKeywordData.hasInclude(pos); |
| } |
| } |
| return IncludesKeywordData.NONE; |
| } |
| |
| /** |
| * Returns true if we interested in the given inclusion kind. Can be overridden by the subclass. |
| * |
| * @param kind |
| */ |
| protected boolean isValidInclusionKind(Kind kind) { |
| return true; |
| } |
| |
| /** |
| * Returns inclusion object for non-standard inclusion cases or null if inclusion should be |
| * ignored. |
| * |
| * @param inclusionContent |
| */ |
| @Nullable |
| protected Inclusion createOtherInclusion(String inclusionContent) { |
| return null; |
| } |
| } |