blob: 30e18cadbf6a8d6b1d76b9c79d485fec596d0e9f [file] [log] [blame]
// Copyright 2018 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.includescanning;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import com.google.common.io.CharStreams;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactFactory;
import com.google.devtools.build.lib.actions.ArtifactRoot;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.includescanning.IncludeParser.Inclusion.Kind;
import com.google.devtools.build.lib.packages.NoSuchPackageException;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import com.google.devtools.build.lib.skyframe.ContainingPackageLookupValue;
import com.google.devtools.build.lib.skyframe.GlobDescriptor;
import com.google.devtools.build.lib.skyframe.GlobValue;
import com.google.devtools.build.lib.skyframe.GlobValue.InvalidGlobPatternException;
import com.google.devtools.build.lib.skyframe.PerBuildSyscallCache;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Root;
import com.google.devtools.build.lib.vfs.UnixGlob;
import com.google.devtools.build.lib.vfs.UnixGlob.FilesystemCalls;
import com.google.devtools.build.skyframe.SkyFunction.Environment;
import com.google.devtools.build.skyframe.SkyKey;
import com.google.devtools.build.skyframe.SkyValue;
import com.google.devtools.build.skyframe.ValueOrException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.annotation.Nullable;
/**
* Scans a source file and extracts the literal inclusions it specifies. Does not store results --
* repeated requests to the same file will result in repeated scans. Clients should implement a
* caching layer in order to avoid unnecessary disk access when requesting an already scanned file.
*/
@VisibleForTesting
class IncludeParser {
/**
* File types supported by the grep-includes binary. {@link #fileType} must be kept is sync with
* //tools/cpp:grep-includes.
*/
public enum GrepIncludesFileType {
CPP("c++"),
SWIG("swig");
private final String fileType;
GrepIncludesFileType(String fileType) {
this.fileType = fileType;
}
public String getFileType() {
return fileType;
}
}
private static final Logger logger = Logger.getLogger(IncludeParser.class.getName());
private static final boolean LOG_FINE = logger.isLoggable(Level.FINE);
/**
* Immutable object representation of the four columns making up a single Rule
* in a Hints set. See {@link Hints} for more details.
*/
private static class Rule {
private enum Type { PATH, FILE, INCLUDE_QUOTE, INCLUDE_ANGLE }
final Type type;
final Pattern pattern;
final String findRoot;
final String findFilter;
private Rule(String type, String pattern, String findRoot, String findFilter) {
this.type = Type.valueOf(type.trim().toUpperCase());
this.pattern = Pattern.compile("^" + pattern + "$");
this.findRoot = findRoot.replace('\\', '$');
this.findFilter = findFilter;
}
Rule(String type, String pattern, String findRoot) {
this(type, pattern, findRoot, null);
Preconditions.checkArgument((this.type == Type.INCLUDE_QUOTE)
|| (this.type == Type.INCLUDE_ANGLE), this);
}
@Override public String toString() {
return "" + type + " " + pattern + " " + findRoot + " " + findFilter;
}
}
/** {@link SkyValue} encapsulating the source-state-dependent part of {@link Hints}. */
public static class HintsRules implements SkyValue {
private final ImmutableList<Rule> rules;
private HintsRules(ImmutableList<Rule> rules) {
this.rules = rules;
}
}
/**
* This class is a representation of the INCLUDE_HINTS file. The hints file contains regexp-based
* rules to help this simple include scanner cope with computed includes, which would otherwise
* require a full preprocessor with symbol support. Instead of actually processing symbols to
* evaluate the computed includes, we instead apply rules to gather inclusions for matching paths.
*
* <p>The hints file is read, line by line, into a list of rules each of which encapsulates a line
* of four columns. Each non-blank, non-comment line has the format:
*
* <pre>
* &quot;file&quot;|&quot;path&quot; match-pattern find-root find-filter
* </pre>
*
* <p>The first column specifies whether the line is a rule based on matching source
* <em>files</em> (passed directly to the compiler as inputs, or transitively #included by other
* inputs) or include <em>paths</em> (passed to the compiler as -I, -iquote, or -isystem flags).
*
* <p>The second column is a regexp for files or paths. Whenever a compiler argument of the
* specified type matches that regexp, the rule is taken. (All matching rules for every path and
* file on a compiler command line are followed, and the results are combined.)
*
* <p>The third column is a point in the local filesystem from which to extract a recursive
* listing. (This follows symlinks) Backrefs may be used to refer to the regexp or its capturing
* groups. (This is mostly necessary because --package_path can cause input paths to carry
* arbitrary prefixes.)
*
* <p>The fourth column is a regexp applied to each file found by the recursive listing. All
* matching files are treated as dependencies.
*/
public static class Hints {
private static final Pattern WS_PAT = Pattern.compile("\\s+");
@VisibleForTesting
static final String ALLOWED_PREFIX = "third_party/";
// Match regular expressions that can only match paths under ALLOWED_PREFIX .
private static final Pattern ALLOWED_PATTERN = Pattern.compile("^\\(*" + ALLOWED_PREFIX + ".*");
private static final int HINTS_CACHE_CONCURRENCY = 100;
private final ImmutableList<Rule> rules;
private final ArtifactFactory artifactFactory;
private final AtomicReference<FilesystemCalls> syscallCache = new AtomicReference<>();
private final LoadingCache<Artifact, Collection<Artifact>> fileLevelHintsCache =
CacheBuilder.newBuilder().concurrencyLevel(HINTS_CACHE_CONCURRENCY).build(
new CacheLoader<Artifact, Collection<Artifact>>() {
@Override
public Collection<Artifact> load(Artifact path) {
return getHintedInclusionsLegacy(Rule.Type.FILE,
path.getExecPath(), path.getRoot());
}
});
/**
* Constructs a hint set for a given INCLUDE_HINTS file to read.
*
* @param hintsRules the {@link HintsRules} parsed from INCLUDE_HINTS
*/
public Hints(HintsRules hintsRules, ArtifactFactory artifactFactory) {
this.artifactFactory = artifactFactory;
this.rules = hintsRules.rules;
clearCachedLegacyHints();
}
static HintsRules getRules(Path hintsFile) throws IOException {
ImmutableList.Builder<Rule> rules = ImmutableList.builder();
try (InputStream is = hintsFile.getInputStream()) {
for (String line : CharStreams.readLines(new InputStreamReader(is, "UTF-8"))) {
line = line.trim();
if (line.length() == 0 || line.startsWith("#")) {
continue;
}
String[] tokens = WS_PAT.split(line);
try {
if (tokens.length == 3) {
rules.add(new Rule(tokens[0], tokens[1], tokens[2]));
} else if (tokens.length == 4) {
if (!ALLOWED_PATTERN.matcher(tokens[1]).matches()) {
throw new IOException("Illegal hint regex on: " + line + "\n"
+ tokens[1] + " does not match only paths in " + ALLOWED_PREFIX);
}
rules.add(new Rule(tokens[0], tokens[1], tokens[2], tokens[3]));
} else {
throw new IOException("Malformed hint line: " + line);
}
} catch (PatternSyntaxException e) {
throw new IOException("Malformed hint regex on: " + line + "\n " + e.getMessage());
} catch (IllegalArgumentException e) {
throw new IOException("Invalid type on: " + line + "\n " + e.getMessage());
}
}
}
return new HintsRules(rules.build());
}
/**
* Clears legacy inclusions cache to maintain inter-build correctness, since filesystem changes
* are not tracked by cache.
*/
void clearCachedLegacyHints() {
fileLevelHintsCache.invalidateAll();
syscallCache.set(
PerBuildSyscallCache.newBuilder().setConcurrencyLevel(HINTS_CACHE_CONCURRENCY).build());
}
/** Returns the "file" type hinted inclusions for a given path, caching results by path. */
Collection<Artifact> getFileLevelHintedInclusionsLegacy(Artifact path) {
if (!path.getExecPathString().startsWith(ALLOWED_PREFIX)) {
return ImmutableList.of();
}
return fileLevelHintsCache.getUnchecked(path);
}
/**
* Returns the "path" type hinted inclusions for the given paths. Callers are responsible for
* caching.
*/
Collection<Artifact> getPathLevelHintedInclusions(
ImmutableList<PathFragment> paths, Environment env) throws InterruptedException {
return getHintedInclusionsWithSkyframe(Rule.Type.PATH, paths, env);
}
/**
* Performs the work of matching the given paths against the hints and returns the matching
* files. This is semantically different from {@link #getHintedInclusionsLegacy} in that it will
* not cross package boundaries.
*/
private Collection<Artifact> getHintedInclusionsWithSkyframe(
Rule.Type type, ImmutableList<PathFragment> paths, Environment env)
throws InterruptedException {
ImmutableList<String> pathStrings =
paths.stream()
.map(PathFragment::getPathString)
.filter((p) -> p.startsWith(ALLOWED_PREFIX))
.collect(ImmutableList.toImmutableList());
if (pathStrings.isEmpty()) {
return ImmutableList.of();
}
// Delay creation until we know we need one. Use a TreeSet to make sure that the results are
// sorted with a stable order and unique.
Set<Artifact> hints = null;
List<ContainingPackageLookupValue.Key> rulePaths = new ArrayList<>(rules.size());
List<String> findFilters = new ArrayList<>(rules.size());
for (Rule rule : rules) {
if (type != rule.type) {
continue;
}
String firstMatchPathString = null;
Matcher m = null;
for (String pathString : pathStrings) {
m = rule.pattern.matcher(pathString);
if (m.matches()) {
firstMatchPathString = pathString;
break;
}
}
if (firstMatchPathString == null) {
continue;
}
if (hints == null) {
hints = Sets.newTreeSet(Artifact.EXEC_PATH_COMPARATOR);
}
PathFragment relativePath = PathFragment.create(m.replaceFirst(rule.findRoot));
if (LOG_FINE) {
logger.fine(
"hint for " + rule.type + " " + firstMatchPathString + " root: " + relativePath);
}
if (!relativePath.getPathString().startsWith(ALLOWED_PREFIX)) {
logger.warning(
"Path "
+ relativePath.getPathString()
+ " to search after substitution does not start with "
+ ALLOWED_PREFIX);
continue;
}
rulePaths.add(
ContainingPackageLookupValue.key(PackageIdentifier.createInMainRepo(relativePath)));
findFilters.add(rule.findFilter);
}
Map<SkyKey, ValueOrException<NoSuchPackageException>> containingPackageLookupValues =
env.getValuesOrThrow(rulePaths, NoSuchPackageException.class);
if (env.valuesMissing()) {
return null;
}
List<GlobDescriptor> globKeys = new ArrayList<>(rulePaths.size());
for (int i = 0; i < rulePaths.size(); i++) {
ContainingPackageLookupValue containingPackageLookupValue;
ContainingPackageLookupValue.Key relativePathKey = rulePaths.get(i);
PathFragment relativePath = relativePathKey.argument().getPackageFragment();
try {
containingPackageLookupValue =
(ContainingPackageLookupValue)
containingPackageLookupValues.get(relativePathKey).get();
} catch (NoSuchPackageException e) {
logger.warning(
"Unexpected exception when looking up containing package for "
+ relativePath
+ " (prodaccess expired?): "
+ e.getMessage());
continue;
}
if (!containingPackageLookupValue.hasContainingPackage()) {
logger.warning(relativePath + " not contained in any package: skipping");
continue;
}
PathFragment packageFragment =
containingPackageLookupValue.getContainingPackageName().getPackageFragment();
String pattern = findFilters.get(i);
try {
globKeys.add(
GlobValue.key(
containingPackageLookupValue.getContainingPackageName(),
containingPackageLookupValue.getContainingPackageRoot(),
pattern,
/* excludeDirs= */ true,
relativePath.relativeTo(packageFragment)));
} catch (InvalidGlobPatternException e) {
env.getListener()
.handle(Event.warn("Error parsing pattern " + pattern + " for " + relativePath));
continue;
}
}
Map<SkyKey, ValueOrException<IOException>> globResults =
env.getValuesOrThrow(globKeys, IOException.class);
if (env.valuesMissing()) {
return null;
}
for (Map.Entry<SkyKey, ValueOrException<IOException>> globEntry : globResults.entrySet()) {
GlobValue globValue;
GlobDescriptor globKey = (GlobDescriptor) globEntry.getKey();
PathFragment packageFragment = globKey.getPackageId().getPackageFragment();
try {
globValue = (GlobValue) globEntry.getValue().get();
} catch (IOException e) {
logger.warning("Error getting hints for " + packageFragment + ": " + e);
continue;
}
for (PathFragment file : globValue.getMatches()) {
hints.add(
artifactFactory.getSourceArtifact(
packageFragment.getRelative(file), globKey.getPackageRoot()));
}
}
return hints == null || hints.isEmpty() ? ImmutableList.<Artifact>of() : hints;
}
/**
* Performs the work of matching a given path against the hints and returns the expanded paths.
* The above {@link #getHintedInclusionsWithSkyframe} should be used in preference, but if the
* performance impact of Skyframe restarts is untenable, this can be used as a fallback.
*/
private Collection<Artifact> getHintedInclusionsLegacy(
Rule.Type type, PathFragment path, ArtifactRoot sourceRoot) {
String pathString = path.getPathString();
// Delay creation until we know we need one. Use a TreeSet to make sure that the results are
// sorted with a stable order and unique.
Set<Path> hints = null;
for (final Rule rule : rules) {
if (type != rule.type) {
continue;
}
Matcher m = rule.pattern.matcher(pathString);
if (!m.matches()) {
continue;
}
if (hints == null) { hints = Sets.newTreeSet(); }
String relativePath = m.replaceFirst(rule.findRoot);
if (!relativePath.startsWith(ALLOWED_PREFIX)) {
logger.warning(
"Path "
+ relativePath
+ " to search after substitution does not start with "
+ ALLOWED_PREFIX);
continue;
}
Path root = sourceRoot.getRoot().getRelative(relativePath);
if (LOG_FINE) {
logger.fine("hint for " + rule.type + " " + pathString + " root: " + root);
}
try {
// The assumption is made here that all files specified by this hint are under the same
// package path as the original file -- this filesystem tree traversal is completely
// ignorant of package paths. This could be violated if there were a hint that resolved to
// foo/**/*.h, there was a package foo/bar, and the packages foo and foo/bar were in
// different package paths. In that case, this traversal would fail to pick up
// foo/bar/**/*.h. No examples of this currently exist in the INCLUDE_HINTS
// file.
if (LOG_FINE) {
logger.fine("Globbing: " + root + " " + rule.findFilter);
}
hints.addAll(new UnixGlob.Builder(root)
.setFilesystemCalls(syscallCache)
.addPattern(rule.findFilter)
.glob());
} catch (IOException e) {
logger.warning("Error in hint expansion: " + e);
}
}
if (hints != null && !hints.isEmpty()) {
// Transform paths into source artifacts (all hints must be to source artifacts).
List<Artifact> result = new ArrayList<>(hints.size());
for (Path hint : hints) {
Root sourcePath = sourceRoot.getRoot();
result.add(
Preconditions.checkNotNull(
artifactFactory.getSourceArtifact(sourcePath.relativize(hint), sourcePath),
"%s %s %s %s",
hint,
sourcePath,
path));
}
return result;
} else {
return ImmutableList.of();
}
}
private Collection<Inclusion> getHintedInclusions(Artifact path) {
String pathString = path.getExecPathString();
// Delay creation until we know we need one. Use a LinkedHashSet to make sure that the results
// are sorted with a stable order and unique.
Set<Inclusion> hints = null;
for (final Rule rule : rules) {
if ((rule.type != Rule.Type.INCLUDE_ANGLE) && (rule.type != Rule.Type.INCLUDE_QUOTE)) {
continue;
}
Matcher m = rule.pattern.matcher(pathString);
if (!m.matches()) {
continue;
}
if (hints == null) { hints = Sets.newLinkedHashSet(); }
Inclusion inclusion = new Inclusion(rule.findRoot, rule.type == Rule.Type.INCLUDE_QUOTE
? Kind.QUOTE : Kind.ANGLE);
hints.add(inclusion);
if (LOG_FINE) {
logger.fine("hint for " + rule.type + " " + pathString + " root: " + inclusion);
}
}
if (hints != null && !hints.isEmpty()) {
return ImmutableList.copyOf(hints);
} else {
return ImmutableList.of();
}
}
}
Hints getHints() {
return hints;
}
/**
* An immutable inclusion tuple. This models an {@code #include} or {@code
* #include_next} line in a file without the context how this file got
* included.
*/
public static class Inclusion {
/** The format of the #include in the source file -- quoted, angle bracket, etc. */
enum Kind {
/** Quote includes: {@code #include "name"}. */
QUOTE,
/** Angle bracket includes: {@code #include <name>}. */
ANGLE,
/** Quote next includes: {@code #include_next "name"}. */
NEXT_QUOTE,
/** Angle next includes: {@code #include_next <name>}. */
NEXT_ANGLE;
/**
* Returns true if this is an {@code #include_next} inclusion,
*/
boolean isNext() {
return this == NEXT_ANGLE || this == NEXT_QUOTE;
}
}
/** The kind of inclusion. */
final Kind kind;
/** The relative path of the inclusion. */
final PathFragment pathFragment;
Inclusion(String includeTarget, Kind kind) {
this.kind = kind;
this.pathFragment = PathFragment.create(includeTarget);
}
Inclusion(PathFragment pathFragment, Kind kind) {
this.kind = kind;
this.pathFragment = Preconditions.checkNotNull(pathFragment);
}
String getPathString() {
return pathFragment.getPathString();
}
@Override
public String toString() {
return kind + ":" + pathFragment.getPathString();
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof Inclusion)) {
return false;
}
Inclusion that = (Inclusion) o;
return kind == that.kind && pathFragment.equals(that.pathFragment);
}
@Override
public int hashCode() {
return pathFragment.hashCode() * 37 + kind.hashCode();
}
}
/** The externally-scoped immutable hints helper that is shared by all scanners. */
private final Hints hints;
/**
* Constructs a new FileParser.
*
* @param hints regexps for converting computed includes into simple strings
*/
public IncludeParser(Hints hints) {
this.hints = hints;
}
/**
* Skips whitespace, \+NL pairs, and block-style / * * / comments. Assumes line comments are
* handled outside. Does not handle digraphs, trigraphs or decahexagraphs.
*
* @param chars characters to scan
* @param pos the starting position
* @return the resulting position after skipping whitespace and comments.
*/
protected static int skipWhitespace(byte[] chars, int pos, int end) {
while (pos < end) {
if (Character.isWhitespace(chars[pos] & 0xff)) {
pos++;
} else if (chars[pos] == '\\' && pos + 1 < end && chars[pos + 1] == '\n') {
pos++;
} else if (chars[pos] == '/' && pos + 1 < end && chars[pos + 1] == '*') {
pos += 2;
while (pos < end - 1) {
if (chars[pos++] == '*') {
if (chars[pos] == '/') {
pos++;
break; // proper comment end
}
}
}
} else { // not whitespace
return pos;
}
}
return pos; // pos == len, meaning we fell off the end.
}
private static final String HAS_INCLUDE = "__has_include";
private static final int HAS_INCLUDE_LENGTH = HAS_INCLUDE.length();
private static final int NECESSARY_HAS_INCLUDE_LENGTH = HAS_INCLUDE_LENGTH + 5;
/**
* Returns the index of {@code chars} after the first occurrence of "__has_include" or -1 if no
* such occurrence exists. Also requires that there be at least 5 characters after the
* "__has_include", corresponding to a pair of parentheses and angle brackets/quotes and a
* filename.
*
* <p>This code runs on every line that starts with " *# *", so it should be as fast as possible.
*/
private static int skipThroughHasInclude(byte[] chars, int pos, int end) {
int lastPos = end - NECESSARY_HAS_INCLUDE_LENGTH;
while (pos <= lastPos) {
int curPos = 0;
while (curPos < HAS_INCLUDE_LENGTH
&& (chars[pos + curPos] & 0xff) == HAS_INCLUDE.charAt(curPos)) {
curPos++;
}
if (curPos == HAS_INCLUDE_LENGTH) {
return pos + curPos;
}
// We're looking for "__has_include" as a preprocessing token, which means that it cannot
// start in the middle of any characters we've already processed, nor at the mismatching
// character.
pos += curPos + 1;
}
return -1;
}
/**
* Checks for and skips a given token.
*
* @param chars characters to scan
* @param pos the starting position
* @param expected the expected token
* @return the resulting position if found, otherwise -1
*/
protected static int expect(byte[] chars, int pos, int end, String expected) {
int si = 0;
int expectedLen = expected.length();
while (pos < end) {
if (si == expectedLen) {
return pos;
}
if ((chars[pos++] & 0xff) != expected.charAt(si++)) {
return -1;
}
}
return -1;
}
/**
* Finds the index of a given character token from a starting pos.
*
* @param chars characters to scan
* @param pos the starting position
* @param echar the character to find
* @return the resulting position of echar if found, otherwise -1
*/
private static int indexOf(byte[] chars, int pos, int end, char echar) {
while (pos < end) {
if (chars[pos] == echar) {
return pos;
}
pos++;
}
return -1;
}
private static final Pattern BS_NL_PAT = Pattern.compile("\\\\" + "\n");
// Keep this in sync with the grep-includes binary's scanning output format.
private static final ImmutableMap<Character, Kind> KIND_MAP = ImmutableMap.of(
'"', Kind.QUOTE,
'<', Kind.ANGLE,
'q', Kind.NEXT_QUOTE,
'a', Kind.NEXT_ANGLE);
/**
* Processes the output generated by an auxiliary include-scanning binary. Closes the stream upon
* completion.
*
* <p>If a source file has the following include statements:
* <pre>
* #include &lt;string&gt;
* #include "directory/header.h"
* </pre>
*
* <p>Then the output file has the following contents:
* <pre>
* "directory/header.h
* &lt;string
* </pre>
* <p>Each line of the output is translated into an Inclusion object.
*/
public static List<Inclusion> processIncludes(Object streamName, InputStream is)
throws IOException {
List<Inclusion> inclusions = new ArrayList<>();
try (InputStreamReader reader = new InputStreamReader(is, ISO_8859_1)) {
for (String line : CharStreams.readLines(reader)) {
char qchar = line.charAt(0);
String name = line.substring(1);
Kind kind = KIND_MAP.get(qchar);
if (kind == null) {
throw new IOException("Illegal inclusion kind '" + qchar + "'");
}
inclusions.add(new Inclusion(name, kind));
}
} catch (IOException e) {
throw new IOException("Error reading include file " + streamName + ": " + e.getMessage());
}
return inclusions;
}
@VisibleForTesting
Inclusion extractInclusion(String line) {
return extractInclusion(line.getBytes(ISO_8859_1), 0, line.length());
}
/**
* Extracts a new, unresolved an Inclusion from a line of source.
*
* @param chars the char array containing the line chars to parse
* @param lineBegin the position of the first character in the line
* @param lineEnd the position of the character after the last
* @return the inclusion object if possible, null if none
*/
private Inclusion extractInclusion(byte[] chars, int lineBegin, int lineEnd) {
// expect WS#WS(include|include_next|__has_include\(_next\)?)WS\(?("name"|<name>|<name>)\)?
IncludesKeywordData data = expectIncludeKeyword(chars, lineBegin, lineEnd);
int pos = data.pos;
if (pos == -1 || pos == lineEnd) {
return null;
}
boolean isNext = false;
if (data.canHaveNext) {
int npos = expect(chars, pos, lineEnd, "_next");
if (npos >= 0) {
isNext = true;
pos = npos;
}
}
if ((pos = skipWhitespace(chars, pos, lineEnd)) == lineEnd) {
return null;
}
if (data.hasParens) {
if (chars[pos] != '(') {
return null;
}
pos++;
if ((pos = skipWhitespace(chars, pos, lineEnd)) == lineEnd) {
return null;
}
}
if (chars[pos] == '"' || chars[pos] == '<') {
char qchar = (char) (chars[pos++] & 0xff);
int spos = pos;
pos = indexOf(chars, pos + 1, lineEnd, qchar == '<' ? '>' : '"');
if (pos < 0) {
return null;
}
if (chars[spos] == '/') {
return null; // disallow absolute paths
}
String name = new String(chars, spos, pos - spos);
if (name.contains("\n")) { // strip any \+NL pairs within name
name = BS_NL_PAT.matcher(name).replaceAll("");
}
if (isNext) {
return new Inclusion(name, qchar == '"' ? Kind.NEXT_QUOTE : Kind.NEXT_ANGLE);
} else {
return new Inclusion(name, qchar == '"' ? Kind.QUOTE : Kind.ANGLE);
}
} else {
return createOtherInclusion(new String(chars, pos, lineEnd - pos));
}
}
/**
* Extracts all inclusions from characters of a file.
*
* @param chars the file contents to parse & extract inclusions from
* @return a new set of inclusions, normalized to the cache
*/
@VisibleForTesting
List<Inclusion> extractInclusions(byte[] chars) {
List<Inclusion> inclusions = new ArrayList<>();
int lineBegin = 0; // the first char of each line
int end = chars.length; // the file end
while (lineBegin < end) {
int lineEnd = lineBegin; // the char after the last non-\n in each line
// skip to the next \n or after end of buffer, ignoring continuations
while (lineEnd < end) {
if (chars[lineEnd] == '\n') {
break;
} else if (chars[lineEnd] == '\\') {
lineEnd++;
if (chars[lineEnd] == '\n') {
lineEnd++;
}
} else {
lineEnd++;
}
}
// TODO(bazel-team) handle multiline block comments /* */ for the cases:
// /* blah blah blah
// lalala */ #include "foo.h"
// and:
// /* blah
// #include "foo.h"
// */
// extract the inclusion, and save only the kind we care about.
Inclusion inclusion = extractInclusion(chars, lineBegin, lineEnd);
if (inclusion != null) {
if (isValidInclusionKind(inclusion.kind)) {
inclusions.add(inclusion);
}
}
lineBegin = lineEnd + 1; // next line starts after the previous line
}
return inclusions;
}
/**
* Extracts all inclusions from a given source file.
*
* @param file the file to parse & extract inclusions from
* @param actionExecutionContext Services in the scope of the action, like the stream to which
* scanning messages are printed
* @return a new set of inclusions, normalized to the cache
*/
Collection<Inclusion> extractInclusions(
Artifact file,
ActionExecutionMetadata actionExecutionMetadata,
ActionExecutionContext actionExecutionContext,
Artifact grepIncludes,
@Nullable SpawnIncludeScanner remoteIncludeScanner,
boolean isOutputFile)
throws IOException, ExecException, InterruptedException {
Collection<Inclusion> inclusions;
if (remoteIncludeScanner != null
&& remoteIncludeScanner.shouldParseRemotely(file, actionExecutionContext)) {
inclusions =
remoteIncludeScanner.extractInclusions(
file,
actionExecutionMetadata,
actionExecutionContext,
grepIncludes,
getFileType(),
isOutputFile);
} else {
try (SilentCloseable c =
Profiler.instance().profile(ProfilerTask.SCANNER, file.getExecPathString())) {
inclusions =
extractInclusions(
FileSystemUtils.readContent(actionExecutionContext.getInputPath(file)));
} catch (IOException e) {
if (remoteIncludeScanner != null) {
logger.log(
Level.WARNING,
"Falling back on remote parsing of " + actionExecutionContext.getInputPath(file),
e);
inclusions =
remoteIncludeScanner.extractInclusions(
file,
actionExecutionMetadata,
actionExecutionContext,
grepIncludes,
getFileType(),
isOutputFile);
} else {
throw e;
}
}
}
if (hints != null) {
inclusions.addAll(hints.getHintedInclusions(file));
}
return ImmutableList.copyOf(inclusions);
}
/**
* Returns type of the scanned file.
*
* <p>Supported values are "c++" for standard c/c++ headers and sources, and "swig" for .swig
* files. Changes to this method must be synchronized with change to //tools/cpp:grep-includes.
*/
protected GrepIncludesFileType getFileType() {
return GrepIncludesFileType.CPP;
}
/**
* Position of found include together with information about how to process the remaining include
* line further.
*/
protected static class IncludesKeywordData {
protected static final IncludesKeywordData NONE = new IncludesKeywordData(-1, false, false);
private final int pos;
private final boolean canHaveNext;
private final boolean hasParens;
private IncludesKeywordData(int pos, boolean canHaveNext, boolean hasParens) {
this.pos = pos;
this.canHaveNext = canHaveNext;
this.hasParens = hasParens;
}
protected static IncludesKeywordData normal(int pos) {
return new IncludesKeywordData(pos, true, false);
}
protected static IncludesKeywordData importOrSwig(int pos) {
return new IncludesKeywordData(pos, false, false);
}
protected static IncludesKeywordData hasInclude(int pos) {
return new IncludesKeywordData(pos, true, true);
}
}
/**
* Parses include keyword in the provided char array and returns position immediately after
* include keyword or -1 if keyword was not found, along with information to aid future parsing.
* Can be overridden by subclasses.
*/
protected IncludesKeywordData expectIncludeKeyword(byte[] chars, int position, int end) {
int pos = expect(chars, skipWhitespace(chars, position, end), end, "#");
if (pos > 0) {
int npos = skipWhitespace(chars, pos, end);
if ((pos = expect(chars, npos, end, "include")) > 0) {
return IncludesKeywordData.normal(pos);
} else if ((pos = expect(chars, npos, end, "import")) > 0) {
return IncludesKeywordData.importOrSwig(pos);
} else if ((pos = skipThroughHasInclude(chars, npos, end)) > 0) {
return IncludesKeywordData.hasInclude(pos);
}
}
return IncludesKeywordData.NONE;
}
/**
* Returns true if we interested in the given inclusion kind. Can be overridden by the subclass.
*
* @param kind
*/
protected boolean isValidInclusionKind(Kind kind) {
return true;
}
/**
* Returns inclusion object for non-standard inclusion cases or null if inclusion should be
* ignored.
*
* @param inclusionContent
*/
@Nullable
protected Inclusion createOtherInclusion(String inclusionContent) {
return null;
}
}