add baseline functionality for not saving unused artifacts
We define unused artifacts as those that aren't consumed by any
action. This can be because an action produced more outputs than
a dependent action needed, or because it's a top level artifact
and we don't care about its contents, just that it was built
without issue. Actions may prevent outputs from being discarded
by declaring them as mandatory. This is particularly useful for
test outputs. The motivation behind this change is to reduce
storage overhead for things we can do without.
It is worth noting this change doesn't cover all cases. In particular
it has difficulty identifying *_binary artifacts as orphaned. This
is due to the insertion of a virtual runfiles artifact which depends
upon the rule's outputs.
--
MOS_MIGRATED_REVID=88467504
diff --git a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
index 82d9248..0945747 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java
@@ -335,6 +335,11 @@
.setMnemonic(getMnemonic());
}
+ @Override
+ public ImmutableSet<Artifact> getMandatoryOutputs() {
+ return ImmutableSet.of();
+ }
+
/**
* Returns input files that need to be present to allow extra_action rules to shadow this action
* correctly when run remotely. This is at least the normal inputs of the action, but may include
diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionCacheChecker.java b/src/main/java/com/google/devtools/build/lib/actions/ActionCacheChecker.java
index 07b9977..f40003b 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/ActionCacheChecker.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/ActionCacheChecker.java
@@ -216,10 +216,12 @@
if (!key.equals(execPath)) {
actionCache.remove(key);
}
- // Output files *must* exist and be accessible after successful action execution.
- Metadata metadata = metadataHandler.getMetadata(output);
- Preconditions.checkState(metadata != null);
- entry.addFile(output.getExecPath(), metadata);
+ if (!metadataHandler.artifactOmitted(output)) {
+ // Output files *must* exist and be accessible after successful action execution.
+ Metadata metadata = metadataHandler.getMetadata(output);
+ Preconditions.checkState(metadata != null);
+ entry.addFile(output.getExecPath(), metadata);
+ }
}
for (Artifact input : action.getInputs()) {
entry.addFile(input.getExecPath(), metadataHandler.getMetadataMaybe(input));
diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionMetadata.java b/src/main/java/com/google/devtools/build/lib/actions/ActionMetadata.java
index 3569f07..64813ae 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/ActionMetadata.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/ActionMetadata.java
@@ -124,6 +124,14 @@
ImmutableSet<Artifact> getOutputs();
/**
+ * Returns the set of output Artifacts that are required to be saved. This is
+ * used to identify items that would otherwise be potentially identified as
+ * orphaned (not consumed by any downstream {@link Action}s and potentially
+ * discarded during the build process.
+ */
+ public ImmutableSet<Artifact> getMandatoryOutputs();
+
+ /**
* Returns the "primary" input of this action, if applicable.
*
* <p>For example, a C++ compile action would return the .cc file which is being compiled,
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/MetadataHandler.java b/src/main/java/com/google/devtools/build/lib/actions/cache/MetadataHandler.java
index 9d288db..61a5d6d 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/cache/MetadataHandler.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/cache/MetadataHandler.java
@@ -48,11 +48,27 @@
*/
void injectDigest(ActionInput output, FileStatus statNoFollow, byte[] digest);
- /** Returns true iff artifact exists. */
+ /**
+ * Marks an artifact as intentionally omitted. Acknowledges that this Artifact could have
+ * existed, but was intentionally not saved, most likely as an optimization.
+ */
+ void markOmitted(ActionInput output);
+
+ /**
+ * Returns true iff artifact exists.
+ *
+ * <p>It is important to note that implementations may cache non-existence as a side effect
+ * of this method. If there is a possibility an artifact was intentionally omitted then
+ * {@link #artifactOmitted(Artifact)} should be checked first to avoid the side effect.</p>
+ */
boolean artifactExists(Artifact artifact);
+
/** Returns true iff artifact is a regular file. */
boolean isRegularFile(Artifact artifact);
+ /** Returns true iff artifact was intentionally omitted (not saved). */
+ boolean artifactOmitted(Artifact artifact);
+
/**
* @return Whether the artifact's data was injected.
* @throws IOException if implementation tried to stat artifact which threw an exception.
diff --git a/src/main/java/com/google/devtools/build/lib/rules/test/TestRunnerAction.java b/src/main/java/com/google/devtools/build/lib/rules/test/TestRunnerAction.java
index 13ec62c..9e9c180 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/test/TestRunnerAction.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/test/TestRunnerAction.java
@@ -17,6 +17,7 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.devtools.build.lib.actions.AbstractAction;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
@@ -535,6 +536,11 @@
return "TestRunner";
}
+ @Override
+ public ImmutableSet<Artifact> getMandatoryOutputs() {
+ return getOutputs();
+ }
+
/**
* The same set of paths as the parent test action, resolved against a given exec root.
*/
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/FileAndMetadataCache.java b/src/main/java/com/google/devtools/build/lib/skyframe/FileAndMetadataCache.java
index 2a0de78..5014c0f 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/FileAndMetadataCache.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/FileAndMetadataCache.java
@@ -76,6 +76,7 @@
private final Map<ByteString, Artifact> reverseMap = new ConcurrentHashMap<>();
private final ConcurrentMap<Artifact, FileValue> outputArtifactData =
new ConcurrentHashMap<>();
+ private final Set<Artifact> omittedOutputs = Sets.newConcurrentHashSet();
// See #getAdditionalOutputData for documentation of this field.
private final ConcurrentMap<Artifact, FileArtifactValue> additionalOutputData =
new ConcurrentHashMap<>();
@@ -328,6 +329,20 @@
}
@Override
+ public void markOmitted(ActionInput output) {
+ if (output instanceof Artifact) {
+ Artifact artifact = (Artifact) output;
+ Preconditions.checkState(omittedOutputs.add(artifact), artifact);
+ additionalOutputData.put(artifact, FileArtifactValue.OMITTED_FILE_MARKER);
+ }
+ }
+
+ @Override
+ public boolean artifactOmitted(Artifact artifact) {
+ return omittedOutputs.contains(artifact);
+ }
+
+ @Override
public void discardMetadata(Collection<Artifact> artifactList) {
Preconditions.checkState(injectedArtifacts.isEmpty(),
"Artifacts cannot be injected before action execution: %s", injectedArtifacts);
@@ -337,6 +352,7 @@
@Override
public boolean artifactExists(Artifact artifact) {
+ Preconditions.checkState(!artifactOmitted(artifact), artifact);
return getMetadataMaybe(artifact) != null;
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java
index 7acc38c..f1f0b3e 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java
@@ -36,6 +36,18 @@
/** Data that marks that a file is not present on the filesystem. */
static final FileArtifactValue MISSING_FILE_MARKER = new FileArtifactValue(null, 1, 0);
+ /**
+ * Represents an omitted file- we are aware of it but it doesn't exist. All access methods
+ * are unsupported.
+ */
+ static final FileArtifactValue OMITTED_FILE_MARKER = new FileArtifactValue(null, 2, 0) {
+ @Override public byte[] getDigest() { throw new UnsupportedOperationException(); }
+ @Override public long getSize() { throw new UnsupportedOperationException(); }
+ @Override public long getModifiedTime() { throw new UnsupportedOperationException(); }
+ @Override public boolean equals(Object o) { return this == o; }
+ @Override public String toString() { return "OMITTED_FILE_MARKER"; }
+ };
+
@Nullable private final byte[] digest;
private final long mtime;
private final long size;
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeActionExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeActionExecutor.java
index ebae390..7b71fba 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeActionExecutor.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeActionExecutor.java
@@ -268,6 +268,17 @@
// intra-build.
perActionHandler.discardMetadata(artifactList);
}
+
+ @Override
+ public void markOmitted(ActionInput output) {
+ perActionHandler.markOmitted(output);
+ }
+
+ @Override
+ public boolean artifactOmitted(Artifact artifact) {
+ return perActionHandler.artifactOmitted(artifact);
+ }
+
}
/**
@@ -944,14 +955,17 @@
}
/**
- * Validates that all action outputs were created.
+ * Validates that all action outputs were created or intentionally omitted.
*
* @return false if some outputs are missing, true - otherwise.
*/
private boolean checkOutputs(Action action, MetadataHandler metadataHandler) {
boolean success = true;
for (Artifact output : action.getOutputs()) {
- if (!metadataHandler.artifactExists(output)) {
+ // artifactExists has the side effect of potentially adding the artifact to the cache,
+ // therefore we only call it if we know the artifact is indeed not omitted to avoid any
+ // unintended side effects.
+ if (!(metadataHandler.artifactOmitted(output) || metadataHandler.artifactExists(output))) {
reportMissingOutputFile(action, output, reporter, output.getPath().isSymbolicLink());
success = false;
}