Improve the performance of createFileSystem in WorkerExecRoot
The performance improvement is due to the reduction of the amount
of I/O performed when creating all necessary directories/symlinks.
Previously the code would delete all inputs and then recreate them,
even if some of the symlinks/directories were already pointing to
the right destinations. The new code will avoid deleting/recreating
anything that is already correct according to `SandboxInputs`.
For actions that have 1000-1500 inputs, this brings roughly a 2x
improvement: from about 100-150ms to 40-70ms. This is a non-trivial
amount for persistent workers that already cache a lot in-memory
and can perform some actions in less than 100ms.
RELNOTES: Improve the performance of creating a sandboxed execution root for workers when the number of inputs is large (>1000).
PiperOrigin-RevId: 295529113
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
index fa90e33..3c39717 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
@@ -16,6 +16,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxOutputs;
import com.google.devtools.build.lib.sandbox.SymlinkedSandboxedSpawn;
@@ -26,12 +27,15 @@
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Symlinks;
import java.io.IOException;
-import java.util.Map;
+import java.util.LinkedHashSet;
+import java.util.Optional;
import java.util.Set;
/** Creates and manages the contents of a working directory of a persistent worker. */
final class WorkerExecRoot extends SymlinkedSandboxedSpawn {
private final Path workDir;
+ private final SandboxInputs inputs;
+ private final SandboxOutputs outputs;
private final Set<PathFragment> workerFiles;
public WorkerExecRoot(
@@ -47,63 +51,124 @@
new SynchronousTreeDeleter(),
/*statisticsPath=*/ null);
this.workDir = workDir;
+ this.inputs = inputs;
+ this.outputs = outputs;
this.workerFiles = workerFiles;
}
@Override
public void createFileSystem() throws IOException {
workDir.createDirectoryAndParents();
- deleteExceptAllowedFiles(workDir, workerFiles);
- super.createFileSystem();
+
+ // First compute all the inputs and directories that we need. This is based only on
+ // `workerFiles`, `inputs` and `outputs` and won't do any I/O.
+ Set<PathFragment> inputsToCreate = new LinkedHashSet<>();
+ Set<PathFragment> dirsToCreate = new LinkedHashSet<>();
+ populateInputsAndDirsToCreate(inputsToCreate, dirsToCreate);
+
+ // Then do a full traversal of the `workDir`. This will use what we computed above, delete
+ // anything unnecessary and update `inputsToCreate`/`dirsToCreate` if something is can be left
+ // without changes (e.g., a symlink that already points to the right destination).
+ cleanExisting(workDir, inputsToCreate, dirsToCreate);
+
+ // Finally, create anything that is still missing.
+ createDirectories(dirsToCreate);
+ createInputs(inputsToCreate);
}
- private void deleteExceptAllowedFiles(Path root, Set<PathFragment> allowedFiles)
+ /** Populates the provided sets with the inputs and directories than need to be created. */
+ private void populateInputsAndDirsToCreate(
+ Set<PathFragment> inputsToCreate, Set<PathFragment> dirsToCreate) {
+ // Add all worker files and the ancestor directories.
+ for (PathFragment path : workerFiles) {
+ inputsToCreate.add(path);
+ for (int i = 0; i < path.segmentCount(); i++) {
+ dirsToCreate.add(path.subFragment(0, i));
+ }
+ }
+
+ // Add all inputs files and the ancestor directories.
+ Iterable<PathFragment> allInputs =
+ Iterables.concat(inputs.getFiles().keySet(), inputs.getSymlinks().keySet());
+ for (PathFragment path : allInputs) {
+ inputsToCreate.add(path);
+ for (int i = 0; i < path.segmentCount(); i++) {
+ dirsToCreate.add(path.subFragment(0, i));
+ }
+ }
+
+ // Add all ouput directories.
+ dirsToCreate.addAll(outputs.dirs());
+
+ // And all ancestor directories of outputs. Note that we don't add the files themselves -- any
+ // pre-existing files that have the same path as an output should get deleted.
+ for (PathFragment path : Iterables.concat(outputs.files(), outputs.dirs())) {
+ for (int i = 0; i < path.segmentCount(); i++) {
+ dirsToCreate.add(path.subFragment(0, i));
+ }
+ }
+ }
+
+ /**
+ * Deletes unnecessary files/directories and updates the sets if something on disk is already
+ * correct and doesn't need any changes.
+ */
+ private void cleanExisting(
+ Path root, Set<PathFragment> inputsToCreate, Set<PathFragment> dirsToCreate)
throws IOException {
- for (Path p : root.getDirectoryEntries()) {
- FileStatus stat = p.stat(Symlinks.NOFOLLOW);
- if (!stat.isDirectory()) {
- if (!allowedFiles.contains(p.relativeTo(workDir))) {
- p.delete();
+ for (Path path : root.getDirectoryEntries()) {
+ FileStatus stat = path.stat(Symlinks.NOFOLLOW);
+ PathFragment pathRelativeToWorkDir = path.relativeTo(workDir);
+ Optional<PathFragment> destination = getExpectedSymlinkDestination(pathRelativeToWorkDir);
+ if (destination.isPresent()) {
+ if (stat.isSymbolicLink() && path.readSymbolicLink().equals(destination.get())) {
+ inputsToCreate.remove(pathRelativeToWorkDir);
+ } else {
+ path.delete();
}
- } else {
- deleteExceptAllowedFiles(p, allowedFiles);
- if (p.readdir(Symlinks.NOFOLLOW).isEmpty()) {
- p.delete();
+ } else if (stat.isDirectory()) {
+ if (dirsToCreate.contains(pathRelativeToWorkDir)) {
+ cleanExisting(path, inputsToCreate, dirsToCreate);
+ dirsToCreate.remove(pathRelativeToWorkDir);
+ } else {
+ path.deleteTree();
}
+ } else if (!inputsToCreate.contains(pathRelativeToWorkDir)) {
+ path.delete();
}
}
}
- @Override
- protected void createInputs(SandboxInputs inputs) throws IOException {
- // All input files are relative to the execroot.
- for (Map.Entry<PathFragment, Path> entry : inputs.getFiles().entrySet()) {
- Path key = workDir.getRelative(entry.getKey());
- FileStatus keyStat = key.statNullable(Symlinks.NOFOLLOW);
- if (keyStat != null) {
- if (keyStat.isSymbolicLink()
- && entry.getValue() != null
- && key.readSymbolicLink().equals(entry.getValue().asFragment())) {
- continue;
- }
- key.delete();
- }
- // A null value means that we're supposed to create an empty file as the input.
- if (entry.getValue() != null) {
- key.createSymbolicLink(entry.getValue());
- } else {
- FileSystemUtils.createEmptyFile(key);
- }
+ private Optional<PathFragment> getExpectedSymlinkDestination(PathFragment fragment) {
+ Path file = inputs.getFiles().get(fragment);
+ if (file != null) {
+ return Optional.of(file.asFragment());
}
+ return Optional.ofNullable(inputs.getSymlinks().get(fragment));
+ }
- for (Map.Entry<PathFragment, PathFragment> entry : inputs.getSymlinks().entrySet()) {
- Path key = workDir.getRelative(entry.getKey());
- FileStatus keyStat = key.statNullable(Symlinks.NOFOLLOW);
- if (keyStat != null) {
- // TODO(lberki): Why? Isn't this method supposed to be called on a fresh tree?
- key.delete();
+ private void createDirectories(Iterable<PathFragment> dirsToCreate) throws IOException {
+ for (PathFragment fragment : dirsToCreate) {
+ workDir.getRelative(fragment).createDirectory();
+ }
+ }
+
+ private void createInputs(Iterable<PathFragment> inputsToCreate) throws IOException {
+ for (PathFragment fragment : inputsToCreate) {
+ Path key = workDir.getRelative(fragment);
+ if (inputs.getFiles().containsKey(fragment)) {
+ Path fileDest = inputs.getFiles().get(fragment);
+ if (fileDest != null) {
+ key.createSymbolicLink(fileDest);
+ } else {
+ FileSystemUtils.createEmptyFile(key);
+ }
+ } else if (inputs.getSymlinks().containsKey(fragment)) {
+ PathFragment symlinkDest = inputs.getSymlinks().get(fragment);
+ if (symlinkDest != null) {
+ key.createSymbolicLink(symlinkDest);
+ }
}
- key.createSymbolicLink(entry.getValue());
}
}
}
diff --git a/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java b/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
index 2654014..27f73e2 100644
--- a/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
+++ b/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
@@ -27,6 +27,7 @@
import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.HashMap;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -154,4 +155,30 @@
assertThat(FileSystemUtils.readContent(otherWorkspaceFile, Charset.defaultCharset()))
.isEqualTo("other workspace content");
}
+
+ @Test
+ public void recreatesEmptyFiles() throws Exception {
+ // Simulate existing non-empty file in the exec root to check that `WorkerExecRoot` will clear
+ // the contents as requested by `SandboxInputs`.
+ FileSystemUtils.writeContentAsLatin1(execRoot.getRelative("some_file"), "some content");
+
+ HashMap<PathFragment, Path> inputs = new HashMap<>();
+ inputs.put(PathFragment.create("some_file"), null);
+ WorkerExecRoot workerExecRoot =
+ new WorkerExecRoot(
+ execRoot,
+ new SandboxInputs(inputs, ImmutableMap.of()),
+ SandboxOutputs.create(ImmutableSet.of(), ImmutableSet.of()),
+ ImmutableSet.of());
+
+ // This is interesting, because the filepath is a key in `SandboxInputs`, but its value is
+ // `null`, which means "create an empty file". So after `createFileSystem` the file should be
+ // empty.
+ workerExecRoot.createFileSystem();
+
+ assertThat(
+ FileSystemUtils.readContent(
+ execRoot.getRelative("some_file"), Charset.defaultCharset()))
+ .isEmpty();
+ }
}