Improve the performance of createFileSystem in WorkerExecRoot

The performance improvement is due to the reduction of the amount
of I/O performed when creating all necessary directories/symlinks.
Previously the code would delete all inputs and then recreate them,
even if some of the symlinks/directories were already pointing to
the right destinations. The new code will avoid deleting/recreating
anything that is already correct according to `SandboxInputs`.

For actions that have 1000-1500 inputs, this brings roughly a 2x
improvement: from about 100-150ms to 40-70ms. This is a non-trivial
amount for persistent workers that already cache a lot in-memory
and can perform some actions in less than 100ms.

RELNOTES: Improve the performance of creating a sandboxed execution root for workers when the number of inputs is large (>1000).
PiperOrigin-RevId: 295529113
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
index fa90e33..3c39717 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerExecRoot.java
@@ -16,6 +16,7 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
 import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs;
 import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxOutputs;
 import com.google.devtools.build.lib.sandbox.SymlinkedSandboxedSpawn;
@@ -26,12 +27,15 @@
 import com.google.devtools.build.lib.vfs.PathFragment;
 import com.google.devtools.build.lib.vfs.Symlinks;
 import java.io.IOException;
-import java.util.Map;
+import java.util.LinkedHashSet;
+import java.util.Optional;
 import java.util.Set;
 
 /** Creates and manages the contents of a working directory of a persistent worker. */
 final class WorkerExecRoot extends SymlinkedSandboxedSpawn {
   private final Path workDir;
+  private final SandboxInputs inputs;
+  private final SandboxOutputs outputs;
   private final Set<PathFragment> workerFiles;
 
   public WorkerExecRoot(
@@ -47,63 +51,124 @@
         new SynchronousTreeDeleter(),
         /*statisticsPath=*/ null);
     this.workDir = workDir;
+    this.inputs = inputs;
+    this.outputs = outputs;
     this.workerFiles = workerFiles;
   }
 
   @Override
   public void createFileSystem() throws IOException {
     workDir.createDirectoryAndParents();
-    deleteExceptAllowedFiles(workDir, workerFiles);
-    super.createFileSystem();
+
+    // First compute all the inputs and directories that we need. This is based only on
+    // `workerFiles`, `inputs` and `outputs` and won't do any I/O.
+    Set<PathFragment> inputsToCreate = new LinkedHashSet<>();
+    Set<PathFragment> dirsToCreate = new LinkedHashSet<>();
+    populateInputsAndDirsToCreate(inputsToCreate, dirsToCreate);
+
+    // Then do a full traversal of the `workDir`. This will use what we computed above, delete
+    // anything unnecessary and update `inputsToCreate`/`dirsToCreate` if something is can be left
+    // without changes (e.g., a symlink that already points to the right destination).
+    cleanExisting(workDir, inputsToCreate, dirsToCreate);
+
+    // Finally, create anything that is still missing.
+    createDirectories(dirsToCreate);
+    createInputs(inputsToCreate);
   }
 
-  private void deleteExceptAllowedFiles(Path root, Set<PathFragment> allowedFiles)
+  /** Populates the provided sets with the inputs and directories than need to be created. */
+  private void populateInputsAndDirsToCreate(
+      Set<PathFragment> inputsToCreate, Set<PathFragment> dirsToCreate) {
+    // Add all worker files and the ancestor directories.
+    for (PathFragment path : workerFiles) {
+      inputsToCreate.add(path);
+      for (int i = 0; i < path.segmentCount(); i++) {
+        dirsToCreate.add(path.subFragment(0, i));
+      }
+    }
+
+    // Add all inputs files and the ancestor directories.
+    Iterable<PathFragment> allInputs =
+        Iterables.concat(inputs.getFiles().keySet(), inputs.getSymlinks().keySet());
+    for (PathFragment path : allInputs) {
+      inputsToCreate.add(path);
+      for (int i = 0; i < path.segmentCount(); i++) {
+        dirsToCreate.add(path.subFragment(0, i));
+      }
+    }
+
+    // Add all ouput directories.
+    dirsToCreate.addAll(outputs.dirs());
+
+    // And all ancestor directories of outputs. Note that we don't add the files themselves -- any
+    // pre-existing files that have the same path as an output should get deleted.
+    for (PathFragment path : Iterables.concat(outputs.files(), outputs.dirs())) {
+      for (int i = 0; i < path.segmentCount(); i++) {
+        dirsToCreate.add(path.subFragment(0, i));
+      }
+    }
+  }
+
+  /**
+   * Deletes unnecessary files/directories and updates the sets if something on disk is already
+   * correct and doesn't need any changes.
+   */
+  private void cleanExisting(
+      Path root, Set<PathFragment> inputsToCreate, Set<PathFragment> dirsToCreate)
       throws IOException {
-    for (Path p : root.getDirectoryEntries()) {
-      FileStatus stat = p.stat(Symlinks.NOFOLLOW);
-      if (!stat.isDirectory()) {
-        if (!allowedFiles.contains(p.relativeTo(workDir))) {
-          p.delete();
+    for (Path path : root.getDirectoryEntries()) {
+      FileStatus stat = path.stat(Symlinks.NOFOLLOW);
+      PathFragment pathRelativeToWorkDir = path.relativeTo(workDir);
+      Optional<PathFragment> destination = getExpectedSymlinkDestination(pathRelativeToWorkDir);
+      if (destination.isPresent()) {
+        if (stat.isSymbolicLink() && path.readSymbolicLink().equals(destination.get())) {
+          inputsToCreate.remove(pathRelativeToWorkDir);
+        } else {
+          path.delete();
         }
-      } else {
-        deleteExceptAllowedFiles(p, allowedFiles);
-        if (p.readdir(Symlinks.NOFOLLOW).isEmpty()) {
-          p.delete();
+      } else if (stat.isDirectory()) {
+        if (dirsToCreate.contains(pathRelativeToWorkDir)) {
+          cleanExisting(path, inputsToCreate, dirsToCreate);
+          dirsToCreate.remove(pathRelativeToWorkDir);
+        } else {
+          path.deleteTree();
         }
+      } else if (!inputsToCreate.contains(pathRelativeToWorkDir)) {
+        path.delete();
       }
     }
   }
 
-  @Override
-  protected void createInputs(SandboxInputs inputs) throws IOException {
-    // All input files are relative to the execroot.
-    for (Map.Entry<PathFragment, Path> entry : inputs.getFiles().entrySet()) {
-      Path key = workDir.getRelative(entry.getKey());
-      FileStatus keyStat = key.statNullable(Symlinks.NOFOLLOW);
-      if (keyStat != null) {
-        if (keyStat.isSymbolicLink()
-            && entry.getValue() != null
-            && key.readSymbolicLink().equals(entry.getValue().asFragment())) {
-          continue;
-        }
-        key.delete();
-      }
-      // A null value means that we're supposed to create an empty file as the input.
-      if (entry.getValue() != null) {
-        key.createSymbolicLink(entry.getValue());
-      } else {
-        FileSystemUtils.createEmptyFile(key);
-      }
+  private Optional<PathFragment> getExpectedSymlinkDestination(PathFragment fragment) {
+    Path file = inputs.getFiles().get(fragment);
+    if (file != null) {
+      return Optional.of(file.asFragment());
     }
+    return Optional.ofNullable(inputs.getSymlinks().get(fragment));
+  }
 
-    for (Map.Entry<PathFragment, PathFragment> entry : inputs.getSymlinks().entrySet()) {
-      Path key = workDir.getRelative(entry.getKey());
-      FileStatus keyStat = key.statNullable(Symlinks.NOFOLLOW);
-      if (keyStat != null) {
-        // TODO(lberki): Why? Isn't this method supposed to be called on a fresh tree?
-        key.delete();
+  private void createDirectories(Iterable<PathFragment> dirsToCreate) throws IOException {
+    for (PathFragment fragment : dirsToCreate) {
+      workDir.getRelative(fragment).createDirectory();
+    }
+  }
+
+  private void createInputs(Iterable<PathFragment> inputsToCreate) throws IOException {
+    for (PathFragment fragment : inputsToCreate) {
+      Path key = workDir.getRelative(fragment);
+      if (inputs.getFiles().containsKey(fragment)) {
+        Path fileDest = inputs.getFiles().get(fragment);
+        if (fileDest != null) {
+          key.createSymbolicLink(fileDest);
+        } else {
+          FileSystemUtils.createEmptyFile(key);
+        }
+      } else if (inputs.getSymlinks().containsKey(fragment)) {
+        PathFragment symlinkDest = inputs.getSymlinks().get(fragment);
+        if (symlinkDest != null) {
+          key.createSymbolicLink(symlinkDest);
+        }
       }
-      key.createSymbolicLink(entry.getValue());
     }
   }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java b/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
index 2654014..27f73e2 100644
--- a/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
+++ b/src/test/java/com/google/devtools/build/lib/worker/WorkerExecRootTest.java
@@ -27,6 +27,7 @@
 import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem;
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.HashMap;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -154,4 +155,30 @@
     assertThat(FileSystemUtils.readContent(otherWorkspaceFile, Charset.defaultCharset()))
         .isEqualTo("other workspace content");
   }
+
+  @Test
+  public void recreatesEmptyFiles() throws Exception {
+    // Simulate existing non-empty file in the exec root to check that `WorkerExecRoot` will clear
+    // the contents as requested by `SandboxInputs`.
+    FileSystemUtils.writeContentAsLatin1(execRoot.getRelative("some_file"), "some content");
+
+    HashMap<PathFragment, Path> inputs = new HashMap<>();
+    inputs.put(PathFragment.create("some_file"), null);
+    WorkerExecRoot workerExecRoot =
+        new WorkerExecRoot(
+            execRoot,
+            new SandboxInputs(inputs, ImmutableMap.of()),
+            SandboxOutputs.create(ImmutableSet.of(), ImmutableSet.of()),
+            ImmutableSet.of());
+
+    // This is interesting, because the filepath is a key in `SandboxInputs`, but its value is
+    // `null`, which means "create an empty file". So after `createFileSystem` the file should be
+    // empty.
+    workerExecRoot.createFileSystem();
+
+    assertThat(
+            FileSystemUtils.readContent(
+                execRoot.getRelative("some_file"), Charset.defaultCharset()))
+        .isEmpty();
+  }
 }