workers: A multitude of bug fixes and improved logging.

I know this should have been split up, but I was a bit on fire today and did it all in one go ^^;

Fixes spurious "Stream closed: Stream closed" errors, by noticing dead workers and retrying with a fresh one. (Configurable with the --worker_max_retries flag.)

Fixes an "IllegalArgumentException" when a non-worker compatible Spawn is given to the strategy. We fall back to StandaloneSpawnStrategy now.

Redirect the stderr of worker processes to separate log files in a common sub-directory and print a message that tells you about the location on worker start-up for easier debugging. The log can be found in <output_base>/worker-logs/*.log.

Adds the mnemonic of the Spawn to log messages and the log filename.

Adds verbose messages on worker start-up and shutdown. (Enable it with --worker_verbose!)

Shuts down the worker pool after a build finished by default, until we sort out one last remaining correctness issue. This also conserves resources, though makes incremental builds a bit slower. Want the maximum performance anyway? Try --experimental_workers_keep_running.

Adds stack traces to errors that are caused by buggy workers to aid development.

Fixes weird dupli..tripli..quadruple error messages ("Compiling failed: Stream closed: Stream closed: Stream closed: Stream closed.").

--
MOS_MIGRATED_REVID=102983853
diff --git a/src/main/java/com/google/devtools/build/lib/worker/BUILD b/src/main/java/com/google/devtools/build/lib/worker/BUILD
index aee455c..4d6cb1a 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/worker/BUILD
@@ -9,11 +9,15 @@
     srcs = glob(["*.java"]),
     deps = [
         "//src/main/java:actions",
+        "//src/main/java:analysis-exec-rules-skyframe",
         "//src/main/java:buildtool-runtime",
         "//src/main/java:common",
         "//src/main/java:concurrent",
+        "//src/main/java:events",
         "//src/main/java:options",
+        "//src/main/java:packages",
         "//src/main/java:vfs",
+        "//src/main/java/com/google/devtools/build/lib/standalone",
         "//src/main/protobuf:proto_worker_protocol",
         "//third_party:apache_commons_pool2",
         "//third_party:guava",
diff --git a/src/main/java/com/google/devtools/build/lib/worker/Worker.java b/src/main/java/com/google/devtools/build/lib/worker/Worker.java
index 409394d..d76b9fe 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/Worker.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/Worker.java
@@ -14,11 +14,15 @@
 package com.google.devtools.build.lib.worker;
 
 import com.google.common.base.Preconditions;
+import com.google.devtools.build.lib.events.Event;
+import com.google.devtools.build.lib.events.Reporter;
+import com.google.devtools.build.lib.vfs.Path;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.lang.ProcessBuilder.Redirect;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Interface to a worker process running as a child process.
@@ -32,19 +36,29 @@
  * class.
  */
 final class Worker {
+  private static final AtomicInteger pidCounter = new AtomicInteger();
+  private final int workerId;
   private final Process process;
   private final Thread shutdownHook;
 
-  private Worker(Process process, Thread shutdownHook) {
+  private Worker(Process process, Thread shutdownHook, int pid) {
     this.process = process;
     this.shutdownHook = shutdownHook;
+    this.workerId = pid;
   }
 
-  static Worker create(WorkerKey key) throws IOException {
+  static Worker create(WorkerKey key, Path logDir, Reporter reporter, boolean verbose)
+      throws IOException {
     Preconditions.checkNotNull(key);
-    ProcessBuilder processBuilder = new ProcessBuilder(key.getArgs().toArray(new String[0]))
-        .directory(key.getWorkDir().getPathFile())
-        .redirectError(Redirect.INHERIT);
+    Preconditions.checkNotNull(logDir);
+
+    int workerId = pidCounter.getAndIncrement();
+    Path logFile = logDir.getRelative("worker-" + workerId + "-" + key.getMnemonic() + ".log");
+
+    ProcessBuilder processBuilder =
+        new ProcessBuilder(key.getArgs().toArray(new String[0]))
+            .directory(key.getWorkDir().getPathFile())
+            .redirectError(Redirect.appendTo(logFile.getPathFile()));
     processBuilder.environment().putAll(key.getEnv());
 
     final Process process = processBuilder.start();
@@ -57,7 +71,18 @@
     };
     Runtime.getRuntime().addShutdownHook(shutdownHook);
 
-    return new Worker(process, shutdownHook);
+    if (verbose) {
+      reporter.handle(
+          Event.info(
+              "Created new "
+                  + key.getMnemonic()
+                  + " worker (id "
+                  + workerId
+                  + "), logging to "
+                  + logFile));
+    }
+
+    return new Worker(process, shutdownHook, workerId);
   }
 
   void destroy() {
@@ -65,6 +90,14 @@
     process.destroy();
   }
 
+  /**
+   * Returns a unique id for this worker. This is used to distinguish different worker processes in
+   * logs and messages.
+   */
+  int getWorkerId() {
+    return this.workerId;
+  }
+
   boolean isAlive() {
     // This is horrible, but Process.isAlive() is only available from Java 8 on and this is the
     // best we can do prior to that.
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java
new file mode 100644
index 0000000..4269ed5
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java
@@ -0,0 +1,41 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.worker;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+import com.google.devtools.build.lib.actions.ActionContextConsumer;
+import com.google.devtools.build.lib.actions.Executor.ActionContext;
+import com.google.devtools.build.lib.actions.SpawnActionContext;
+
+import java.util.Map;
+
+/**
+ * {@link ActionContextConsumer} that requests the action contexts necessary for worker process
+ * execution.
+ */
+public class WorkerActionContextConsumer implements ActionContextConsumer {
+
+  @Override
+  public Map<String, String> getSpawnActionContexts() {
+    return ImmutableMap.of();
+  }
+
+  @Override
+  public Map<Class<? extends ActionContext>, String> getActionContexts() {
+    Builder<Class<? extends ActionContext>, String> contexts = ImmutableMap.builder();
+    contexts.put(SpawnActionContext.class, "worker");
+    return contexts.build();
+  }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java
index 0db3a46..66d0208 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java
@@ -18,6 +18,8 @@
 import com.google.devtools.build.lib.actions.ActionContextProvider;
 import com.google.devtools.build.lib.actions.Executor.ActionContext;
 import com.google.devtools.build.lib.buildtool.BuildRequest;
+import com.google.devtools.build.lib.exec.ExecutionOptions;
+import com.google.devtools.build.lib.runtime.BlazeRuntime;
 
 /**
  * Factory for the Worker-based execution strategy.
@@ -26,9 +28,19 @@
   private final ImmutableList<ActionContext> strategies;
 
   public WorkerActionContextProvider(
-      BuildRequest buildRequest, WorkerPool workers, EventBus eventBus) {
+      BlazeRuntime runtime, BuildRequest buildRequest, WorkerPool workers, EventBus eventBus) {
+    boolean verboseFailures = buildRequest.getOptions(ExecutionOptions.class).verboseFailures;
+    int maxRetries = buildRequest.getOptions(WorkerOptions.class).workerMaxRetries;
+
     this.strategies =
-        ImmutableList.<ActionContext>of(new WorkerSpawnStrategy(buildRequest, workers, eventBus));
+        ImmutableList.<ActionContext>of(
+            new WorkerSpawnStrategy(
+                runtime.getDirectories(),
+                buildRequest,
+                eventBus,
+                workers,
+                verboseFailures,
+                maxRetries));
   }
 
   @Override
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java
index 8248e1c..208b88c 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java
@@ -13,6 +13,10 @@
 // limitations under the License.
 package com.google.devtools.build.lib.worker;
 
+import com.google.devtools.build.lib.events.Event;
+import com.google.devtools.build.lib.events.Reporter;
+import com.google.devtools.build.lib.vfs.Path;
+
 import org.apache.commons.pool2.BaseKeyedPooledObjectFactory;
 import org.apache.commons.pool2.PooledObject;
 import org.apache.commons.pool2.impl.DefaultPooledObject;
@@ -21,9 +25,25 @@
  * Factory used by the pool to create / destroy / validate worker processes.
  */
 final class WorkerFactory extends BaseKeyedPooledObjectFactory<WorkerKey, Worker> {
+  private Path logDir;
+  private Reporter reporter;
+  private boolean verbose;
+
+  public void setLogDirectory(Path logDir) {
+    this.logDir = logDir;
+  }
+
+  public void setReporter(Reporter reporter) {
+    this.reporter = reporter;
+  }
+
+  public void setVerbose(boolean verbose) {
+    this.verbose = verbose;
+  }
+
   @Override
   public Worker create(WorkerKey key) throws Exception {
-    return Worker.create(key);
+    return Worker.create(key, logDir, reporter, verbose);
   }
 
   /**
@@ -39,6 +59,15 @@
    */
   @Override
   public void destroyObject(WorkerKey key, PooledObject<Worker> p) throws Exception {
+    if (verbose) {
+      reporter.handle(
+          Event.info(
+              "Destroying "
+                  + key.getMnemonic()
+                  + " worker (id "
+                  + p.getObject().getWorkerId()
+                  + ")."));
+    }
     p.getObject().destroy();
   }
 
@@ -47,6 +76,15 @@
    */
   @Override
   public boolean validateObject(WorkerKey key, PooledObject<Worker> p) {
+    if (verbose) {
+      reporter.handle(
+          Event.info(
+              "Validating "
+                  + key.getMnemonic()
+                  + " worker (id "
+                  + p.getObject().getWorkerId()
+                  + ")."));
+    }
     return p.getObject().isAlive();
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java
index e0ce5cf..b9247ce 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java
@@ -30,11 +30,13 @@
   private final ImmutableList<String> args;
   private final ImmutableMap<String, String> env;
   private final Path workDir;
+  private final String mnemonic;
 
-  WorkerKey(List<String> args, Map<String, String> env, Path workDir) {
+  WorkerKey(List<String> args, Map<String, String> env, Path workDir, String mnemonic) {
     this.args = ImmutableList.copyOf(Preconditions.checkNotNull(args));
     this.env = ImmutableMap.copyOf(Preconditions.checkNotNull(env));
     this.workDir = Preconditions.checkNotNull(workDir);
+    this.mnemonic = Preconditions.checkNotNull(mnemonic);
   }
 
   public ImmutableList<String> getArgs() {
@@ -49,6 +51,10 @@
     return workDir;
   }
 
+  public String getMnemonic() {
+    return mnemonic;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) {
@@ -66,7 +72,11 @@
     if (!env.equals(workerKey.env)) {
       return false;
     }
-    return workDir.equals(workerKey.workDir);
+    if (!workDir.equals(workerKey.workDir)) {
+      return false;
+    }
+    return mnemonic.equals(workerKey.mnemonic);
+
   }
 
   @Override
@@ -74,6 +84,7 @@
     int result = args.hashCode();
     result = 31 * result + env.hashCode();
     result = 31 * result + workDir.hashCode();
+    result = 31 * result + mnemonic.hashCode();
     return result;
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java
index b7de57f..07943ec 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java
@@ -13,38 +13,34 @@
 // limitations under the License.
 package com.google.devtools.build.lib.worker;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.eventbus.Subscribe;
+import com.google.devtools.build.lib.actions.ActionContextConsumer;
 import com.google.devtools.build.lib.actions.ActionContextProvider;
 import com.google.devtools.build.lib.buildtool.BuildRequest;
+import com.google.devtools.build.lib.buildtool.buildevent.BuildCompleteEvent;
+import com.google.devtools.build.lib.buildtool.buildevent.BuildInterruptedEvent;
 import com.google.devtools.build.lib.buildtool.buildevent.BuildStartingEvent;
+import com.google.devtools.build.lib.events.Event;
 import com.google.devtools.build.lib.runtime.BlazeModule;
 import com.google.devtools.build.lib.runtime.BlazeRuntime;
 import com.google.devtools.build.lib.runtime.Command;
+import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.common.options.OptionsBase;
 
 import org.apache.commons.pool2.impl.GenericKeyedObjectPoolConfig;
 
+import java.io.IOException;
+
 /**
  * A module that adds the WorkerActionContextProvider to the available action context providers.
  */
 public class WorkerModule extends BlazeModule {
-  private final WorkerPool workers;
-
-  private BuildRequest buildRequest;
   private BlazeRuntime blazeRuntime;
-
-  public WorkerModule() {
-    GenericKeyedObjectPoolConfig config = new GenericKeyedObjectPoolConfig();
-    config.setTimeBetweenEvictionRunsMillis(10 * 1000);
-    workers = new WorkerPool(new WorkerFactory(), config);
-  }
-
-  @Override
-  public Iterable<ActionContextProvider> getActionContextProviders() {
-    return ImmutableList.<ActionContextProvider>of(
-        new WorkerActionContextProvider(buildRequest, workers, blazeRuntime.getEventBus()));
-  }
+  private BuildRequest buildRequest;
+  private WorkerPool workers;
+  private boolean verbose;
 
   @Override
   public Iterable<Class<? extends OptionsBase>> getCommandOptions(Command command) {
@@ -55,17 +51,90 @@
 
   @Override
   public void beforeCommand(BlazeRuntime blazeRuntime, Command command) {
-    this.blazeRuntime = blazeRuntime;
+    this.blazeRuntime = Preconditions.checkNotNull(blazeRuntime);
     blazeRuntime.getEventBus().register(this);
+
+    if (workers == null) {
+      Path logDir = blazeRuntime.getOutputBase().getRelative("worker-logs");
+      try {
+        logDir.createDirectory();
+      } catch (IOException e) {
+        blazeRuntime
+            .getReporter()
+            .handle(Event.error("Could not create directory for worker logs: " + logDir));
+      }
+
+      GenericKeyedObjectPoolConfig config = new GenericKeyedObjectPoolConfig();
+      config.setTimeBetweenEvictionRunsMillis(10 * 1000);
+
+      workers = new WorkerPool(new WorkerFactory(), config);
+      workers.setReporter(blazeRuntime.getReporter());
+      workers.setLogDirectory(logDir);
+    }
   }
 
   @Subscribe
   public void buildStarting(BuildStartingEvent event) {
-    buildRequest = event.getRequest();
+    Preconditions.checkNotNull(workers);
+
+    this.buildRequest = event.getRequest();
+
+    WorkerOptions options = buildRequest.getOptions(WorkerOptions.class);
+    workers.setMaxTotalPerKey(options.workerMaxInstances);
+    workers.setMaxIdlePerKey(options.workerMaxInstances);
+    workers.setMinIdlePerKey(options.workerMaxInstances);
+    workers.setVerbose(options.workerVerbose);
+    this.verbose = options.workerVerbose;
+  }
+
+  @Override
+  public Iterable<ActionContextProvider> getActionContextProviders() {
+    Preconditions.checkNotNull(blazeRuntime);
+    Preconditions.checkNotNull(buildRequest);
+    Preconditions.checkNotNull(workers);
+
+    return ImmutableList.<ActionContextProvider>of(
+        new WorkerActionContextProvider(
+            blazeRuntime, buildRequest, workers, blazeRuntime.getEventBus()));
+  }
+
+  @Override
+  public Iterable<ActionContextConsumer> getActionContextConsumers() {
+    return ImmutableList.<ActionContextConsumer>of(new WorkerActionContextConsumer());
+  }
+
+  @Subscribe
+  public void buildComplete(BuildCompleteEvent event) {
+    if (workers != null && buildRequest.getOptions(WorkerOptions.class).workerQuitAfterBuild) {
+      if (verbose) {
+        blazeRuntime
+            .getReporter()
+            .handle(Event.info("Build completed, shutting down worker pool..."));
+      }
+      workers.close();
+      workers = null;
+    }
+  }
+
+  // Kill workers on Ctrl-C to quickly end the interrupted build.
+  // TODO(philwo) - make sure that this actually *kills* the workers and not just politely waits
+  // for them to finish.
+  @Subscribe
+  public void buildInterrupted(BuildInterruptedEvent event) {
+    if (workers != null) {
+      if (verbose) {
+        blazeRuntime
+            .getReporter()
+            .handle(Event.info("Build interrupted, shutting down worker pool..."));
+      }
+      workers.close();
+      workers = null;
+    }
   }
 
   @Override
   public void afterCommand() {
-    buildRequest = null;
+    this.blazeRuntime = null;
+    this.buildRequest = null;
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java
index b4471c0..2947a66 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java
@@ -23,21 +23,26 @@
 public class WorkerOptions extends OptionsBase {
   public static final WorkerOptions DEFAULTS = Options.getDefaults(WorkerOptions.class);
 
-  @Option(name = "worker_max_instances",
-      defaultValue = "4",
-      category = "strategy",
-      help = "How many instances of a worker process (like the persistent Java compiler) may be "
-          + "launched if you use the 'worker' strategy.")
-  public int workerMaxInstances;
-
-  @Option(name = "experimental_persistent_javac",
-      defaultValue = "null",
-      category = "undocumented",
-      help = "Enable the experimental persistent Java compiler.",
-      expansion = {"--strategy=Javac=worker", "--strategy=JavaIjar=local"})
+  @Option(
+    name = "experimental_persistent_javac",
+    defaultValue = "null",
+    category = "strategy",
+    help = "Enable the experimental persistent Java compiler.",
+    expansion = {"--strategy=Javac=worker", "--strategy=JavaIjar=local"}
+  )
   public Void experimentalPersistentJavac;
 
   @Option(
+    name = "worker_max_instances",
+    defaultValue = "4",
+    category = "strategy",
+    help =
+        "How many instances of a worker process (like the persistent Java compiler) may be "
+            + "launched if you use the 'worker' strategy."
+  )
+  public int workerMaxInstances;
+
+  @Option(
     name = "worker_max_changed_files",
     defaultValue = "0",
     category = "strategy",
@@ -46,4 +51,28 @@
             + "workers."
   )
   public int workerMaxChangedFiles;
+
+  @Option(
+    name = "worker_max_retries",
+    defaultValue = "3",
+    category = "strategy",
+    help = "If a worker fails during work, retry <worker_max_retries> times before giving up."
+  )
+  public int workerMaxRetries;
+
+  @Option(
+    name = "worker_quit_after_build",
+    defaultValue = "false",
+    category = "strategy",
+    help = "If enabled, all workers quit after a build is done."
+  )
+  public boolean workerQuitAfterBuild;
+
+  @Option(
+    name = "worker_verbose",
+    defaultValue = "true",
+    category = "strategy",
+    help = "If enabled, prints verbose messages when workers are started, shutdown, ..."
+  )
+  public boolean workerVerbose;
 }
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java
index b834d54b..4df8ddc 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java
@@ -13,10 +13,15 @@
 // limitations under the License.
 package com.google.devtools.build.lib.worker;
 
-import org.apache.commons.pool2.KeyedPooledObjectFactory;
+import com.google.devtools.build.lib.events.Reporter;
+import com.google.devtools.build.lib.vfs.Path;
+
 import org.apache.commons.pool2.impl.GenericKeyedObjectPool;
 import org.apache.commons.pool2.impl.GenericKeyedObjectPoolConfig;
 
+import java.util.HashSet;
+import java.util.Set;
+
 import javax.annotation.concurrent.ThreadSafe;
 
 /**
@@ -27,12 +32,28 @@
  */
 @ThreadSafe
 final class WorkerPool extends GenericKeyedObjectPool<WorkerKey, Worker> {
-  public WorkerPool(KeyedPooledObjectFactory<WorkerKey, Worker> factory) {
+  final WorkerFactory workerFactory;
+  final Set<Worker> workers = new HashSet<>();
+
+  public WorkerPool(WorkerFactory factory) {
     super(factory);
+    this.workerFactory = factory;
   }
 
-  public WorkerPool(KeyedPooledObjectFactory<WorkerKey, Worker> factory,
-      GenericKeyedObjectPoolConfig config) {
+  public WorkerPool(WorkerFactory factory, GenericKeyedObjectPoolConfig config) {
     super(factory, config);
+    this.workerFactory = factory;
+  }
+
+  public void setLogDirectory(Path logDir) {
+    this.workerFactory.setLogDirectory(logDir);
+  }
+
+  public void setReporter(Reporter reporter) {
+    this.workerFactory.setReporter(reporter);
+  }
+
+  public void setVerbose(boolean verbose) {
+    this.workerFactory.setVerbose(verbose);
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
index bfac250..77b38c4 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
@@ -23,10 +23,17 @@
 import com.google.devtools.build.lib.actions.ChangedFilesMessage;
 import com.google.devtools.build.lib.actions.ExecException;
 import com.google.devtools.build.lib.actions.ExecutionStrategy;
+import com.google.devtools.build.lib.actions.Executor;
 import com.google.devtools.build.lib.actions.Spawn;
 import com.google.devtools.build.lib.actions.SpawnActionContext;
 import com.google.devtools.build.lib.actions.UserExecException;
+import com.google.devtools.build.lib.analysis.BlazeDirectories;
 import com.google.devtools.build.lib.concurrent.ThreadSafety;
+import com.google.devtools.build.lib.events.Event;
+import com.google.devtools.build.lib.events.EventHandler;
+import com.google.devtools.build.lib.standalone.StandaloneSpawnStrategy;
+import com.google.devtools.build.lib.syntax.Label;
+import com.google.devtools.build.lib.util.CommandFailureUtils;
 import com.google.devtools.build.lib.util.io.FileOutErr;
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.lib.worker.WorkerProtocol.WorkRequest;
@@ -43,31 +50,44 @@
 final class WorkerSpawnStrategy implements SpawnActionContext {
   private final WorkerPool workers;
   private final IncrementalHeuristic incrementalHeuristic;
+  private final StandaloneSpawnStrategy standaloneStrategy;
+  private final boolean verboseFailures;
+  private final int maxRetries;
 
   public WorkerSpawnStrategy(
-      OptionsClassProvider optionsProvider, WorkerPool workers, EventBus eventBus) {
+      BlazeDirectories blazeDirs,
+      OptionsClassProvider optionsProvider,
+      EventBus eventBus,
+      WorkerPool workers,
+      boolean verboseFailures,
+      int maxRetries) {
     Preconditions.checkNotNull(optionsProvider);
     WorkerOptions options = optionsProvider.getOptions(WorkerOptions.class);
-    workers.setMaxTotalPerKey(options.workerMaxInstances);
-    workers.setMaxIdlePerKey(options.workerMaxInstances);
-    workers.setMinIdlePerKey(options.workerMaxInstances);
-    this.workers = workers;
     this.incrementalHeuristic = new IncrementalHeuristic(options.workerMaxChangedFiles);
     eventBus.register(incrementalHeuristic);
+    this.workers = Preconditions.checkNotNull(workers);
+    this.standaloneStrategy = new StandaloneSpawnStrategy(blazeDirs.getExecRoot(), verboseFailures);
+    this.verboseFailures = verboseFailures;
+    this.maxRetries = maxRetries;
   }
 
   @Override
   public void exec(Spawn spawn, ActionExecutionContext actionExecutionContext)
       throws ExecException, InterruptedException {
-    if (!incrementalHeuristic.shouldUseWorkers()) {
-      SpawnActionContext context = actionExecutionContext.getExecutor().getSpawnActionContext("");
-      if (context != this) {
-        context.exec(spawn, actionExecutionContext);
-        return;
-      }
+    Executor executor = actionExecutionContext.getExecutor();
+    if (executor.reportsSubcommands()) {
+      executor.reportSubcommand(
+          Label.print(spawn.getOwner().getLabel())
+              + " ["
+              + spawn.getResourceOwner().prettyPrint()
+              + "]",
+          spawn.asShellCommand(executor.getExecRoot()));
     }
 
-    String paramFile = Iterables.getLast(spawn.getArguments());
+    if (!incrementalHeuristic.shouldUseWorkers()) {
+      standaloneStrategy.exec(spawn, actionExecutionContext);
+      return;
+    }
 
     // We assume that the spawn to be executed always gets a single argument, which is a flagfile
     // prefixed with @ and that it will start in persistent mode when we don't pass it one.
@@ -75,10 +95,11 @@
     // persistent mode and then pass it the flagfile via a WorkRequest to make it actually do the
     // work.
     if (!Iterables.getLast(spawn.getArguments()).startsWith("@")) {
-      throw new IllegalStateException(
-          "Must have parameter file as last arg, got args: " + spawn.getArguments());
+      standaloneStrategy.exec(spawn, actionExecutionContext);
+      return;
     }
 
+    String paramFile = Iterables.getLast(spawn.getArguments());
     FileOutErr outErr = actionExecutionContext.getFileOutErr();
 
     ImmutableList<String> args = ImmutableList.<String>builder()
@@ -87,44 +108,78 @@
         .build();
     ImmutableMap<String, String> env = spawn.getEnvironment();
     Path workDir = actionExecutionContext.getExecutor().getExecRoot();
-    WorkerKey key = new WorkerKey(args, env, workDir);
+    WorkerKey key = new WorkerKey(args, env, workDir, spawn.getMnemonic());
 
     try {
-      Worker worker = workers.borrowObject(key);
-      try {
-        WorkRequest.newBuilder()
-            .addArguments(paramFile)
-            .build()
-            .writeDelimitedTo(worker.getOutputStream());
-        worker.getOutputStream().flush();
+      WorkResponse response = execInWorker(executor.getEventHandler(), paramFile, key, maxRetries);
 
-        WorkResponse response = WorkResponse.parseDelimitedFrom(worker.getInputStream());
+      outErr.getErrorStream().write(response.getOutputBytes().toByteArray());
 
-        if (response == null) {
-          throw new UserExecException(
-              "Worker process did not return a correct WorkResponse. This is probably caused by a "
-                  + "bug in the worker, writing unexpected other data to stdout.");
-        }
-
-        String trimmedOutput = response.getOutput().trim();
-        if (!trimmedOutput.isEmpty()) {
-          outErr.getErrorStream().write(trimmedOutput.getBytes());
-        }
-
-        if (response.getExitCode() != 0) {
-          throw new UserExecException(
-              String.format("Worker process failed with exit code: %d.", response.getExitCode()));
-        }
-      } finally {
-        if (worker != null) {
-          workers.returnObject(key, worker);
-        }
+      if (response.getExitCode() != 0) {
+        throw new UserExecException(
+            String.format("Worker process failed with exit code: %d.", response.getExitCode()));
       }
     } catch (Exception e) {
-      throw new UserExecException(e.getMessage(), e);
+      String message =
+          CommandFailureUtils.describeCommandFailure(
+              verboseFailures, spawn.getArguments(), env, workDir.getPathString());
+      throw new UserExecException(message, e);
     }
   }
 
+  private WorkResponse execInWorker(
+      EventHandler eventHandler, String paramFile, WorkerKey key, int retriesLeft)
+      throws Exception {
+    Worker worker = null;
+    WorkResponse response = null;
+
+    try {
+      worker = workers.borrowObject(key);
+      WorkRequest.newBuilder()
+          .addArguments(paramFile)
+          .build()
+          .writeDelimitedTo(worker.getOutputStream());
+      worker.getOutputStream().flush();
+
+      response = WorkResponse.parseDelimitedFrom(worker.getInputStream());
+
+      if (response == null) {
+        throw new UserExecException(
+            "Worker process did not return a correct WorkResponse. This is probably caused by a "
+                + "bug in the worker, writing unexpected other data to stdout.");
+      }
+    } catch (InterruptedException e) {
+      // The user pressed Ctrl-C. Get out here quick.
+      if (worker != null) {
+        workers.invalidateObject(key, worker);
+        worker = null;
+      }
+      throw e;
+    } catch (Exception e) {
+      // "Something" failed - let's retry with a fresh worker.
+      if (worker != null) {
+        workers.invalidateObject(key, worker);
+        worker = null;
+      }
+      if (retriesLeft > 0) {
+        eventHandler.handle(
+            Event.warn(
+                key.getMnemonic()
+                    + " worker failed ("
+                    + e
+                    + "), invalidating and retrying with new worker..."));
+        return execInWorker(eventHandler, paramFile, key, retriesLeft - 1);
+      } else {
+        throw e;
+      }
+    } finally {
+      if (worker != null) {
+        workers.returnObject(key, worker);
+      }
+    }
+    return response;
+  }
+
   @Override
   public String strategyLocality(String mnemonic, boolean remotable) {
     return "worker";