workers: Make sure to wait for worker processes to exit so that they don't become zombies.
--
MOS_MIGRATED_REVID=103541217
diff --git a/src/main/java/com/google/devtools/build/lib/concurrent/ThreadSafety.java b/src/main/java/com/google/devtools/build/lib/concurrent/ThreadSafety.java
index 0c67fd9..8d8be17 100644
--- a/src/main/java/com/google/devtools/build/lib/concurrent/ThreadSafety.java
+++ b/src/main/java/com/google/devtools/build/lib/concurrent/ThreadSafety.java
@@ -24,7 +24,7 @@
*<p>
* The names used here are adapted from those used in Joshua Bloch's book
* "Effective Java", which are also described at
- * <http://www-128.ibm.com/developerworks/java/library/j-jtp09263.html>.
+ * <http://www.ibm.com/developerworks/java/library/j-jtp09263/>.
*<p>
* These attributes are just documentation. They don't have any run-time
* effect. The main aim is mainly just to standardize the terminology.
diff --git a/src/main/java/com/google/devtools/build/lib/shell/Command.java b/src/main/java/com/google/devtools/build/lib/shell/Command.java
index 364373d..b0ed4a5 100644
--- a/src/main/java/com/google/devtools/build/lib/shell/Command.java
+++ b/src/main/java/com/google/devtools/build/lib/shell/Command.java
@@ -846,8 +846,7 @@
}
}
} finally {
- // Read this for detailed explanation:
- // http://www-128.ibm.com/developerworks/java/library/j-jtp05236.html
+ // Read this for detailed explanation: http://www.ibm.com/developerworks/library/j-jtp05236/
if (wasInterrupted) {
Thread.currentThread().interrupt(); // preserve interrupted status
}
diff --git a/src/main/java/com/google/devtools/build/lib/shell/Consumers.java b/src/main/java/com/google/devtools/build/lib/shell/Consumers.java
index 2e548f6..d6fe4d9 100644
--- a/src/main/java/com/google/devtools/build/lib/shell/Consumers.java
+++ b/src/main/java/com/google/devtools/build/lib/shell/Consumers.java
@@ -260,7 +260,7 @@
}
} finally {
// Read this for detailed explanation:
- // http://www-128.ibm.com/developerworks/java/library/j-jtp05236.html
+ // http://www.ibm.com/developerworks/java/library/j-jtp05236/
if (wasInterrupted) {
Thread.currentThread().interrupt(); // preserve interrupted status
}
diff --git a/src/main/java/com/google/devtools/build/lib/worker/Worker.java b/src/main/java/com/google/devtools/build/lib/worker/Worker.java
index d76b9fe..307761a 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/Worker.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/Worker.java
@@ -63,12 +63,13 @@
final Process process = processBuilder.start();
- Thread shutdownHook = new Thread() {
- @Override
- public void run() {
- process.destroy();
- }
- };
+ Thread shutdownHook =
+ new Thread() {
+ @Override
+ public void run() {
+ destroyProcess(process);
+ }
+ };
Runtime.getRuntime().addShutdownHook(shutdownHook);
if (verbose) {
@@ -87,7 +88,33 @@
void destroy() {
Runtime.getRuntime().removeShutdownHook(shutdownHook);
- process.destroy();
+ destroyProcess(process);
+ }
+
+ /**
+ * Destroys a process and waits for it to exit. This is necessary for the child to not become a
+ * zombie.
+ *
+ * @param process the process to destroy.
+ */
+ private static void destroyProcess(Process process) {
+ boolean wasInterrupted = false;
+ try {
+ process.destroy();
+ while (true) {
+ try {
+ process.waitFor();
+ return;
+ } catch (InterruptedException ie) {
+ wasInterrupted = true;
+ }
+ }
+ } finally {
+ // Read this for detailed explanation: http://www.ibm.com/developerworks/library/j-jtp05236/
+ if (wasInterrupted) {
+ Thread.currentThread().interrupt(); // preserve interrupted status
+ }
+ }
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
index 4355338..dd0508f 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java
@@ -148,20 +148,20 @@
"Worker process did not return a correct WorkResponse. This is probably caused by a "
+ "bug in the worker, writing unexpected other data to stdout.");
}
- } catch (InterruptedException e) {
- // The user pressed Ctrl-C. Get out here quick.
- if (worker != null) {
- workers.invalidateObject(key, worker);
- worker = null;
- }
- throw e;
} catch (Exception e) {
- // "Something" failed - let's retry with a fresh worker.
+ if (e instanceof InterruptedException) {
+ // The user pressed Ctrl-C. Get out here quick.
+ retriesLeft = 0;
+ }
+
if (worker != null) {
workers.invalidateObject(key, worker);
worker = null;
}
+
if (retriesLeft > 0) {
+ // The worker process failed, but we still have some retries left. Let's retry with a fresh
+ // worker.
eventHandler.handle(
Event.warn(
key.getMnemonic()
diff --git a/src/test/java/com/google/devtools/build/lib/shell/InterruptibleTest.java b/src/test/java/com/google/devtools/build/lib/shell/InterruptibleTest.java
index 8192bd9..4e864a9 100644
--- a/src/test/java/com/google/devtools/build/lib/shell/InterruptibleTest.java
+++ b/src/test/java/com/google/devtools/build/lib/shell/InterruptibleTest.java
@@ -27,7 +27,7 @@
/**
* Tests of the interaction of Thread.interrupt and Command.execute.
*
- * Read http://www-128.ibm.com/developerworks/java/library/j-jtp05236.html
+ * Read http://www.ibm.com/developerworks/java/library/j-jtp05236/
* for background material.
*
* NOTE: This test is dependent on thread timings. Under extreme machine load