Make sure to wait for all subprocesses in the process-wrapper.

This is take 3 of https://github.com/bazelbuild/bazel/commit/7828118ea8f4150aa1eae4ab3133935ffe221eb5 and then https://github.com/bazelbuild/bazel/commit/9c1853aa2fe49cf80dd467bb4020eb0822d53881, both of which
required a Bazel release rollback due to unforeseen problems (the
latter of which caused some tests to never complete).

This CL is an identical repeat of https://github.com/bazelbuild/bazel/commit/9c1853aa2fe49cf80dd467bb4020eb0822d53881, the only difference
being that the changes to the process-wrapper are now gated behind
a flag to allow us to roll them out in a controlled manner.  Therefore,
this is *still broken* in the same way that caused the second rollback,
but I feel more comfortable addressing that bug separately from this
roll forward (especially considering the many months that have passed
in between).

Original description:

When we kill the process group, make sure to wait until the results
of all subprocesses have been collected to ensure all subprocesses are
stopped before the process-wrapper exits.

I have been unable to come up with a test case to show that this does
anything useful though (in particular because the process-wrapper uses
SIGKILL against the processes instead of SIGTERM so I cannot install
any handlers to mess things up). Conceptually, the problem existed, but
it is a race condition that is very hard to trigger. But I'm adding
some unit tests for the internals.

Partially fixes https://github.com/bazelbuild/bazel/issues/10245.

RELNOTES: None.
PiperOrigin-RevId: 312093258
diff --git a/src/BUILD b/src/BUILD
index 926758f..85f695e 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -463,6 +463,7 @@
         "//src/test/py/bazel:srcs",
         "//src/test/shell:srcs",
         "//src/test/testdata/test_tls_certificate",
+        "//src/test/tools:srcs",
         "//src/tools/android/java/com/google/devtools/build/android:srcs",
         "//src/tools/execlog:srcs",
         "//src/tools/workspacelog:srcs",
diff --git a/src/main/tools/BUILD b/src/main/tools/BUILD
index cb2548c..f202f1f 100644
--- a/src/main/tools/BUILD
+++ b/src/main/tools/BUILD
@@ -15,7 +15,10 @@
 
 cc_library(
     name = "process-tools",
-    srcs = ["process-tools.cc"],
+    srcs = ["process-tools.cc"] + select({
+        "//src/conditions:darwin": ["process-tools-darwin.cc"],
+        "//conditions:default": ["process-tools-linux.cc"],
+    }),
     hdrs = ["process-tools.h"],
     deps = [
         ":logging",
diff --git a/src/main/tools/process-tools-darwin.cc b/src/main/tools/process-tools-darwin.cc
new file mode 100644
index 0000000..277796d
--- /dev/null
+++ b/src/main/tools/process-tools-darwin.cc
@@ -0,0 +1,116 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/event.h>
+#include <sys/sysctl.h>
+#include <unistd.h>
+
+#include "src/main/tools/logging.h"
+#include "src/main/tools/process-tools.h"
+
+int WaitForProcessToTerminate(pid_t pid) {
+  int kq;
+  if ((kq = kqueue()) == -1) {
+    return -1;
+  }
+
+  // According to the kqueue(2) documentation, registering for an event
+  // reports any pending such events, so this is not racy even if the
+  // process happened to exit before we got to installing the kevent.
+  struct kevent kc;
+  EV_SET(&kc, pid, EVFILT_PROC, EV_ADD | EV_ENABLE, NOTE_EXIT, 0, 0);
+
+  int nev;
+  struct kevent ke;
+retry:
+  if ((nev = kevent(kq, &kc, 1, &ke, 1, NULL)) == -1) {
+    if (errno == EINTR) {
+      goto retry;
+    }
+    return -1;
+  }
+  if (nev != 1) {
+    DIE("Expected only one event from the kevent call; got %d", nev);
+  }
+  if (ke.ident != pid) {
+    DIE("Expected PID in the kevent to be %" PRIdMAX " but got %" PRIdMAX,
+        (intmax_t)pid, (intmax_t)ke.ident);
+  }
+  if (!(ke.fflags & NOTE_EXIT)) {
+    DIE("Expected the kevent to be for an exit condition");
+  }
+
+  return close(kq);
+}
+
+int WaitForProcessGroupToTerminate(pid_t pgid) {
+  int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PGRP, pgid};
+
+  for (;;) {
+    // Query the list of processes in the group by using sysctl(3).
+    // This is "hard" because we don't know how big that list is, so we
+    // have to first query the size of the output data and then account for
+    // the fact that the size might change by the time we actually issue
+    // the query.
+    struct kinfo_proc *procs = NULL;
+    size_t nprocs = 0;
+    do {
+      size_t len;
+      if (sysctl(name, 4, 0, &len, NULL, 0) == -1) {
+        return -1;
+      }
+      procs = (struct kinfo_proc *)malloc(len);
+      if (sysctl(name, 4, procs, &len, NULL, 0) == -1) {
+        if (errno != ENOMEM) {
+          DIE("Unexpected error code %d", errno);
+        }
+        free(procs);
+        procs = NULL;
+      } else {
+        nprocs = len / sizeof(struct kinfo_proc);
+      }
+    } while (procs == NULL);
+    if (nprocs < 1) {
+      DIE("Must have found the group leader at least");
+    }
+
+    if (nprocs == 1) {
+      // Found only one process, which must be the leader because we have
+      // purposely expect it as a zombie with WaitForProcess.
+      if (procs->kp_proc.p_pid != pgid) {
+        DIE("Process group leader must be the only process left");
+      }
+      free(procs);
+      return 0;
+    }
+    free(procs);
+
+    // More than one process left in the process group.  Kill the group
+    // again just in case any extra processes appeared just now, which
+    // would not allow us to complete quickly.
+    kill(-pgid, SIGKILL);
+
+    // And pause a little bit before retrying to avoid burning CPU.
+    struct timespec ts;
+    ts.tv_sec = 0;
+    ts.tv_nsec = 1000000;
+    if (nanosleep(&ts, NULL) == -1) {
+      return -1;
+    }
+  }
+}
diff --git a/src/main/tools/process-tools-linux.cc b/src/main/tools/process-tools-linux.cc
new file mode 100644
index 0000000..53d8641
--- /dev/null
+++ b/src/main/tools/process-tools-linux.cc
@@ -0,0 +1,54 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "src/main/tools/process-tools.h"
+
+int TerminateAndWaitForAll(pid_t pid) {
+  kill(-pid, SIGKILL);
+
+  int res;
+  while ((res = waitpid(-1, nullptr, WNOHANG)) > 0) {
+    // Got one child; try again.
+  }
+  if (res == -1) {
+    // The fast path got all children, so there is nothing else to do.
+    return 0;
+  }
+
+  // Cope with children that may have escaped the process group or that
+  // did not exit quickly enough.
+  FILE *f = fopen("/proc/thread-self/children", "r");
+  if (f == nullptr) {
+    // Oh oh. This feature may be disabled, in which case there is
+    // nothing we can do. Stop early and let any stale children be
+    // reparented to init.
+    return 0;
+  }
+  setbuf(f, nullptr);
+  int child_pid;
+  while ((waitpid(-1, nullptr, WNOHANG) != -1 || errno != ECHILD) &&
+         (rewind(f), 1 == fscanf(f, "%d", &child_pid))) {
+    kill(child_pid, SIGKILL);
+    usleep(100);
+  }
+  fclose(f);
+
+  return 0;
+}
diff --git a/src/main/tools/process-tools.h b/src/main/tools/process-tools.h
index 6f6f494..eb551f2 100644
--- a/src/main/tools/process-tools.h
+++ b/src/main/tools/process-tools.h
@@ -65,4 +65,34 @@
 // Write execution statistics to a file.
 void WriteStatsToFile(struct rusage *rusage, const std::string &stats_path);
 
+// Waits for a process to terminate but does *not* collect its exit status.
+//
+// Note that the process' zombie status may not be available immediately after
+// this call returns.
+//
+// May not be implemented on all platforms.
+int WaitForProcessToTerminate(pid_t pid);
+
+// Waits for a process group to terminate.  Assumes that the process leader
+// still exists in the process table (though it may be a zombie), and allows
+// it to remain.
+//
+// Assumes that the pgid has been sent a termination signal on entry to
+// terminate quickly (or else this will send its own termination signal to
+// the group).
+//
+// May not be implemented on all platforms.
+int WaitForProcessGroupToTerminate(pid_t pgid);
+
+// Terminates and waits for all descendents of the given process to exit.
+//
+// Assumes that the caller has enabled the child subreaper feature before
+// spawning any subprocesses.
+//
+// Assumes that the caller has already waited for the process to collect its
+// exit code as this discards the exit code of all processes it encounters.
+//
+// May not be implemented on all platforms.
+int TerminateAndWaitForAll(pid_t pid);
+
 #endif  // PROCESS_TOOLS_H__
diff --git a/src/main/tools/process-wrapper-legacy.cc b/src/main/tools/process-wrapper-legacy.cc
index 03fdec5..9c4b482 100644
--- a/src/main/tools/process-wrapper-legacy.cc
+++ b/src/main/tools/process-wrapper-legacy.cc
@@ -14,16 +14,20 @@
 
 #include "src/main/tools/process-wrapper-legacy.h"
 
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#if defined(__linux__)
+#include <sys/prctl.h>
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
-#include <vector>
 
 #include "src/main/tools/logging.h"
 #include "src/main/tools/process-tools.h"
@@ -39,6 +43,14 @@
 }
 
 void LegacyProcessWrapper::SpawnChild() {
+  if (opt.wait_fix) {
+#if defined(__linux__)
+    if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
+      DIE("prctl");
+    }
+#endif
+  }
+
   child_pid = fork();
   if (child_pid < 0) {
     DIE("fork");
@@ -70,6 +82,25 @@
     SetTimeout(opt.timeout_secs);
   }
 
+  if (opt.wait_fix) {
+    // On macOS, we have to ensure the whole process group is terminated before
+    // collecting the status of the PID we are interested in. (Otherwise other
+    // processes could race us and grab the PGID.)
+#if defined(__APPLE__)
+    if (WaitForProcessToTerminate(child_pid) == -1) {
+      DIE("WaitForProcessToTerminate");
+    }
+
+    // The child is done for, but may have grandchildren that we still have to
+    // kill.
+    kill(-child_pid, SIGKILL);
+
+    if (WaitForProcessGroupToTerminate(child_pid) == -1) {
+      DIE("WaitForProcessGroupToTerminate");
+    }
+#endif
+  }
+
   int status;
   if (!opt.stats_path.empty()) {
     struct rusage child_rusage;
@@ -79,9 +110,25 @@
     status = WaitChild(child_pid);
   }
 
-  // The child is done for, but may have grandchildren that we still have to
-  // kill.
-  kill(-child_pid, SIGKILL);
+  if (opt.wait_fix) {
+    // On Linux, we enabled the child subreaper feature, so now that we have
+    // collected the status of the PID we were interested in, terminate the
+    // rest of the process group and wait until all the children are gone.
+    //
+    // If you are wondering why we don't use a PID namespace instead, it's
+    // because those can have subtle effects on the processes we spawn (like
+    // them assuming that the PIDs that they get are unique). The linux-sandbox
+    // offers this functionality.
+#if defined(__linux__)
+    if (TerminateAndWaitForAll(child_pid) == -1) {
+      DIE("TerminateAndWaitForAll");
+    }
+#endif
+  } else {
+    // The child is done for, but may have grandchildren that we still have to
+    // kill.
+    kill(-child_pid, SIGKILL);
+  }
 
   if (last_signal > 0) {
     // Don't trust the exit code if we got a timeout or signal.
diff --git a/src/main/tools/process-wrapper-options.cc b/src/main/tools/process-wrapper-options.cc
index 8238411..25ccc03 100644
--- a/src/main/tools/process-wrapper-options.cc
+++ b/src/main/tools/process-wrapper-options.cc
@@ -48,6 +48,7 @@
       "  -s/--stats <file>  if set, write stats in protobuf format to a file\n"
       "  -d/--debug  if set, debug info will be printed\n"
       "  --  command to run inside sandbox, followed by arguments\n");
+  // -W intentionally not documented.
   exit(EXIT_FAILURE);
 }
 
@@ -61,12 +62,13 @@
       {"stderr", required_argument, 0, 'e'},
       {"stats", required_argument, 0, 's'},
       {"debug", no_argument, 0, 'd'},
+      {"wait_fix", no_argument, 0, 'W'},
       {0, 0, 0, 0}};
   extern char *optarg;
   extern int optind, optopt;
   int c;
 
-  while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:s:d",
+  while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:s:dW",
                           long_options, nullptr)) != -1) {
     switch (c) {
       case 't':
@@ -106,6 +108,9 @@
       case 'd':
         opt.debug = true;
         break;
+      case 'W':
+        opt.wait_fix = true;
+        break;
       case '?':
         Usage(args.front(), "Unrecognized argument: -%c (%d)", optopt, optind);
         break;
diff --git a/src/main/tools/process-wrapper-options.h b/src/main/tools/process-wrapper-options.h
index e8156a8..d7f269f 100644
--- a/src/main/tools/process-wrapper-options.h
+++ b/src/main/tools/process-wrapper-options.h
@@ -30,6 +30,8 @@
   std::string stderr_path;
   // Whether to print debugging messages (-d)
   bool debug;
+  // Whether to apply the "wait fix" (-W)
+  bool wait_fix;
   // Where to write stats, in protobuf format (-s)
   std::string stats_path;
   // Command to run (--)
diff --git a/src/test/tools/BUILD b/src/test/tools/BUILD
new file mode 100644
index 0000000..4e70352
--- /dev/null
+++ b/src/test/tools/BUILD
@@ -0,0 +1,23 @@
+load("@rules_cc//cc:defs.bzl", "cc_test")
+
+package(
+    default_visibility = ["//src:__subpackages__"],
+)
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["**"]),
+    visibility = ["//src:__subpackages__"],
+)
+
+cc_test(
+    name = "process-tools_test",
+    srcs = ["process-tools_test.cc"] + select({
+        "//src/conditions:darwin": ["process-tools-darwin_test.cc"],
+        "//conditions:default": ["process-tools-linux_test.cc"],
+    }),
+    deps = [
+        "//src/main/tools:process-tools",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/src/test/tools/process-tools-darwin_test.cc b/src/test/tools/process-tools-darwin_test.cc
new file mode 100644
index 0000000..5bc057c
--- /dev/null
+++ b/src/test/tools/process-tools-darwin_test.cc
@@ -0,0 +1,167 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <memory>
+
+#include "src/main/tools/process-tools.h"
+#include "googlemock/include/gmock/gmock.h"
+#include "googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Looks for the given process in the process table. Returns the entry if
+// found and nullptr otherwise. Aborts on error.
+std::unique_ptr<kinfo_proc> FindProcess(pid_t pid) {
+  int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid};
+  std::unique_ptr<kinfo_proc> proc(new kinfo_proc);
+  size_t len = sizeof(kinfo_proc);
+  if (sysctl(name, 4, proc.get(), &len, nullptr, 0) == -1) {
+    abort();
+  }
+  if (len == 0 || proc->kp_proc.p_pid == 0) {
+    return nullptr;
+  } else {
+    if (proc->kp_proc.p_pid != pid) {
+      // Did not expect to get a process with a PID we did not ask for.
+      abort();
+    }
+    return proc;
+  }
+}
+
+class WaitForProcessToTerminateTest : public testing::Test {};
+
+TEST_F(WaitForProcessToTerminateTest, TestExit) {
+  const pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+
+  if (pid == 0) {
+    _exit(42);
+  }
+
+  ASSERT_NE(WaitForProcessToTerminate(pid), -1);
+  // The WaitForProcessToTerminate call guarantees that the process is done,
+  // so we should not be able to affect its exit status any longer.
+  kill(pid, SIGKILL);
+
+  int status;
+  ASSERT_NE(waitpid(pid, &status, 0), -1);
+  ASSERT_TRUE(WIFEXITED(status));
+  ASSERT_EQ(WEXITSTATUS(status), 42);
+}
+
+TEST_F(WaitForProcessToTerminateTest, TestSignal) {
+  const pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+
+  if (pid == 0) {
+    sleep(30);
+    _exit(0);
+  }
+  kill(pid, SIGTERM);
+
+  ASSERT_NE(WaitForProcessToTerminate(pid), -1);
+  // The WaitForProcessToTerminate call guarantees that the process is done,
+  // so we should not be able to affect its exit status any longer.
+  kill(pid, SIGKILL);
+
+  int status;
+  ASSERT_NE(waitpid(pid, &status, 0), -1);
+  ASSERT_TRUE(WIFSIGNALED(status));
+  ASSERT_EQ(WTERMSIG(status), SIGTERM);
+}
+
+class WaitForProcessGroupToTerminateTest : public testing::Test {};
+
+TEST_F(WaitForProcessGroupToTerminateTest, TestOnlyLeader) {
+  const pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+
+  if (pid == 0) {
+    setpgid(0, getpid());
+    sleep(30);
+    _exit(0);
+  }
+  setpgid(pid, pid);
+
+  ASSERT_NE(WaitForProcessGroupToTerminate(pid), -1);
+  kill(pid, SIGKILL);  // Abort sleep to finish test quickly.
+  ASSERT_NE(waitpid(pid, nullptr, 0), -1);
+}
+
+TEST_F(WaitForProcessGroupToTerminateTest, TestManyProcesses) {
+  int fds[2];
+  ASSERT_NE(pipe(fds), -1);
+
+  const size_t nprocs = 3;
+
+  pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+  if (pid == 0) {
+    setpgid(0, getpid());
+
+    close(fds[0]);
+
+    // Spawn a bunch of subprocesses in the same process group as the leader
+    // and report their PIDs to the test before exiting.
+    for (size_t i = 0; i < nprocs; i++) {
+      const pid_t subpid = fork();
+      if (subpid == -1) {
+        abort();
+      } else if (subpid == 0) {
+        close(fds[1]);
+        // Sleep for a very long amount of time to ensure we actually wait for
+        // and terminate processes in the process group.
+        sleep(10000);
+        _exit(0);
+      }
+      if (write(fds[1], &subpid, sizeof(subpid)) != sizeof(subpid)) {
+        abort();
+      }
+    }
+    close(fds[1]);
+
+    _exit(0);
+  }
+  setpgid(pid, pid);
+
+  // Collect the PIDs of all subprocesses (except for the leader).
+  close(fds[1]);
+  pid_t pids[nprocs];
+  for (size_t i = 0; i < nprocs; i++) {
+    ASSERT_EQ(read(fds[0], &pids[i], sizeof(pids[i])), sizeof(pids[i]));
+  }
+  close(fds[0]);
+
+  ASSERT_NE(WaitForProcessGroupToTerminate(pid), -1);
+  // The process leader must still exist (as a zombie or not, we don't know)
+  // but all other processes in the group must be gone by now.
+  ASSERT_NE(FindProcess(pid), nullptr);
+  for (size_t i = 0; i < nprocs; i++) {
+    // This check is racy: some other process might have reclaimed the PID of
+    // the process we already terminated. But it's very unlikely because the
+    // kernel tries very hard to not reassign PIDs too quickly.
+    ASSERT_EQ(FindProcess(pids[i]), nullptr);
+  }
+
+  ASSERT_NE(waitpid(pid, nullptr, 0), -1);
+}
+
+}  // namespace
diff --git a/src/test/tools/process-tools-linux_test.cc b/src/test/tools/process-tools-linux_test.cc
new file mode 100644
index 0000000..6a0a121
--- /dev/null
+++ b/src/test/tools/process-tools-linux_test.cc
@@ -0,0 +1,112 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "src/main/tools/process-tools.h"
+#include "googlemock/include/gmock/gmock.h"
+#include "googletest/include/gtest/gtest.h"
+
+namespace {
+
+class TerminateAndWaitForAllTest : public testing::Test {
+  void SetUp(void) override {
+    // TerminateAndWaitForAll requires the caller to have enabled the child
+    // subreaper feature before spawning any processes.
+    ASSERT_NE(prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0), -1);
+  }
+};
+
+TEST_F(TerminateAndWaitForAllTest, TestOnlyLeader) {
+  const pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+
+  if (pid == 0) {
+    setpgid(0, getpid());
+    sleep(30);
+    _exit(0);
+  }
+  setpgid(pid, pid);
+
+  kill(pid, SIGKILL);  // Abort sleep to finish test quickly.
+  ASSERT_NE(TerminateAndWaitForAll(pid), -1);
+  ASSERT_EQ(waitpid(pid, nullptr, 0), -1);
+}
+
+TEST_F(TerminateAndWaitForAllTest, TestOutsideOfGroup) {
+  int fds[2];
+  ASSERT_NE(pipe(fds), -1);
+
+  const size_t nprocs = 32;
+
+  pid_t pid = fork();
+  ASSERT_NE(pid, -1);
+  if (pid == 0) {
+    setpgid(0, getpid());
+
+    close(fds[0]);
+
+    // Spawn a bunch of subprocesses and report their PIDs to the test before
+    // exiting.
+    for (size_t i = 0; i < nprocs; i++) {
+      const pid_t subpid = fork();
+      if (subpid == -1) {
+        abort();
+      } else if (subpid == 0) {
+        close(fds[1]);
+
+        // Keep some subprocesses in the process group and make others escape.
+        if (i % 2 == 0) {
+          setsid();
+        }
+
+        // Sleep for a very long amount of time to ensure we actually wait for
+        // and terminate processes in the process group.
+        sleep(10000);
+        _exit(0);
+      }
+      if (write(fds[1], &subpid, sizeof(subpid)) != sizeof(subpid)) {
+        abort();
+      }
+    }
+    close(fds[1]);
+
+    _exit(0);
+  }
+  setpgid(pid, pid);
+
+  // Collect the PIDs of all subprocesses (except for the leader).
+  close(fds[1]);
+  pid_t pids[nprocs];
+  for (size_t i = 0; i < nprocs; i++) {
+    ASSERT_EQ(read(fds[0], &pids[i], sizeof(pids[i])), sizeof(pids[i]));
+  }
+  close(fds[0]);
+
+  ASSERT_NE(waitpid(pid, nullptr, 0), -1);
+
+  ASSERT_NE(TerminateAndWaitForAll(pid), -1);
+  for (size_t i = 0; i < nprocs; i++) {
+    // This check is racy: some other process might have reclaimed the PID of
+    // the process we already terminated. But it's very unlikely because the
+    // kernel tries very hard to not reassign PIDs too quickly.
+    ASSERT_EQ(kill(pids[i], 0), -1);
+  }
+}
+
+}  // namespace
diff --git a/src/test/tools/process-tools_test.cc b/src/test/tools/process-tools_test.cc
new file mode 100644
index 0000000..af8283b
--- /dev/null
+++ b/src/test/tools/process-tools_test.cc
@@ -0,0 +1,23 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/main/tools/process-tools.h"
+
+namespace {
+
+// Intentionally left empty to allow the test program to exist on any
+// platform we build on. Add tests here and remove this comment once we
+// have any non-platform specific tests.
+
+}  // namespace