Implement graceful SIGTERM handling in the process-wrapper.

This is intended to be used with the new dynamic spawn scheduler (via the
--process_wrapper_extra_flags flag), which triggers lots more local process
cancellations, in an attempt to workaround an odd OSXFUSE bug for which we
have no diagnostic nor fix yet.

Part of #7818.

RELNOTES: None.
PiperOrigin-RevId: 314953746
diff --git a/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
index 660b0bd..f404845 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
@@ -72,6 +72,17 @@
   public boolean localLockfreeOutput;
 
   @Option(
+      name = "experimental_process_wrapper_graceful_sigterm",
+      defaultValue = "false",
+      documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
+      effectTags = {OptionEffectTag.EXECUTION},
+      help =
+          "When true, make the process-wrapper propagate SIGTERMs (used by the dynamic scheduler "
+              + "to stop process trees) to the subprocesses themselves, giving them the grace "
+              + "period in --local_termination_grace_seconds before forcibly sending a SIGKILL.")
+  public boolean processWrapperGracefulSigterm;
+
+  @Option(
       name = "experimental_process_wrapper_wait_fix",
       defaultValue = "false",
       documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java b/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
index 73dba33..e0ef4ec 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
@@ -61,6 +61,9 @@
 
     ImmutableList.Builder<String> extraFlags = ImmutableList.builder();
     if (options != null) {
+      if (options.processWrapperGracefulSigterm) {
+        extraFlags.add("--graceful_sigterm");
+      }
       if (options.processWrapperWaitFix) {
         extraFlags.add("--wait_fix");
       }
diff --git a/src/main/tools/process-wrapper-legacy.cc b/src/main/tools/process-wrapper-legacy.cc
index 0580291..60a01e6 100644
--- a/src/main/tools/process-wrapper-legacy.cc
+++ b/src/main/tools/process-wrapper-legacy.cc
@@ -72,12 +72,28 @@
   }
 }
 
+// Sets up signal handlers to kill all subprocesses when the given signal is
+// triggered. Whether subprocesses are abruptly terminated or not depends on
+// the signal type and the user configuration.
+void LegacyProcessWrapper::SetupSignalHandlers() {
+  // SIGALRM represents a timeout so we should give the process a bit of time
+  // to die gracefully if it needs it.
+  InstallSignalHandler(SIGALRM, OnGracefulSignal);
+
+  // Termination signals should kill the process quickly, as it's typically
+  // blocking the return of the prompt after a user hits "Ctrl-C". But we allow
+  // customizing the behavior of SIGTERM because it's used by the dynamic
+  // scheduler to terminate process trees in a controlled manner.
+  if (opt.graceful_sigterm) {
+    InstallSignalHandler(SIGTERM, OnGracefulSignal);
+  } else {
+    InstallSignalHandler(SIGTERM, OnAbruptSignal);
+  }
+  InstallSignalHandler(SIGINT, OnAbruptSignal);
+}
+
 void LegacyProcessWrapper::WaitForChild() {
-  // Set up a signal handler which kills all subprocesses when the given signal
-  // is triggered.
-  InstallSignalHandler(SIGALRM, OnSignal);
-  InstallSignalHandler(SIGTERM, OnSignal);
-  InstallSignalHandler(SIGINT, OnSignal);
+  SetupSignalHandlers();
   if (opt.timeout_secs > 0) {
     SetTimeout(opt.timeout_secs);
   }
@@ -144,16 +160,13 @@
 }
 
 // Called when timeout or signal occurs.
-void LegacyProcessWrapper::OnSignal(int sig) {
+void LegacyProcessWrapper::OnAbruptSignal(int sig) {
   last_signal = sig;
+  KillEverything(child_pid, false, opt.kill_delay_secs);
+}
 
-  if (sig == SIGALRM) {
-    // SIGALRM represents a timeout, so we should give the process a bit of time
-    // to die gracefully if it needs it.
-    KillEverything(child_pid, true, opt.kill_delay_secs);
-  } else {
-    // Signals should kill the process quickly, as it's typically blocking the
-    // return of the prompt after a user hits "Ctrl-C".
-    KillEverything(child_pid, false, opt.kill_delay_secs);
-  }
+// Called when timeout or signal occurs.
+void LegacyProcessWrapper::OnGracefulSignal(int sig) {
+  last_signal = sig;
+  KillEverything(child_pid, true, opt.kill_delay_secs);
 }
diff --git a/src/main/tools/process-wrapper-legacy.h b/src/main/tools/process-wrapper-legacy.h
index 943c4c1..2301a38 100644
--- a/src/main/tools/process-wrapper-legacy.h
+++ b/src/main/tools/process-wrapper-legacy.h
@@ -39,8 +39,10 @@
 
  private:
   static void SpawnChild();
+  static void SetupSignalHandlers();
   static void WaitForChild();
-  static void OnSignal(int sig);
+  static void OnAbruptSignal(int sig);
+  static void OnGracefulSignal(int sig);
 
   static pid_t child_pid;
   static volatile sig_atomic_t last_signal;
diff --git a/src/main/tools/process-wrapper-options.cc b/src/main/tools/process-wrapper-options.cc
index 837fd26..0c0e97e 100644
--- a/src/main/tools/process-wrapper-options.cc
+++ b/src/main/tools/process-wrapper-options.cc
@@ -41,6 +41,8 @@
   fprintf(
       stderr,
       "\nPossible arguments:\n"
+      "  -g/--graceful_sigterm  propagate SIGTERM to the subprocess and delay "
+      "the corresponding SIGKILL until --kill_delay has passed\n"
       "  -t/--timeout <timeout>  timeout after which the child process will be "
       "terminated with SIGTERM\n"
       "  -k/--kill_delay <timeout>  in case timeout occurs, how long to wait "
@@ -58,6 +60,7 @@
 // global `opt` struct.
 static void ParseCommandLine(const std::vector<char *> &args) {
   static struct option long_options[] = {
+      {"graceful_sigterm", no_argument, 0, 'g'},
       {"timeout", required_argument, 0, 't'},
       {"kill_delay", required_argument, 0, 'k'},
       {"stdout", required_argument, 0, 'o'},
@@ -70,9 +73,12 @@
   extern int optind, optopt;
   int c;
 
-  while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:s:dW",
+  while ((c = getopt_long(args.size(), args.data(), "+:gt:k:o:e:s:dW",
                           long_options, nullptr)) != -1) {
     switch (c) {
+      case 'g':
+        opt.graceful_sigterm = true;
+        break;
       case 't':
         if (sscanf(optarg, "%lf", &opt.timeout_secs) != 1) {
           Usage(args.front(), "Invalid timeout (-t) value: %s", optarg);
diff --git a/src/main/tools/process-wrapper-options.h b/src/main/tools/process-wrapper-options.h
index d7f269f..d1587d6 100644
--- a/src/main/tools/process-wrapper-options.h
+++ b/src/main/tools/process-wrapper-options.h
@@ -20,6 +20,8 @@
 
 // Options parsing result.
 struct Options {
+  // Whether to gracefully terminate subprocesses on SIGTERM (-g)
+  bool graceful_sigterm;
   // How long to wait before killing the child (-t)
   double timeout_secs;
   // How long to wait before sending SIGKILL in case of timeout (-k)
diff --git a/src/test/shell/integration/process-wrapper_test.sh b/src/test/shell/integration/process-wrapper_test.sh
index 38d59ec..136edd6 100755
--- a/src/test/shell/integration/process-wrapper_test.sh
+++ b/src/test/shell/integration/process-wrapper_test.sh
@@ -34,6 +34,7 @@
 readonly EXIT_STATUS_SIGABRT=$((128 + 6))
 readonly EXIT_STATUS_SIGKILL=$((128 + 9))
 readonly EXIT_STATUS_SIGALRM=$((128 + 14))
+readonly EXIT_STATUS_SIGTERM=$((128 + 15))
 
 function set_up() {
   rm -rf $OUT_DIR
@@ -143,6 +144,80 @@
   assert_stdout 'ignoring signal'
 }
 
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled, but with the process exiting on its own
+# without the need for a SIGKILL. To make sure that this is the case, we pass a
+# very large kill delay to cause the outer test to fail if we violate this
+# expectation.
+function test_sigterm_grace() {
+  $process_wrapper --graceful_sigterm --kill_delay=100000 \
+    --stdout=$OUT --stderr=$ERR \
+    /bin/sh -c \
+    'trap "echo ignoring signal" INT TERM ARLM; \
+     for i in $(seq 5); do sleep 1; done; echo after' \
+    &> $TEST_log &
+  local pid=$!
+  sleep 1
+  kill -TERM "${pid}"
+  local code=0
+  wait "${pid}" || code=$?
+  assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+  assert_stdout 'ignoring signal
+after'
+}
+
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled and waits until the process has exited
+# on its own, even if that takes a little bit of time. To make sure that this is
+# the case, we pass a very large kill delay to cause the outer test to fail if
+# we violate this expectation.
+#
+# In the past, even though we would terminate the process quickly, we would
+# get stuck until the kill delay passed (because we'd be stuck waiting for a
+# zombie process without us actually collecting it). So this is a regression
+# test for that subtle bug.
+function test_sigterm_exits_as_soon_as_process_terminates() {
+  $process_wrapper --graceful_sigterm --kill_delay=100000 \
+    --stdout=$OUT --stderr=$ERR \
+    /bin/sh -c \
+    'trap "" INT TERM ARLM; \
+     for i in $(seq 5); do echo sleeping $i; sleep 1; done' \
+    &> $TEST_log &
+  local pid=$!
+  sleep 1
+  kill -TERM "${pid}"
+  local code=0
+  wait "${pid}" || code=$?
+  assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+  assert_stdout 'sleeping 1
+sleeping 2
+sleeping 3
+sleeping 4
+sleeping 5'
+}
+
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled and a SIGKILL later once a kill delay has
+# passed without the process exiting on its own. We make the process get stuck
+# indefinitely until killed, and we do this with individual calls to sleep
+# instead of a single one to ensure that a single termination of the sleep
+# subprocess doesn't cause us to spuriously exit and thus pass this test.
+function test_sigterm_kill() {
+  $process_wrapper --graceful_sigterm --kill_delay=5 \
+    --stdout=$OUT --stderr=$ERR \
+    /bin/sh -c \
+    'trap "echo ignoring signal" INT TERM ARLM; \
+     while :; do sleep 1; done; echo after' \
+    &> $TEST_log &
+  local pid=$!
+  sleep 1
+  kill -TERM "${pid}"
+  local code=0
+  wait "${pid}" || code=$?
+  assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+  assert_stdout 'ignoring signal'
+}
+
 function test_execvp_error_message() {
   local code=0
   $process_wrapper --stdout=$OUT --stderr=$ERR /bin/notexisting &> $TEST_log || code=$?