Update test-setup.sh to in fact forward signals to its children. It claims to in at least one mode, but actually implicitly relies on the fact that the sandbox does. Second attempt: this accounts for allowing test processes to receive SIGINT if they want to, which is harder than it sounds.

This is a different variant of the same fix as https://github.com/bazelbuild/bazel/commit/9fe02aa9c7081f5cb6b48a949a054f64f693ae22 , which implicitly relied on someone else to signal child processes. Will it be better, or will it introduce a new corner case? We'll see!

RELNOTES: None.
PiperOrigin-RevId: 303794822
diff --git a/tools/test/test-setup.sh b/tools/test/test-setup.sh
index 098a898..9ae1ebb 100755
--- a/tools/test/test-setup.sh
+++ b/tools/test/test-setup.sh
@@ -260,54 +260,109 @@
   TEST_PATH="${BASE}.exe"
 fi
 
+# Helper to kill a process and its entire group.
+function kill_group {
+  local signal="${1-}"
+  local pid="${2-}"
+  kill -$signal -$pid &> /dev/null
+}
+
+childPid=""
+function signal_children {
+  local signal="${1-}"
+  if [ "${signal}" = "SIGTERM" ] && [ -z "$no_echo" ]; then
+    echo "-- Test timed out at $(date +"%F %T %Z") --"
+  fi
+  if [ ! -z "$childPid" ]; then
+    # For consistency with historical bazel behaviour, send signal to all child
+    # processes, not just the first one. We use the process group for this
+    # purpose.
+    kill_group $signal $childPid
+  fi
+}
+
 exitCode=0
 signals="$(trap -l | sed -E 's/[0-9]+\)//g')"
 if [[ "${EXPERIMENTAL_SPLIT_XML_GENERATION}" == "1" ]]; then
-  # If we trap here, then bash forwards the signal to the subprocess, at least
-  # for bash version 4.4.12(1) on Linux. If we don't trap here, then bash does
-  # not forward the signal. This seems to contradict the bash documentation, and
-  # also seems to contradict bug #7119, which reports the opposite behavior.
-  trap 'echo "-- Test timed out at $(date +"%F %T %Z") --"' SIGTERM
+  for signal in $signals; do
+    # SIGCHLD is expected when a subprocess dies
+    [ "${signal}" = "SIGCHLD" ] && continue
+    trap "signal_children ${signal}" ${signal}
+  done
 else
   for signal in $signals; do
     # SIGCHLD is expected when a subprocess dies
     [ "${signal}" = "SIGCHLD" ] && continue
-    trap "write_xml_output_file ${signal}" ${signal}
+    trap "write_xml_output_file ${signal}; signal_children ${signal}" ${signal}
   done
 fi
 start=$(date +%s)
 
-# Check if we have tail --pid option
-dummy=1 &
-pid=$!
-has_tail=true
-tail -fq --pid $pid -s 0.001 /dev/null &> /dev/null || has_tail=false
-
+# We have a challenge here: we want to forward signals to our child processes,
+# but we also want them to send themselves signals like SIGINT. Catching signals
+# ourselves requires use of background processes, trap, and wait. But normally
+# background processes are themselves unable to receive signals like SIGINT,
+# since those signals are intended for interactive processes - the only way for
+# them to get SIGINT in bash is for us to run them in the foreground.
+# To achieve this, we have to use `set -m` to enable Job Control in bash. This
+# has the effect of putting the child processes and any children of their own
+# into their own process groups, which are then able to receive SIGINT, etc,
+# without our shell interfering. Of course, this has the new complication that
+# anyone trying to SIGKILL *us* by group (as we know bazel's legacy process
+# wrapper does) will only kill this process and not the children below it. Any
+# reasonable sandboxing uses at least a process namespace, but we don't have the
+# luxury of assuming one, so our children could be left behind in that
+# eventuality. So, what we do is spawn a *second* background process that
+# watches for us to be killed, and then chain-kills the test's process group.
+# Aren't processes fun?
+set -m
 if [[ "${EXPERIMENTAL_SPLIT_XML_GENERATION}" == "1" ]]; then
   if [ -z "$COVERAGE_DIR" ]; then
-    "${TEST_PATH}" "$@" 2>&1 || exitCode=$?
+    ("${TEST_PATH}" "$@" 2>&1) <&0 &
   else
-    "$1" "$TEST_PATH" "${@:3}" 2>&1 || exitCode=$?
+    ("$1" "$TEST_PATH" "${@:3}" 2>&1) <&0 &
   fi
-elif [ "$has_tail" == true ] && [  -z "$no_echo" ]; then
-  touch "${XML_OUTPUT_FILE}.log"
-  if [ -z "$COVERAGE_DIR" ]; then
-    ("${TEST_PATH}" "$@" &>"${XML_OUTPUT_FILE}.log") <&0 &
-    pid=$!
-  else
-    ("$1" "$TEST_PATH" "${@:3}" &> "${XML_OUTPUT_FILE}.log") <&0 &
-    pid=$!
-  fi
-  tail -fq --pid $pid -s 0.001 "${XML_OUTPUT_FILE}.log"
-  wait $pid
-  exitCode=$?
 else
   if [ -z "$COVERAGE_DIR" ]; then
-    "${TEST_PATH}" "$@" 2> >(tee -a "${XML_OUTPUT_FILE}.log" >&2) 1> >(tee -a "${XML_OUTPUT_FILE}.log") 2>&1 || exitCode=$?
+    ("${TEST_PATH}" "$@" 2> >(tee -a "${XML_OUTPUT_FILE}.log" >&2) 1> >(tee -a "${XML_OUTPUT_FILE}.log") 2>&1) <&0 &
   else
-    "$1" "$TEST_PATH" "${@:3}" 2> >(tee -a "${XML_OUTPUT_FILE}.log" >&2) 1> >(tee -a "${XML_OUTPUT_FILE}.log") 2>&1 || exitCode=$?
+    ("$1" "$TEST_PATH" "${@:3}" 2> >(tee -a "${XML_OUTPUT_FILE}.log" >&2) 1> >(tee -a "${XML_OUTPUT_FILE}.log") 2>&1) <&0 &
   fi
 fi
+childPid=$!
+
+# Cleanup helper
+( if ! (ps -p $$ &> /dev/null || [ "`pgrep -a -g $$ 2> /dev/null`" != "" ] ); then
+   # `ps` is known to be unrunnable in the darwin sandbox-exec environment due
+   # to being a set-uid root program. pgrep exists in most environments, but not
+   # universally. In the event that we find ourselves running in an environment
+   # where *neither* exists, we have no reliable way to check if our parent is
+   # still alive - so simply disable this cleanup routine entirely.
+   exit 0
+ fi
+ while ps -p $$ &> /dev/null || [ "`pgrep -a -g $$ 2> /dev/null`" != "" ]; do
+    sleep 10
+ done
+ # Parent process not found - we've been abandoned! Clean up test processes.
+ kill_group SIGKILL $childPid
+) &
+cleanupPid=$!
+
+set +m
+
+wait $childPid
+# If interrupted by a signal, use the signal as the exit code. But allow
+# the child to actually finish from the signal we sent _it_ via signal_child.
+# (Waiting on a stopped process is a no-op).
+# Only once - if we receive multiple signals (of any sort), give up.
+exitCode=$?
+wait $childPid
+
+# By this point, we have everything we're willing to wait for. Tidy up our own
+# processes and move on.
+kill_group SIGKILL $childPid
+kill -SIGKILL $cleanupPid &> /dev/null
+wait $cleanupPid
 
 for signal in $signals; do
   trap - ${signal}