Implement graceful SIGTERM handling in the process-wrapper.
This is intended to be used with the new dynamic spawn scheduler (via the
--process_wrapper_extra_flags flag), which triggers lots more local process
cancellations, in an attempt to workaround an odd OSXFUSE bug for which we
have no diagnostic nor fix yet.
Part of #7818.
RELNOTES: None.
PiperOrigin-RevId: 314953746
diff --git a/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
index 660b0bd..f404845 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/local/LocalExecutionOptions.java
@@ -72,6 +72,17 @@
public boolean localLockfreeOutput;
@Option(
+ name = "experimental_process_wrapper_graceful_sigterm",
+ defaultValue = "false",
+ documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
+ effectTags = {OptionEffectTag.EXECUTION},
+ help =
+ "When true, make the process-wrapper propagate SIGTERMs (used by the dynamic scheduler "
+ + "to stop process trees) to the subprocesses themselves, giving them the grace "
+ + "period in --local_termination_grace_seconds before forcibly sending a SIGKILL.")
+ public boolean processWrapperGracefulSigterm;
+
+ @Option(
name = "experimental_process_wrapper_wait_fix",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java b/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
index 73dba33..e0ef4ec 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/ProcessWrapper.java
@@ -61,6 +61,9 @@
ImmutableList.Builder<String> extraFlags = ImmutableList.builder();
if (options != null) {
+ if (options.processWrapperGracefulSigterm) {
+ extraFlags.add("--graceful_sigterm");
+ }
if (options.processWrapperWaitFix) {
extraFlags.add("--wait_fix");
}
diff --git a/src/main/tools/process-wrapper-legacy.cc b/src/main/tools/process-wrapper-legacy.cc
index 0580291..60a01e6 100644
--- a/src/main/tools/process-wrapper-legacy.cc
+++ b/src/main/tools/process-wrapper-legacy.cc
@@ -72,12 +72,28 @@
}
}
+// Sets up signal handlers to kill all subprocesses when the given signal is
+// triggered. Whether subprocesses are abruptly terminated or not depends on
+// the signal type and the user configuration.
+void LegacyProcessWrapper::SetupSignalHandlers() {
+ // SIGALRM represents a timeout so we should give the process a bit of time
+ // to die gracefully if it needs it.
+ InstallSignalHandler(SIGALRM, OnGracefulSignal);
+
+ // Termination signals should kill the process quickly, as it's typically
+ // blocking the return of the prompt after a user hits "Ctrl-C". But we allow
+ // customizing the behavior of SIGTERM because it's used by the dynamic
+ // scheduler to terminate process trees in a controlled manner.
+ if (opt.graceful_sigterm) {
+ InstallSignalHandler(SIGTERM, OnGracefulSignal);
+ } else {
+ InstallSignalHandler(SIGTERM, OnAbruptSignal);
+ }
+ InstallSignalHandler(SIGINT, OnAbruptSignal);
+}
+
void LegacyProcessWrapper::WaitForChild() {
- // Set up a signal handler which kills all subprocesses when the given signal
- // is triggered.
- InstallSignalHandler(SIGALRM, OnSignal);
- InstallSignalHandler(SIGTERM, OnSignal);
- InstallSignalHandler(SIGINT, OnSignal);
+ SetupSignalHandlers();
if (opt.timeout_secs > 0) {
SetTimeout(opt.timeout_secs);
}
@@ -144,16 +160,13 @@
}
// Called when timeout or signal occurs.
-void LegacyProcessWrapper::OnSignal(int sig) {
+void LegacyProcessWrapper::OnAbruptSignal(int sig) {
last_signal = sig;
+ KillEverything(child_pid, false, opt.kill_delay_secs);
+}
- if (sig == SIGALRM) {
- // SIGALRM represents a timeout, so we should give the process a bit of time
- // to die gracefully if it needs it.
- KillEverything(child_pid, true, opt.kill_delay_secs);
- } else {
- // Signals should kill the process quickly, as it's typically blocking the
- // return of the prompt after a user hits "Ctrl-C".
- KillEverything(child_pid, false, opt.kill_delay_secs);
- }
+// Called when timeout or signal occurs.
+void LegacyProcessWrapper::OnGracefulSignal(int sig) {
+ last_signal = sig;
+ KillEverything(child_pid, true, opt.kill_delay_secs);
}
diff --git a/src/main/tools/process-wrapper-legacy.h b/src/main/tools/process-wrapper-legacy.h
index 943c4c1..2301a38 100644
--- a/src/main/tools/process-wrapper-legacy.h
+++ b/src/main/tools/process-wrapper-legacy.h
@@ -39,8 +39,10 @@
private:
static void SpawnChild();
+ static void SetupSignalHandlers();
static void WaitForChild();
- static void OnSignal(int sig);
+ static void OnAbruptSignal(int sig);
+ static void OnGracefulSignal(int sig);
static pid_t child_pid;
static volatile sig_atomic_t last_signal;
diff --git a/src/main/tools/process-wrapper-options.cc b/src/main/tools/process-wrapper-options.cc
index 837fd26..0c0e97e 100644
--- a/src/main/tools/process-wrapper-options.cc
+++ b/src/main/tools/process-wrapper-options.cc
@@ -41,6 +41,8 @@
fprintf(
stderr,
"\nPossible arguments:\n"
+ " -g/--graceful_sigterm propagate SIGTERM to the subprocess and delay "
+ "the corresponding SIGKILL until --kill_delay has passed\n"
" -t/--timeout <timeout> timeout after which the child process will be "
"terminated with SIGTERM\n"
" -k/--kill_delay <timeout> in case timeout occurs, how long to wait "
@@ -58,6 +60,7 @@
// global `opt` struct.
static void ParseCommandLine(const std::vector<char *> &args) {
static struct option long_options[] = {
+ {"graceful_sigterm", no_argument, 0, 'g'},
{"timeout", required_argument, 0, 't'},
{"kill_delay", required_argument, 0, 'k'},
{"stdout", required_argument, 0, 'o'},
@@ -70,9 +73,12 @@
extern int optind, optopt;
int c;
- while ((c = getopt_long(args.size(), args.data(), "+:t:k:o:e:s:dW",
+ while ((c = getopt_long(args.size(), args.data(), "+:gt:k:o:e:s:dW",
long_options, nullptr)) != -1) {
switch (c) {
+ case 'g':
+ opt.graceful_sigterm = true;
+ break;
case 't':
if (sscanf(optarg, "%lf", &opt.timeout_secs) != 1) {
Usage(args.front(), "Invalid timeout (-t) value: %s", optarg);
diff --git a/src/main/tools/process-wrapper-options.h b/src/main/tools/process-wrapper-options.h
index d7f269f..d1587d6 100644
--- a/src/main/tools/process-wrapper-options.h
+++ b/src/main/tools/process-wrapper-options.h
@@ -20,6 +20,8 @@
// Options parsing result.
struct Options {
+ // Whether to gracefully terminate subprocesses on SIGTERM (-g)
+ bool graceful_sigterm;
// How long to wait before killing the child (-t)
double timeout_secs;
// How long to wait before sending SIGKILL in case of timeout (-k)
diff --git a/src/test/shell/integration/process-wrapper_test.sh b/src/test/shell/integration/process-wrapper_test.sh
index 38d59ec..136edd6 100755
--- a/src/test/shell/integration/process-wrapper_test.sh
+++ b/src/test/shell/integration/process-wrapper_test.sh
@@ -34,6 +34,7 @@
readonly EXIT_STATUS_SIGABRT=$((128 + 6))
readonly EXIT_STATUS_SIGKILL=$((128 + 9))
readonly EXIT_STATUS_SIGALRM=$((128 + 14))
+readonly EXIT_STATUS_SIGTERM=$((128 + 15))
function set_up() {
rm -rf $OUT_DIR
@@ -143,6 +144,80 @@
assert_stdout 'ignoring signal'
}
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled, but with the process exiting on its own
+# without the need for a SIGKILL. To make sure that this is the case, we pass a
+# very large kill delay to cause the outer test to fail if we violate this
+# expectation.
+function test_sigterm_grace() {
+ $process_wrapper --graceful_sigterm --kill_delay=100000 \
+ --stdout=$OUT --stderr=$ERR \
+ /bin/sh -c \
+ 'trap "echo ignoring signal" INT TERM ARLM; \
+ for i in $(seq 5); do sleep 1; done; echo after' \
+ &> $TEST_log &
+ local pid=$!
+ sleep 1
+ kill -TERM "${pid}"
+ local code=0
+ wait "${pid}" || code=$?
+ assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+ assert_stdout 'ignoring signal
+after'
+}
+
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled and waits until the process has exited
+# on its own, even if that takes a little bit of time. To make sure that this is
+# the case, we pass a very large kill delay to cause the outer test to fail if
+# we violate this expectation.
+#
+# In the past, even though we would terminate the process quickly, we would
+# get stuck until the kill delay passed (because we'd be stuck waiting for a
+# zombie process without us actually collecting it). So this is a regression
+# test for that subtle bug.
+function test_sigterm_exits_as_soon_as_process_terminates() {
+ $process_wrapper --graceful_sigterm --kill_delay=100000 \
+ --stdout=$OUT --stderr=$ERR \
+ /bin/sh -c \
+ 'trap "" INT TERM ARLM; \
+ for i in $(seq 5); do echo sleeping $i; sleep 1; done' \
+ &> $TEST_log &
+ local pid=$!
+ sleep 1
+ kill -TERM "${pid}"
+ local code=0
+ wait "${pid}" || code=$?
+ assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+ assert_stdout 'sleeping 1
+sleeping 2
+sleeping 3
+sleeping 4
+sleeping 5'
+}
+
+# Tests that sending a SIGTERM causes the process to receive such SIGTERM if
+# graceful SIGTERM handling is enabled and a SIGKILL later once a kill delay has
+# passed without the process exiting on its own. We make the process get stuck
+# indefinitely until killed, and we do this with individual calls to sleep
+# instead of a single one to ensure that a single termination of the sleep
+# subprocess doesn't cause us to spuriously exit and thus pass this test.
+function test_sigterm_kill() {
+ $process_wrapper --graceful_sigterm --kill_delay=5 \
+ --stdout=$OUT --stderr=$ERR \
+ /bin/sh -c \
+ 'trap "echo ignoring signal" INT TERM ARLM; \
+ while :; do sleep 1; done; echo after' \
+ &> $TEST_log &
+ local pid=$!
+ sleep 1
+ kill -TERM "${pid}"
+ local code=0
+ wait "${pid}" || code=$?
+ assert_equals "${EXIT_STATUS_SIGTERM}" "$code"
+ assert_stdout 'ignoring signal'
+}
+
function test_execvp_error_message() {
local code=0
$process_wrapper --stdout=$OUT --stderr=$ERR /bin/notexisting &> $TEST_log || code=$?