Use posix_spawn to start the Bazel server. Introduce a new "daemonize" helper tool that takes the heavy-lifting of starting the Bazel server process as a daemon. This tool is pure C and does not have any big dependencies like gRPC so we can ensure it's thread-free. As a result, the code in this tool doesn't have to take a lot of care when running fork(), which results in easier to read code overall. Then, change the Bazel client to simply use posix_spawn to invoke this helper tool, which in turn runs the Bazel server as a daemon. The code in the Bazel client becomes simpler as well as we don't have to worry about the implications of using fork() from a threaded process. The switch to posix_spawn is necessary to address https://github.com/bazelbuild/bazel/issues/7446 because the only way to customize the QoS of a process in macOS is by using this API. Such a change will come separately. This change is intended to be a no-op. However, this fixes a bug introduced in unknown commit by which we violated the requirement of not doing any memory management from a child process started with fork(). (I don't think this bug ever manifested itself, but it was there -- which is funny because this piece of code went to great extents to not do the wrong thing... and a one-line change let hell break loose.) RELNOTES: None. PiperOrigin-RevId: 234991943
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 306f9cf..f31155f 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc
@@ -685,6 +685,8 @@ BlazeServerStartup **server_startup) { vector<string> jvm_args_vector = GetArgumentArray(workspace_layout); string argument_string = GetArgumentString(jvm_args_vector); + const string binaries_dir = + GetEmbeddedBinariesRoot(globals->options->install_base); string server_dir = blaze_util::JoinPath(globals->options->output_base, "server"); // Write the cmdline argument string to the server dir. If we get to this @@ -708,7 +710,7 @@ return ExecuteDaemon(exe, jvm_args_vector, PrepareEnvironmentForJvm(), globals->jvm_log_file, globals->jvm_log_file_append, - server_dir, server_startup); + binaries_dir, server_dir, server_startup); } // Replace this process with blaze in standalone/batch mode.
diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h index 7ed5373..707d7d1 100644 --- a/src/main/cpp/blaze_util_platform.h +++ b/src/main/cpp/blaze_util_platform.h
@@ -154,6 +154,7 @@ const std::map<std::string, EnvVarValue>& env, const std::string& daemon_output, const bool daemon_output_append, + const std::string& binaries_dir, const std::string& server_dir, BlazeServerStartup** server_startup);
diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc index 8f87338..c420658 100644 --- a/src/main/cpp/blaze_util_posix.cc +++ b/src/main/cpp/blaze_util_posix.cc
@@ -22,6 +22,7 @@ #include <poll.h> #include <pwd.h> #include <signal.h> +#include <spawn.h> #include <stdarg.h> #include <stdio.h> #include <string.h> @@ -34,8 +35,11 @@ #include <time.h> #include <unistd.h> +#include <algorithm> #include <cassert> #include <cinttypes> +#include <fstream> +#include <iterator> #include <set> #include <string> @@ -223,6 +227,30 @@ charpp_[i] = NULL; } + // Constructs a new CharPP from a list of environment variables. + explicit CharPP(const std::map<string, EnvVarValue>& env) { + charpp_ = static_cast<char**>(malloc(sizeof(char*) * (env.size() + 1))); + size_t i = 0; + for (auto iter : env) { + const string& var = iter.first; + const EnvVarValue& value = iter.second; + + switch (value.action) { + case SET: + charpp_[i] = strdup((var + "=" + value.value).c_str()); + i++; + break; + + case UNSET: + break; + + default: + assert(false); + } + } + charpp_[i] = NULL; + } + // Deletes all memory held by the CharPP. ~CharPP() { for (char** ptr = charpp_; *ptr != NULL; ptr++) { @@ -258,40 +286,6 @@ return symlink(target.c_str(), link.c_str()) == 0; } -// Causes the current process to become a daemon (i.e. a child of -// init, detached from the terminal, in its own session.) We don't -// change cwd, though. -static void Daemonize(const char* daemon_output, - const bool daemon_output_append) { - // Don't call BAZEL_DIE or exit() in this function; we're already in a child - // process so it won't work as expected. Just don't do anything that can - // possibly fail. :) - - signal(SIGHUP, SIG_IGN); - if (fork() > 0) { - // This second fork is required iff there's any chance cmd will - // open an specific tty explicitly, e.g., open("/dev/tty23"). If - // not, this fork can be removed. - _exit(blaze_exit_code::SUCCESS); - } - - setsid(); - - close(0); - close(1); - close(2); - - open("/dev/null", O_RDONLY); // stdin - // stdout: - int out_flags = - O_WRONLY | O_CREAT | (daemon_output_append ? O_APPEND : O_TRUNC); - if (open(daemon_output, out_flags, 0666) == -1) { - // In a daemon, no-one can hear you scream. - open("/dev/null", O_WRONLY); - } - (void) dup(STDOUT_FILENO); // stderr (2>&1) -} - // Notifies the client about the death of the server process by keeping a socket // open in the server. If the server dies for any reason, the socket will be // closed, which can be detected by the client. @@ -331,87 +325,30 @@ } } -// NB: There should only be system calls in this function. See the comment -// before ExecuteDaemon() to understand why. strerror() and strlen() are -// hopefully okay. -static void DieAfterFork(const char* message) { - char* error_string = strerror(errno); // strerror is hopefully okay - write(STDERR_FILENO, message, strlen(message)); // strlen should be OK - write(STDERR_FILENO, ": ", 2); - write(STDERR_FILENO, error_string, strlen(error_string)); - write(STDERR_FILENO, "\n", 1); - _exit(blaze_exit_code::INTERNAL_ERROR); -} - -// NB: There should only be system calls in this function. See the comment -// before ExecuteDaemon() to understand why. -static void ReadFromFdWithRetryEintr( - int fd, void *buf, size_t count, const char* error_message) { - ssize_t result; - do { - result = read(fd, buf, count); - } while (result < 0 && errno == EINTR); - if (result < 0 || static_cast<size_t>(result) != count) { - DieAfterFork(error_message); - } -} - -// NB: There should only be system calls in this function. See the comment -// before ExecuteDaemon() to understand why. -static void WriteToFdWithRetryEintr( - int fd, void *buf, size_t count, const char* error_message) { - ssize_t result; - do { - // Ideally, we'd use send(..., MSG_NOSIGNAL), but that's not available on - // Darwin. - result = write(fd, buf, count); - } while (result < 0 && errno == EINTR); - if (result < 0 || static_cast<size_t>(result) != count) { - DieAfterFork(error_message); - } -} - void WriteSystemSpecificProcessIdentifier( const string& server_dir, pid_t server_pid); -// We do a lot of seemingly-needless complications to avoid doing anything -// complex after a fork(). The reason is that forking in multi-threaded -// programs is fraught with peril. -// -// One root cause is that fork() only forks the thread it was called from. If -// another thread holds a lock, it will never be relinquished in the child -// process, and malloc() sometimes does lock. Thus, we need to avoid allocating -// any dynamic memory in the child process, which is hard if any C++ feature is -// used. -// -// We also don't know what libc does behind the scenes, so it's advisable to -// avoid anything that's not a system call. read(), write(), fork() and execv() -// aren't guaranteed to be pure system calls, either, but we can't get any -// closer to this ideal without writing logic specific to each POSIX system we -// run on. -// -// Another way to tackle this issue would be to pre-fork a child process before -// spawning any threads which is then used to fork all the other necessary -// child processes. However, then we'd need to invent a protocol that can handle -// sending stdout/stderr back and forking multiple times, which isn't trivial, -// either. -// -// Yet another fix would be not to use multiple threads before forking. However, -// we need to use gRPC to figure out if a server process is already present and -// gRPC currently cannot be convinced not to spawn any threads. -// -// Another way would be to use posix_spawn(). However, since we need to create a -// daemon process, we'd need to posix_spawn() a little child process that -// daemonizes then exec()s the actual JVM, which is also non-trivial. So I hope -// this will be good enough because for all its flaws, this solution is at least -// localized here. int ExecuteDaemon(const string& exe, const std::vector<string>& args_vector, const std::map<string, EnvVarValue>& env, const string& daemon_output, const bool daemon_output_append, + const string& binaries_dir, const string& server_dir, BlazeServerStartup** server_startup) { + const string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile); + const string daemonize = blaze_util::JoinPath(binaries_dir, "daemonize"); + + std::vector<string> daemonize_args = + {"daemonize", "-l", daemon_output, "-p", pid_file}; + if (daemon_output_append) { + daemonize_args.push_back("-a"); + } + daemonize_args.push_back("--"); + daemonize_args.push_back(exe); + std::copy(args_vector.begin(), args_vector.end(), + std::back_inserter(daemonize_args)); + int fds[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) { @@ -419,63 +356,50 @@ << "socket creation failed: " << GetLastErrorString(); } - const char* daemon_output_chars = daemon_output.c_str(); - CharPP argv(args_vector); - const char* exe_chars = exe.c_str(); - - int child = fork(); - if (child == -1) { + posix_spawn_file_actions_t file_actions; + if (posix_spawn_file_actions_init(&file_actions) == -1) { BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) - << "fork() failed: " << GetLastErrorString(); - return -1; - } else if (child > 0) { - // Parent process (i.e. the client) - close(fds[1]); // parent keeps one side... - int unused_status; - waitpid(child, &unused_status, 0); // child double-forks - pid_t server_pid = 0; - ReadFromFdWithRetryEintr(fds[0], &server_pid, sizeof server_pid, - "cannot read server PID from server"); - string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile); - if (!blaze_util::WriteFile(ToString(server_pid), pid_file)) { - BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) - << "cannot write PID file: " << GetLastErrorString(); - return -1; - } - - WriteSystemSpecificProcessIdentifier(server_dir, server_pid); - char dummy = 'a'; - WriteToFdWithRetryEintr(fds[0], &dummy, 1, - "cannot notify server about having written PID file"); - *server_startup = new SocketBlazeServerStartup(fds[0]); - return server_pid; - } else { - // Child process (i.e. the server) - // NB: There should only be system calls in this branch. See the comment - // before ExecuteDaemon() to understand why. - close(fds[0]); // ...child keeps the other. - - { - WithEnvVars env_obj(env); - Daemonize(daemon_output_chars, daemon_output_append); - pid_t server_pid = getpid(); - WriteToFdWithRetryEintr(fds[1], &server_pid, sizeof server_pid, - "cannot communicate server PID to client"); - // We wait until the client writes the PID file so that there is no race - // condition; the server expects the PID file to already be there so that - // it can read it and know its own PID (see the ctor GrpcServerImpl) and - // so that it can kill itself if the PID file is deleted (see - // GrpcServerImpl.PidFileWatcherThread) - char dummy; - ReadFromFdWithRetryEintr( - fds[1], &dummy, 1, - "cannot get PID file write acknowledgement from client"); - - execv(exe_chars, argv.get()); - DieAfterFork("Cannot execute daemon"); - return -1; - } + << "Failed to create posix_spawn_file_actions: " << GetLastErrorString(); } + if (posix_spawn_file_actions_addclose(&file_actions, fds[0]) == -1) { + BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) + << "Failed to modify posix_spawn_file_actions: "<< GetLastErrorString(); + } + + pid_t transient_pid; + if (posix_spawn(&transient_pid, daemonize.c_str(), &file_actions, NULL, + CharPP(daemonize_args).get(), CharPP(env).get()) == -1) { + BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) + << "Failed to execute JVM via " << daemonize + << ": " << GetLastErrorString(); + } + close(fds[1]); + + posix_spawn_file_actions_destroy(&file_actions); + + // Wait for daemonize to exit. This guarantees that the pid file exists. + int status; + if (waitpid(transient_pid, &status, 0) == -1) { + BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) + << "waitpid failed: " << GetLastErrorString(); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { + BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) + << "daemonize didn't exit cleanly: " << GetLastErrorString(); + } + + std::ifstream pid_reader(pid_file); + if (!pid_reader) { + BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR) + << "Failed to open " << pid_file << ": " << GetLastErrorString(); + } + pid_t server_pid; + pid_reader >> server_pid; + + WriteSystemSpecificProcessIdentifier(server_dir, server_pid); + + *server_startup = new SocketBlazeServerStartup(fds[0]); + return server_pid; } string GetHashedBaseDir(const string& root, const string& hashable) {
diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc index 3d01b44..b113c54 100644 --- a/src/main/cpp/blaze_util_windows.cc +++ b/src/main/cpp/blaze_util_windows.cc
@@ -648,6 +648,7 @@ const std::map<string, EnvVarValue>& env, const string& daemon_output, const bool daemon_out_append, + const string& binaries_dir, const string& server_dir, BlazeServerStartup** server_startup) { wstring wdaemon_output;
diff --git a/src/main/tools/BUILD b/src/main/tools/BUILD index 8362c0c..d1b45f8 100644 --- a/src/main/tools/BUILD +++ b/src/main/tools/BUILD
@@ -1,5 +1,10 @@ package(default_visibility = ["//src:__subpackages__"]) +cc_binary( + name = "daemonize", + srcs = ["daemonize.c"], +) + cc_library( name = "logging", srcs = ["logging.cc"],
diff --git a/src/main/tools/daemonize.c b/src/main/tools/daemonize.c new file mode 100644 index 0000000..16429f5 --- /dev/null +++ b/src/main/tools/daemonize.c
@@ -0,0 +1,210 @@ +// Copyright 2019 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// daemonize [-a] -l log_path -p pid_path -- binary_path binary_name [args] +// +// daemonize spawns a program as a daemon, redirecting all of its output to the +// given log_path and writing the daemon's PID to pid_path. binary_path +// specifies the full location of the program to execute and binary_name +// indicates its display name (aka argv[0], so the optional args do not have to +// specify it again). log_path is created/truncated unless the -a (append) flag +// is specified. Also note that pid_path is guaranteed to exists when this +// program terminates successfully. +// +// Some important details about the implementation of this program: +// +// * No threads to ensure the use of fork below does not cause trouble. +// +// * Pure C, no C++. This is intentional to keep the program low overhead +// and to avoid the accidental introduction of heavy dependencies that +// could spawn threads. +// +// * Error handling is extensive but there is no error propagation. Given +// that the goal of this program is just to spawn another one as a daemon, +// we take the freedom to immediatey exit from anywhere as soon as we +// hit an error. + +#include <sys/types.h> + +#include <assert.h> +#include <err.h> +#include <fcntl.h> +#include <getopt.h> +#include <inttypes.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +// Configures std{in,out,err} of the current process to serve as a daemon. +// +// stdin is configured to read from /dev/null. +// +// stdout and stderr are configured to write to log_path, which is created and +// truncated unless log_append is set to true, in which case it is open for +// append if it exists. +static void SetupStdio(const char* log_path, bool log_append) { + close(STDIN_FILENO); + int fd = open("/dev/null", O_RDONLY); + if (fd == -1) { + err(EXIT_FAILURE, "Failed to open /dev/null"); + } + assert(fd == STDIN_FILENO); + + close(STDOUT_FILENO); + int flags = O_WRONLY | O_CREAT | (log_append ? O_APPEND : O_TRUNC); + fd = open(log_path, flags, 0666); + if (fd == -1) { + err(EXIT_FAILURE, "Failed to create log file %s", log_path); + } + assert(fd == STDOUT_FILENO); + + close(STDERR_FILENO); + fd = dup(STDOUT_FILENO); + if (fd == -1) { + err(EXIT_FAILURE, "dup failed"); + } + assert(fd == STDERR_FILENO); +} + +// Writes the given pid to a new file at pid_path. +// +// Once the pid file has been created, this notifies pid_done_fd by writing a +// dummy character to it and closing it. +static void WritePidFile(pid_t pid, const char* pid_path, int pid_done_fd) { + FILE* pid_file = fopen(pid_path, "w"); + if (pid_file == NULL) { + err(EXIT_FAILURE, "Failed to create %s", pid_path); + } + fprintf(pid_file, "%" PRIdMAX, (intmax_t) pid); + fclose(pid_file); + + char dummy = '\0'; + write(pid_done_fd, &dummy, sizeof(dummy)); + close(pid_done_fd); +} + +static void ExecAsDaemon(const char* log_path, bool log_append, int pid_done_fd, + const char* exe, char** argv) + __attribute__((noreturn)); + +// Executes the requested binary configuring it to behave as a daemon. +// +// The stdout and stderr of the current process are redirected to the given +// log_path. See SetupStdio for details on how this is handled. +// +// This blocks execution until pid_done_fd receives a write. We do this +// because the Bazel server process (which is what we start with this helper +// binary) requires the PID file to be present at startup time so we must +// wait until the parent process has created it. +// +// This function never returns. +static void ExecAsDaemon(const char* log_path, bool log_append, int pid_done_fd, + const char* exe, char** argv) { + char dummy; + if (read(pid_done_fd, &dummy, sizeof(dummy)) == -1) { + err(EXIT_FAILURE, "Failed to wait for pid file creation"); + } + close(pid_done_fd); + + if (signal(SIGHUP, SIG_IGN) == SIG_ERR) { + err(EXIT_FAILURE, "Failed to install SIGHUP handler"); + } + + if (setsid() == -1) { + err(EXIT_FAILURE, "setsid failed"); + } + + SetupStdio(log_path, log_append); + + execv(exe, argv); + err(EXIT_FAILURE, "Failed to execute %s", exe); +} + +// Starts the given process as a daemon. +// +// This spawns a subprocess that will be configured to run the desired program +// as a daemon. The program to run is supplied in exe and the arguments to it +// are given in the NULL-terminated argv. argv[0] must be present and +// contain the program name (which may or may not match the basename of exe). +static void Daemonize(const char* log_path, bool log_append, + const char* pid_path, const char* exe, char** argv) { + assert(argv[0] != NULL); + + int pid_done_fds[2]; + if (pipe(pid_done_fds) == -1) { + err(EXIT_FAILURE, "pipe failed"); + } + + pid_t pid = fork(); + if (pid == -1) { + err(EXIT_FAILURE, "fork failed"); + } else if (pid == 0) { + close(pid_done_fds[1]); + ExecAsDaemon(log_path, log_append, pid_done_fds[0], exe, argv); + abort(); // NOLINT Unreachable. + } + close(pid_done_fds[0]); + + WritePidFile(pid, pid_path, pid_done_fds[1]); +} + +// Program entry point. +// +// The primary responsibility of this function is to parse program options. +// Once that is done, delegates all work to Daemonize. +int main(int argc, char** argv) { + bool log_append = false; + const char* log_path = NULL; + const char* pid_path = NULL; + int opt; + while ((opt = getopt(argc, argv, ":al:p:")) != -1) { + switch (opt) { + case 'a': + log_append = true; + break; + + case 'l': + log_path = optarg; + break; + + case 'p': + pid_path = optarg; + break; + + case ':': + errx(EXIT_FAILURE, "Option -%c requires an argument", optopt); + + case '?': + default: + errx(EXIT_FAILURE, "Unknown option -%c", optopt); + } + } + argc -= optind; + argv += optind; + + if (log_path == NULL) { + errx(EXIT_FAILURE, "Must specify a log file with -l"); + } + if (pid_path == NULL) { + errx(EXIT_FAILURE, "Must specify a pid file with -p"); + } + + if (argc < 2) { + errx(EXIT_FAILURE, "Must provide at least an executable name and arg0"); + } + Daemonize(log_path, log_append, pid_path, argv[0], argv + 1); + return EXIT_SUCCESS; +}