Use posix_spawn to start the Bazel server.
Introduce a new "daemonize" helper tool that takes the heavy-lifting
of starting the Bazel server process as a daemon. This tool is pure
C and does not have any big dependencies like gRPC so we can ensure
it's thread-free. As a result, the code in this tool doesn't have
to take a lot of care when running fork(), which results in easier
to read code overall.
Then, change the Bazel client to simply use posix_spawn to invoke
this helper tool, which in turn runs the Bazel server as a daemon.
The code in the Bazel client becomes simpler as well as we don't
have to worry about the implications of using fork() from a threaded
process.
The switch to posix_spawn is necessary to address
https://github.com/bazelbuild/bazel/issues/7446 because the only way
to customize the QoS of a process in macOS is by using this API.
Such a change will come separately.
This change is intended to be a no-op. However, this fixes a bug
introduced in unknown commit by which we violated the requirement of
not doing any memory management from a child process started with
fork(). (I don't think this bug ever manifested itself, but it was
there -- which is funny because this piece of code went to great
extents to not do the wrong thing... and a one-line change let
hell break loose.)
RELNOTES: None.
PiperOrigin-RevId: 234991943
diff --git a/src/BUILD b/src/BUILD
index e7fd73a..82c5f13 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -358,7 +358,12 @@
"//src/main/tools:jdk-support",
"//src/main/tools:linux-sandbox",
"//tools/osx:xcode-locator",
- ],
+ ] + select({
+ "//src/conditions:windows": [],
+ "//conditions:default": [
+ "//src/main/tools:daemonize",
+ ],
+ }),
outs = ["package" + suffix + ".zip"],
cmd = "$(location :package-bazel.sh) $@ " + ("" if embed else "''") + " $(SRCS)",
tools = ["package-bazel.sh"],
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 306f9cf..f31155f 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -685,6 +685,8 @@
BlazeServerStartup **server_startup) {
vector<string> jvm_args_vector = GetArgumentArray(workspace_layout);
string argument_string = GetArgumentString(jvm_args_vector);
+ const string binaries_dir =
+ GetEmbeddedBinariesRoot(globals->options->install_base);
string server_dir =
blaze_util::JoinPath(globals->options->output_base, "server");
// Write the cmdline argument string to the server dir. If we get to this
@@ -708,7 +710,7 @@
return ExecuteDaemon(exe, jvm_args_vector, PrepareEnvironmentForJvm(),
globals->jvm_log_file, globals->jvm_log_file_append,
- server_dir, server_startup);
+ binaries_dir, server_dir, server_startup);
}
// Replace this process with blaze in standalone/batch mode.
diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h
index 7ed5373..707d7d1 100644
--- a/src/main/cpp/blaze_util_platform.h
+++ b/src/main/cpp/blaze_util_platform.h
@@ -154,6 +154,7 @@
const std::map<std::string, EnvVarValue>& env,
const std::string& daemon_output,
const bool daemon_output_append,
+ const std::string& binaries_dir,
const std::string& server_dir,
BlazeServerStartup** server_startup);
diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc
index 8f87338..c420658 100644
--- a/src/main/cpp/blaze_util_posix.cc
+++ b/src/main/cpp/blaze_util_posix.cc
@@ -22,6 +22,7 @@
#include <poll.h>
#include <pwd.h>
#include <signal.h>
+#include <spawn.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
@@ -34,8 +35,11 @@
#include <time.h>
#include <unistd.h>
+#include <algorithm>
#include <cassert>
#include <cinttypes>
+#include <fstream>
+#include <iterator>
#include <set>
#include <string>
@@ -223,6 +227,30 @@
charpp_[i] = NULL;
}
+ // Constructs a new CharPP from a list of environment variables.
+ explicit CharPP(const std::map<string, EnvVarValue>& env) {
+ charpp_ = static_cast<char**>(malloc(sizeof(char*) * (env.size() + 1)));
+ size_t i = 0;
+ for (auto iter : env) {
+ const string& var = iter.first;
+ const EnvVarValue& value = iter.second;
+
+ switch (value.action) {
+ case SET:
+ charpp_[i] = strdup((var + "=" + value.value).c_str());
+ i++;
+ break;
+
+ case UNSET:
+ break;
+
+ default:
+ assert(false);
+ }
+ }
+ charpp_[i] = NULL;
+ }
+
// Deletes all memory held by the CharPP.
~CharPP() {
for (char** ptr = charpp_; *ptr != NULL; ptr++) {
@@ -258,40 +286,6 @@
return symlink(target.c_str(), link.c_str()) == 0;
}
-// Causes the current process to become a daemon (i.e. a child of
-// init, detached from the terminal, in its own session.) We don't
-// change cwd, though.
-static void Daemonize(const char* daemon_output,
- const bool daemon_output_append) {
- // Don't call BAZEL_DIE or exit() in this function; we're already in a child
- // process so it won't work as expected. Just don't do anything that can
- // possibly fail. :)
-
- signal(SIGHUP, SIG_IGN);
- if (fork() > 0) {
- // This second fork is required iff there's any chance cmd will
- // open an specific tty explicitly, e.g., open("/dev/tty23"). If
- // not, this fork can be removed.
- _exit(blaze_exit_code::SUCCESS);
- }
-
- setsid();
-
- close(0);
- close(1);
- close(2);
-
- open("/dev/null", O_RDONLY); // stdin
- // stdout:
- int out_flags =
- O_WRONLY | O_CREAT | (daemon_output_append ? O_APPEND : O_TRUNC);
- if (open(daemon_output, out_flags, 0666) == -1) {
- // In a daemon, no-one can hear you scream.
- open("/dev/null", O_WRONLY);
- }
- (void) dup(STDOUT_FILENO); // stderr (2>&1)
-}
-
// Notifies the client about the death of the server process by keeping a socket
// open in the server. If the server dies for any reason, the socket will be
// closed, which can be detected by the client.
@@ -331,87 +325,30 @@
}
}
-// NB: There should only be system calls in this function. See the comment
-// before ExecuteDaemon() to understand why. strerror() and strlen() are
-// hopefully okay.
-static void DieAfterFork(const char* message) {
- char* error_string = strerror(errno); // strerror is hopefully okay
- write(STDERR_FILENO, message, strlen(message)); // strlen should be OK
- write(STDERR_FILENO, ": ", 2);
- write(STDERR_FILENO, error_string, strlen(error_string));
- write(STDERR_FILENO, "\n", 1);
- _exit(blaze_exit_code::INTERNAL_ERROR);
-}
-
-// NB: There should only be system calls in this function. See the comment
-// before ExecuteDaemon() to understand why.
-static void ReadFromFdWithRetryEintr(
- int fd, void *buf, size_t count, const char* error_message) {
- ssize_t result;
- do {
- result = read(fd, buf, count);
- } while (result < 0 && errno == EINTR);
- if (result < 0 || static_cast<size_t>(result) != count) {
- DieAfterFork(error_message);
- }
-}
-
-// NB: There should only be system calls in this function. See the comment
-// before ExecuteDaemon() to understand why.
-static void WriteToFdWithRetryEintr(
- int fd, void *buf, size_t count, const char* error_message) {
- ssize_t result;
- do {
- // Ideally, we'd use send(..., MSG_NOSIGNAL), but that's not available on
- // Darwin.
- result = write(fd, buf, count);
- } while (result < 0 && errno == EINTR);
- if (result < 0 || static_cast<size_t>(result) != count) {
- DieAfterFork(error_message);
- }
-}
-
void WriteSystemSpecificProcessIdentifier(
const string& server_dir, pid_t server_pid);
-// We do a lot of seemingly-needless complications to avoid doing anything
-// complex after a fork(). The reason is that forking in multi-threaded
-// programs is fraught with peril.
-//
-// One root cause is that fork() only forks the thread it was called from. If
-// another thread holds a lock, it will never be relinquished in the child
-// process, and malloc() sometimes does lock. Thus, we need to avoid allocating
-// any dynamic memory in the child process, which is hard if any C++ feature is
-// used.
-//
-// We also don't know what libc does behind the scenes, so it's advisable to
-// avoid anything that's not a system call. read(), write(), fork() and execv()
-// aren't guaranteed to be pure system calls, either, but we can't get any
-// closer to this ideal without writing logic specific to each POSIX system we
-// run on.
-//
-// Another way to tackle this issue would be to pre-fork a child process before
-// spawning any threads which is then used to fork all the other necessary
-// child processes. However, then we'd need to invent a protocol that can handle
-// sending stdout/stderr back and forking multiple times, which isn't trivial,
-// either.
-//
-// Yet another fix would be not to use multiple threads before forking. However,
-// we need to use gRPC to figure out if a server process is already present and
-// gRPC currently cannot be convinced not to spawn any threads.
-//
-// Another way would be to use posix_spawn(). However, since we need to create a
-// daemon process, we'd need to posix_spawn() a little child process that
-// daemonizes then exec()s the actual JVM, which is also non-trivial. So I hope
-// this will be good enough because for all its flaws, this solution is at least
-// localized here.
int ExecuteDaemon(const string& exe,
const std::vector<string>& args_vector,
const std::map<string, EnvVarValue>& env,
const string& daemon_output,
const bool daemon_output_append,
+ const string& binaries_dir,
const string& server_dir,
BlazeServerStartup** server_startup) {
+ const string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
+ const string daemonize = blaze_util::JoinPath(binaries_dir, "daemonize");
+
+ std::vector<string> daemonize_args =
+ {"daemonize", "-l", daemon_output, "-p", pid_file};
+ if (daemon_output_append) {
+ daemonize_args.push_back("-a");
+ }
+ daemonize_args.push_back("--");
+ daemonize_args.push_back(exe);
+ std::copy(args_vector.begin(), args_vector.end(),
+ std::back_inserter(daemonize_args));
+
int fds[2];
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
@@ -419,63 +356,50 @@
<< "socket creation failed: " << GetLastErrorString();
}
- const char* daemon_output_chars = daemon_output.c_str();
- CharPP argv(args_vector);
- const char* exe_chars = exe.c_str();
-
- int child = fork();
- if (child == -1) {
+ posix_spawn_file_actions_t file_actions;
+ if (posix_spawn_file_actions_init(&file_actions) == -1) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
- << "fork() failed: " << GetLastErrorString();
- return -1;
- } else if (child > 0) {
- // Parent process (i.e. the client)
- close(fds[1]); // parent keeps one side...
- int unused_status;
- waitpid(child, &unused_status, 0); // child double-forks
- pid_t server_pid = 0;
- ReadFromFdWithRetryEintr(fds[0], &server_pid, sizeof server_pid,
- "cannot read server PID from server");
- string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
- if (!blaze_util::WriteFile(ToString(server_pid), pid_file)) {
- BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
- << "cannot write PID file: " << GetLastErrorString();
- return -1;
- }
-
- WriteSystemSpecificProcessIdentifier(server_dir, server_pid);
- char dummy = 'a';
- WriteToFdWithRetryEintr(fds[0], &dummy, 1,
- "cannot notify server about having written PID file");
- *server_startup = new SocketBlazeServerStartup(fds[0]);
- return server_pid;
- } else {
- // Child process (i.e. the server)
- // NB: There should only be system calls in this branch. See the comment
- // before ExecuteDaemon() to understand why.
- close(fds[0]); // ...child keeps the other.
-
- {
- WithEnvVars env_obj(env);
- Daemonize(daemon_output_chars, daemon_output_append);
- pid_t server_pid = getpid();
- WriteToFdWithRetryEintr(fds[1], &server_pid, sizeof server_pid,
- "cannot communicate server PID to client");
- // We wait until the client writes the PID file so that there is no race
- // condition; the server expects the PID file to already be there so that
- // it can read it and know its own PID (see the ctor GrpcServerImpl) and
- // so that it can kill itself if the PID file is deleted (see
- // GrpcServerImpl.PidFileWatcherThread)
- char dummy;
- ReadFromFdWithRetryEintr(
- fds[1], &dummy, 1,
- "cannot get PID file write acknowledgement from client");
-
- execv(exe_chars, argv.get());
- DieAfterFork("Cannot execute daemon");
- return -1;
- }
+ << "Failed to create posix_spawn_file_actions: " << GetLastErrorString();
}
+ if (posix_spawn_file_actions_addclose(&file_actions, fds[0]) == -1) {
+ BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+ << "Failed to modify posix_spawn_file_actions: "<< GetLastErrorString();
+ }
+
+ pid_t transient_pid;
+ if (posix_spawn(&transient_pid, daemonize.c_str(), &file_actions, NULL,
+ CharPP(daemonize_args).get(), CharPP(env).get()) == -1) {
+ BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+ << "Failed to execute JVM via " << daemonize
+ << ": " << GetLastErrorString();
+ }
+ close(fds[1]);
+
+ posix_spawn_file_actions_destroy(&file_actions);
+
+ // Wait for daemonize to exit. This guarantees that the pid file exists.
+ int status;
+ if (waitpid(transient_pid, &status, 0) == -1) {
+ BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+ << "waitpid failed: " << GetLastErrorString();
+ }
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) {
+ BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+ << "daemonize didn't exit cleanly: " << GetLastErrorString();
+ }
+
+ std::ifstream pid_reader(pid_file);
+ if (!pid_reader) {
+ BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
+ << "Failed to open " << pid_file << ": " << GetLastErrorString();
+ }
+ pid_t server_pid;
+ pid_reader >> server_pid;
+
+ WriteSystemSpecificProcessIdentifier(server_dir, server_pid);
+
+ *server_startup = new SocketBlazeServerStartup(fds[0]);
+ return server_pid;
}
string GetHashedBaseDir(const string& root, const string& hashable) {
diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc
index 3d01b44..b113c54 100644
--- a/src/main/cpp/blaze_util_windows.cc
+++ b/src/main/cpp/blaze_util_windows.cc
@@ -648,6 +648,7 @@
const std::map<string, EnvVarValue>& env,
const string& daemon_output,
const bool daemon_out_append,
+ const string& binaries_dir,
const string& server_dir,
BlazeServerStartup** server_startup) {
wstring wdaemon_output;
diff --git a/src/main/tools/BUILD b/src/main/tools/BUILD
index 8362c0c..d1b45f8 100644
--- a/src/main/tools/BUILD
+++ b/src/main/tools/BUILD
@@ -1,5 +1,10 @@
package(default_visibility = ["//src:__subpackages__"])
+cc_binary(
+ name = "daemonize",
+ srcs = ["daemonize.c"],
+)
+
cc_library(
name = "logging",
srcs = ["logging.cc"],
diff --git a/src/main/tools/daemonize.c b/src/main/tools/daemonize.c
new file mode 100644
index 0000000..16429f5
--- /dev/null
+++ b/src/main/tools/daemonize.c
@@ -0,0 +1,210 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// daemonize [-a] -l log_path -p pid_path -- binary_path binary_name [args]
+//
+// daemonize spawns a program as a daemon, redirecting all of its output to the
+// given log_path and writing the daemon's PID to pid_path. binary_path
+// specifies the full location of the program to execute and binary_name
+// indicates its display name (aka argv[0], so the optional args do not have to
+// specify it again). log_path is created/truncated unless the -a (append) flag
+// is specified. Also note that pid_path is guaranteed to exists when this
+// program terminates successfully.
+//
+// Some important details about the implementation of this program:
+//
+// * No threads to ensure the use of fork below does not cause trouble.
+//
+// * Pure C, no C++. This is intentional to keep the program low overhead
+// and to avoid the accidental introduction of heavy dependencies that
+// could spawn threads.
+//
+// * Error handling is extensive but there is no error propagation. Given
+// that the goal of this program is just to spawn another one as a daemon,
+// we take the freedom to immediatey exit from anywhere as soon as we
+// hit an error.
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <err.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+// Configures std{in,out,err} of the current process to serve as a daemon.
+//
+// stdin is configured to read from /dev/null.
+//
+// stdout and stderr are configured to write to log_path, which is created and
+// truncated unless log_append is set to true, in which case it is open for
+// append if it exists.
+static void SetupStdio(const char* log_path, bool log_append) {
+ close(STDIN_FILENO);
+ int fd = open("/dev/null", O_RDONLY);
+ if (fd == -1) {
+ err(EXIT_FAILURE, "Failed to open /dev/null");
+ }
+ assert(fd == STDIN_FILENO);
+
+ close(STDOUT_FILENO);
+ int flags = O_WRONLY | O_CREAT | (log_append ? O_APPEND : O_TRUNC);
+ fd = open(log_path, flags, 0666);
+ if (fd == -1) {
+ err(EXIT_FAILURE, "Failed to create log file %s", log_path);
+ }
+ assert(fd == STDOUT_FILENO);
+
+ close(STDERR_FILENO);
+ fd = dup(STDOUT_FILENO);
+ if (fd == -1) {
+ err(EXIT_FAILURE, "dup failed");
+ }
+ assert(fd == STDERR_FILENO);
+}
+
+// Writes the given pid to a new file at pid_path.
+//
+// Once the pid file has been created, this notifies pid_done_fd by writing a
+// dummy character to it and closing it.
+static void WritePidFile(pid_t pid, const char* pid_path, int pid_done_fd) {
+ FILE* pid_file = fopen(pid_path, "w");
+ if (pid_file == NULL) {
+ err(EXIT_FAILURE, "Failed to create %s", pid_path);
+ }
+ fprintf(pid_file, "%" PRIdMAX, (intmax_t) pid);
+ fclose(pid_file);
+
+ char dummy = '\0';
+ write(pid_done_fd, &dummy, sizeof(dummy));
+ close(pid_done_fd);
+}
+
+static void ExecAsDaemon(const char* log_path, bool log_append, int pid_done_fd,
+ const char* exe, char** argv)
+ __attribute__((noreturn));
+
+// Executes the requested binary configuring it to behave as a daemon.
+//
+// The stdout and stderr of the current process are redirected to the given
+// log_path. See SetupStdio for details on how this is handled.
+//
+// This blocks execution until pid_done_fd receives a write. We do this
+// because the Bazel server process (which is what we start with this helper
+// binary) requires the PID file to be present at startup time so we must
+// wait until the parent process has created it.
+//
+// This function never returns.
+static void ExecAsDaemon(const char* log_path, bool log_append, int pid_done_fd,
+ const char* exe, char** argv) {
+ char dummy;
+ if (read(pid_done_fd, &dummy, sizeof(dummy)) == -1) {
+ err(EXIT_FAILURE, "Failed to wait for pid file creation");
+ }
+ close(pid_done_fd);
+
+ if (signal(SIGHUP, SIG_IGN) == SIG_ERR) {
+ err(EXIT_FAILURE, "Failed to install SIGHUP handler");
+ }
+
+ if (setsid() == -1) {
+ err(EXIT_FAILURE, "setsid failed");
+ }
+
+ SetupStdio(log_path, log_append);
+
+ execv(exe, argv);
+ err(EXIT_FAILURE, "Failed to execute %s", exe);
+}
+
+// Starts the given process as a daemon.
+//
+// This spawns a subprocess that will be configured to run the desired program
+// as a daemon. The program to run is supplied in exe and the arguments to it
+// are given in the NULL-terminated argv. argv[0] must be present and
+// contain the program name (which may or may not match the basename of exe).
+static void Daemonize(const char* log_path, bool log_append,
+ const char* pid_path, const char* exe, char** argv) {
+ assert(argv[0] != NULL);
+
+ int pid_done_fds[2];
+ if (pipe(pid_done_fds) == -1) {
+ err(EXIT_FAILURE, "pipe failed");
+ }
+
+ pid_t pid = fork();
+ if (pid == -1) {
+ err(EXIT_FAILURE, "fork failed");
+ } else if (pid == 0) {
+ close(pid_done_fds[1]);
+ ExecAsDaemon(log_path, log_append, pid_done_fds[0], exe, argv);
+ abort(); // NOLINT Unreachable.
+ }
+ close(pid_done_fds[0]);
+
+ WritePidFile(pid, pid_path, pid_done_fds[1]);
+}
+
+// Program entry point.
+//
+// The primary responsibility of this function is to parse program options.
+// Once that is done, delegates all work to Daemonize.
+int main(int argc, char** argv) {
+ bool log_append = false;
+ const char* log_path = NULL;
+ const char* pid_path = NULL;
+ int opt;
+ while ((opt = getopt(argc, argv, ":al:p:")) != -1) {
+ switch (opt) {
+ case 'a':
+ log_append = true;
+ break;
+
+ case 'l':
+ log_path = optarg;
+ break;
+
+ case 'p':
+ pid_path = optarg;
+ break;
+
+ case ':':
+ errx(EXIT_FAILURE, "Option -%c requires an argument", optopt);
+
+ case '?':
+ default:
+ errx(EXIT_FAILURE, "Unknown option -%c", optopt);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (log_path == NULL) {
+ errx(EXIT_FAILURE, "Must specify a log file with -l");
+ }
+ if (pid_path == NULL) {
+ errx(EXIT_FAILURE, "Must specify a pid file with -p");
+ }
+
+ if (argc < 2) {
+ errx(EXIT_FAILURE, "Must provide at least an executable name and arg0");
+ }
+ Daemonize(log_path, log_append, pid_path, argv[0], argv + 1);
+ return EXIT_SUCCESS;
+}