| // Copyright 2016 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| /** |
| * linux-sandbox runs commands in a restricted environment where they are |
| * subject to a few rules: |
| * |
| * - The entire filesystem is made read-only. |
| * - The working directory (-W) will be made read-write, though. |
| * - Individual files or directories can be made writable (but not deletable) |
| * (-w). |
| * - Individual files or directories can be made inaccessible / unreadable |
| * (-i). |
| * - tmpfs will be mounted on /tmp. |
| * - tmpfs can be mounted on top of existing directories (-e), too. |
| * - If the process takes longer than the timeout (-T), it will be killed with |
| * SIGTERM. If it does not exit within the grace period (-t), it all of its |
| * children will be killed with SIGKILL. |
| * - If linux-sandbox itself gets killed, the process and all of its children |
| * will be killed. |
| * - If linux-sandbox's parent dies, it will kill itself, the process and all |
| * the children. |
| * - Network access is allowed, but can be disabled via -N. |
| * - The process runs as user "nobody", unless fakeroot is enabled (-R). |
| * - The hostname and domainname will be set to "sandbox". |
| * - The process runs in its own PID namespace, so other processes on the |
| * system are invisible. |
| */ |
| |
| #include "linux-sandbox-options.h" |
| #include "linux-sandbox-pid1.h" |
| #include "linux-sandbox-utils.h" |
| |
| #define DIE(args...) \ |
| { \ |
| fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" args); \ |
| fprintf(stderr, "\": "); \ |
| perror(NULL); \ |
| exit(EXIT_FAILURE); \ |
| } |
| |
| #include <ctype.h> |
| #include <dirent.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <math.h> |
| #include <sched.h> |
| #include <signal.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/prctl.h> |
| #include <sys/stat.h> |
| #include <sys/time.h> |
| #include <sys/types.h> |
| #include <sys/wait.h> |
| #include <unistd.h> |
| |
| #include <vector> |
| |
| int global_outer_uid; |
| int global_outer_gid; |
| |
| static char global_sandbox_root[] = "/tmp/sandbox.XXXXXX"; |
| static int global_child_pid; |
| |
| // The signal that will be sent to the child when a timeout occurs. |
| static volatile sig_atomic_t global_next_timeout_signal = SIGTERM; |
| |
| // The signal that caused us to kill the child (e.g. on timeout). |
| static volatile sig_atomic_t global_signal; |
| |
| static void CloseFds() { |
| DIR *fds = opendir("/proc/self/fd"); |
| if (fds == NULL) { |
| DIE("opendir"); |
| } |
| |
| while (1) { |
| errno = 0; |
| struct dirent *dent = readdir(fds); |
| |
| if (dent == NULL) { |
| if (errno != 0) { |
| DIE("readdir"); |
| } |
| break; |
| } |
| |
| if (isdigit(dent->d_name[0])) { |
| errno = 0; |
| int fd = strtol(dent->d_name, nullptr, 10); |
| |
| // (1) Skip unparseable entries. |
| // (2) Close everything except stdin, stdout and stderr. |
| // (3) Do not accidentally close our directory handle. |
| if (errno == 0 && fd > STDERR_FILENO && fd != dirfd(fds)) { |
| if (close(fd) < 0) { |
| DIE("close"); |
| } |
| } |
| } |
| } |
| |
| if (closedir(fds) < 0) { |
| DIE("closedir"); |
| } |
| } |
| |
| static void RemoveSandboxRoot() { |
| if (rmdir(global_sandbox_root) < 0) { |
| DIE("rmdir(%s)", global_sandbox_root); |
| } |
| } |
| |
| static void SetupSandboxRoot() { |
| if (opt.sandbox_root_dir == NULL) { |
| if (mkdtemp(global_sandbox_root) == NULL) { |
| DIE("mkdtemp(%s)", global_sandbox_root); |
| } |
| atexit(RemoveSandboxRoot); |
| opt.sandbox_root_dir = global_sandbox_root; |
| } |
| } |
| |
| static void HandleSignal(int signum, void (*handler)(int)) { |
| struct sigaction sa; |
| memset(&sa, 0, sizeof(sa)); |
| sa.sa_handler = handler; |
| if (sigemptyset(&sa.sa_mask) < 0) { |
| DIE("sigemptyset"); |
| } |
| if (sigaction(signum, &sa, NULL) < 0) { |
| DIE("sigaction"); |
| } |
| } |
| |
| static void OnTimeout(int sig) { |
| global_signal = sig; |
| kill(global_child_pid, global_next_timeout_signal); |
| if (global_next_timeout_signal == SIGTERM && opt.kill_delay_secs > 0) { |
| global_next_timeout_signal = SIGKILL; |
| alarm(opt.kill_delay_secs); |
| } |
| } |
| |
| static void SpawnPid1() { |
| const int kStackSize = 1024 * 1024; |
| std::vector<char> child_stack(kStackSize); |
| |
| int sync_pipe[2]; |
| if (pipe(sync_pipe) < 0) { |
| DIE("pipe"); |
| } |
| |
| int clone_flags = CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
| CLONE_NEWPID | SIGCHLD; |
| if (opt.create_netns) { |
| clone_flags |= CLONE_NEWNET; |
| } |
| |
| // We use clone instead of unshare, because unshare sometimes fails with |
| // EINVAL due to a race condition in the Linux kernel (see |
| // https://lkml.org/lkml/2015/7/28/833). |
| global_child_pid = |
| clone(Pid1Main, child_stack.data() + kStackSize, clone_flags, sync_pipe); |
| if (global_child_pid < 0) { |
| DIE("clone"); |
| } |
| |
| PRINT_DEBUG("linux-sandbox-pid1 has PID %d", global_child_pid); |
| |
| // We close the write end of the sync pipe, read a byte and then close the |
| // pipe. This proves to the linux-sandbox-pid1 process that we still existed |
| // after it ran prctl(PR_SET_PDEATHSIG, SIGKILL), thus preventing a race |
| // condition where the parent is killed before that call was made. |
| char buf; |
| if (close(sync_pipe[1]) < 0) { |
| DIE("close"); |
| } |
| if (read(sync_pipe[0], &buf, 1) < 0) { |
| DIE("read"); |
| } |
| if (close(sync_pipe[0]) < 0) { |
| DIE("close"); |
| } |
| } |
| |
| static int WaitForPid1() { |
| int err, status; |
| do { |
| err = waitpid(global_child_pid, &status, 0); |
| } while (err < 0 && errno == EINTR); |
| |
| if (err < 0) { |
| DIE("waitpid"); |
| } |
| |
| if (global_signal > 0) { |
| // The child exited because we killed it due to receiving a signal |
| // ourselves. Do not trust the exitcode in this case, just calculate it from |
| // the signal. |
| PRINT_DEBUG("child exited due to us catching signal: %s", |
| strsignal(global_signal)); |
| return 128 + global_signal; |
| } else if (WIFSIGNALED(status)) { |
| PRINT_DEBUG("child exited due to receiving signal: %s", |
| strsignal(WTERMSIG(status))); |
| return 128 + WTERMSIG(status); |
| } else { |
| PRINT_DEBUG("child exited normally with exitcode %d", WEXITSTATUS(status)); |
| return WEXITSTATUS(status); |
| } |
| } |
| |
| static void Redirect(const char *target_path, int fd, const char *name) { |
| if (target_path != NULL && strcmp(target_path, "-") != 0) { |
| const int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND; |
| int fd_out = open(target_path, flags, 0666); |
| if (fd_out < 0) { |
| DIE("open(%s)", target_path); |
| } |
| // If we were launched with less than 3 fds (stdin, stdout, stderr) open, |
| // but redirection is still requested via a command-line flag, something is |
| // wacky and the following code would not do what we intend to do, so let's |
| // bail. |
| if (fd_out < 3) { |
| DIE("open(%s) returned a handle that is reserved for stdin / stdout / " |
| "stderr", |
| target_path); |
| } |
| if (dup2(fd_out, fd) < 0) { |
| DIE("dup2()"); |
| } |
| if (close(fd_out) < 0) { |
| DIE("close()"); |
| } |
| } |
| } |
| |
| static void RedirectStdout(const char *stdout_path) { |
| Redirect(stdout_path, STDOUT_FILENO, "stdout"); |
| } |
| |
| static void RedirectStderr(const char *stderr_path) { |
| Redirect(stderr_path, STDERR_FILENO, "stderr"); |
| } |
| |
| int main(int argc, char *argv[]) { |
| // Ask the kernel to kill us with SIGKILL if our parent dies. |
| if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { |
| DIE("prctl"); |
| } |
| |
| ParseOptions(argc, argv); |
| |
| RedirectStdout(opt.stdout_path); |
| RedirectStderr(opt.stderr_path); |
| |
| // This should never be called as a setuid binary, drop privileges just in |
| // case. We don't need to be root, because we use user namespaces anyway. |
| if (setuid(getuid()) < 0) { |
| DIE("setuid"); |
| } |
| |
| global_outer_uid = getuid(); |
| global_outer_gid = getgid(); |
| |
| // Make sure the sandboxed process does not inherit any accidentally left open |
| // file handles from our parent. |
| CloseFds(); |
| |
| SetupSandboxRoot(); |
| |
| HandleSignal(SIGALRM, OnTimeout); |
| if (opt.timeout_secs > 0) { |
| alarm(opt.timeout_secs); |
| } |
| |
| SpawnPid1(); |
| return WaitForPid1(); |
| } |