| // Copyright 2016 The Bazel Authors. All rights reserved. | 
 | // | 
 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | // you may not use this file except in compliance with the License. | 
 | // You may obtain a copy of the License at | 
 | // | 
 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
 | // | 
 | // Unless required by applicable law or agreed to in writing, software | 
 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | // See the License for the specific language governing permissions and | 
 | // limitations under the License. | 
 |  | 
 | /** | 
 |  * linux-sandbox runs commands in a restricted environment where they are | 
 |  * subject to a few rules: | 
 |  * | 
 |  *  - The entire filesystem is made read-only. | 
 |  *  - The working directory (-W) will be made read-write, though. | 
 |  *  - Individual files or directories can be made writable (but not deletable) | 
 |  *    (-w). | 
 |  *  - Individual files or directories can be made inaccessible / unreadable | 
 |  *    (-i). | 
 |  *  - tmpfs will be mounted on /tmp. | 
 |  *  - tmpfs can be mounted on top of existing directories (-e), too. | 
 |  *  - If the process takes longer than the timeout (-T), it will be killed with | 
 |  *    SIGTERM. If it does not exit within the grace period (-t), it all of its | 
 |  *    children will be killed with SIGKILL. | 
 |  *  - If linux-sandbox itself gets killed, the process and all of its children | 
 |  *    will be killed. | 
 |  *  - If linux-sandbox's parent dies, it will kill itself, the process and all | 
 |  *    the children. | 
 |  *  - Network access is allowed, but can be disabled via -N. | 
 |  *  - The process runs as user "nobody", unless fakeroot is enabled (-R). | 
 |  *  - The hostname and domainname will be set to "sandbox". | 
 |  *  - The process runs in its own PID namespace, so other processes on the | 
 |  *    system are invisible. | 
 |  */ | 
 |  | 
 | #include "linux-sandbox-options.h" | 
 | #include "linux-sandbox-pid1.h" | 
 | #include "linux-sandbox-utils.h" | 
 |  | 
 | #define DIE(args...)                                     \ | 
 |   {                                                      \ | 
 |     fprintf(stderr, __FILE__ ":" S__LINE__ ": \"" args); \ | 
 |     fprintf(stderr, "\": ");                             \ | 
 |     perror(NULL);                                        \ | 
 |     exit(EXIT_FAILURE);                                  \ | 
 |   } | 
 |  | 
 | #include <ctype.h> | 
 | #include <dirent.h> | 
 | #include <errno.h> | 
 | #include <fcntl.h> | 
 | #include <math.h> | 
 | #include <sched.h> | 
 | #include <signal.h> | 
 | #include <stdbool.h> | 
 | #include <stdio.h> | 
 | #include <stdlib.h> | 
 | #include <string.h> | 
 | #include <sys/prctl.h> | 
 | #include <sys/stat.h> | 
 | #include <sys/time.h> | 
 | #include <sys/types.h> | 
 | #include <sys/wait.h> | 
 | #include <unistd.h> | 
 |  | 
 | #include <vector> | 
 |  | 
 | int global_outer_uid; | 
 | int global_outer_gid; | 
 |  | 
 | static char global_sandbox_root[] = "/tmp/sandbox.XXXXXX"; | 
 | static int global_child_pid; | 
 |  | 
 | // The signal that will be sent to the child when a timeout occurs. | 
 | static volatile sig_atomic_t global_next_timeout_signal = SIGTERM; | 
 |  | 
 | // The signal that caused us to kill the child (e.g. on timeout). | 
 | static volatile sig_atomic_t global_signal; | 
 |  | 
 | static void CloseFds() { | 
 |   DIR *fds = opendir("/proc/self/fd"); | 
 |   if (fds == NULL) { | 
 |     DIE("opendir"); | 
 |   } | 
 |  | 
 |   while (1) { | 
 |     errno = 0; | 
 |     struct dirent *dent = readdir(fds); | 
 |  | 
 |     if (dent == NULL) { | 
 |       if (errno != 0) { | 
 |         DIE("readdir"); | 
 |       } | 
 |       break; | 
 |     } | 
 |  | 
 |     if (isdigit(dent->d_name[0])) { | 
 |       errno = 0; | 
 |       int fd = strtol(dent->d_name, nullptr, 10); | 
 |  | 
 |       // (1) Skip unparseable entries. | 
 |       // (2) Close everything except stdin, stdout and stderr. | 
 |       // (3) Do not accidentally close our directory handle. | 
 |       if (errno == 0 && fd > STDERR_FILENO && fd != dirfd(fds)) { | 
 |         if (close(fd) < 0) { | 
 |           DIE("close"); | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   if (closedir(fds) < 0) { | 
 |     DIE("closedir"); | 
 |   } | 
 | } | 
 |  | 
 | static void RemoveSandboxRoot() { | 
 |   if (rmdir(global_sandbox_root) < 0) { | 
 |     DIE("rmdir(%s)", global_sandbox_root); | 
 |   } | 
 | } | 
 |  | 
 | static void SetupSandboxRoot() { | 
 |   if (opt.sandbox_root_dir == NULL) { | 
 |     if (mkdtemp(global_sandbox_root) == NULL) { | 
 |       DIE("mkdtemp(%s)", global_sandbox_root); | 
 |     } | 
 |     atexit(RemoveSandboxRoot); | 
 |     opt.sandbox_root_dir = global_sandbox_root; | 
 |   } | 
 | } | 
 |  | 
 | static void HandleSignal(int signum, void (*handler)(int)) { | 
 |   struct sigaction sa; | 
 |   memset(&sa, 0, sizeof(sa)); | 
 |   sa.sa_handler = handler; | 
 |   if (sigemptyset(&sa.sa_mask) < 0) { | 
 |     DIE("sigemptyset"); | 
 |   } | 
 |   if (sigaction(signum, &sa, NULL) < 0) { | 
 |     DIE("sigaction"); | 
 |   } | 
 | } | 
 |  | 
 | static void OnTimeout(int sig) { | 
 |   global_signal = sig; | 
 |   kill(global_child_pid, global_next_timeout_signal); | 
 |   if (global_next_timeout_signal == SIGTERM && opt.kill_delay_secs > 0) { | 
 |     global_next_timeout_signal = SIGKILL; | 
 |     alarm(opt.kill_delay_secs); | 
 |   } | 
 | } | 
 |  | 
 | static void SpawnPid1() { | 
 |   const int kStackSize = 1024 * 1024; | 
 |   std::vector<char> child_stack(kStackSize); | 
 |  | 
 |   int sync_pipe[2]; | 
 |   if (pipe(sync_pipe) < 0) { | 
 |     DIE("pipe"); | 
 |   } | 
 |  | 
 |   int clone_flags = CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 
 |                     CLONE_NEWPID | SIGCHLD; | 
 |   if (opt.create_netns) { | 
 |     clone_flags |= CLONE_NEWNET; | 
 |   } | 
 |  | 
 |   // We use clone instead of unshare, because unshare sometimes fails with | 
 |   // EINVAL due to a race condition in the Linux kernel (see | 
 |   // https://lkml.org/lkml/2015/7/28/833). | 
 |   global_child_pid = | 
 |       clone(Pid1Main, child_stack.data() + kStackSize, clone_flags, sync_pipe); | 
 |   if (global_child_pid < 0) { | 
 |     DIE("clone"); | 
 |   } | 
 |  | 
 |   PRINT_DEBUG("linux-sandbox-pid1 has PID %d", global_child_pid); | 
 |  | 
 |   // We close the write end of the sync pipe, read a byte and then close the | 
 |   // pipe. This proves to the linux-sandbox-pid1 process that we still existed | 
 |   // after it ran prctl(PR_SET_PDEATHSIG, SIGKILL), thus preventing a race | 
 |   // condition where the parent is killed before that call was made. | 
 |   char buf; | 
 |   if (close(sync_pipe[1]) < 0) { | 
 |     DIE("close"); | 
 |   } | 
 |   if (read(sync_pipe[0], &buf, 1) < 0) { | 
 |     DIE("read"); | 
 |   } | 
 |   if (close(sync_pipe[0]) < 0) { | 
 |     DIE("close"); | 
 |   } | 
 | } | 
 |  | 
 | static int WaitForPid1() { | 
 |   int err, status; | 
 |   do { | 
 |     err = waitpid(global_child_pid, &status, 0); | 
 |   } while (err < 0 && errno == EINTR); | 
 |  | 
 |   if (err < 0) { | 
 |     DIE("waitpid"); | 
 |   } | 
 |  | 
 |   if (global_signal > 0) { | 
 |     // The child exited because we killed it due to receiving a signal | 
 |     // ourselves. Do not trust the exitcode in this case, just calculate it from | 
 |     // the signal. | 
 |     PRINT_DEBUG("child exited due to us catching signal: %s", | 
 |                 strsignal(global_signal)); | 
 |     return 128 + global_signal; | 
 |   } else if (WIFSIGNALED(status)) { | 
 |     PRINT_DEBUG("child exited due to receiving signal: %s", | 
 |                 strsignal(WTERMSIG(status))); | 
 |     return 128 + WTERMSIG(status); | 
 |   } else { | 
 |     PRINT_DEBUG("child exited normally with exitcode %d", WEXITSTATUS(status)); | 
 |     return WEXITSTATUS(status); | 
 |   } | 
 | } | 
 |  | 
 | static void Redirect(const char *target_path, int fd, const char *name) { | 
 |   if (target_path != NULL && strcmp(target_path, "-") != 0) { | 
 |     const int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND; | 
 |     int fd_out = open(target_path, flags, 0666); | 
 |     if (fd_out < 0) { | 
 |       DIE("open(%s)", target_path); | 
 |     } | 
 |     // If we were launched with less than 3 fds (stdin, stdout, stderr) open, | 
 |     // but redirection is still requested via a command-line flag, something is | 
 |     // wacky and the following code would not do what we intend to do, so let's | 
 |     // bail. | 
 |     if (fd_out < 3) { | 
 |       DIE("open(%s) returned a handle that is reserved for stdin / stdout / " | 
 |           "stderr", | 
 |           target_path); | 
 |     } | 
 |     if (dup2(fd_out, fd) < 0) { | 
 |       DIE("dup2()"); | 
 |     } | 
 |     if (close(fd_out) < 0) { | 
 |       DIE("close()"); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | static void RedirectStdout(const char *stdout_path) { | 
 |   Redirect(stdout_path, STDOUT_FILENO, "stdout"); | 
 | } | 
 |  | 
 | static void RedirectStderr(const char *stderr_path) { | 
 |   Redirect(stderr_path, STDERR_FILENO, "stderr"); | 
 | } | 
 |  | 
 | int main(int argc, char *argv[]) { | 
 |   // Ask the kernel to kill us with SIGKILL if our parent dies. | 
 |   if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { | 
 |     DIE("prctl"); | 
 |   } | 
 |  | 
 |   ParseOptions(argc, argv); | 
 |  | 
 |   RedirectStdout(opt.stdout_path); | 
 |   RedirectStderr(opt.stderr_path); | 
 |  | 
 |   // This should never be called as a setuid binary, drop privileges just in | 
 |   // case. We don't need to be root, because we use user namespaces anyway. | 
 |   if (setuid(getuid()) < 0) { | 
 |     DIE("setuid"); | 
 |   } | 
 |  | 
 |   global_outer_uid = getuid(); | 
 |   global_outer_gid = getgid(); | 
 |  | 
 |   // Make sure the sandboxed process does not inherit any accidentally left open | 
 |   // file handles from our parent. | 
 |   CloseFds(); | 
 |  | 
 |   SetupSandboxRoot(); | 
 |  | 
 |   HandleSignal(SIGALRM, OnTimeout); | 
 |   if (opt.timeout_secs > 0) { | 
 |     alarm(opt.timeout_secs); | 
 |   } | 
 |  | 
 |   SpawnPid1(); | 
 |   return WaitForPid1(); | 
 | } |