blob: 5cf6b433bec684c0a8cbd7477a7114a84e1c7cd9 [file] [log] [blame]
#define _GNU_SOURCE
// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <linux/capability.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
static int global_debug = 0;
#define PRINT_DEBUG(...) do { if (global_debug) {fprintf(stderr, "sandbox.c: " __VA_ARGS__);}} while(0)
#define CHECK_CALL(x) if ((x) == -1) { perror(#x); exit(1); }
#define CHECK_NOT_NULL(x) if (x == NULL) { perror(#x); exit(1); }
#define DIE() do { fprintf(stderr, "Error in %d\n", __LINE__); exit(-1); } while(0);
const int kChildrenCleanupDelay = 1;
static volatile sig_atomic_t global_signal_received = 0;
//
// Options parsing result
//
struct Options {
char **args; // Command to run (-C / --)
char *include_prefix; // Include prefix (-N)
char *sandbox_root; // Sandbox root (-S)
char *tools; // tools directory (-t)
char **mounts; // List of directories to mount (-m)
char **includes; // List of include directories (-n)
int num_mounts; // size of mounts
int num_includes; // size of includes
int timeout; // Timeout (-T)
};
// Print out a usage error. argc and argv are the argument counter
// and vector, fmt is a format string for the error message to print.
void Usage(int argc, char **argv, char *fmt, ...);
// Parse the command line flags and return the result in an
// Options structure passed as argument.
void ParseCommandLine(int argc, char **argv, struct Options *opt);
// Signal hanlding
void PropagateSignals();
void EnableAlarm();
// Sandbox setup
void SetupDirectories(struct Options* opt);
void SetupSlashDev();
void SetupUserNamespace(int uid, int gid);
void ChangeRoot();
// Write the file "filename" using a format string specified by "fmt".
// Returns -1 on failure.
int WriteFile(const char *filename, const char *fmt, ...);
// Run the command specified by the argv array and kill it after
// timeout seconds.
void SpawnCommand(char **argv, int timeout);
int main(int argc, char *argv[]) {
struct Options opt = {
.args = NULL,
.include_prefix = NULL,
.sandbox_root = NULL,
.tools = NULL,
.mounts = calloc(argc, sizeof(char*)),
.includes = calloc(argc, sizeof(char*)),
.num_mounts = 0,
.num_includes = 0,
.timeout = 0
};
ParseCommandLine(argc, argv, &opt);
int uid = getuid();
int gid = getgid();
// parsed all arguments, now prepare sandbox
PRINT_DEBUG("%s\n", opt.sandbox_root);
// create new namespaces in which this process and its children will live
CHECK_CALL(unshare(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER));
CHECK_CALL(mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL));
// Create the sandbox directory layout
SetupDirectories(&opt);
// Set the user namespace (user_namespaces(7))
SetupUserNamespace(uid, gid);
// make sandbox actually hermetic:
ChangeRoot();
// Finally call the command
free(opt.mounts);
free(opt.includes);
SpawnCommand(opt.args, opt.timeout);
return 0;
}
void SpawnCommand(char **argv, int timeout) {
for (int i = 0; argv[i] != NULL; i++) {
PRINT_DEBUG("arg: %s\n", argv[i]);
}
// spawn child and wait until it finishes
pid_t cpid = fork();
if (cpid == 0) {
CHECK_CALL(setpgid(0, 0));
// if the execvp below fails with "No such file or directory" it means that:
// a) the binary is not in the sandbox (which means it wasn't included in
// the inputs)
// b) the binary uses shared library which is not inside sandbox - you can
// check for that by running "ldd ./a.out" (by default directories
// starting with /lib* and /usr/lib* should be there)
// c) the binary uses elf interpreter which is not inside sandbox - you can
// check for that by running "readelf -a a.out | grep interpreter" (the
// sandbox code assumes that it is either in /lib*/ or /usr/lib*/)
CHECK_CALL(execvp(argv[0], argv));
PRINT_DEBUG("Exec failed near %s:%d\n", __FILE__, __LINE__);
exit(1);
} else {
// PARENT
// make sure that all signals propagate to children (mostly useful to kill
// entire sandbox)
PropagateSignals();
// after given timeout, kill children
EnableAlarm(timeout);
int status = 0;
while (1) {
PRINT_DEBUG("Waiting for the child...\n");
pid_t pid = wait(&status);
if (global_signal_received) {
PRINT_DEBUG("Received signal: %s\n", strsignal(global_signal_received));
CHECK_CALL(killpg(cpid, global_signal_received));
// give children some time for cleanup before they terminate
sleep(kChildrenCleanupDelay);
CHECK_CALL(killpg(cpid, SIGKILL));
exit(128 | global_signal_received);
}
if (errno == EINTR) {
continue;
}
if (pid < 0) {
perror("Wait failed:");
exit(1);
}
if (WIFEXITED(status)) {
PRINT_DEBUG("Child exited with status: %d\n", WEXITSTATUS(status));
exit(WEXITSTATUS(status));
}
if (WIFSIGNALED(status)) {
PRINT_DEBUG("Child terminated by a signal: %d\n", WTERMSIG(status));
exit(WEXITSTATUS(status));
}
if (WIFSTOPPED(status)) {
PRINT_DEBUG("Child stopped by a signal: %d\n", WSTOPSIG(status));
}
}
}
}
int WriteFile(const char *filename, const char *fmt, ...) {
int r;
va_list ap;
FILE *stream = fopen(filename, "w");
if (stream == NULL) {
return -1;
}
va_start(ap, fmt);
r = vfprintf(stream, fmt, ap);
va_end(ap);
if (r >= 0) {
r = fclose(stream);
}
return r;
}
//
// Signal handling
//
void SignalHandler(int signum, siginfo_t *info, void *uctxt) {
global_signal_received = signum;
}
void PropagateSignals() {
// propagate some signals received by the parent to processes in sandbox, so
// that it's easier to terminate entire sandbox
struct sigaction action = {};
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = SignalHandler;
// handle all signals that could terminate the process
int signals[] = {SIGHUP, SIGINT, SIGKILL, SIGPIPE, SIGALRM, SIGTERM, SIGPOLL,
SIGPROF, SIGVTALRM,
// signals below produce core dump by default, however at the moment we'll
// just terminate
SIGQUIT, SIGILL, SIGABRT, SIGFPE, SIGSEGV, SIGBUS, SIGSYS, SIGTRAP, SIGXCPU,
SIGXFSZ, -1};
for (int *p = signals; *p != -1; p++) {
sigaction(*p, &action, NULL);
}
}
void EnableAlarm(int timeout) {
if (timeout <= 0) return;
struct itimerval timer = {};
timer.it_value.tv_sec = (long) timeout;
CHECK_CALL(setitimer(ITIMER_REAL, &timer, NULL));
}
//
// Sandbox setup
//
void SetupSlashDev() {
CHECK_CALL(mkdir("dev", 0755));
const char *devs[] = {
"/dev/null",
"/dev/random",
"/dev/urandom",
"/dev/zero",
NULL
};
for (int i = 0; devs[i] != NULL; i++) {
// open+close to create the file, which will become mount point for actual
// device
int handle = open(devs[i] + 1, O_CREAT | O_RDONLY, 0644);
CHECK_CALL(handle);
CHECK_CALL(close(handle));
CHECK_CALL(mount(devs[i], devs[i] + 1, NULL, MS_BIND, NULL));
}
}
void SetupDirectories(struct Options *opt) {
// Mount the sandbox and go there.
CHECK_CALL(mount(opt->sandbox_root, opt->sandbox_root, NULL, MS_BIND | MS_NOSUID, NULL));
CHECK_CALL(chdir(opt->sandbox_root));
SetupSlashDev();
// Mount blaze specific directories - tools/ and build-runfiles/.
if (opt->tools != NULL) {
PRINT_DEBUG("tools: %s\n", opt->tools);
CHECK_CALL(mkdir("tools", 0755));
CHECK_CALL(mount(opt->tools, "tools", NULL, MS_BIND | MS_RDONLY, NULL));
}
// Mount directories passed in argv; those are mostly dirs for shared libs.
for (int i = 0; i < opt->num_mounts; i++) {
CHECK_CALL(mount(opt->mounts[i], opt->mounts[i] + 1, NULL, MS_BIND | MS_RDONLY, NULL));
}
// C++ compilation
// C++ headers go in a separate directory.
if (opt->include_prefix != NULL) {
CHECK_CALL(chdir(opt->include_prefix));
for (int i = 0; i < opt->num_includes; i++) {
// TODO(bazel-team): sometimes list of -iquote given by bazel contains
// invalid (non-existing) entries, ideally we would like not to have them
PRINT_DEBUG("include: %s\n", opt->includes[i]);
if (mount(opt->includes[i], opt->includes[i] + 1 , NULL, MS_BIND, NULL) > -1) {
continue;
}
if (errno == ENOENT) {
continue;
}
CHECK_CALL(-1);
}
CHECK_CALL(chdir(".."));
}
CHECK_CALL(mkdir("proc", 0755));
CHECK_CALL(mount("/proc", "proc", NULL, MS_REC | MS_BIND, NULL));
}
void SetupUserNamespace(int uid, int gid) {
// Disable needs for CAP_SETGID
int r = WriteFile("/proc/self/setgroups", "deny");
if (r < 0 && errno != ENOENT) {
// Writing to /proc/self/setgroups might fail on earlier
// version of linux because setgroups does not exist, ignore.
perror("WriteFile(\"/proc/self/setgroups\", \"deny\")");
exit(-1);
}
// set group and user mapping from outer namespace to inner:
// no changes in the parent, be root in the child
CHECK_CALL(WriteFile("/proc/self/uid_map", "0 %d 1\n", uid));
CHECK_CALL(WriteFile("/proc/self/gid_map", "0 %d 1\n", gid));
CHECK_CALL(setresuid(0, 0, 0));
CHECK_CALL(setresgid(0, 0, 0));
}
void ChangeRoot() {
// move the real root to old_root, then detach it
char old_root[16] = "old-root-XXXXXX";
CHECK_NOT_NULL(mkdtemp(old_root));
// pivot_root has no wrapper in libc, so we need syscall()
CHECK_CALL(syscall(SYS_pivot_root, ".", old_root));
CHECK_CALL(chroot("."));
CHECK_CALL(umount2(old_root, MNT_DETACH));
CHECK_CALL(rmdir(old_root));
}
//
// Command line parsing
//
void Usage(int argc, char **argv, char *fmt, ...) {
int i;
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
fprintf(stderr,
"\nUsage: %s [-S sandbox-root] [-m mount] [-C|--] command arg1\n",
argv[0]);
fprintf(stderr, " provided:");
for (i = 0; i < argc; i++) {
fprintf(stderr, " %s", argv[i]);
}
fprintf(stderr,
"\nMandatory arguments:\n"
" [-C|--] command to run inside sandbox, followed by arguments\n"
" -S directory which will become the root of the sandbox\n"
"\n"
"Optional arguments:\n"
" -t absolute path to bazel tools directory\n"
" -T timeout after which sandbox will be terminated\n"
" -m system directory to mount inside the sandbox\n"
" Multiple directories can be specified and each of them will\n"
" be mount as readonly\n"
" -D if set, debug info will be printed\n");
exit(1);
}
void ParseCommandLine(int argc, char **argv, struct Options *opt) {
extern char *optarg;
extern int optind, optopt;
int c;
opt->include_prefix = NULL;
opt->sandbox_root = NULL;
opt->tools = NULL;
opt->mounts = malloc(argc * sizeof(char*));
opt->includes = malloc(argc * sizeof(char*));
opt->num_mounts = 0;
opt->num_includes = 0;
opt->timeout = 0;
while ((c = getopt(argc, argv, "+:S:t:T:m:N:n:DC")) != -1) {
switch(c) {
case 'S':
if (opt->sandbox_root == NULL) {
opt->sandbox_root = optarg;
} else {
Usage(argc, argv,
"Multiple sandbox roots (-S) specified (expected one).");
}
break;
case 'm':
opt->mounts[opt->num_mounts++] = optarg;
break;
case 'D':
global_debug = 1;
break;
case 'T':
sscanf(optarg, "%d", &opt->timeout);
if (opt->timeout < 0) {
Usage(argc, argv, "Invalid timeout (-T) value: %d", opt->timeout);
}
break;
case 'N':
opt->include_prefix = optarg;
break;
case 'n':
opt->includes[opt->num_includes++] = optarg;
break;
case 'C':
break; // deprecated, ignore.
case 't':
opt->tools = optarg;
break;
case '?':
Usage(argc, argv, "Unrecognized argument: -%c (%d)", optopt, optind);
break;
case ':':
Usage(argc, argv, "Flag -%c requires an argument", optopt);
break;
}
}
opt->args = argv + optind;
if (argc <= optind) {
Usage(argc, argv, "No command specified");
}
}