blob: 56ca81e31c2ceca9fe2cdb65d9136b9aa891adbe [file] [log] [blame]
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001// Copyright 2014 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// blaze.cc: bootstrap and client code for Blaze server.
16//
17// Responsible for:
18// - extracting the Python, C++ and Java components.
19// - starting the server or finding the existing one.
20// - client options parsing.
21// - passing the argv array, and printing the out/err streams.
22// - signal handling.
23// - exiting with the right error/WTERMSIG code.
24// - debugger + profiler support.
25// - mutual exclusion between batch invocations.
26
27#include <assert.h>
28#include <ctype.h>
29#include <dirent.h>
30#include <errno.h>
31#include <fcntl.h>
32#include <limits.h>
33#include <poll.h>
34#include <sched.h>
35#include <signal.h>
36#include <stdarg.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <sys/resource.h>
41#include <sys/select.h>
42#include <sys/socket.h>
43#include <sys/stat.h>
44#include <sys/statvfs.h>
45#include <sys/time.h>
46#include <sys/un.h>
47#include <time.h>
48#include <unistd.h>
49#include <utime.h>
50#include <algorithm>
51#include <set>
52#include <string>
53#include <utility>
54#include <vector>
55
56#include "blaze_exit_code.h"
57#include "blaze_startup_options.h"
58#include "blaze_util.h"
59#include "blaze_util_platform.h"
60#include "option_processor.h"
61#include "util/file.h"
62#include "util/md5.h"
63#include "util/numbers.h"
64#include "util/port.h"
65#include "util/strings.h"
66#include "archive.h"
67#include "archive_entry.h"
68
69using std::set;
70using std::vector;
71
72// This should already be defined in sched.h, but it's not.
73#ifndef SCHED_BATCH
74#define SCHED_BATCH 3
75#endif
76
77namespace blaze {
78
79// Enable messages mostly of interest to developers.
80static const bool SPAM = getenv("VERBOSE_BLAZE_CLIENT") != NULL;
81
82// Blaze is being run by a test.
83static const bool TESTING = getenv("TEST_TMPDIR") != NULL;
84
85extern char **environ;
86
87////////////////////////////////////////////////////////////////////////
88// Global Variables
89
90// The reason for a blaze server restart.
91// Keep in sync with logging.proto
92enum RestartReason {
93 NO_RESTART = 0,
94 NO_DAEMON,
95 NEW_VERSION,
96 NEW_OPTIONS
97};
98
99struct GlobalVariables {
100 // Used to make concurrent invocations of this program safe.
101 string lockfile; // = <output_base>/lock
102 int lockfd;
103
104 string jvm_log_file; // = <output_base>/server/jvm.out
105
106 string cwd;
107
108 // The nearest enclosing workspace directory, starting from cwd.
109 // If not under a workspace directory, this is equal to cwd.
110 string workspace;
111
112 // Option processor responsible for parsing RC files and converting them into
113 // the argument list passed on to the server.
114 OptionProcessor option_processor;
115
116 pid_t server_pid;
117
118 volatile sig_atomic_t sigint_count;
119
120 // The number of the last received signal that should cause the client
121 // to shutdown. This is saved so that the client's WTERMSIG can be set
122 // correctly. (Currently only SIGPIPE uses this mechanism.)
123 volatile sig_atomic_t received_signal;
124
125 // Contains the relative paths of all the files in the attached zip, and is
126 // populated during GetInstallDir().
127 vector<string> extracted_binaries;
128
129 // Parsed startup options
130 BlazeStartupOptions options;
131
132 // The time in ms the launcher spends before sending the request to the Blaze
133 uint64 startup_time;
134
135 // The time spent on extracting the new blaze version
136 // This is part of startup_time
137 uint64 extract_data_time;
138
139 // The time in ms if a command had to wait on a busy Blaze server process
140 // This is part of startup_time
141 uint64 command_wait_time;
142
143 RestartReason restart_reason;
144
145 // Absolute path of the blaze binary
146 string binary_path;
147};
148
149static GlobalVariables *globals;
150
151void InitGlobals() {
152 globals = new GlobalVariables;
153 globals->sigint_count = 0;
154 globals->startup_time = 0;
155 globals->extract_data_time = 0;
156 globals->command_wait_time = 0;
157 globals->restart_reason = NO_RESTART;
158}
159
160////////////////////////////////////////////////////////////////////////
161// Logic
162
163
164// Returns the canonical form of the base dir given a root and a hashable
165// string. The resulting dir is composed of the root + md5(hashable)
166static string GetHashedBaseDir(const string &root,
167 const string &hashable) {
168 unsigned char buf[17];
169 blaze_util::Md5Digest digest;
170 digest.Update(hashable.data(), hashable.size());
171 digest.Finish(buf);
172 return root + "/" + digest.String();
173}
174
175// Returns the install base (the root concatenated with the contents of the file
176// 'install_base_key' contained as a ZIP entry in the Blaze binary); as a side
177// effect, it also populates the extracted_binaries global variable.
178static string GetInstallBase(const string &root, const string &self_path) {
179 string key_file = "install_base_key";
180 struct archive *blaze_zip = archive_read_new();
181 archive_read_support_format_zip(blaze_zip);
182 int retval = archive_read_open_filename(blaze_zip, self_path.c_str(), 10240);
183 if (retval != ARCHIVE_OK) {
184 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000185 "\nFailed to open %s as a zip file: (%d) %s",
186 globals->options.GetProductName().c_str(), archive_errno(blaze_zip),
187 archive_error_string(blaze_zip));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100188 }
189
190 struct archive_entry *entry;
191 string install_base_key;
192 while (archive_read_next_header(blaze_zip, &entry) == ARCHIVE_OK) {
193 string pathname = archive_entry_pathname(entry);
194 globals->extracted_binaries.push_back(pathname);
195
196 if (key_file == pathname) {
197 const int size = 32;
198 char buf[size];
199 int bytesRead = archive_read_data(blaze_zip, &buf, size);
200 if (bytesRead < 0) {
201 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
202 "\nFailed to extract install_base_key: (%d) %s",
203 archive_errno(blaze_zip), archive_error_string(blaze_zip));
204 }
205 if (bytesRead < 32) {
206 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
207 "\nFailed to extract install_base_key: file too short");
208 }
209 install_base_key = string(buf, bytesRead);
210 }
211 }
212 retval = archive_read_free(blaze_zip);
213 if (retval != ARCHIVE_OK) {
214 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
215 "\nFailed to close install_base_key's containing zip file");
216 }
217
218 return root + "/" + install_base_key;
219}
220
221// Escapes colons by replacing them with '_C' and underscores by replacing them
222// with '_U'. E.g. "name:foo_bar" becomes "name_Cfoo_Ubar"
223static string EscapeForOptionSource(const string& input) {
224 string result = input;
225 blaze_util::Replace("_", "_U", &result);
226 blaze_util::Replace(":", "_C", &result);
227 return result;
228}
229
230// Returns the JVM command argument array.
231static vector<string> GetArgumentArray() {
232 vector<string> result;
233
234 // e.g. A Blaze server process running in ~/src/build_root (where there's a
235 // ~/src/build_root/WORKSPACE file) will appear in ps(1) as "blaze(src)".
236 string workspace =
237 blaze_util::Basename(blaze_util::Dirname(globals->workspace));
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000238 string product = globals->options.GetProductName();
239 blaze_util::ToLower(&product);
240 result.push_back(product + "(" + workspace + ")");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100241 if (globals->options.batch) {
242 result.push_back("-client");
243 result.push_back("-Xms256m");
244 result.push_back("-XX:NewRatio=4");
245 } else {
246 result.push_back("-server");
247 }
248
249 result.push_back("-XX:+HeapDumpOnOutOfMemoryError");
250 string heap_crash_path = globals->options.output_base;
251 result.push_back("-XX:HeapDumpPath=" + heap_crash_path);
252
253 result.push_back("-Xverify:none");
254
255 // Add JVM arguments particular to building blaze64 and particular JVM
256 // versions.
257 string error;
258 blaze_exit_code::ExitCode jvm_args_exit_code =
259 globals->options.AddJVMArguments(globals->options.GetHostJavabase(),
260 &result, &error);
261 if (jvm_args_exit_code != blaze_exit_code::SUCCESS) {
262 die(jvm_args_exit_code, "%s", error.c_str());
263 }
264
265 // We put all directories on the java.library.path that contain .so files.
266 string java_library_path = "-Djava.library.path=";
267 string real_install_dir = blaze_util::JoinPath(globals->options.install_base,
268 "_embedded_binaries");
269 bool first = true;
270 for (const auto& it : globals->extracted_binaries) {
271 if (blaze::IsSharedLibrary(it)) {
272 if (!first) {
273 java_library_path += ":";
274 }
275 first = false;
276 java_library_path += blaze_util::JoinPath(real_install_dir,
277 blaze_util::Dirname(it));
278 }
279 }
280 result.push_back(java_library_path);
281
282 // Force use of latin1 for file names.
283 result.push_back("-Dfile.encoding=ISO-8859-1");
284
285 if (globals->options.host_jvm_debug) {
286 fprintf(stderr,
287 "Running host JVM under debugger (listening on TCP port 5005).\n");
288 // Start JVM so that it listens for a connection from a
289 // JDWP-compliant debugger:
290 result.push_back("-Xdebug");
291 result.push_back("-Xrunjdwp:transport=dt_socket,server=y,address=5005");
292 }
293
294 blaze_util::SplitQuotedStringUsing(globals->options.host_jvm_args, ' ',
295 &result);
296
297 result.push_back("-jar");
298 result.push_back(blaze_util::JoinPath(real_install_dir,
299 globals->extracted_binaries[0]));
300
301 if (!globals->options.batch) {
302 result.push_back("--max_idle_secs");
303 result.push_back(std::to_string(globals->options.max_idle_secs));
304 } else {
305 result.push_back("--batch");
306 }
307 result.push_back("--install_base=" + globals->options.install_base);
308 result.push_back("--output_base=" + globals->options.output_base);
309 result.push_back("--workspace_directory=" + globals->workspace);
310 if (!globals->options.skyframe.empty()) {
311 result.push_back("--skyframe=" + globals->options.skyframe);
312 }
313 if (globals->options.allow_configurable_attributes) {
314 result.push_back("--allow_configurable_attributes");
315 }
316 if (globals->options.watchfs) {
317 result.push_back("--watchfs");
318 }
319 if (globals->options.fatal_event_bus_exceptions) {
320 result.push_back("--fatal_event_bus_exceptions");
321 } else {
322 result.push_back("--nofatal_event_bus_exceptions");
323 }
324 if (globals->options.webstatus_port) {
325 result.push_back("--use_webstatusserver=" + \
326 std::to_string(globals->options.webstatus_port));
327 }
328
329 // This is only for Blaze reporting purposes; the real interpretation of the
330 // jvm flags occurs when we set up the java command line.
331 if (globals->options.host_jvm_debug) {
332 result.push_back("--host_jvm_debug");
333 }
334 if (!globals->options.host_jvm_profile.empty()) {
335 result.push_back("--host_jvm_profile=" + globals->options.host_jvm_profile);
336 }
337 if (!globals->options.host_jvm_args.empty()) {
338 result.push_back("--host_jvm_args=" + globals->options.host_jvm_args);
339 }
340 globals->options.AddExtraOptions(&result);
341
342 // The option sources are transmitted in the following format:
343 // --option_sources=option1:source1:option2:source2:...
344 string option_sources = "--option_sources=";
345 first = true;
346 for (const auto& it : globals->options.option_sources) {
347 if (!first) {
348 option_sources += ":";
349 }
350
351 first = false;
352 option_sources += EscapeForOptionSource(it.first) + ":" +
353 EscapeForOptionSource(it.second);
354 }
355
356 result.push_back(option_sources);
357 return result;
358}
359
360// Add commom command options for logging to the given argument array.
361static void AddLoggingArgs(vector<string>* args) {
362 args->push_back("--startup_time=" + std::to_string(globals->startup_time));
363 if (globals->command_wait_time != 0) {
364 args->push_back("--command_wait_time=" +
365 std::to_string(globals->command_wait_time));
366 }
367 if (globals->extract_data_time != 0) {
368 args->push_back("--extract_data_time=" +
369 std::to_string(globals->extract_data_time));
370 }
371 if (globals->restart_reason != NO_RESTART) {
372 const char *reasons[] = {
373 "no_restart", "no_daemon", "new_version", "new_options"
374 };
375 args->push_back(
376 string("--restart_reason=") + reasons[globals->restart_reason]);
377 }
378 args->push_back(
379 string("--binary_path=") + globals->binary_path);
380}
381
382
383// Join the elements of the specified array with NUL's (\0's), akin to the
384// format of /proc/$PID/cmdline.
385string GetArgumentString(const vector<string>& argument_array) {
386 string result;
387 blaze_util::JoinStrings(argument_array, '\0', &result);
388 return result;
389}
390
391// Causes the current process to become a daemon (i.e. a child of
392// init, detached from the terminal, in its own session.) We don't
393// change cwd, though.
394static void Daemonize(int socket) {
395 // Don't call die() or exit() in this function; we're already in a
396 // child process so it won't work as expected. Just don't do
397 // anything that can possibly fail. :)
398
399 signal(SIGHUP, SIG_IGN);
400 if (fork() > 0) {
401 // This second fork is required iff there's any chance cmd will
402 // open an specific tty explicitly, e.g., open("/dev/tty23"). If
403 // not, this fork can be removed.
404 _exit(blaze_exit_code::SUCCESS);
405 }
406
407 setsid();
408
409 close(0);
410 close(1);
411 close(2);
412 close(socket);
413
414 open("/dev/null", O_RDONLY); // stdin
415 // stdout:
416 if (open(globals->jvm_log_file.c_str(),
417 O_WRONLY | O_CREAT | O_TRUNC, 0666) == -1) {
418 // In a daemon, no-one can hear you scream.
419 open("/dev/null", O_WRONLY);
420 }
421 dup(STDOUT_FILENO); // stderr (2>&1)
422
423 // Keep server from inheriting a useless fd.
424 // The file lock was already lost at fork().
425 close(globals->lockfd);
426}
427
428// Do a chdir into the workspace, and die if it fails.
429static void GoToWorkspace() {
430 if (BlazeStartupOptions::InWorkspace(globals->workspace) &&
431 chdir(globals->workspace.c_str()) != 0) {
432 pdie(blaze_exit_code::INTERNAL_ERROR,
433 "chdir() into %s failed", globals->workspace.c_str());
434 }
435}
436
437// Check the java version if a java version specification is bundled. On
438// success,
439// return the executable path of the java command.
440static string VerifyJavaVersionAndGetJvm() {
441 string exe = globals->options.GetJvm();
442
443 string version_spec_file = blaze_util::JoinPath(
444 blaze_util::JoinPath(globals->options.install_base, "_embedded_binaries"),
445 "java.version");
446 string version_spec = "";
447 if (ReadFile(version_spec_file, &version_spec)) {
448 blaze_util::StripWhitespace(&version_spec);
449 // A version specification is given, get version of java.
450 string jvm_version = GetJvmVersion(exe);
451
452 // Compare that jvm_version is found and at least the one specified.
453 if (jvm_version.size() == 0) {
454 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
455 "Java version not detected while at least %s is needed.\n"
456 "Please set JAVA_HOME.", version_spec.c_str());
457 } else if (!CheckJavaVersionIsAtLeast(jvm_version, version_spec)) {
458 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
459 "Java version is %s while at least %s is needed.\n"
460 "Please set JAVA_HOME.",
461 jvm_version.c_str(), version_spec.c_str());
462 }
463 }
464
465 return exe;
466}
467
468// Starts the Blaze server. Returns a readable fd connected to the server.
469// This is currently used only to detect liveness.
470static int StartServer(int socket) {
471 vector<string> jvm_args_vector = GetArgumentArray();
472 string argument_string = GetArgumentString(jvm_args_vector);
473
474 // Write the cmdline argument string to the server dir. If we get to this
475 // point, there is no server running, so we don't overwrite the cmdline file
476 // for the existing server. If might be that the server dies and the cmdline
477 // file stays there, but that is not a problem, since we always check the
478 // server, too.
479 WriteFile(argument_string, globals->options.output_base + "/server/cmdline");
480
481 // unless we restarted for a new-version, mark this as initial start
482 if (globals->restart_reason == NO_RESTART) {
483 globals->restart_reason = NO_DAEMON;
484 }
485
486 // Computing this path may report a fatal error, so do it before forking.
487 string exe = VerifyJavaVersionAndGetJvm();
488
489 // Go to the workspace before we daemonize, so
490 // we can still print errors to the terminal.
491 GoToWorkspace();
492
493 int fds[2];
494 if (pipe(fds)) {
495 pdie(blaze_exit_code::INTERNAL_ERROR, "pipe creation failed");
496 }
497 int child = fork();
498 if (child == -1) {
499 pdie(blaze_exit_code::INTERNAL_ERROR, "fork() failed");
500 } else if (child > 0) { // we're the parent
501 close(fds[1]); // parent keeps only the reading side
502 return fds[0];
503 } else {
504 close(fds[0]); // child keeps only the writing side
505 }
506
507 Daemonize(socket);
508 ExecuteProgram(exe, jvm_args_vector);
509 pdie(blaze_exit_code::INTERNAL_ERROR, "execv of '%s' failed", exe.c_str());
510}
511
512static bool KillRunningServerIfAny();
513
514// Replace this process with blaze in standalone/batch mode.
515// The batch mode blaze process handles the command and exits.
516//
517// This function passes the commands array to the blaze process.
518// This array should start with a command ("build", "info", etc.).
519static void StartStandalone() {
520 KillRunningServerIfAny();
521
522 // Wall clock time since process startup.
523 globals->startup_time = ProcessClock() / 1000000LL;
524
525 if (VerboseLogging()) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000526 fprintf(stderr, "Starting %s in batch mode.\n",
527 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100528 }
529 string command = globals->option_processor.GetCommand();
530 vector<string> command_arguments;
531 globals->option_processor.GetCommandArguments(&command_arguments);
532
533 if (!command_arguments.empty() && command == "shutdown") {
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000534 string product = globals->options.GetProductName();
535 blaze_util::ToLower(&product);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100536 fprintf(stderr,
537 "WARNING: Running command \"shutdown\" in batch mode. Batch mode "
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000538 "is triggered\nwhen not running %s within a workspace. If you "
539 "intend to shutdown an\nexisting %s server, run \"%s "
540 "shutdown\" from the directory where\nit was started.\n",
541 globals->options.GetProductName().c_str(),
542 globals->options.GetProductName().c_str(), product.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100543 }
544 vector<string> jvm_args_vector = GetArgumentArray();
545 if (command != "") {
546 jvm_args_vector.push_back(command);
547 AddLoggingArgs(&jvm_args_vector);
548 }
549
550 jvm_args_vector.insert(jvm_args_vector.end(),
551 command_arguments.begin(),
552 command_arguments.end());
553
554 GoToWorkspace();
555
556 string exe = VerifyJavaVersionAndGetJvm();
557 ExecuteProgram(exe, jvm_args_vector);
558 pdie(blaze_exit_code::INTERNAL_ERROR, "execv of '%s' failed", exe.c_str());
559}
560
561// Like connect(2), but uses the AF_UNIX address denoted by socket_file,
562// resolving symbolic links. (The server may make "socket_file" a
563// symlink, to avoid ENAMETOOLONG, in which case the client must
564// resolve it in userspace before connecting.)
565static int Connect(int socket, const string &socket_file) {
566 struct sockaddr_un addr;
567 addr.sun_family = AF_UNIX;
568
569 char *resolved_path = realpath(socket_file.c_str(), NULL);
570 if (resolved_path != NULL) {
571 strncpy(addr.sun_path, resolved_path, sizeof addr.sun_path);
572 addr.sun_path[sizeof addr.sun_path - 1] = '\0';
573 free(resolved_path);
574 sockaddr *paddr = reinterpret_cast<sockaddr *>(&addr);
575 return connect(socket, paddr, sizeof addr);
576 } else if (errno == ENOENT) { // No socket means no server to connect to
577 errno = ECONNREFUSED;
578 return -1;
579 } else {
580 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
581 "realpath('%s') failed", socket_file.c_str());
582 }
583}
584
585// Write the contents of file_name to stream.
586static void WriteFileToStreamOrDie(FILE *stream, const char *file_name) {
587 FILE *fp = fopen(file_name, "r");
588 if (fp == NULL) {
589 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
590 "opening %s failed", file_name);
591 }
592 char buffer[255];
593 int num_read;
594 while ((num_read = fread(buffer, 1, sizeof buffer, fp)) > 0) {
595 if (ferror(fp)) {
596 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
597 "failed to read from '%s'", file_name);
598 }
599 fwrite(buffer, 1, num_read, stream);
600 }
601 fclose(fp);
602}
603
604// Connects to the Blaze server, returning the socket, or -1 if no
605// server is running and !start. If start, attempts to start a new
606// server, and exits on failure.
607static int ConnectToServer(bool start) {
608 int s = socket(PF_UNIX, SOCK_STREAM, 0);
609 if (s == -1) {
610 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
611 "can't create AF_UNIX socket");
612 }
613
614 string server_dir = globals->options.output_base + "/server";
615
616 // The server dir has the socket, so we don't allow access by other
617 // users.
618 if (MakeDirectories(server_dir, 0700) == -1) {
619 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
620 "server directory '%s' could not be created", server_dir.c_str());
621 }
622
623 string socket_file = server_dir + "/server.socket";
624
625 if (Connect(s, socket_file) == 0) {
626 return s;
627 }
628 if (!start) {
629 return -1;
630 } else {
631 SetScheduling(
632 globals->options.batch_cpu_scheduling,
633 globals->options.io_nice_level);
634
635 int fd = StartServer(s);
636 if (fcntl(fd, F_SETFL, O_NONBLOCK | fcntl(fd, F_GETFL))) {
637 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
638 "Failed: fcntl to enable O_NONBLOCK on pipe");
639 }
640 // Give the server one minute to start up.
641 for (int ii = 0; ii < 600; ++ii) { // 60s; enough time to connect
642 // with debugger
643 if (Connect(s, socket_file) == 0) {
644 if (ii) {
645 fputc('\n', stderr);
646 fflush(stderr);
647 }
648 return s;
649 }
650 fputc('.', stderr);
651 fflush(stderr);
652 poll(NULL, 0, 100); // sleep 100ms. (usleep(3) is obsolete.)
653 char c;
654 if (read(fd, &c, 1) != -1 || errno != EAGAIN) {
655 fprintf(stderr, "\nunexpected pipe read status: %s\n"
656 "Server presumed dead. Now printing '%s':\n",
657 strerror(errno), globals->jvm_log_file.c_str());
658 WriteFileToStreamOrDie(stderr, globals->jvm_log_file.c_str());
659 exit(blaze_exit_code::INTERNAL_ERROR);
660 }
661 }
662 die(blaze_exit_code::INTERNAL_ERROR,
663 "\nError: couldn't connect to server at '%s' after 60 seconds.",
664 socket_file.c_str());
665 }
666}
667
668
669// Kills the specified running Blaze server.
670static void KillRunningServer(pid_t server_pid) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000671 fprintf(stderr, "Sending SIGTERM to previous %s server (pid=%d)... ",
672 globals->options.GetProductName().c_str(), server_pid);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100673 fflush(stderr);
674 for (int ii = 0; ii < 100; ++ii) { // wait up to 10s
675 if (kill(server_pid, SIGTERM) == -1) {
676 fprintf(stderr, "done.\n");
677 return; // Ding! Dong! The witch is dead!
678 }
679 poll(NULL, 0, 100); // sleep 100ms. (usleep(3) is obsolete.)
680 }
681
682 // If the previous attempt did not suceeded, kill the whole group.
683 fprintf(stderr,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000684 "Sending SIGKILL to previous %s server process group (pid=%d)... ",
685 globals->options.GetProductName().c_str(), server_pid);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100686 fflush(stderr);
687 killpg(server_pid, SIGKILL);
688 if (kill(server_pid, 0) == -1) { // (probe)
689 fprintf(stderr, "could not be killed.\n"); // task state 'Z' or 'D'?
690 exit(1); // TODO(bazel-team): confirm whether this is an internal error.
691 } else {
692 fprintf(stderr, "killed.\n");
693 }
694}
695
696
697// Kills the running Blaze server, if any. Finds the pid from the socket.
698static bool KillRunningServerIfAny() {
699 int socket = ConnectToServer(false);
700 if (socket != -1) {
701 KillRunningServer(GetPeerProcessId(socket));
702 return true;
703 }
704 return false;
705}
706
707
708// Calls fsync() on the file (or directory) specified in 'file_path'.
709// pdie()'s if syncing fails.
710static void SyncFile(const char *file_path) {
711 // fsync always fails on Cygwin with "Permission denied" for some reason.
712#ifndef __CYGWIN__
713 int fd = open(file_path, O_RDONLY);
714 if (fd < 0) {
715 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
716 "failed to open '%s' for syncing", file_path);
717 }
718 if (fsync(fd) < 0) {
719 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
720 "failed to sync '%s'", file_path);
721 }
722 close(fd);
723#endif
724}
725
726// Walks the temporary directory recursively and collects full file paths.
727static void CollectExtractedFiles(const string &dir_path, vector<string> &files) {
728 DIR *dir;
729 struct dirent *ent;
730
731 if ((dir = opendir(dir_path.c_str())) == NULL) {
732 die(blaze_exit_code::INTERNAL_ERROR, "opendir failed");
733 }
734
735 while ((ent = readdir(dir)) != NULL) {
736 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
737 continue;
738 }
739
740 string filename(blaze_util::JoinPath(dir_path, ent->d_name));
741 bool is_directory;
742 if (ent->d_type == DT_UNKNOWN) {
743 struct stat buf;
744 if (lstat(filename.c_str(), &buf) == -1) {
745 die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
746 }
747 is_directory = S_ISDIR(buf.st_mode);
748 } else {
749 is_directory = (ent->d_type == DT_DIR);
750 }
751
752 if (is_directory) {
753 CollectExtractedFiles(filename, files);
754 } else {
755 files.push_back(filename);
756 }
757 }
758
759 closedir(dir);
760}
761
762// Actually extracts the embedded data files into the tree whose root
763// is 'embedded_binaries'.
764static void ActuallyExtractData(const string &argv0,
765 const string &embedded_binaries) {
766 if (MakeDirectories(embedded_binaries, 0777) == -1) {
767 pdie(blaze_exit_code::INTERNAL_ERROR,
768 "couldn't create '%s'", embedded_binaries.c_str());
769 }
770
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000771 fprintf(stderr, "Extracting %s installation...\n",
772 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100773
774 struct archive *blaze_zip = archive_read_new();
775 archive_read_support_format_zip(blaze_zip);
776 int retval = archive_read_open_filename(blaze_zip, argv0.c_str(), 10240);
777 if (retval != ARCHIVE_OK) {
778 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000779 "\nFailed to open %s as a zip file",
780 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100781 }
782
783 struct archive_entry *entry;
784 string install_base_key;
785 while (archive_read_next_header(blaze_zip, &entry) == ARCHIVE_OK) {
786 string path = blaze_util::JoinPath(
787 embedded_binaries, archive_entry_pathname(entry));
788 if (MakeDirectories(blaze_util::Dirname(path), 0777) == -1) {
789 pdie(blaze_exit_code::INTERNAL_ERROR,
790 "couldn't create '%s'", path.c_str());
791 }
792 int fd = open(path.c_str(), O_CREAT | O_WRONLY, archive_entry_perm(entry));
793 if (fd < 0) {
794 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
795 "\nFailed to open extraction file: %s", strerror(errno));
796 }
797
798 const void *buf;
799 size_t size;
800 off_t offset;
801 while (true) {
802 retval = archive_read_data_block(blaze_zip, &buf, &size, &offset);
803 if (retval == ARCHIVE_EOF) {
804 break;
805 } else if (retval != ARCHIVE_OK) {
806 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000807 "\nFailed to extract data from %s zip: (%d) %s",
808 globals->options.GetProductName().c_str(), archive_errno(blaze_zip),
809 archive_error_string(blaze_zip));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100810 }
811 if (write(fd, buf, size) != size) {
812 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
813 "\nError writing zipped file to %s", path.c_str());
814 }
815 }
816 if (close(fd) != 0) {
817 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
818 "\nCould not close file %s", path.c_str());
819 }
820 }
821 retval = archive_read_free(blaze_zip);
822 if (retval != ARCHIVE_OK) {
823 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000824 "\nFailed to close %s zip", globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100825 }
826
827 const time_t TEN_YEARS_IN_SEC = 3600 * 24 * 365 * 10;
828 time_t future_time = time(NULL) + TEN_YEARS_IN_SEC;
829
830 // Set the timestamps of the extracted files to the future and make sure (or
831 // at least as sure as we can...) that the files we have written are actually
832 // on the disk.
833
834 vector<string> extracted_files;
835 CollectExtractedFiles(embedded_binaries, extracted_files);
836
837 set<string> synced_directories;
838 for (vector<string>::iterator it = extracted_files.begin(); it != extracted_files.end(); it++) {
839
840 const char *extracted_path = it->c_str();
841
842 // Set the time to a distantly futuristic value so we can observe tampering.
843 // Note that keeping the default timestamp set by unzip (1970-01-01) and using
844 // that to detect tampering is not enough, because we also need the timestamp
845 // to change between Blaze releases so that the metadata cache knows that
846 // the files may have changed. This is important for actions that use
847 // embedded binaries as artifacts.
848 struct utimbuf times = { future_time, future_time };
849 if (utime(extracted_path, &times) == -1) {
850 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
851 "failed to set timestamp on '%s'", extracted_path);
852 }
853
854 SyncFile(extracted_path);
855
856 string directory = blaze_util::Dirname(extracted_path);
857
858 // Now walk up until embedded_binaries and sync every directory in between.
859 // synced_directories is used to avoid syncing the same directory twice.
860 // The !directory.empty() and directory != "/" conditions are not strictly
861 // needed, but it makes this loop more robust, because otherwise, if due to
862 // some glitch, directory was not under embedded_binaries, it would get
863 // into an infinite loop.
864 while (directory != embedded_binaries &&
865 synced_directories.count(directory) == 0 &&
866 !directory.empty() &&
867 directory != "/") {
868 SyncFile(directory.c_str());
869 synced_directories.insert(directory);
870 directory = blaze_util::Dirname(directory);
871 }
872 }
873
874 SyncFile(embedded_binaries.c_str());
875}
876
877// Installs Blaze by extracting the embedded data files, iff necessary.
878// The MD5-named install_base directory on disk is trusted; we assume
879// no-one has modified the extracted files beneath this directory once
880// it is in place. Concurrency during extraction is handled by
881// extracting in a tmp dir and then renaming it into place where it
882// becomes visible automically at the new path.
883// Populates globals->extracted_binaries with their extracted locations.
884static void ExtractData(const string &self_path) {
885 // If the install dir doesn't exist, create it, if it does, we know it's good.
886 struct stat buf;
887 if (stat(globals->options.install_base.c_str(), &buf) == -1) {
888 uint64 st = MonotonicClock();
889 // Work in a temp dir to avoid races.
890 string tmp_install = globals->options.install_base + ".tmp." +
891 std::to_string(getpid());
892 string tmp_binaries = tmp_install + "/_embedded_binaries";
893 ActuallyExtractData(self_path, tmp_binaries);
894
895 uint64 et = MonotonicClock();
896 globals->extract_data_time = (et - st) / 1000000LL;
897
898 // Now rename the completed installation to its final name. If this
899 // fails due to an ENOTEMPTY then we assume another good
900 // installation snuck in before us.
901 if (rename(tmp_install.c_str(), globals->options.install_base.c_str()) == -1
902 && errno != ENOTEMPTY) {
903 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
904 "install base directory '%s' could not be renamed into place",
905 tmp_install.c_str());
906 }
907 } else {
908 if (!S_ISDIR(buf.st_mode)) {
909 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
910 "Error: Install base directory '%s' could not be created. "
911 "It exists but is not a directory.",
912 globals->options.install_base.c_str());
913 }
914
915 const time_t time_now = time(NULL);
916 string real_install_dir = blaze_util::JoinPath(
917 globals->options.install_base,
918 "_embedded_binaries");
919 for (const auto& it : globals->extracted_binaries) {
920 string path = blaze_util::JoinPath(real_install_dir, it);
921 // Check that the file exists and is readable.
922 if (stat(path.c_str(), &buf) == -1) {
923 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
924 "Error: corrupt installation: file '%s' missing."
925 " Please remove '%s' and try again.",
926 path.c_str(), globals->options.install_base.c_str());
927 }
928 // Check that the timestamp is in the future. A past timestamp would indicate
929 // that the file has been tampered with. See ActuallyExtractData().
930 if (buf.st_mtime <= time_now) {
931 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
932 "Error: corrupt installation: file '%s' "
933 "modified. Please remove '%s' and try again.",
934 path.c_str(), globals->options.install_base.c_str());
935 }
936 }
937 }
938}
939
940// Returns true if the server needs to be restarted to accommodate changes
941// between the two argument lists.
942static bool ServerNeedsToBeKilled(const vector<string>& args1,
943 const vector<string>& args2) {
944 // We need not worry about one side missing an argument and the other side
945 // having the default value, since this command line is already the
946 // canonicalized one that always contains every switch (with default values
947 // if it was not present on the real command line). Same applies for argument
948 // ordering.
949 if (args1.size() != args2.size()) {
950 return true;
951 }
952
953 for (int i = 0; i < args1.size(); i++) {
954 string option_sources = "--option_sources=";
955 if (args1[i].substr(0, option_sources.size()) == option_sources &&
956 args2[i].substr(0, option_sources.size()) == option_sources) {
957 continue;
958 }
959
960 if (args1[i] !=args2[i]) {
961 return true;
962 }
963
964 if (args1[i] == "--max_idle_secs") {
965 // Skip the argument of --max_idle_secs.
966 i++;
967 }
968 }
969
970 return false;
971}
972
973// Kills the running Blaze server, if any, if the startup options do not match.
974static void KillRunningServerIfDifferentStartupOptions() {
975 int socket = ConnectToServer(false);
976
977 if (socket == -1) {
978 return;
979 }
980
981 pid_t server_pid = GetPeerProcessId(socket);
982 close(socket);
983 string cmdline_path = globals->options.output_base + "/server/cmdline";
984 string joined_arguments;
985
986 // No, /proc/$PID/cmdline does not work, because it is limited to 4K. Even
987 // worse, its behavior differs slightly between kernels (in some, when longer
988 // command lines are truncated, the last 4 bytes are replaced with
989 // "..." + NUL.
990 ReadFile(cmdline_path, &joined_arguments);
991 vector<string> arguments = blaze_util::Split(joined_arguments, '\0');
992
993 // These strings contain null-separated command line arguments. If they are
994 // the same, the server can stay alive, otherwise, it needs shuffle off this
995 // mortal coil.
996 if (ServerNeedsToBeKilled(arguments, GetArgumentArray())) {
997 globals->restart_reason = NEW_OPTIONS;
998 fprintf(stderr,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000999 "WARNING: Running %s server needs to be killed, because the "
1000 "startup options are different.\n",
1001 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001002 KillRunningServer(server_pid);
1003 }
1004}
1005
1006
1007// Kills the old running server if it is not the same version as us,
1008// dealing with various combinations of installation scheme
1009// (installation symlink and older MD5_MANIFEST contents).
1010// This function requires that the installation be complete, and the
1011// server lock acquired.
1012static void EnsureCorrectRunningVersion() {
1013 // Read the previous installation's semaphore symlink in output_base. If the
1014 // target dirs don't match, or if the symlink was not present, then kill any
1015 // running servers. Lastly, symlink to our installation so others know which
1016 // installation is running.
1017 string installation_path = globals->options.output_base + "/install";
1018 char prev_installation[PATH_MAX + 1] = ""; // NULs the whole array
1019 if (readlink(installation_path.c_str(),
1020 prev_installation, PATH_MAX) == -1 ||
1021 prev_installation != globals->options.install_base) {
1022 if (KillRunningServerIfAny()) {
1023 globals->restart_reason = NEW_VERSION;
1024 }
1025 unlink(installation_path.c_str());
1026 if (symlink(globals->options.install_base.c_str(),
1027 installation_path.c_str())) {
1028 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1029 "failed to create installation symlink '%s'",
1030 installation_path.c_str());
1031 }
1032 const time_t time_now = time(NULL);
1033 struct utimbuf times = { time_now, time_now };
1034 if (utime(globals->options.install_base.c_str(), &times) == -1) {
1035 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1036 "failed to set timestamp on '%s'",
1037 globals->options.install_base.c_str());
1038 }
1039 }
1040}
1041
1042
1043// A signal-safe version of fprintf(stderr, ...).
1044//
1045// WARNING: any output from the blaze client may be interleaved
1046// with output from the blaze server. In --curses mode,
1047// the Blaze server often erases the previous line of output.
1048// So, be sure to end each such message with TWO newlines,
1049// otherwise it may be erased by the next message from the
1050// Blaze server.
1051// Also, it's a good idea to start each message with a newline,
1052// in case the Blaze server has written a partial line.
1053static void sigprintf(const char *format, ...) {
1054 char buf[1024];
1055 va_list ap;
1056 va_start(ap, format);
1057 int r = vsnprintf(buf, sizeof buf, format, ap);
1058 va_end(ap);
1059 write(STDERR_FILENO, buf, r);
1060}
1061
1062
1063// Signal handler.
1064static void handler(int signum) {
1065 // A defensive measure:
1066 if (kill(globals->server_pid, 0) == -1 && errno == ESRCH) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001067 sigprintf("\n%s server has died; client exiting.\n\n",
1068 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001069 _exit(1);
1070 }
1071
1072 switch (signum) {
1073 case SIGINT:
1074 if (++globals->sigint_count >= 3) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001075 sigprintf("\n%s caught third interrupt signal; killed.\n\n",
1076 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001077 kill(globals->server_pid, SIGKILL);
1078 _exit(1);
1079 }
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001080 sigprintf("\n%s caught interrupt signal; shutting down.\n\n",
1081 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001082 kill(globals->server_pid, SIGINT);
1083 break;
1084 case SIGTERM:
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001085 sigprintf("\n%s caught terminate signal; shutting down.\n\n",
1086 globals->options.GetProductName().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001087 kill(globals->server_pid, SIGINT);
1088 break;
1089 case SIGPIPE:
1090 // Don't bother the user with a message in this case; they're
1091 // probably using head(1) or more(1).
1092 kill(globals->server_pid, SIGINT);
1093 signal(SIGPIPE, SIG_IGN); // ignore subsequent SIGPIPE signals
1094 globals->received_signal = SIGPIPE;
1095 break;
1096 case SIGQUIT:
1097 sigprintf("\nSending SIGQUIT to JVM process %d (see %s).\n\n",
1098 globals->server_pid,
1099 globals->jvm_log_file.c_str());
1100 kill(globals->server_pid, SIGQUIT);
1101 break;
1102 }
1103}
1104
1105
1106// Reads a single char from the specified stream.
1107static char read_server_char(FILE *fp) {
1108 int c = getc(fp);
1109 if (c == EOF) {
1110 // e.g. external SIGKILL of server, misplaced System.exit() in the server,
1111 // or a JVM crash. Print out the jvm.out file in case there's something
1112 // useful.
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001113 fprintf(stderr, "Error: unexpected EOF from %s server.\n"
1114 "Contents of '%s':\n", globals->options.GetProductName().c_str(),
1115 globals->jvm_log_file.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001116 WriteFileToStreamOrDie(stderr, globals->jvm_log_file.c_str());
1117 exit(blaze_exit_code::INTERNAL_ERROR);
1118 }
1119 return static_cast<char>(c);
1120}
1121
1122// Constructs the command line for a server request,
1123static string BuildServerRequest() {
1124 vector<string> arg_vector;
1125 string command = globals->option_processor.GetCommand();
1126 if (command != "") {
1127 arg_vector.push_back(command);
1128 AddLoggingArgs(&arg_vector);
1129 }
1130
1131 globals->option_processor.GetCommandArguments(&arg_vector);
1132
1133 string request("blaze");
1134 for (vector<string>::iterator it = arg_vector.begin();
1135 it != arg_vector.end(); it++) {
1136 request.push_back('\0');
1137 request.append(*it);
1138 }
1139 return request;
1140}
1141
1142// Performs all I/O for a single client request to the server, and
1143// shuts down the client (by exit or signal).
1144static void SendServerRequest(void) ATTRIBUTE_NORETURN;
1145static void SendServerRequest(void) {
1146 int socket = -1;
1147 while (true) {
1148 socket = ConnectToServer(true);
1149 globals->server_pid = GetPeerProcessId(socket);
1150
1151 // Check for deleted server cwd:
1152 string server_cwd = GetProcessCWD(globals->server_pid);
1153 if (server_cwd.empty() || // GetProcessCWD failed
1154 server_cwd != globals->workspace || // changed
1155 server_cwd.find(" (deleted)") != string::npos) { // deleted.
1156 // There's a distant possibility that the two paths look the same yet are
1157 // actually different because the two processes have different mount
1158 // tables.
1159 if (VerboseLogging()) {
1160 fprintf(stderr, "Server's cwd moved or deleted (%s).\n",
1161 server_cwd.c_str());
1162 }
1163 close(socket);
1164 KillRunningServer(globals->server_pid);
1165 } else {
1166 break;
1167 }
1168 }
1169
1170 FILE *fp = fdopen(socket, "r"); // use buffering for reads--it's faster
1171
1172 if (VerboseLogging()) {
1173 fprintf(stderr, "Connected (server pid=%d).\n", globals->server_pid);
1174 }
1175
1176 // Wall clock time since process startup.
1177 globals->startup_time = ProcessClock() / 1000000LL;
1178 const string request = BuildServerRequest();
1179
1180 // Unblock all signals.
1181 sigset_t sigset;
1182 sigemptyset(&sigset);
1183 sigprocmask(SIG_SETMASK, &sigset, NULL);
1184
1185 signal(SIGINT, handler);
1186 signal(SIGTERM, handler);
1187 signal(SIGPIPE, handler);
1188 signal(SIGQUIT, handler);
1189
1190 // Send request and shutdown the write half of the connection:
1191 // (Request is written in a single chunk.)
1192 if (write(socket, request.data(), request.size()) != request.size()) {
1193 pdie(blaze_exit_code::INTERNAL_ERROR, "write() to server failed");
1194 }
1195 // In this (totally bizarre) protocol, this is the
1196 // client's way of saying "um, that's the end of the request".
1197 if (shutdown(socket, SHUT_WR) == -1) {
1198 pdie(blaze_exit_code::INTERNAL_ERROR, "shutdown(WR) failed");
1199 }
1200
1201 // Wait until we receive some response from the server.
1202 // (We do this by calling select() with a timeout.)
1203 // If we don't receive a response within 3 seconds, print a message,
1204 // so that the user has some idea what is going on.
1205 while (true) {
1206 fd_set fdset;
1207 FD_ZERO(&fdset);
1208 FD_SET(socket, &fdset);
1209 struct timeval timeout;
1210 timeout.tv_sec = 3;
1211 timeout.tv_usec = 0;
1212 int result = select(socket + 1, &fdset, NULL, &fdset, &timeout);
1213 if (result > 0) {
1214 // Data is ready on socket. Go ahead and read it.
1215 break;
1216 } else if (result == 0) {
1217 // Timeout. Print a message, then go ahead and read from
1218 // the socket (the read will usually block).
1219 fprintf(stderr,
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001220 "INFO: Waiting for response from %s server (pid %d)...\n",
1221 globals->options.GetProductName().c_str(), globals->server_pid);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001222 break;
1223 } else { // result < 0
1224 // Error. For EINTR we try again, all other errors are fatal.
1225 if (errno != EINTR) {
1226 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1227 "select() on server socket failed");
1228 }
1229 }
1230 }
1231
1232 // Read and demux the response. This protocol is awful.
1233 for (;;) {
1234 // Read one line:
1235 char at = read_server_char(fp);
1236 assert(at == '@');
1237 (void) at; // avoid warning about unused variable
1238 char tag = read_server_char(fp);
1239 assert(tag == '1' || tag == '2' || tag == '3');
1240 char at_or_newline = read_server_char(fp);
1241 bool second_at = at_or_newline == '@';
1242 if (second_at) {
1243 at_or_newline = read_server_char(fp);
1244 }
1245 assert(at_or_newline == '\n');
1246
1247 if (tag == '3') {
1248 // In this (totally bizarre) protocol, this is the
1249 // server's way of saying "um, that's the end of the response".
1250 break;
1251 }
1252 FILE *stream = tag == '1' ? stdout : stderr;
1253 for (;;) {
1254 char c = read_server_char(fp);
1255 if (c == '\n') {
1256 if (!second_at) fputc(c, stream);
1257 fflush(stream);
1258 break;
1259 } else {
1260 fputc(c, stream);
1261 }
1262 }
1263 }
1264
1265 char line[255];
1266 if (fgets(line, sizeof line, fp) == NULL ||
1267 !isdigit(line[0])) {
1268 die(blaze_exit_code::INTERNAL_ERROR,
1269 "Error: can't read exit code from server.");
1270 }
1271 int exit_code;
1272 blaze_util::safe_strto32(line, &exit_code);
1273
1274 close(socket); // might fail EINTR, just ignore.
1275
1276 if (globals->received_signal) { // Kill ourselves with the same signal, so
1277 // that callers see the right WTERMSIG value.
1278 signal(globals->received_signal, SIG_DFL);
1279 raise(globals->received_signal);
1280 exit(1); // (in case raise didn't kill us for some reason)
1281 }
1282
1283 exit(exit_code);
1284}
1285
1286// Parse the options, storing parsed values in globals.
1287// Returns the index of the first non-option argument.
1288static void ParseOptions(int argc, const char *argv[]) {
1289 string error;
1290 blaze_exit_code::ExitCode parse_exit_code =
1291 globals->option_processor.ParseOptions(argc, argv, globals->workspace,
1292 globals->cwd, &error);
1293 if (parse_exit_code != blaze_exit_code::SUCCESS) {
1294 die(parse_exit_code, "%s", error.c_str());
1295 }
1296 globals->options = globals->option_processor.GetParsedStartupOptions();
1297}
1298
1299// Returns the canonical form of a path.
1300static string MakeCanonical(const char *path) {
1301 char *resolved_path = realpath(path, NULL);
1302 if (resolved_path == NULL) {
1303 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1304 "realpath('%s') failed", path);
1305 }
1306
1307 string ret = resolved_path;
1308 free(resolved_path);
1309 return ret;
1310}
1311
1312// Compute the globals globals->cwd and globals->workspace.
1313static void ComputeWorkspace() {
1314 char cwdbuf[PATH_MAX];
1315 if (getcwd(cwdbuf, sizeof cwdbuf) == NULL) {
1316 pdie(blaze_exit_code::INTERNAL_ERROR, "getcwd() failed");
1317 }
1318 globals->cwd = MakeCanonical(cwdbuf);
1319 globals->workspace = BlazeStartupOptions::GetWorkspace(globals->cwd);
1320}
1321
1322// Figure out the base directories based on embedded data, username, cwd, etc.
1323// Sets globals->options.install_base, globals->options.output_base,
1324// globals->lock_file, globals->jvm_log_file.
1325static void ComputeBaseDirectories(const string self_path) {
1326 // Only start a server when in a workspace because otherwise we won't do more
1327 // than emit a help message.
1328 if (!BlazeStartupOptions::InWorkspace(globals->workspace)) {
1329 globals->options.batch = true;
1330 }
1331
1332 // The default install_base is <output_user_root>/install/<md5(blaze)>
1333 // but if an install_base is specified on the command line, we use that as
1334 // the base instead.
1335 if (globals->options.install_base.empty()) {
1336 string install_user_root = globals->options.output_user_root + "/install";
1337 globals->options.install_base =
1338 GetInstallBase(install_user_root, self_path);
1339 } else {
1340 // We call GetInstallBase anyway to populate extracted_binaries.
1341 GetInstallBase("", self_path);
1342 }
1343
1344 if (globals->options.output_base.empty()) {
1345 globals->options.output_base = GetHashedBaseDir(
1346 globals->options.output_user_root, globals->workspace);
1347 }
1348
1349 struct stat buf;
1350 if (stat(globals->options.output_base.c_str(), &buf) == -1) {
1351 if (MakeDirectories(globals->options.output_base, 0777) == -1) {
1352 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1353 "Output base directory '%s' could not be created",
1354 globals->options.output_base.c_str());
1355 }
1356 } else {
1357 if (!S_ISDIR(buf.st_mode)) {
1358 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1359 "Error: Output base directory '%s' could not be created. "
1360 "It exists but is not a directory.",
1361 globals->options.output_base.c_str());
1362 }
1363 }
1364 if (access(globals->options.output_base.c_str(), R_OK | W_OK | X_OK) != 0) {
1365 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1366 "Error: Output base directory '%s' must be readable and writable.",
1367 globals->options.output_base.c_str());
1368 }
1369
1370 globals->options.output_base =
1371 MakeCanonical(globals->options.output_base.c_str());
1372 globals->lockfile = globals->options.output_base + "/lock";
1373 globals->jvm_log_file = globals->options.output_base + "/server/jvm.out";
1374}
1375
1376static void CheckEnvironment() {
1377 char pthread_impl[512];
1378#ifndef _CS_GNU_LIBPTHREAD_VERSION
1379#define _CS_GNU_LIBPTHREAD_VERSION 3
1380#endif
1381 if (confstr(_CS_GNU_LIBPTHREAD_VERSION, pthread_impl, sizeof pthread_impl) &&
1382 strprefix(pthread_impl, "linuxthreads")) {
1383 fprintf(stderr, "Warning: LinuxThreads detected. NPTL is preferred.\n"
1384 " (Perhaps unset LD_ASSUME_KERNEL or LD_LIBRARY_PATH.)\n");
1385 }
1386
1387 if (getenv("LD_ASSUME_KERNEL") != NULL) {
1388 // Fix for bug: if ulimit -s and LD_ASSUME_KERNEL are both
1389 // specified, the JVM fails to create threads. See thread_stack_regtest.
1390 // This is also provoked by LD_LIBRARY_PATH=/usr/lib/debug,
1391 // or anything else that causes the JVM to use LinuxThreads.
1392 fprintf(stderr, "Warning: ignoring LD_ASSUME_KERNEL in environment.\n");
1393 unsetenv("LD_ASSUME_KERNEL");
1394 }
1395
1396 if (getenv("LD_PRELOAD") != NULL) {
1397 fprintf(stderr, "Warning: ignoring LD_PRELOAD in environment.\n");
1398 unsetenv("LD_PRELOAD");
1399 }
1400
1401 if (getenv("_JAVA_OPTIONS") != NULL) {
1402 // This would override --host_jvm_args
1403 fprintf(stderr, "Warning: ignoring _JAVA_OPTIONS in environment.\n");
1404 unsetenv("_JAVA_OPTIONS");
1405 }
1406
1407 if (TESTING) {
1408 fprintf(stderr, "INFO: $TEST_TMPDIR defined: output root default is "
1409 "'%s'.\n", globals->options.output_root.c_str());
1410 }
1411
1412 // TODO(bazel-team): We've also seen a failure during loading (creating
1413 // threads?) when ulimit -Hs 8192. Characterize that and check for it here.
1414
1415 // Make the JVM use ISO-8859-1 for parsing its command line because "blaze
1416 // run" doesn't handle non-ASCII command line arguments. This is apparently
1417 // the most reliable way to select the platform default encoding.
1418 setenv("LANG", "en_US.ISO-8859-1", 1);
1419 setenv("LANGUAGE", "en_US.ISO-8859-1", 1);
1420 setenv("LC_ALL", "en_US.ISO-8859-1", 1);
1421 setenv("LC_CTYPE", "en_US.ISO-8859-1", 1);
1422}
1423
1424// Create the lockfile and take an exclusive lock on a region within it. This
1425// lock is inherited with the file descriptor across execve(), but not fork().
1426// So in the batch case, the JVM holds the lock until exit; otherwise, this
1427// program holds it until exit.
1428static void AcquireLock() {
1429 globals->lockfd = open(globals->lockfile.c_str(), O_CREAT|O_RDWR, 0644);
1430 if (globals->lockfd < 0) {
1431 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1432 "cannot open lockfile '%s' for writing", globals->lockfile.c_str());
1433 }
1434
1435 struct flock lock;
1436 lock.l_type = F_WRLCK;
1437 lock.l_whence = SEEK_SET;
1438 lock.l_start = 0;
1439 // This doesn't really matter now, but allows us to subdivide the lock
1440 // later if that becomes meaningful. (Ranges beyond EOF can be locked.)
1441 lock.l_len = 4096;
1442
1443 // Try to take the lock, without blocking.
1444 if (fcntl(globals->lockfd, F_SETLK, &lock) == -1) {
1445 if (errno != EACCES && errno != EAGAIN) {
1446 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1447 "unexpected result from F_SETLK");
1448 }
1449
1450 // We didn't get the lock. Find out who has it.
1451 struct flock probe = lock;
1452 probe.l_pid = 0;
1453 if (fcntl(globals->lockfd, F_GETLK, &probe) == -1) {
1454 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1455 "unexpected result from F_GETLK");
1456 }
1457 if (!globals->options.block_for_lock) {
1458 die(blaze_exit_code::BAD_ARGV,
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001459 "Another %s command is running (pid=%d). Exiting immediately.",
1460 globals->options.GetProductName().c_str(), probe.l_pid);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001461 }
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001462 fprintf(stderr, "Another %s command is running (pid = %d). "
1463 "Waiting for it to complete...",
1464 globals->options.GetProductName().c_str(), probe.l_pid);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001465 fflush(stderr);
1466
1467 // Take a clock sample for that start of the waiting time
1468 uint64 st = MonotonicClock();
1469 // Try to take the lock again (blocking).
1470 int r;
1471 do {
1472 r = fcntl(globals->lockfd, F_SETLKW, &lock);
1473 } while (r == -1 && errno == EINTR);
1474 fprintf(stderr, "\n");
1475 if (r == -1) {
1476 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1477 "couldn't acquire file lock");
1478 }
1479 // Take another clock sample, calculate elapsed
1480 uint64 et = MonotonicClock();
1481 globals->command_wait_time = (et - st) / 1000000LL;
1482 }
1483
1484 // Identify ourselves in the lockfile.
1485 ftruncate(globals->lockfd, 0);
1486 const char *tty = ttyname(STDIN_FILENO); // NOLINT (single-threaded)
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001487 string msg = "owner=" + globals->options.GetProductName() + " launcher\npid="
1488 + std::to_string(getpid()) + "\ntty=" + (tty ? tty : "") + "\n";
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001489 // Don't bother checking for error, since it's unlikely and unimportant.
1490 // The contents are currently meant only for debugging.
1491 write(globals->lockfd, msg.data(), msg.size());
1492}
1493
1494// Returns the mountpoint containing the specified directory, which
1495// must exist. Fails if any parent path could not be statted or
1496// canonicalised.
1497static string GetMountpoint(string dir) {
1498 dev_t initial_device = -1;
1499 ino_t prev_inode = -1;
1500 string prev_dir = dir;
1501 for (;;) {
1502 struct stat buf;
1503 if (stat(dir.c_str(), &buf) == -1) {
1504 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1505 "stat('%s') failed", dir.c_str());
1506 } else if (initial_device == -1 && prev_inode == -1) { // first time
1507 initial_device = buf.st_dev;
1508 } else if (initial_device != buf.st_dev) { // we crossed file systems
1509 char *resolved_path = realpath(prev_dir.c_str(), NULL);
1510 if (resolved_path == NULL) {
1511 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1512 "realpath('%s') failed", prev_dir.c_str());
1513 }
1514 dir = resolved_path;
1515 free(resolved_path);
1516 return dir;
1517 } else if (prev_inode == buf.st_ino) { // ".." had no effect => root.
1518 return "/";
1519 }
1520
1521 prev_inode = buf.st_ino;
1522 prev_dir = dir;
1523 dir += "/..";
1524 }
1525
1526 return "/";
1527}
1528
1529// Issue a warning if disk has less than 10% free blocks or inodes.
1530static void WarnIfFullDisk() {
1531 struct statvfs buf;
1532 if (statvfs(globals->options.output_base.c_str(), &buf) < 0) {
1533 fprintf(stderr, "WARNING: couldn't get file system information for '%s': "
1534 "%s\n", globals->options.output_base.c_str(), strerror(errno));
1535 return;
1536 }
1537
1538 if (10LL * buf.f_favail < buf.f_files) {
1539 fprintf(stderr,
1540 "WARNING: build volume %s is nearly full "
1541 "(%llu inodes remain).\n",
1542 GetMountpoint(globals->options.output_base).c_str(),
1543 static_cast<int64>(buf.f_favail));
1544 }
1545 if (10LL * buf.f_bavail < buf.f_blocks) {
1546 fprintf(stderr,
1547 "WARNING: build volume %s is nearly full "
1548 "(%.1fGB remain).\n",
1549 GetMountpoint(globals->options.output_base).c_str(),
1550 (1.0 * buf.f_bavail) * buf.f_frsize / 1E9);
1551 }
1552}
1553
1554void SetupStreams() {
1555 // Line-buffer stderr, since we always flush at the end of a server
1556 // message. This saves lots of single-char calls to write(2).
1557 // This doesn't work if any writes to stderr have already occurred!
1558 setlinebuf(stderr);
1559
1560 // Ensure we have three open fds. Otherwise we can end up with
1561 // bizarre things like stdout going to the lock file, etc.
1562 if (fcntl(0, F_GETFL) == -1) open("/dev/null", O_RDONLY);
1563 if (fcntl(1, F_GETFL) == -1) open("/dev/null", O_WRONLY);
1564 if (fcntl(2, F_GETFL) == -1) open("/dev/null", O_WRONLY);
1565}
1566
1567// Set an 8MB stack for Blaze. When the stack max is unbounded, it changes the
1568// layout in the JVM's address space, and we are unable to instantiate the
1569// default 3000MB heap.
1570static void EnsureFiniteStackLimit() {
1571 struct rlimit limit;
1572 const int default_stack = 8 * 1024 * 1024; // 8MB.
1573 if (getrlimit(RLIMIT_STACK, &limit)) {
1574 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "getrlimit() failed");
1575 }
1576
1577 if (default_stack < limit.rlim_cur) {
1578 limit.rlim_cur = default_stack;
1579 if (setrlimit(RLIMIT_STACK, &limit)) {
1580 perror("setrlimit() failed: If the stack limit is too high, "
1581 "this can cause the JVM to be unable to allocate enough "
1582 "contiguous address space for its heap");
1583 }
1584 }
1585}
1586
1587static void CheckBinaryPath(const string& argv0) {
1588 if (argv0[0] == '/') {
1589 globals->binary_path = argv0;
1590 } else {
1591 string abs_path = globals->cwd + '/' + argv0;
1592 char *resolved_path = realpath(abs_path.c_str(), NULL);
1593 if (resolved_path) {
1594 globals->binary_path = resolved_path;
1595 free(resolved_path);
1596 } else {
1597 // This happens during our integration tests, but thats okay, as we won't
1598 // log the invocation anyway.
1599 globals->binary_path = abs_path;
1600 }
1601 }
1602}
1603
1604// Create the user's directory where we keep state, installations etc.
1605// Typically, this happens inside a temp directory, so we have to be
1606// careful about symlink attacks.
1607static void CreateSecureOutputRoot() {
1608 const char* root = globals->options.output_user_root.c_str();
1609 struct stat fileinfo = {};
1610
1611 if (mkdir(root, 0755) == 0) {
1612 return; // mkdir succeeded, no need to verify ownership/mode.
1613 }
1614 if (errno != EEXIST) {
1615 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "mkdir('%s')", root);
1616 }
1617
1618 // The path already exists.
1619 // Check ownership and mode, and verify that it is a directory.
1620
1621 if (lstat(root, &fileinfo) < 0) {
1622 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "lstat('%s')", root);
1623 }
1624
1625 if (fileinfo.st_uid != geteuid()) {
1626 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "'%s' is not owned by me",
1627 root);
1628 }
1629
1630 if ((fileinfo.st_mode & 022) != 0) {
1631 int new_mode = fileinfo.st_mode & (~022);
1632 if (chmod(root, new_mode) < 0) {
1633 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1634 "'%s' has mode %o, chmod to %o failed", root,
1635 fileinfo.st_mode & 07777, new_mode);
1636 }
1637 }
1638
1639 if (stat(root, &fileinfo) < 0) {
1640 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "stat('%s')", root);
1641 }
1642
1643 if (!S_ISDIR(fileinfo.st_mode)) {
1644 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "'%s' is not a directory",
1645 root);
1646 }
1647}
1648
1649// TODO(bazel-team): Execute the server as a child process and write its exit
1650// code to a file. In case the server becomes unresonsive or terminates
1651// unexpectedly (in a way that isn't already handled), we can observe the file,
1652// if it exists. (If it doesn't, then we know something went horribly wrong.)
1653int main(int argc, const char *argv[]) {
1654 InitGlobals();
1655 SetupStreams();
1656
1657 // Must be done before command line parsing.
1658 ComputeWorkspace();
1659 CheckBinaryPath(argv[0]);
1660 ParseOptions(argc, argv);
1661 string error;
1662 blaze_exit_code::ExitCode reexec_options_exit_code =
1663 globals->options.CheckForReExecuteOptions(argc, argv, &error);
1664 if (reexec_options_exit_code != blaze_exit_code::SUCCESS) {
1665 die(reexec_options_exit_code, "%s", error.c_str());
1666 }
1667 CheckEnvironment();
1668 CreateSecureOutputRoot();
1669
1670 const string self_path = GetSelfPath();
1671 ComputeBaseDirectories(self_path);
1672
1673 AcquireLock();
1674
1675 WarnIfFullDisk();
1676 WarnFilesystemType(globals->options.output_base);
1677 EnsureFiniteStackLimit();
1678
1679 ExtractData(self_path);
1680 EnsureCorrectRunningVersion();
1681 KillRunningServerIfDifferentStartupOptions();
1682
1683 if (globals->options.batch) {
1684 SetScheduling(globals->options.batch_cpu_scheduling,
1685 globals->options.io_nice_level);
1686 StartStandalone();
1687 } else {
1688 SendServerRequest();
1689 }
1690 return 0;
1691}
1692} // namespace blaze
1693
1694int main(int argc, const char *argv[]) {
1695 return blaze::main(argc, argv);
1696}