blob: 47011dc9e02b442fe883dbe0a1b5f34ab7b14ed4 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// blaze.cc: bootstrap and client code for Blaze server.
16//
17// Responsible for:
18// - extracting the Python, C++ and Java components.
19// - starting the server or finding the existing one.
20// - client options parsing.
21// - passing the argv array, and printing the out/err streams.
22// - signal handling.
23// - exiting with the right error/WTERMSIG code.
24// - debugger + profiler support.
25// - mutual exclusion between batch invocations.
Julio Merino28774852016-09-14 16:59:46 +000026#include "src/main/cpp/blaze.h"
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010027
28#include <assert.h>
29#include <ctype.h>
30#include <dirent.h>
31#include <errno.h>
32#include <fcntl.h>
33#include <limits.h>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010034#include <sched.h>
35#include <signal.h>
36#include <stdarg.h>
Thiago Farina8a67da42015-05-05 18:04:50 +000037#include <stdint.h>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010038#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <sys/resource.h>
42#include <sys/select.h>
43#include <sys/socket.h>
44#include <sys/stat.h>
45#include <sys/statvfs.h>
46#include <sys/time.h>
47#include <sys/un.h>
48#include <time.h>
49#include <unistd.h>
50#include <utime.h>
Lukacs Berkie21e5922016-04-12 12:22:20 +000051
52#include <grpc/grpc.h>
Googler197547b2016-09-26 22:25:14 +000053#include <grpc/support/log.h>
Lukacs Berkie21e5922016-04-12 12:22:20 +000054#include <grpc++/channel.h>
55#include <grpc++/client_context.h>
56#include <grpc++/create_channel.h>
57#include <grpc++/security/credentials.h>
58
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010059#include <algorithm>
Lukacs Berki1b25ce22016-04-15 13:11:21 +000060#include <chrono> // NOLINT (gRPC requires this)
Lukacs Berkif1df38a2016-04-19 07:42:22 +000061#include <mutex> // NOLINT
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010062#include <set>
63#include <string>
Lukacs Berkif1df38a2016-04-19 07:42:22 +000064#include <thread> // NOLINT
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010065#include <utility>
66#include <vector>
67
Lukacs Berkie21e5922016-04-12 12:22:20 +000068
Nathan Harmatabf98f392016-01-07 22:58:29 +000069#include "src/main/cpp/blaze_abrupt_exit.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000070#include "src/main/cpp/blaze_util.h"
71#include "src/main/cpp/blaze_util_platform.h"
Thiago Farina676cb9f2016-10-06 11:00:43 +000072#include "src/main/cpp/global_variables.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000073#include "src/main/cpp/option_processor.h"
Julio Merino28774852016-09-14 16:59:46 +000074#include "src/main/cpp/startup_options.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000075#include "src/main/cpp/util/errors.h"
Thiago Farina7f9357f2015-04-23 13:57:43 +000076#include "src/main/cpp/util/exit_code.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000077#include "src/main/cpp/util/file.h"
78#include "src/main/cpp/util/md5.h"
79#include "src/main/cpp/util/numbers.h"
80#include "src/main/cpp/util/port.h"
81#include "src/main/cpp/util/strings.h"
Julio Merino211a95c2016-08-29 11:01:35 +000082#include "src/main/cpp/workspace_layout.h"
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +000083#include "third_party/ijar/zip.h"
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010084
Lukacs Berkie21e5922016-04-12 12:22:20 +000085#include "src/main/protobuf/command_server.grpc.pb.h"
86
Thiago Farina7390ddb2015-04-09 13:53:53 +000087using blaze_util::Md5Digest;
Thiago Farina241f46c2015-04-13 14:33:30 +000088using blaze_util::die;
89using blaze_util::pdie;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010090
91// This should already be defined in sched.h, but it's not.
92#ifndef SCHED_BATCH
93#define SCHED_BATCH 3
94#endif
95
96namespace blaze {
97
Thiago Farina80bb0f22016-10-17 15:57:13 +000098using std::set;
99using std::string;
100using std::vector;
101
Lukacs Berki907dbbf2016-04-15 11:30:12 +0000102static void WriteFileToStreamOrDie(FILE *stream, const char *file_name);
Lukacs Berki1977d922016-05-02 09:31:37 +0000103static int GetServerPid(const string &server_dir);
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000104static void VerifyJavaVersionAndSetJvm();
Lukacs Berki907dbbf2016-04-15 11:30:12 +0000105
Lukacs Berki1977d922016-05-02 09:31:37 +0000106// The following is a treatise on how the interaction between the client and the
107// server works.
108//
109// First, the client unconditionally acquires an flock() lock on
110// $OUTPUT_BASE/lock then verifies if it has already extracted itself by
111// checking if the directory it extracts itself to (install base + a checksum)
112// is present. If not, then it does the extraction. Care is taken that this
113// process is atomic so that Blazen in multiple output bases do not clash.
114//
115// Then the client tries to connect to the currently executing server and kills
116// it if at least one of the following conditions is true:
117//
118// - The server is of the wrong version (as determined by the
119// $OUTPUT_BASE/install symlink)
120// - The server has different startup options than the client wants
121// - The client wants to run the command in batch mode
122//
123// Then, if needed, the client adjusts the install link to indicate which
124// version of the server it is running.
125//
126// In batch mode, the client then simply executes the server while taking care
127// that the output base lock is kept until it finishes.
128//
129// If in server mode, the client starts up a server if needed then sends the
Thiago Farina69dac862016-11-02 09:48:27 +0000130// command to the client and streams back stdout and stderr. The output base
131// lock is released after the command is sent to the server (the server
132// implements its own locking mechanism).
Lukacs Berki1977d922016-05-02 09:31:37 +0000133
134// Synchronization between the client and the server is a little precarious
135// because the client needs to know the PID of the server and it is not
136// available using a Java API and we don't have JNI on Windows at the moment,
137// so the server can't just communicate this over the communication channel.
138// Thus, a PID file is used, but care needs to be taken that the contents of
139// this PID file are right.
140//
141// Upon server startup, the PID file is written before the client spawns the
142// server. Thus, when the client can connect, it can be certain that the PID
143// file is up to date.
144//
145// Upon server shutdown, the PID file is deleted using a server shutdown hook.
146// However, this happens *after* the server stopped listening, so it's possible
147// that a client has already started up a server and written a new PID file.
148// In order to avoid this, when the client starts up a new server, it reads the
149// contents of the PID file and kills the process indicated in it (it could do
150// with a bit more care, since PIDs can be reused, but for now, we just believe
151// the PID file)
152//
153// Some more interesting scenarios:
154//
155// - The server receives a kill signal and it does not have a chance to delete
156// the PID file: the client cannot connect, reads the PID file, kills the
157// process indicated in it and starts up a new server.
158//
159// - The server stopped accepting connections but hasn't quit yet and a new
160// client comes around: the new client will kill the server based on the
161// PID file before a new server is started up.
162//
163// Alternative implementations:
164//
165// - Don't deal with PIDs at all. This would make it impossible for the client
166// to deliver a SIGKILL to the server after three SIGINTs. It would only be
167// possible with gRPC anyway.
168//
169// - Have the server check that the PID file containts the correct things
170// before deleting them: there is a window of time between checking the file
171// and deleting it in which a new server can overwrite the PID file. The
172// output base lock cannot be acquired, either, because when starting up a
173// new server, the client already holds it.
174//
175// - Delete the PID file before stopping to accept connections: then a client
176// could come about after deleting the PID file but before stopping accepting
177// connections. It would also not be resilient against a dead server that
178// left a PID file around.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000179class BlazeServer {
180 public:
181 virtual ~BlazeServer() {}
182
Lukacs Berki1977d922016-05-02 09:31:37 +0000183 // Acquire a lock for the server running in this output base. Returns the
184 // number of milliseconds spent waiting for the lock.
Lukacs Berki415d39a2016-04-28 13:18:54 +0000185 uint64_t AcquireLock();
186
Lukacs Berki1977d922016-05-02 09:31:37 +0000187 // Whether there is an active connection to a server.
188 bool Connected() const { return connected_; }
189
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000190 // Connect to the server. Returns if the connection was successful. Only
191 // call this when this object is in disconnected state. If it returns true,
192 // this object will be in connected state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000193 virtual bool Connect() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000194
195 // Disconnects from an existing server. Only call this when this object is in
196 // connected state. After this call returns, the object will be in connected
197 // state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000198 virtual void Disconnect() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000199
200 // Send the command line to the server and forward whatever it says to stdout
201 // and stderr. Returns the desired exit code. Only call this when the server
202 // is in connected state.
203 virtual unsigned int Communicate() = 0;
204
205 // Disconnects and kills an existing server. Only call this when this object
206 // is in connected state.
Lukacs Berki1977d922016-05-02 09:31:37 +0000207 virtual void KillRunningServer() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000208
209 // Cancel the currently running command. If there is no command currently
Lukacs Berki1977d922016-05-02 09:31:37 +0000210 // running, the result is unspecified. When called, this object must be in
211 // connected state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000212 virtual void Cancel() = 0;
Thiago Farina69dac862016-11-02 09:48:27 +0000213
214 protected:
215 BlazeLock blaze_lock_;
216 bool connected_;
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000217};
218
Lukacs Berki415d39a2016-04-28 13:18:54 +0000219////////////////////////////////////////////////////////////////////////
220// Global Variables
221static GlobalVariables *globals;
222static BlazeServer *blaze_server;
223
Lukacs Berki415d39a2016-04-28 13:18:54 +0000224uint64_t BlazeServer::AcquireLock() {
225 return blaze::AcquireLock(
Julio Merino28774852016-09-14 16:59:46 +0000226 globals->options->output_base, globals->options->batch,
227 globals->options->block_for_lock, &blaze_lock_);
Lukacs Berki415d39a2016-04-28 13:18:54 +0000228}
229
Lukacs Berki1977d922016-05-02 09:31:37 +0000230// Communication method that uses gRPC on a socket bound to localhost. More
231// documentation is in command_server.proto .
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000232class GrpcBlazeServer : public BlazeServer {
233 public:
234 GrpcBlazeServer();
Lukacs Berki6dd29092016-05-30 14:05:33 +0000235 virtual ~GrpcBlazeServer();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000236
Lukacs Berki9d52bc52016-06-07 11:11:04 +0000237 virtual bool Connect();
238 virtual void Disconnect();
239 virtual unsigned int Communicate();
240 virtual void KillRunningServer();
241 virtual void Cancel();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000242
243 private:
Lukacs Berki6dd29092016-05-30 14:05:33 +0000244 enum CancelThreadAction { NOTHING, JOIN, CANCEL, COMMAND_ID_RECEIVED };
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000245
246 std::unique_ptr<command_server::CommandServer::Stub> client_;
247 std::string request_cookie_;
248 std::string response_cookie_;
249 std::string command_id_;
250
Lukacs Berki6dd29092016-05-30 14:05:33 +0000251 // protects command_id_ . Although we always set it before making the cancel
252 // thread do something with it, the mutex is still useful because it provides
253 // a memory fence.
254 std::mutex cancel_thread_mutex_;
Lukacs Berki8b999982016-04-26 15:40:38 +0000255
Lukacs Berki6dd29092016-05-30 14:05:33 +0000256 int recv_socket_; // Socket the cancel thread reads actions from
257 int send_socket_; // Socket the main thread writes actions to
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000258
259 void CancelThread();
Lukacs Berki6dd29092016-05-30 14:05:33 +0000260 void SendAction(CancelThreadAction action);
261 void SendCancelMessage();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000262};
263
264
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100265////////////////////////////////////////////////////////////////////////
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100266// Logic
267
268
Dmitry Lomov8e2e4b32016-04-20 08:04:17 +0000269#if !defined(__CYGWIN__)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100270// Returns the canonical form of the base dir given a root and a hashable
271// string. The resulting dir is composed of the root + md5(hashable)
272static string GetHashedBaseDir(const string &root,
273 const string &hashable) {
Thiago Farina7390ddb2015-04-09 13:53:53 +0000274 unsigned char buf[Md5Digest::kDigestLength];
275 Md5Digest digest;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100276 digest.Update(hashable.data(), hashable.size());
277 digest.Finish(buf);
278 return root + "/" + digest.String();
279}
280
Dmitry Lomov8e2e4b32016-04-20 08:04:17 +0000281#else
Dmitry Lomovbc84cc82016-04-15 14:05:24 +0000282// Builds a shorter output base dir name for Windows.
283// This MD5s together user name and workspace directory,
284// and only uses 1/3 of the bits to get 8-char alphanumeric
285// file name.
286static string GetHashedBaseDirForWindows(const string &root,
287 const string &product_name,
288 const string &user_name,
289 const string &workspace_directory) {
290 static const char* alphabet
291 // Exactly 64 characters.
292 = "abcdefghigklmnopqrstuvwxyzABCDEFGHIGKLMNOPQRSTUVWXYZ0123456789$-";
293
294 // The length of the resulting filename (8 characters).
295 static const int filename_length = Md5Digest::kDigestLength / 2;
296 unsigned char buf[Md5Digest::kDigestLength];
297 char coded_name[filename_length + 1];
298 Md5Digest digest;
299 digest.Update(user_name.data(), user_name.size());
300 digest.Update(workspace_directory.data(), workspace_directory.size());
301 digest.Finish(buf);
302 for (int i = 0; i < filename_length; i++) {
303 coded_name[i] = alphabet[buf[i] & 0x3F];
304 }
305 coded_name[filename_length] = '\0';
306 return root + "/" + product_name + "/" + string(coded_name);
307}
Dmitry Lomov8e2e4b32016-04-20 08:04:17 +0000308#endif
Dmitry Lomovbc84cc82016-04-15 14:05:24 +0000309
310
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000311// A devtools_ijar::ZipExtractorProcessor to extract the InstallKeyFile
312class GetInstallKeyFileProcessor : public devtools_ijar::ZipExtractorProcessor {
313 public:
Thiago Farina9cb32752015-06-03 15:34:19 +0000314 explicit GetInstallKeyFileProcessor(string *install_base_key)
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000315 : install_base_key_(install_base_key) {}
316
317 virtual bool Accept(const char *filename, const devtools_ijar::u4 attr) {
318 globals->extracted_binaries.push_back(filename);
319 return strcmp(filename, "install_base_key") == 0;
320 }
321
322 virtual void Process(const char *filename, const devtools_ijar::u4 attr,
323 const devtools_ijar::u1 *data, const size_t size) {
324 string str(reinterpret_cast<const char *>(data), size);
325 blaze_util::StripWhitespace(&str);
Lukacs Berki58c29ae2015-10-16 14:48:33 +0000326 if (str.size() != 32) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000327 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Lukacs Berki58c29ae2015-10-16 14:48:33 +0000328 "\nFailed to extract install_base_key: file size mismatch "
329 "(should be 32, is %zd)", str.size());
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000330 }
331 *install_base_key_ = str;
332 }
333
334 private:
335 string *install_base_key_;
336};
337
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100338// Returns the install base (the root concatenated with the contents of the file
339// 'install_base_key' contained as a ZIP entry in the Blaze binary); as a side
340// effect, it also populates the extracted_binaries global variable.
341static string GetInstallBase(const string &root, const string &self_path) {
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000342 GetInstallKeyFileProcessor processor(&globals->install_md5);
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000343 std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
344 devtools_ijar::ZipExtractor::Create(self_path.c_str(), &processor));
345 if (extractor.get() == NULL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100346 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000347 "\nFailed to open %s as a zip file: (%d) %s",
Julio Merino28774852016-09-14 16:59:46 +0000348 globals->options->product_name.c_str(), errno, strerror(errno));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100349 }
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000350 if (extractor->ProcessAll() < 0) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100351 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000352 "\nFailed to extract install_base_key: %s", extractor->GetError());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100353 }
354
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000355 if (globals->install_md5.empty()) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000356 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
357 "\nFailed to find install_base_key's in zip file");
358 }
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000359 return root + "/" + globals->install_md5;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100360}
361
362// Escapes colons by replacing them with '_C' and underscores by replacing them
363// with '_U'. E.g. "name:foo_bar" becomes "name_Cfoo_Ubar"
364static string EscapeForOptionSource(const string& input) {
365 string result = input;
366 blaze_util::Replace("_", "_U", &result);
367 blaze_util::Replace(":", "_C", &result);
368 return result;
369}
370
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000371// Returns the installed embedded binaries directory, under the shared
372// install_base location.
373string GetEmbeddedBinariesRoot(const string &install_base) {
374 return blaze_util::JoinPath(install_base, "_embedded_binaries");
375}
376
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100377// Returns the JVM command argument array.
378static vector<string> GetArgumentArray() {
379 vector<string> result;
380
381 // e.g. A Blaze server process running in ~/src/build_root (where there's a
382 // ~/src/build_root/WORKSPACE file) will appear in ps(1) as "blaze(src)".
383 string workspace =
384 blaze_util::Basename(blaze_util::Dirname(globals->workspace));
Julio Merino28774852016-09-14 16:59:46 +0000385 string product = globals->options->product_name;
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000386 blaze_util::ToLower(&product);
387 result.push_back(product + "(" + workspace + ")");
Julio Merino28774852016-09-14 16:59:46 +0000388 globals->options->AddJVMArgumentPrefix(
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000389 blaze_util::Dirname(blaze_util::Dirname(globals->jvm_path)),
390 &result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100391
392 result.push_back("-XX:+HeapDumpOnOutOfMemoryError");
Julio Merino28774852016-09-14 16:59:46 +0000393 string heap_crash_path = globals->options->output_base;
Dmitry Lomov7608db52016-07-14 11:27:10 +0000394 result.push_back("-XX:HeapDumpPath=" + ConvertPath(heap_crash_path));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100395
396 result.push_back("-Xverify:none");
397
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000398 vector<string> user_options;
399
Janak Ramakrishnan0acd1542016-01-06 18:42:30 +0000400 user_options.insert(user_options.begin(),
Julio Merino28774852016-09-14 16:59:46 +0000401 globals->options->host_jvm_args.begin(),
402 globals->options->host_jvm_args.end());
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000403
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100404 // Add JVM arguments particular to building blaze64 and particular JVM
405 // versions.
406 string error;
407 blaze_exit_code::ExitCode jvm_args_exit_code =
Julio Merino28774852016-09-14 16:59:46 +0000408 globals->options->AddJVMArguments(globals->options->GetHostJavabase(),
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000409 &result, user_options, &error);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100410 if (jvm_args_exit_code != blaze_exit_code::SUCCESS) {
411 die(jvm_args_exit_code, "%s", error.c_str());
412 }
413
Julio Merino28774852016-09-14 16:59:46 +0000414 if (globals->options->batch && globals->options->oom_more_eagerly) {
Janak Ramakrishnan70c57902016-03-10 00:58:59 +0000415 // Put this OOM trigger with kill after --host_jvm_args, in case
416 // --host_jvm_args contains user-specified OOM triggers since we want those
417 // to execute first.
418 result.push_back("-XX:OnOutOfMemoryError=kill -USR2 %p");
419 }
420
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100421 // We put all directories on the java.library.path that contain .so files.
422 string java_library_path = "-Djava.library.path=";
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000423 string real_install_dir =
424 GetEmbeddedBinariesRoot(globals->options->install_base);
425
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100426 bool first = true;
427 for (const auto& it : globals->extracted_binaries) {
Thiago Farina01f36002015-04-08 15:59:08 +0000428 if (IsSharedLibrary(it)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100429 if (!first) {
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000430 java_library_path += blaze::ListSeparator();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100431 }
432 first = false;
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000433 java_library_path += blaze::ConvertPath(
434 blaze_util::JoinPath(real_install_dir, blaze_util::Dirname(it)));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100435 }
436 }
437 result.push_back(java_library_path);
438
439 // Force use of latin1 for file names.
440 result.push_back("-Dfile.encoding=ISO-8859-1");
441
Julio Merino28774852016-09-14 16:59:46 +0000442 if (globals->options->host_jvm_debug) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100443 fprintf(stderr,
444 "Running host JVM under debugger (listening on TCP port 5005).\n");
445 // Start JVM so that it listens for a connection from a
446 // JDWP-compliant debugger:
447 result.push_back("-Xdebug");
448 result.push_back("-Xrunjdwp:transport=dt_socket,server=y,address=5005");
449 }
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000450 result.insert(result.end(), user_options.begin(), user_options.end());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100451
Julio Merino28774852016-09-14 16:59:46 +0000452 globals->options->AddJVMArgumentSuffix(real_install_dir,
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000453 globals->extracted_binaries[0],
454 &result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100455
Lukacs Berki3d486832016-10-26 12:51:38 +0000456 // JVM arguments are complete. Now pass in Blaze startup options.
457 // Note that we always use the --flag=ARG form (instead of the --flag ARG one)
458 // so that BlazeRuntime#splitStartupOptions has an easy job.
Julio Merino28774852016-09-14 16:59:46 +0000459 if (!globals->options->batch) {
Lukacs Berki3d486832016-10-26 12:51:38 +0000460 result.push_back("--max_idle_secs=" +
461 ToString(globals->options->max_idle_secs));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100462 } else {
Googlerc8c64e72015-03-23 23:22:18 +0000463 // --batch must come first in the arguments to Java main() because
464 // the code expects it to be at args[0] if it's been set.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100465 result.push_back("--batch");
466 }
Lukacs Berkice1445f2016-04-19 15:52:55 +0000467
Julio Merino28774852016-09-14 16:59:46 +0000468 if (globals->options->command_port != 0) {
Lukacs Berki7e0249e2016-04-21 08:14:08 +0000469 result.push_back(
Julio Merino28774852016-09-14 16:59:46 +0000470 "--command_port=" + ToString(globals->options->command_port));
Lukacs Berkice1445f2016-04-19 15:52:55 +0000471 }
472
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000473 result.push_back("--install_base=" +
Julio Merino28774852016-09-14 16:59:46 +0000474 blaze::ConvertPath(globals->options->install_base));
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000475 result.push_back("--install_md5=" + globals->install_md5);
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000476 result.push_back("--output_base=" +
Julio Merino28774852016-09-14 16:59:46 +0000477 blaze::ConvertPath(globals->options->output_base));
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000478 result.push_back("--workspace_directory=" +
479 blaze::ConvertPath(globals->workspace));
Marian Lobur6dcdd602015-04-09 09:28:40 +0000480
Julio Merino28774852016-09-14 16:59:46 +0000481 if (globals->options->allow_configurable_attributes) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100482 result.push_back("--allow_configurable_attributes");
483 }
Julio Merino28774852016-09-14 16:59:46 +0000484 if (globals->options->deep_execroot) {
Lukacs Berki5fb98d12015-12-09 15:29:46 +0000485 result.push_back("--deep_execroot");
486 } else {
487 result.push_back("--nodeep_execroot");
488 }
Julio Merino28774852016-09-14 16:59:46 +0000489 if (globals->options->oom_more_eagerly) {
Janak Ramakrishnanadc706f2016-03-07 19:12:48 +0000490 result.push_back("--experimental_oom_more_eagerly");
491 }
Janak Ramakrishnan19fde1f2016-05-23 21:20:16 +0000492 result.push_back("--experimental_oom_more_eagerly_threshold=" +
Julio Merino28774852016-09-14 16:59:46 +0000493 ToString(globals->options->oom_more_eagerly_threshold));
Janak Ramakrishnan8cc772e2016-03-23 17:26:12 +0000494
Michajlo Matijkiwaf79a322016-09-16 15:44:35 +0000495 if (!globals->options->write_command_log) {
496 result.push_back("--nowrite_command_log");
497 }
498
Julio Merino28774852016-09-14 16:59:46 +0000499 if (globals->options->watchfs) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100500 result.push_back("--watchfs");
501 }
Julio Merino28774852016-09-14 16:59:46 +0000502 if (globals->options->fatal_event_bus_exceptions) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100503 result.push_back("--fatal_event_bus_exceptions");
504 } else {
505 result.push_back("--nofatal_event_bus_exceptions");
506 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100507
508 // This is only for Blaze reporting purposes; the real interpretation of the
509 // jvm flags occurs when we set up the java command line.
Julio Merino28774852016-09-14 16:59:46 +0000510 if (globals->options->host_jvm_debug) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100511 result.push_back("--host_jvm_debug");
512 }
Julio Merino28774852016-09-14 16:59:46 +0000513 if (!globals->options->host_jvm_profile.empty()) {
514 result.push_back("--host_jvm_profile=" +
515 globals->options->host_jvm_profile);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100516 }
Julio Merino28774852016-09-14 16:59:46 +0000517 if (!globals->options->host_jvm_args.empty()) {
518 for (const auto &arg : globals->options->host_jvm_args) {
Janak Ramakrishnan533657e2015-11-13 23:34:14 +0000519 result.push_back("--host_jvm_args=" + arg);
520 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100521 }
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000522
Julio Merino28774852016-09-14 16:59:46 +0000523 if (globals->options->invocation_policy != NULL &&
524 strlen(globals->options->invocation_policy) > 0) {
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000525 result.push_back(string("--invocation_policy=") +
Julio Merino28774852016-09-14 16:59:46 +0000526 globals->options->invocation_policy);
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000527 }
528
Julio Merino28774852016-09-14 16:59:46 +0000529 result.push_back("--product_name=" + globals->options->product_name);
Luis Fernando Pino Duque623cdf82016-05-31 16:21:46 +0000530
Julio Merino28774852016-09-14 16:59:46 +0000531 globals->options->AddExtraOptions(&result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100532
533 // The option sources are transmitted in the following format:
534 // --option_sources=option1:source1:option2:source2:...
535 string option_sources = "--option_sources=";
536 first = true;
Julio Merino28774852016-09-14 16:59:46 +0000537 for (const auto& it : globals->options->option_sources) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100538 if (!first) {
539 option_sources += ":";
540 }
541
542 first = false;
543 option_sources += EscapeForOptionSource(it.first) + ":" +
544 EscapeForOptionSource(it.second);
545 }
546
547 result.push_back(option_sources);
548 return result;
549}
550
Thiago Farina5735c252016-04-27 16:16:27 +0000551// Add common command options for logging to the given argument array.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100552static void AddLoggingArgs(vector<string>* args) {
Googler9588b812015-07-23 11:49:37 +0000553 args->push_back("--startup_time=" + ToString(globals->startup_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100554 if (globals->command_wait_time != 0) {
555 args->push_back("--command_wait_time=" +
Googler9588b812015-07-23 11:49:37 +0000556 ToString(globals->command_wait_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100557 }
558 if (globals->extract_data_time != 0) {
559 args->push_back("--extract_data_time=" +
Googler9588b812015-07-23 11:49:37 +0000560 ToString(globals->extract_data_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100561 }
562 if (globals->restart_reason != NO_RESTART) {
563 const char *reasons[] = {
564 "no_restart", "no_daemon", "new_version", "new_options"
565 };
566 args->push_back(
567 string("--restart_reason=") + reasons[globals->restart_reason]);
568 }
569 args->push_back(
570 string("--binary_path=") + globals->binary_path);
571}
572
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100573// Join the elements of the specified array with NUL's (\0's), akin to the
574// format of /proc/$PID/cmdline.
Thiago Farina0b6963e2015-04-28 20:26:45 +0000575static string GetArgumentString(const vector<string>& argument_array) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100576 string result;
577 blaze_util::JoinStrings(argument_array, '\0', &result);
578 return result;
579}
580
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100581// Do a chdir into the workspace, and die if it fails.
582static void GoToWorkspace() {
Julio Merino211a95c2016-08-29 11:01:35 +0000583 if (WorkspaceLayout::InWorkspace(globals->workspace) &&
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100584 chdir(globals->workspace.c_str()) != 0) {
585 pdie(blaze_exit_code::INTERNAL_ERROR,
586 "chdir() into %s failed", globals->workspace.c_str());
587 }
588}
589
590// Check the java version if a java version specification is bundled. On
Thiago Farina5735c252016-04-27 16:16:27 +0000591// success, returns the executable path of the java command.
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000592static void VerifyJavaVersionAndSetJvm() {
Julio Merino28774852016-09-14 16:59:46 +0000593 string exe = globals->options->GetJvm();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100594
595 string version_spec_file = blaze_util::JoinPath(
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000596 GetEmbeddedBinariesRoot(globals->options->install_base), "java.version");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100597 string version_spec = "";
598 if (ReadFile(version_spec_file, &version_spec)) {
599 blaze_util::StripWhitespace(&version_spec);
600 // A version specification is given, get version of java.
601 string jvm_version = GetJvmVersion(exe);
602
603 // Compare that jvm_version is found and at least the one specified.
604 if (jvm_version.size() == 0) {
605 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
606 "Java version not detected while at least %s is needed.\n"
607 "Please set JAVA_HOME.", version_spec.c_str());
608 } else if (!CheckJavaVersionIsAtLeast(jvm_version, version_spec)) {
609 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
610 "Java version is %s while at least %s is needed.\n"
611 "Please set JAVA_HOME.",
612 jvm_version.c_str(), version_spec.c_str());
613 }
614 }
615
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000616 globals->jvm_path = exe;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100617}
618
619// Starts the Blaze server. Returns a readable fd connected to the server.
620// This is currently used only to detect liveness.
Lukacs Berki1977d922016-05-02 09:31:37 +0000621static void StartServer(BlazeServerStartup** server_startup) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100622 vector<string> jvm_args_vector = GetArgumentArray();
623 string argument_string = GetArgumentString(jvm_args_vector);
Julio Merino28774852016-09-14 16:59:46 +0000624 string server_dir = globals->options->output_base + "/server";
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100625 // Write the cmdline argument string to the server dir. If we get to this
626 // point, there is no server running, so we don't overwrite the cmdline file
627 // for the existing server. If might be that the server dies and the cmdline
628 // file stays there, but that is not a problem, since we always check the
629 // server, too.
Lukacs Berki5a781662016-04-25 11:17:31 +0000630 WriteFile(argument_string, server_dir + "/cmdline");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100631
632 // unless we restarted for a new-version, mark this as initial start
633 if (globals->restart_reason == NO_RESTART) {
634 globals->restart_reason = NO_DAEMON;
635 }
636
Julio Merino28774852016-09-14 16:59:46 +0000637 string exe = globals->options->GetExe(globals->jvm_path,
638 globals->extracted_binaries[0]);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100639 // Go to the workspace before we daemonize, so
640 // we can still print errors to the terminal.
641 GoToWorkspace();
642
Lukacs Berki1977d922016-05-02 09:31:37 +0000643 ExecuteDaemon(exe, jvm_args_vector, globals->jvm_log_file.c_str(),
644 server_dir, server_startup);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100645}
646
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100647// Replace this process with blaze in standalone/batch mode.
648// The batch mode blaze process handles the command and exits.
649//
650// This function passes the commands array to the blaze process.
651// This array should start with a command ("build", "info", etc.).
Lukacs Berki907dbbf2016-04-15 11:30:12 +0000652static void StartStandalone(BlazeServer* server) {
Lukacs Berki1977d922016-05-02 09:31:37 +0000653 if (server->Connected()) {
654 server->KillRunningServer();
655 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100656
657 // Wall clock time since process startup.
658 globals->startup_time = ProcessClock() / 1000000LL;
659
660 if (VerboseLogging()) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000661 fprintf(stderr, "Starting %s in batch mode.\n",
Julio Merino28774852016-09-14 16:59:46 +0000662 globals->options->product_name.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100663 }
Julio Merino28774852016-09-14 16:59:46 +0000664 string command = globals->option_processor->GetCommand();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100665 vector<string> command_arguments;
Julio Merino28774852016-09-14 16:59:46 +0000666 globals->option_processor->GetCommandArguments(&command_arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100667
668 if (!command_arguments.empty() && command == "shutdown") {
Julio Merino28774852016-09-14 16:59:46 +0000669 string product = globals->options->product_name;
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000670 blaze_util::ToLower(&product);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100671 fprintf(stderr,
672 "WARNING: Running command \"shutdown\" in batch mode. Batch mode "
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000673 "is triggered\nwhen not running %s within a workspace. If you "
674 "intend to shutdown an\nexisting %s server, run \"%s "
675 "shutdown\" from the directory where\nit was started.\n",
Julio Merino28774852016-09-14 16:59:46 +0000676 globals->options->product_name.c_str(),
677 globals->options->product_name.c_str(), product.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100678 }
679 vector<string> jvm_args_vector = GetArgumentArray();
680 if (command != "") {
681 jvm_args_vector.push_back(command);
682 AddLoggingArgs(&jvm_args_vector);
683 }
684
685 jvm_args_vector.insert(jvm_args_vector.end(),
686 command_arguments.begin(),
687 command_arguments.end());
688
689 GoToWorkspace();
690
Julio Merino28774852016-09-14 16:59:46 +0000691 string exe = globals->options->GetExe(globals->jvm_path,
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000692 globals->extracted_binaries[0]);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100693 ExecuteProgram(exe, jvm_args_vector);
694 pdie(blaze_exit_code::INTERNAL_ERROR, "execv of '%s' failed", exe.c_str());
695}
696
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100697// Write the contents of file_name to stream.
698static void WriteFileToStreamOrDie(FILE *stream, const char *file_name) {
699 FILE *fp = fopen(file_name, "r");
700 if (fp == NULL) {
701 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
702 "opening %s failed", file_name);
703 }
704 char buffer[255];
705 int num_read;
706 while ((num_read = fread(buffer, 1, sizeof buffer, fp)) > 0) {
707 if (ferror(fp)) {
708 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
709 "failed to read from '%s'", file_name);
710 }
711 fwrite(buffer, 1, num_read, stream);
712 }
713 fclose(fp);
714}
715
Lukacs Berki4912f7f2016-06-17 16:12:22 +0000716// After connecting to the Blaze server, return its PID, or -1 if there was an
717// error.
Lukacs Berkid9da60f2016-04-26 11:40:24 +0000718static int GetServerPid(const string &server_dir) {
Lukacs Berki907dbbf2016-04-15 11:30:12 +0000719 // Note: there is no race here on startup since the server creates
720 // the pid file strictly before it binds the socket.
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000721 char buf[33];
722
723 // The server writes a file, but we need to handle old servers that still
724 // write a symlink.
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000725 int len;
Thiago Farina048bbfc2016-09-21 08:20:41 +0000726 string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
727 string pid_symlink = blaze_util::JoinPath(server_dir, kServerPidSymlink);
Lukacs Berkid9da60f2016-04-26 11:40:24 +0000728 len = readlink(pid_symlink.c_str(), buf, sizeof(buf) - 1);
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000729 if (len < 0) {
730 int fd = open(pid_file.c_str(), O_RDONLY);
731 if (fd < 0) {
732 return -1;
733 }
734 len = read(fd, buf, 32);
735 close(fd);
736 if (len < 0) {
737 return -1;
738 }
Doug Rabsond655f2a2015-08-13 14:41:50 +0000739 }
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000740
741 int result;
742 buf[len] = 0;
743 if (!blaze_util::safe_strto32(string(buf), &result)) {
744 return -1;
745 }
746
747 return result;
Doug Rabsond655f2a2015-08-13 14:41:50 +0000748}
749
Lukacs Berki1977d922016-05-02 09:31:37 +0000750// Starts up a new server and connects to it. Exits if it didn't work not.
751static void StartServerAndConnect(BlazeServer *server) {
Julio Merino28774852016-09-14 16:59:46 +0000752 string server_dir = globals->options->output_base + "/server";
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100753
754 // The server dir has the socket, so we don't allow access by other
755 // users.
756 if (MakeDirectories(server_dir, 0700) == -1) {
757 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
758 "server directory '%s' could not be created", server_dir.c_str());
759 }
760
Lukacs Berki1977d922016-05-02 09:31:37 +0000761 // If we couldn't connect to the server check if there is still a PID file
762 // and if so, kill the server that wrote it. This can happen e.g. if the
763 // server is in a GC pause and therefore cannot respond to ping requests and
764 // having two server instances running in the same output base is a
765 // disaster.
766 int server_pid = GetServerPid(server_dir);
767 if (server_pid > 0) {
Julio Merino28774852016-09-14 16:59:46 +0000768 if (VerifyServerProcess(server_pid, globals->options->output_base,
769 globals->options->install_base) &&
Lukacs Berkiee44c382016-09-14 10:53:37 +0000770 KillServerProcess(server_pid)) {
Lukacs Berki119dd4b2016-07-13 15:28:42 +0000771 fprintf(stderr, "Killed non-responsive server process (pid=%d)\n",
772 server_pid);
773 }
Lukacs Berki7e0249e2016-04-21 08:14:08 +0000774 }
775
Julio Merino28774852016-09-14 16:59:46 +0000776 SetScheduling(globals->options->batch_cpu_scheduling,
777 globals->options->io_nice_level);
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000778
Lukacs Berki1977d922016-05-02 09:31:37 +0000779 BlazeServerStartup* server_startup;
780 StartServer(&server_startup);
781 // Give the server one minute to start up.
782 for (int ii = 0; ii < 600; ++ii) {
783 // 60s; enough time to connect with debugger
784 if (server->Connect()) {
785 if (ii) {
786 fputc('\n', stderr);
787 fflush(stderr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100788 }
Lukacs Berki1977d922016-05-02 09:31:37 +0000789 delete server_startup;
790 return;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100791 }
Lukacs Berki1977d922016-05-02 09:31:37 +0000792 fputc('.', stderr);
793 fflush(stderr);
Chongyu Zhufefd2322016-09-21 11:01:14 +0000794 struct timespec ts;
795 ts.tv_sec = 0;
796 ts.tv_nsec = 100 * 1000 * 1000;
797 nanosleep(&ts, NULL);
Lukacs Berki1977d922016-05-02 09:31:37 +0000798 if (!server_startup->IsStillAlive()) {
799 fprintf(stderr, "\nunexpected pipe read status: %s\n"
800 "Server presumed dead. Now printing '%s':\n",
801 strerror(errno), globals->jvm_log_file.c_str());
802 WriteFileToStreamOrDie(stderr, globals->jvm_log_file.c_str());
803 exit(blaze_exit_code::INTERNAL_ERROR);
804 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100805 }
Lukacs Berki1977d922016-05-02 09:31:37 +0000806 die(blaze_exit_code::INTERNAL_ERROR,
Thiago Farina8fc795a2016-10-31 08:54:25 +0000807 "\nError: couldn't connect to server after 60 seconds.");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100808}
809
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100810// Calls fsync() on the file (or directory) specified in 'file_path'.
811// pdie()'s if syncing fails.
812static void SyncFile(const char *file_path) {
813 // fsync always fails on Cygwin with "Permission denied" for some reason.
814#ifndef __CYGWIN__
815 int fd = open(file_path, O_RDONLY);
816 if (fd < 0) {
817 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
818 "failed to open '%s' for syncing", file_path);
819 }
820 if (fsync(fd) < 0) {
821 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
822 "failed to sync '%s'", file_path);
823 }
824 close(fd);
825#endif
826}
827
828// Walks the temporary directory recursively and collects full file paths.
829static void CollectExtractedFiles(const string &dir_path, vector<string> &files) {
830 DIR *dir;
831 struct dirent *ent;
832
833 if ((dir = opendir(dir_path.c_str())) == NULL) {
834 die(blaze_exit_code::INTERNAL_ERROR, "opendir failed");
835 }
836
837 while ((ent = readdir(dir)) != NULL) {
838 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
839 continue;
840 }
841
842 string filename(blaze_util::JoinPath(dir_path, ent->d_name));
843 bool is_directory;
844 if (ent->d_type == DT_UNKNOWN) {
845 struct stat buf;
846 if (lstat(filename.c_str(), &buf) == -1) {
847 die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
848 }
849 is_directory = S_ISDIR(buf.st_mode);
850 } else {
851 is_directory = (ent->d_type == DT_DIR);
852 }
853
854 if (is_directory) {
855 CollectExtractedFiles(filename, files);
856 } else {
857 files.push_back(filename);
858 }
859 }
860
861 closedir(dir);
862}
863
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000864// A devtools_ijar::ZipExtractorProcessor to extract the files from the blaze
865// zip.
866class ExtractBlazeZipProcessor : public devtools_ijar::ZipExtractorProcessor {
867 public:
Thiago Farina9cb32752015-06-03 15:34:19 +0000868 explicit ExtractBlazeZipProcessor(const string &embedded_binaries)
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000869 : embedded_binaries_(embedded_binaries) {}
870
871 virtual bool Accept(const char *filename, const devtools_ijar::u4 attr) {
872 return !devtools_ijar::zipattr_is_dir(attr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100873 }
874
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000875 virtual void Process(const char *filename, const devtools_ijar::u4 attr,
876 const devtools_ijar::u1 *data, const size_t size) {
877 string path = blaze_util::JoinPath(embedded_binaries_, filename);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100878 if (MakeDirectories(blaze_util::Dirname(path), 0777) == -1) {
879 pdie(blaze_exit_code::INTERNAL_ERROR,
880 "couldn't create '%s'", path.c_str());
881 }
Damien Martin-Guillereze20e5442015-03-26 09:04:23 +0000882 int fd = open(path.c_str(), O_CREAT | O_WRONLY, 0755);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100883 if (fd < 0) {
884 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
885 "\nFailed to open extraction file: %s", strerror(errno));
886 }
887
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000888 if (write(fd, data, size) != size) {
889 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
890 "\nError writing zipped file to %s", path.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100891 }
892 if (close(fd) != 0) {
893 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
894 "\nCould not close file %s", path.c_str());
895 }
896 }
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000897
898 private:
899 const string embedded_binaries_;
900};
901
902// Actually extracts the embedded data files into the tree whose root
903// is 'embedded_binaries'.
904static void ActuallyExtractData(const string &argv0,
905 const string &embedded_binaries) {
906 ExtractBlazeZipProcessor processor(embedded_binaries);
907 if (MakeDirectories(embedded_binaries, 0777) == -1) {
908 pdie(blaze_exit_code::INTERNAL_ERROR, "couldn't create '%s'",
909 embedded_binaries.c_str());
910 }
911
912 fprintf(stderr, "Extracting %s installation...\n",
Julio Merino28774852016-09-14 16:59:46 +0000913 globals->options->product_name.c_str());
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000914 std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
915 devtools_ijar::ZipExtractor::Create(argv0.c_str(), &processor));
916 if (extractor.get() == NULL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100917 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000918 "\nFailed to open %s as a zip file: (%d) %s",
Julio Merino28774852016-09-14 16:59:46 +0000919 globals->options->product_name.c_str(), errno, strerror(errno));
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000920 }
921 if (extractor->ProcessAll() < 0) {
922 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
923 "\nFailed to extract %s as a zip file: %s",
Julio Merino28774852016-09-14 16:59:46 +0000924 globals->options->product_name.c_str(), extractor->GetError());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100925 }
926
927 const time_t TEN_YEARS_IN_SEC = 3600 * 24 * 365 * 10;
928 time_t future_time = time(NULL) + TEN_YEARS_IN_SEC;
929
930 // Set the timestamps of the extracted files to the future and make sure (or
931 // at least as sure as we can...) that the files we have written are actually
932 // on the disk.
933
934 vector<string> extracted_files;
935 CollectExtractedFiles(embedded_binaries, extracted_files);
936
937 set<string> synced_directories;
938 for (vector<string>::iterator it = extracted_files.begin(); it != extracted_files.end(); it++) {
939
940 const char *extracted_path = it->c_str();
941
942 // Set the time to a distantly futuristic value so we can observe tampering.
943 // Note that keeping the default timestamp set by unzip (1970-01-01) and using
944 // that to detect tampering is not enough, because we also need the timestamp
945 // to change between Blaze releases so that the metadata cache knows that
946 // the files may have changed. This is important for actions that use
947 // embedded binaries as artifacts.
948 struct utimbuf times = { future_time, future_time };
949 if (utime(extracted_path, &times) == -1) {
950 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
951 "failed to set timestamp on '%s'", extracted_path);
952 }
953
954 SyncFile(extracted_path);
955
956 string directory = blaze_util::Dirname(extracted_path);
957
958 // Now walk up until embedded_binaries and sync every directory in between.
959 // synced_directories is used to avoid syncing the same directory twice.
960 // The !directory.empty() and directory != "/" conditions are not strictly
961 // needed, but it makes this loop more robust, because otherwise, if due to
962 // some glitch, directory was not under embedded_binaries, it would get
963 // into an infinite loop.
964 while (directory != embedded_binaries &&
965 synced_directories.count(directory) == 0 &&
966 !directory.empty() &&
967 directory != "/") {
968 SyncFile(directory.c_str());
969 synced_directories.insert(directory);
970 directory = blaze_util::Dirname(directory);
971 }
972 }
973
974 SyncFile(embedded_binaries.c_str());
975}
976
977// Installs Blaze by extracting the embedded data files, iff necessary.
978// The MD5-named install_base directory on disk is trusted; we assume
979// no-one has modified the extracted files beneath this directory once
980// it is in place. Concurrency during extraction is handled by
981// extracting in a tmp dir and then renaming it into place where it
982// becomes visible automically at the new path.
983// Populates globals->extracted_binaries with their extracted locations.
984static void ExtractData(const string &self_path) {
985 // If the install dir doesn't exist, create it, if it does, we know it's good.
986 struct stat buf;
Julio Merino28774852016-09-14 16:59:46 +0000987 if (stat(globals->options->install_base.c_str(), &buf) == -1) {
Thiago Farina8a67da42015-05-05 18:04:50 +0000988 uint64_t st = MonotonicClock();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100989 // Work in a temp dir to avoid races.
Julio Merino28774852016-09-14 16:59:46 +0000990 string tmp_install = globals->options->install_base + ".tmp." +
Googler9588b812015-07-23 11:49:37 +0000991 ToString(getpid());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100992 string tmp_binaries = tmp_install + "/_embedded_binaries";
993 ActuallyExtractData(self_path, tmp_binaries);
994
Thiago Farina8a67da42015-05-05 18:04:50 +0000995 uint64_t et = MonotonicClock();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100996 globals->extract_data_time = (et - st) / 1000000LL;
997
998 // Now rename the completed installation to its final name. If this
999 // fails due to an ENOTEMPTY then we assume another good
1000 // installation snuck in before us.
Julio Merino28774852016-09-14 16:59:46 +00001001 if (rename(tmp_install.c_str(), globals->options->install_base.c_str()) == -1
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001002 && errno != ENOTEMPTY) {
1003 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1004 "install base directory '%s' could not be renamed into place",
1005 tmp_install.c_str());
1006 }
1007 } else {
1008 if (!S_ISDIR(buf.st_mode)) {
1009 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1010 "Error: Install base directory '%s' could not be created. "
1011 "It exists but is not a directory.",
Julio Merino28774852016-09-14 16:59:46 +00001012 globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001013 }
1014
1015 const time_t time_now = time(NULL);
1016 string real_install_dir = blaze_util::JoinPath(
Julio Merino28774852016-09-14 16:59:46 +00001017 globals->options->install_base,
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001018 "_embedded_binaries");
1019 for (const auto& it : globals->extracted_binaries) {
1020 string path = blaze_util::JoinPath(real_install_dir, it);
1021 // Check that the file exists and is readable.
1022 if (stat(path.c_str(), &buf) == -1) {
1023 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1024 "Error: corrupt installation: file '%s' missing."
1025 " Please remove '%s' and try again.",
Julio Merino28774852016-09-14 16:59:46 +00001026 path.c_str(), globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001027 }
1028 // Check that the timestamp is in the future. A past timestamp would indicate
1029 // that the file has been tampered with. See ActuallyExtractData().
Damien Martin-Guillerez12997672015-09-03 21:54:08 +00001030 if (!S_ISDIR(buf.st_mode) && buf.st_mtime <= time_now) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001031 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1032 "Error: corrupt installation: file '%s' "
1033 "modified. Please remove '%s' and try again.",
Julio Merino28774852016-09-14 16:59:46 +00001034 path.c_str(), globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001035 }
1036 }
1037 }
1038}
1039
1040// Returns true if the server needs to be restarted to accommodate changes
1041// between the two argument lists.
1042static bool ServerNeedsToBeKilled(const vector<string>& args1,
1043 const vector<string>& args2) {
1044 // We need not worry about one side missing an argument and the other side
1045 // having the default value, since this command line is already the
1046 // canonicalized one that always contains every switch (with default values
1047 // if it was not present on the real command line). Same applies for argument
1048 // ordering.
1049 if (args1.size() != args2.size()) {
1050 return true;
1051 }
1052
1053 for (int i = 0; i < args1.size(); i++) {
1054 string option_sources = "--option_sources=";
1055 if (args1[i].substr(0, option_sources.size()) == option_sources &&
1056 args2[i].substr(0, option_sources.size()) == option_sources) {
1057 continue;
1058 }
1059
Lukacs Berki3d486832016-10-26 12:51:38 +00001060 string max_idle_secs = "--max_idle_secs=";
1061 if (args1[i].substr(0, max_idle_secs.size()) == max_idle_secs &&
1062 args2[i].substr(0, max_idle_secs.size()) == max_idle_secs) {
1063 continue;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001064 }
1065
Lukacs Berki3d486832016-10-26 12:51:38 +00001066 if (args1[i] != args2[i]) {
1067 return true;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001068 }
1069 }
1070
1071 return false;
1072}
1073
1074// Kills the running Blaze server, if any, if the startup options do not match.
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001075static void KillRunningServerIfDifferentStartupOptions(BlazeServer* server) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001076 if (!server->Connected()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001077 return;
1078 }
1079
Julio Merino28774852016-09-14 16:59:46 +00001080 string cmdline_path = globals->options->output_base + "/server/cmdline";
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001081 string joined_arguments;
1082
1083 // No, /proc/$PID/cmdline does not work, because it is limited to 4K. Even
1084 // worse, its behavior differs slightly between kernels (in some, when longer
1085 // command lines are truncated, the last 4 bytes are replaced with
1086 // "..." + NUL.
1087 ReadFile(cmdline_path, &joined_arguments);
1088 vector<string> arguments = blaze_util::Split(joined_arguments, '\0');
1089
1090 // These strings contain null-separated command line arguments. If they are
1091 // the same, the server can stay alive, otherwise, it needs shuffle off this
1092 // mortal coil.
1093 if (ServerNeedsToBeKilled(arguments, GetArgumentArray())) {
1094 globals->restart_reason = NEW_OPTIONS;
1095 fprintf(stderr,
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001096 "WARNING: Running %s server needs to be killed, because the "
1097 "startup options are different.\n",
Julio Merino28774852016-09-14 16:59:46 +00001098 globals->options->product_name.c_str());
Lukacs Berki1977d922016-05-02 09:31:37 +00001099 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001100 }
1101}
1102
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001103// Kills the old running server if it is not the same version as us,
1104// dealing with various combinations of installation scheme
1105// (installation symlink and older MD5_MANIFEST contents).
1106// This function requires that the installation be complete, and the
1107// server lock acquired.
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001108static void EnsureCorrectRunningVersion(BlazeServer* server) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001109 // Read the previous installation's semaphore symlink in output_base. If the
1110 // target dirs don't match, or if the symlink was not present, then kill any
1111 // running servers. Lastly, symlink to our installation so others know which
1112 // installation is running.
Julio Merino28774852016-09-14 16:59:46 +00001113 string installation_path = globals->options->output_base + "/install";
Lukacs Berki497d8242016-04-28 07:21:26 +00001114 string prev_installation;
1115 bool ok = ReadDirectorySymlink(installation_path.c_str(), &prev_installation);
1116 if (!ok || !CompareAbsolutePaths(
Julio Merino28774852016-09-14 16:59:46 +00001117 prev_installation, globals->options->install_base)) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001118 if (server->Connected()) {
1119 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001120 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001121
1122 globals->restart_reason = NEW_VERSION;
Thiago Farina4e4ffd22016-03-09 17:02:28 +00001123 UnlinkPath(installation_path.c_str());
Julio Merino28774852016-09-14 16:59:46 +00001124 if (!SymlinkDirectories(globals->options->install_base.c_str(),
Dmitry Lomov47afaab2016-02-19 08:21:13 +00001125 installation_path.c_str())) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001126 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1127 "failed to create installation symlink '%s'",
1128 installation_path.c_str());
1129 }
1130 const time_t time_now = time(NULL);
1131 struct utimbuf times = { time_now, time_now };
Julio Merino28774852016-09-14 16:59:46 +00001132 if (utime(globals->options->install_base.c_str(), &times) == -1) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001133 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1134 "failed to set timestamp on '%s'",
Julio Merino28774852016-09-14 16:59:46 +00001135 globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001136 }
1137 }
1138}
1139
Lukacs Berkiee44c382016-09-14 10:53:37 +00001140// A signal-safe version of fprintf(stderr, ...).
1141//
1142// WARNING: any output from the blaze client may be interleaved
1143// with output from the blaze server. In --curses mode,
1144// the Blaze server often erases the previous line of output.
1145// So, be sure to end each such message with TWO newlines,
1146// otherwise it may be erased by the next message from the
1147// Blaze server.
1148// Also, it's a good idea to start each message with a newline,
1149// in case the Blaze server has written a partial line.
1150static void sigprintf(const char *format, ...) {
1151 char buf[1024];
1152 va_list ap;
1153 va_start(ap, format);
1154 int r = vsnprintf(buf, sizeof buf, format, ap);
1155 va_end(ap);
1156 if (write(STDERR_FILENO, buf, r) <= 0) {
1157 // We don't care, just placate the compiler.
1158 }
1159}
1160
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001161// Signal handler.
1162static void handler(int signum) {
Lukacs Berkiee44c382016-09-14 10:53:37 +00001163 int saved_errno = errno;
1164
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001165 switch (signum) {
1166 case SIGINT:
1167 if (++globals->sigint_count >= 3) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001168 sigprintf("\n%s caught third interrupt signal; killed.\n\n",
Julio Merino28774852016-09-14 16:59:46 +00001169 globals->options->product_name.c_str());
Lukacs Berki793cd012016-06-20 09:48:47 +00001170 if (globals->server_pid != -1) {
Lukacs Berkiee44c382016-09-14 10:53:37 +00001171 KillServerProcess(globals->server_pid);
Lukacs Berki793cd012016-06-20 09:48:47 +00001172 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001173 _exit(1);
1174 }
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001175 sigprintf("\n%s caught interrupt signal; shutting down.\n\n",
Julio Merino28774852016-09-14 16:59:46 +00001176 globals->options->product_name.c_str());
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001177 blaze_server->Cancel();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001178 break;
1179 case SIGTERM:
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001180 sigprintf("\n%s caught terminate signal; shutting down.\n\n",
Julio Merino28774852016-09-14 16:59:46 +00001181 globals->options->product_name.c_str());
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001182 blaze_server->Cancel();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001183 break;
1184 case SIGPIPE:
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001185 globals->received_signal = SIGPIPE;
1186 break;
1187 case SIGQUIT:
1188 sigprintf("\nSending SIGQUIT to JVM process %d (see %s).\n\n",
1189 globals->server_pid,
1190 globals->jvm_log_file.c_str());
1191 kill(globals->server_pid, SIGQUIT);
1192 break;
1193 }
Lukacs Berkiee44c382016-09-14 10:53:37 +00001194
1195 errno = saved_errno;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001196}
1197
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001198// Performs all I/O for a single client request to the server, and
1199// shuts down the client (by exit or signal).
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001200static ATTRIBUTE_NORETURN void SendServerRequest(BlazeServer* server) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001201 while (true) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001202 if (!server->Connected()) {
1203 StartServerAndConnect(server);
1204 }
1205
Lukacs Berki4de98942016-09-09 09:23:36 +00001206 // Check for the case when the workspace directory deleted and then gets
1207 // recreated while the server is running
1208
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001209 string server_cwd = GetProcessCWD(globals->server_pid);
Lukacs Berki4be230a2015-10-15 13:43:03 +00001210 // If server_cwd is empty, GetProcessCWD failed. This notably occurs when
1211 // running under Docker because then readlink(/proc/[pid]/cwd) returns
1212 // EPERM.
1213 // Docker issue #6687 (https://github.com/docker/docker/issues/6687) fixed
1214 // this, but one still needs the --cap-add SYS_PTRACE command line flag, at
1215 // least according to the discussion on Docker issue #6800
1216 // (https://github.com/docker/docker/issues/6687), and even then, it's a
1217 // non-default Docker flag. Given that this occurs only in very weird
1218 // cases, it's better to assume that everything is alright if we can't get
1219 // the cwd.
1220
1221 if (!server_cwd.empty() &&
1222 (server_cwd != globals->workspace || // changed
1223 server_cwd.find(" (deleted)") != string::npos)) { // deleted.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001224 // There's a distant possibility that the two paths look the same yet are
1225 // actually different because the two processes have different mount
1226 // tables.
1227 if (VerboseLogging()) {
1228 fprintf(stderr, "Server's cwd moved or deleted (%s).\n",
1229 server_cwd.c_str());
1230 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001231 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001232 } else {
1233 break;
1234 }
1235 }
1236
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001237 if (VerboseLogging()) {
1238 fprintf(stderr, "Connected (server pid=%d).\n", globals->server_pid);
1239 }
1240
1241 // Wall clock time since process startup.
1242 globals->startup_time = ProcessClock() / 1000000LL;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001243
1244 // Unblock all signals.
1245 sigset_t sigset;
1246 sigemptyset(&sigset);
1247 sigprocmask(SIG_SETMASK, &sigset, NULL);
1248
1249 signal(SIGINT, handler);
1250 signal(SIGTERM, handler);
1251 signal(SIGPIPE, handler);
1252 signal(SIGQUIT, handler);
1253
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001254 int exit_code = server->Communicate();
1255 if (globals->received_signal) {
1256 // Kill ourselves with the same signal, so that callers see the
1257 // right WTERMSIG value.
1258 signal(globals->received_signal, SIG_DFL);
1259 raise(globals->received_signal);
1260 exit(1); // (in case raise didn't kill us for some reason)
1261 } else {
1262 exit(exit_code);
1263 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001264}
1265
1266// Parse the options, storing parsed values in globals.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001267static void ParseOptions(int argc, const char *argv[]) {
1268 string error;
1269 blaze_exit_code::ExitCode parse_exit_code =
Julio Merino28774852016-09-14 16:59:46 +00001270 globals->option_processor->ParseOptions(argc, argv, globals->workspace,
1271 globals->cwd, &error);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001272 if (parse_exit_code != blaze_exit_code::SUCCESS) {
1273 die(parse_exit_code, "%s", error.c_str());
1274 }
Julio Merino28774852016-09-14 16:59:46 +00001275 globals->options = globals->option_processor->GetParsedStartupOptions();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001276}
1277
1278// Returns the canonical form of a path.
1279static string MakeCanonical(const char *path) {
1280 char *resolved_path = realpath(path, NULL);
1281 if (resolved_path == NULL) {
1282 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1283 "realpath('%s') failed", path);
1284 }
1285
1286 string ret = resolved_path;
1287 free(resolved_path);
1288 return ret;
1289}
1290
1291// Compute the globals globals->cwd and globals->workspace.
1292static void ComputeWorkspace() {
1293 char cwdbuf[PATH_MAX];
1294 if (getcwd(cwdbuf, sizeof cwdbuf) == NULL) {
1295 pdie(blaze_exit_code::INTERNAL_ERROR, "getcwd() failed");
1296 }
1297 globals->cwd = MakeCanonical(cwdbuf);
Julio Merino211a95c2016-08-29 11:01:35 +00001298 globals->workspace = WorkspaceLayout::GetWorkspace(globals->cwd);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001299}
1300
1301// Figure out the base directories based on embedded data, username, cwd, etc.
Julio Merino28774852016-09-14 16:59:46 +00001302// Sets globals->options->install_base, globals->options->output_base,
Thiago Farina6fd9bf12016-04-26 09:02:18 +00001303// globals->lockfile, globals->jvm_log_file.
Thiago Farina2fd78902015-05-18 11:37:59 +00001304static void ComputeBaseDirectories(const string &self_path) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001305 // Only start a server when in a workspace because otherwise we won't do more
1306 // than emit a help message.
Julio Merino211a95c2016-08-29 11:01:35 +00001307 if (!WorkspaceLayout::InWorkspace(globals->workspace)) {
Julio Merino28774852016-09-14 16:59:46 +00001308 globals->options->batch = true;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001309 }
1310
1311 // The default install_base is <output_user_root>/install/<md5(blaze)>
1312 // but if an install_base is specified on the command line, we use that as
1313 // the base instead.
Julio Merino28774852016-09-14 16:59:46 +00001314 if (globals->options->install_base.empty()) {
1315 string install_user_root = globals->options->output_user_root + "/install";
1316 globals->options->install_base =
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001317 GetInstallBase(install_user_root, self_path);
1318 } else {
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +00001319 // We call GetInstallBase anyway to populate extracted_binaries and
1320 // install_md5.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001321 GetInstallBase("", self_path);
1322 }
1323
Julio Merino28774852016-09-14 16:59:46 +00001324 if (globals->options->output_base.empty()) {
Dmitry Lomovbc84cc82016-04-15 14:05:24 +00001325#if !defined(__CYGWIN__)
Julio Merino28774852016-09-14 16:59:46 +00001326 globals->options->output_base = GetHashedBaseDir(
1327 globals->options->output_user_root, globals->workspace);
Dmitry Lomovbc84cc82016-04-15 14:05:24 +00001328#else
Julio Merino28774852016-09-14 16:59:46 +00001329 globals->options->output_base = GetHashedBaseDirForWindows(
1330 blaze::GetOutputRoot(), globals->options->product_name,
Dmitry Lomovbc84cc82016-04-15 14:05:24 +00001331 blaze::GetUserName(), globals->workspace);
1332#endif
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001333 }
1334
1335 struct stat buf;
Julio Merino28774852016-09-14 16:59:46 +00001336 const char *output_base = globals->options->output_base.c_str();
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001337 if (stat(output_base, &buf) == -1) {
Julio Merino28774852016-09-14 16:59:46 +00001338 if (MakeDirectories(globals->options->output_base, 0777) == -1) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001339 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1340 "Output base directory '%s' could not be created",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001341 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001342 }
1343 } else {
1344 if (!S_ISDIR(buf.st_mode)) {
1345 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1346 "Error: Output base directory '%s' could not be created. "
1347 "It exists but is not a directory.",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001348 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001349 }
1350 }
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001351 if (access(output_base, R_OK | W_OK | X_OK) != 0) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001352 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1353 "Error: Output base directory '%s' must be readable and writable.",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001354 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001355 }
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001356 ExcludePathFromBackup(output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001357
Julio Merino28774852016-09-14 16:59:46 +00001358 globals->options->output_base = MakeCanonical(output_base);
1359 globals->lockfile = globals->options->output_base + "/lock";
1360 globals->jvm_log_file = globals->options->output_base + "/server/jvm.out";
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001361}
1362
1363static void CheckEnvironment() {
Lukacs Berki86a28b02016-10-25 10:34:45 +00001364 if (getenv("http_proxy") != NULL) {
1365 fprintf(stderr, "Warning: ignoring http_proxy in environment.\n");
1366 unsetenv("http_proxy");
1367 }
1368
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001369 if (getenv("LD_ASSUME_KERNEL") != NULL) {
1370 // Fix for bug: if ulimit -s and LD_ASSUME_KERNEL are both
1371 // specified, the JVM fails to create threads. See thread_stack_regtest.
1372 // This is also provoked by LD_LIBRARY_PATH=/usr/lib/debug,
1373 // or anything else that causes the JVM to use LinuxThreads.
1374 fprintf(stderr, "Warning: ignoring LD_ASSUME_KERNEL in environment.\n");
1375 unsetenv("LD_ASSUME_KERNEL");
1376 }
1377
1378 if (getenv("LD_PRELOAD") != NULL) {
1379 fprintf(stderr, "Warning: ignoring LD_PRELOAD in environment.\n");
1380 unsetenv("LD_PRELOAD");
1381 }
1382
1383 if (getenv("_JAVA_OPTIONS") != NULL) {
1384 // This would override --host_jvm_args
1385 fprintf(stderr, "Warning: ignoring _JAVA_OPTIONS in environment.\n");
1386 unsetenv("_JAVA_OPTIONS");
1387 }
1388
Thiago Farinadfe43a22015-04-07 13:48:49 +00001389 if (getenv("TEST_TMPDIR") != NULL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001390 fprintf(stderr, "INFO: $TEST_TMPDIR defined: output root default is "
Julio Merino28774852016-09-14 16:59:46 +00001391 "'%s'.\n", globals->options->output_root.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001392 }
1393
1394 // TODO(bazel-team): We've also seen a failure during loading (creating
1395 // threads?) when ulimit -Hs 8192. Characterize that and check for it here.
1396
1397 // Make the JVM use ISO-8859-1 for parsing its command line because "blaze
1398 // run" doesn't handle non-ASCII command line arguments. This is apparently
1399 // the most reliable way to select the platform default encoding.
1400 setenv("LANG", "en_US.ISO-8859-1", 1);
1401 setenv("LANGUAGE", "en_US.ISO-8859-1", 1);
1402 setenv("LC_ALL", "en_US.ISO-8859-1", 1);
1403 setenv("LC_CTYPE", "en_US.ISO-8859-1", 1);
1404}
1405
Thiago Farina0b6963e2015-04-28 20:26:45 +00001406static void SetupStreams() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001407 // Line-buffer stderr, since we always flush at the end of a server
1408 // message. This saves lots of single-char calls to write(2).
1409 // This doesn't work if any writes to stderr have already occurred!
1410 setlinebuf(stderr);
1411
1412 // Ensure we have three open fds. Otherwise we can end up with
1413 // bizarre things like stdout going to the lock file, etc.
Lukacs Berkicdd42272016-09-13 07:52:01 +00001414 if (fcntl(STDIN_FILENO, F_GETFL) == -1) open("/dev/null", O_RDONLY);
1415 if (fcntl(STDOUT_FILENO, F_GETFL) == -1) open("/dev/null", O_WRONLY);
1416 if (fcntl(STDERR_FILENO, F_GETFL) == -1) open("/dev/null", O_WRONLY);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001417}
1418
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001419static void CheckBinaryPath(const string& argv0) {
1420 if (argv0[0] == '/') {
1421 globals->binary_path = argv0;
1422 } else {
1423 string abs_path = globals->cwd + '/' + argv0;
1424 char *resolved_path = realpath(abs_path.c_str(), NULL);
1425 if (resolved_path) {
1426 globals->binary_path = resolved_path;
1427 free(resolved_path);
1428 } else {
1429 // This happens during our integration tests, but thats okay, as we won't
1430 // log the invocation anyway.
1431 globals->binary_path = abs_path;
1432 }
1433 }
1434}
1435
1436// Create the user's directory where we keep state, installations etc.
1437// Typically, this happens inside a temp directory, so we have to be
1438// careful about symlink attacks.
1439static void CreateSecureOutputRoot() {
Julio Merino28774852016-09-14 16:59:46 +00001440 const char* root = globals->options->output_user_root.c_str();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001441 struct stat fileinfo = {};
1442
Kristina Chodorow46af79d2015-03-20 22:35:37 +00001443 if (MakeDirectories(root, 0755) == -1) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001444 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "mkdir('%s')", root);
1445 }
1446
1447 // The path already exists.
1448 // Check ownership and mode, and verify that it is a directory.
1449
1450 if (lstat(root, &fileinfo) < 0) {
1451 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "lstat('%s')", root);
1452 }
1453
1454 if (fileinfo.st_uid != geteuid()) {
1455 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "'%s' is not owned by me",
1456 root);
1457 }
1458
1459 if ((fileinfo.st_mode & 022) != 0) {
1460 int new_mode = fileinfo.st_mode & (~022);
1461 if (chmod(root, new_mode) < 0) {
1462 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1463 "'%s' has mode %o, chmod to %o failed", root,
1464 fileinfo.st_mode & 07777, new_mode);
1465 }
1466 }
1467
1468 if (stat(root, &fileinfo) < 0) {
1469 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "stat('%s')", root);
1470 }
1471
1472 if (!S_ISDIR(fileinfo.st_mode)) {
1473 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "'%s' is not a directory",
1474 root);
1475 }
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001476
1477 ExcludePathFromBackup(root);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001478}
1479
1480// TODO(bazel-team): Execute the server as a child process and write its exit
1481// code to a file. In case the server becomes unresonsive or terminates
1482// unexpectedly (in a way that isn't already handled), we can observe the file,
1483// if it exists. (If it doesn't, then we know something went horribly wrong.)
Julio Merino28774852016-09-14 16:59:46 +00001484int Main(int argc, const char *argv[], OptionProcessor *option_processor) {
Thiago Farina676cb9f2016-10-06 11:00:43 +00001485 globals = new GlobalVariables(option_processor);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001486 SetupStreams();
1487
1488 // Must be done before command line parsing.
1489 ComputeWorkspace();
1490 CheckBinaryPath(argv[0]);
1491 ParseOptions(argc, argv);
Lukacs Berkibb2230f2016-04-27 14:19:25 +00001492
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001493 string error;
1494 blaze_exit_code::ExitCode reexec_options_exit_code =
Julio Merino28774852016-09-14 16:59:46 +00001495 globals->options->CheckForReExecuteOptions(argc, argv, &error);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001496 if (reexec_options_exit_code != blaze_exit_code::SUCCESS) {
1497 die(reexec_options_exit_code, "%s", error.c_str());
1498 }
1499 CheckEnvironment();
1500 CreateSecureOutputRoot();
1501
1502 const string self_path = GetSelfPath();
1503 ComputeBaseDirectories(self_path);
1504
Lukacs Berki3a3c4832016-10-06 14:20:04 +00001505 blaze_server = static_cast<BlazeServer *>(new GrpcBlazeServer());
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001506
Lukacs Berki415d39a2016-04-28 13:18:54 +00001507 globals->command_wait_time = blaze_server->AcquireLock();
Lukacs Berkice1445f2016-04-19 15:52:55 +00001508
Julio Merino28774852016-09-14 16:59:46 +00001509 WarnFilesystemType(globals->options->output_base);
Lukacs Berkice1445f2016-04-19 15:52:55 +00001510
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001511 ExtractData(self_path);
Lukacs Berki949c8762016-07-08 12:17:28 +00001512 VerifyJavaVersionAndSetJvm();
1513
Lukacs Berki1977d922016-05-02 09:31:37 +00001514 blaze_server->Connect();
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001515 EnsureCorrectRunningVersion(blaze_server);
1516 KillRunningServerIfDifferentStartupOptions(blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001517
Julio Merino28774852016-09-14 16:59:46 +00001518 if (globals->options->batch) {
1519 SetScheduling(globals->options->batch_cpu_scheduling,
1520 globals->options->io_nice_level);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001521 StartStandalone(blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001522 } else {
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001523 SendServerRequest(blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001524 }
1525 return 0;
1526}
Thiago Farina0b6963e2015-04-28 20:26:45 +00001527
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001528static void null_grpc_log_function(gpr_log_func_args *args) {
1529}
1530
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001531GrpcBlazeServer::GrpcBlazeServer() {
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001532 gpr_set_log_function(null_grpc_log_function);
Lukacs Berki1977d922016-05-02 09:31:37 +00001533 connected_ = false;
Lukacs Berki6dd29092016-05-30 14:05:33 +00001534 int fd[2];
1535 if (pipe(fd) < 0) {
1536 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1537 "pipe()");
1538 }
1539 recv_socket_ = fd[0];
1540 send_socket_ = fd[1];
1541
1542 if (fcntl(recv_socket_, F_SETFD, FD_CLOEXEC) == -1) {
1543 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1544 "fcntl(F_SETFD, FD_CLOEXEC) failed");
1545 }
1546
1547 if (fcntl(send_socket_, F_SETFD, FD_CLOEXEC) == -1) {
1548 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1549 "fcntl(F_SETFD, FD_CLOEXEC) failed");
1550 }
1551}
1552
1553GrpcBlazeServer::~GrpcBlazeServer() {
1554 close(send_socket_);
1555 close(recv_socket_);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001556}
1557
1558bool GrpcBlazeServer::Connect() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001559 assert(!connected_);
1560
Julio Merino28774852016-09-14 16:59:46 +00001561 std::string server_dir = globals->options->output_base + "/server";
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001562 std::string port;
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001563 std::string ipv4_prefix = "127.0.0.1:";
Lukacs Berkic8e74242016-04-28 08:32:04 +00001564 std::string ipv6_prefix_1 = "[0:0:0:0:0:0:0:1]:";
1565 std::string ipv6_prefix_2 = "[::1]:";
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001566
Lukacs Berki7e0249e2016-04-21 08:14:08 +00001567 if (!ReadFile(server_dir + "/command_port", &port)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001568 return false;
1569 }
1570
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001571 // Make sure that we are being directed to localhost
1572 if (port.compare(0, ipv4_prefix.size(), ipv4_prefix)
Lukacs Berkic8e74242016-04-28 08:32:04 +00001573 && port.compare(0, ipv6_prefix_1.size(), ipv6_prefix_1)
1574 && port.compare(0, ipv6_prefix_2.size(), ipv6_prefix_2)) {
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001575 return false;
1576 }
1577
Lukacs Berki7e0249e2016-04-21 08:14:08 +00001578 if (!ReadFile(server_dir + "/request_cookie", &request_cookie_)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001579 return false;
1580 }
1581
Lukacs Berki7e0249e2016-04-21 08:14:08 +00001582 if (!ReadFile(server_dir + "/response_cookie", &response_cookie_)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001583 return false;
1584 }
1585
1586 std::shared_ptr<grpc::Channel> channel(grpc::CreateChannel(
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001587 port, grpc::InsecureChannelCredentials()));
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001588 std::unique_ptr<command_server::CommandServer::Stub> client(
1589 command_server::CommandServer::NewStub(channel));
1590
1591 grpc::ClientContext context;
1592 context.set_deadline(
Michajlo Matijkiw641c1322016-08-12 18:05:29 +00001593 std::chrono::system_clock::now() + std::chrono::seconds(10));
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001594
1595 command_server::PingRequest request;
1596 command_server::PingResponse response;
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001597 request.set_cookie(request_cookie_);
Lukacs Berki7494c922016-04-27 11:17:51 +00001598
Lukacs Berki7e0249e2016-04-21 08:14:08 +00001599 grpc::Status status = client->Ping(&context, request, &response);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001600
Lukacs Berkic55e9c72016-04-25 13:43:40 +00001601 if (!status.ok() || response.cookie() != response_cookie_) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001602 return false;
1603 }
1604
Lukacs Berki1977d922016-05-02 09:31:37 +00001605 globals->server_pid = GetServerPid(server_dir);
1606 if (globals->server_pid <= 0) {
1607 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1608 "can't get PID of existing server (server dir=%s)",
1609 server_dir.c_str());
1610 }
1611
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001612 this->client_ = std::move(client);
Lukacs Berki1977d922016-05-02 09:31:37 +00001613 connected_ = true;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001614 return true;
1615}
1616
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001617// Cancellation works as follows:
1618//
1619// When the user presses Ctrl-C, a SIGINT is delivered to the client, which is
1620// translated into a BlazeServer::Cancel() call. Since it's not a good idea to
Lukacs Berki6dd29092016-05-30 14:05:33 +00001621// do significant work in signal handlers, all it does is write a byte to an
1622// unnamed pipe.
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001623//
Lukacs Berki6dd29092016-05-30 14:05:33 +00001624// This unnamed pipe is used to communicate with the cancel thread. Whenever
1625// something interesting happens, a byte is written into it, which is read by
1626// the cancel thread. These commands are available:
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001627//
Lukacs Berki6dd29092016-05-30 14:05:33 +00001628// - NOP
1629// - JOIN. The cancel thread needs to be terminated.
1630// - CANCEL. If the command ID is already available, a cancel request is sent.
1631// - COMMAND_ID_RECEIVED. The client learned the command ID from the server.
1632// If there is a pending cancellation request, it is acted upon.
1633//
1634// The only data the cancellation thread shares with the main thread is the
1635// file descriptor for receiving commands and command_id_, the latter of which
1636// is protected by a mutex, which mainly serves as a memory fence.
1637//
1638// The cancellation thread is joined at the end of the execution of the command.
1639// The main thread wakes it up just so that it can finish (using the JOIN
1640// action)
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001641//
1642// It's conceivable that the server is busy and thus it cannot service the
1643// cancellation request. In that case, we simply ignore the failure and the both
1644// the server and the client go on as if nothing had happened (except that this
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001645// Ctrl-C still counts as a SIGINT, three of which result in a SIGKILL being
1646// delivered to the server)
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001647void GrpcBlazeServer::CancelThread() {
1648 bool running = true;
Lukacs Berki6dd29092016-05-30 14:05:33 +00001649 bool cancel = false;
1650 bool command_id_received = false;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001651 while (running) {
Lukacs Berki6dd29092016-05-30 14:05:33 +00001652 char buf;
1653 int bytes_read = read(recv_socket_, &buf, 1);
1654 if (bytes_read == -1 && errno == EINTR) {
1655 continue;
1656 } else if (bytes_read != 1) {
1657 pdie(blaze_exit_code::INTERNAL_ERROR,
1658 "Cannot communicate with cancel thread");
1659 }
1660
1661 switch (buf) {
1662 case CancelThreadAction::NOTHING:
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001663 break;
1664
Lukacs Berki6dd29092016-05-30 14:05:33 +00001665 case CancelThreadAction::JOIN:
1666 running = false;
1667 break;
1668
1669 case CancelThreadAction::COMMAND_ID_RECEIVED:
1670 command_id_received = true;
1671 if (cancel) {
1672 SendCancelMessage();
1673 cancel = false;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001674 }
1675 break;
1676
Lukacs Berki6dd29092016-05-30 14:05:33 +00001677 case CancelThreadAction::CANCEL:
1678 if (command_id_received) {
1679 SendCancelMessage();
1680 } else {
1681 cancel = true;
1682 }
1683 break;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001684 }
1685 }
1686}
1687
Lukacs Berki6dd29092016-05-30 14:05:33 +00001688void GrpcBlazeServer::SendCancelMessage() {
1689 std::unique_lock<std::mutex> lock(cancel_thread_mutex_);
1690
1691 command_server::CancelRequest request;
1692 request.set_cookie(request_cookie_);
1693 request.set_command_id(command_id_);
1694 grpc::ClientContext context;
1695 context.set_deadline(std::chrono::system_clock::now() +
Lukacs Berki3ace3002016-08-31 08:55:34 +00001696 std::chrono::seconds(10));
Lukacs Berki6dd29092016-05-30 14:05:33 +00001697 command_server::CancelResponse response;
1698 // There isn't a lot we can do if this request fails
Lukacs Berki3ace3002016-08-31 08:55:34 +00001699 grpc::Status status = client_->Cancel(&context, request, &response);
1700 if (!status.ok()) {
1701 fprintf(stderr, "\nCould not interrupt server (%s)\n\n",
1702 status.error_message().c_str());
1703 }
Lukacs Berki6dd29092016-05-30 14:05:33 +00001704}
1705
Lukacs Berki1977d922016-05-02 09:31:37 +00001706// This will wait indefinitely until the server shuts down
1707void GrpcBlazeServer::KillRunningServer() {
1708 assert(connected_);
1709 assert(globals->server_pid > 0);
1710
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001711 grpc::ClientContext context;
1712 command_server::RunRequest request;
1713 command_server::RunResponse response;
1714 request.set_cookie(request_cookie_);
Julio Merino28774852016-09-14 16:59:46 +00001715 request.set_block_for_lock(globals->options->block_for_lock);
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001716 request.set_client_description(
1717 "pid=" + ToString(getpid()) + " (for shutdown)");
1718 request.add_arg("shutdown");
1719 std::unique_ptr<grpc::ClientReader<command_server::RunResponse>> reader(
1720 client_->Run(&context, request));
1721
1722 while (reader->Read(&response)) {}
1723
Lukacs Berki1977d922016-05-02 09:31:37 +00001724 // Kill the server process for good measure.
Julio Merino28774852016-09-14 16:59:46 +00001725 if (VerifyServerProcess(globals->server_pid, globals->options->output_base,
1726 globals->options->install_base)) {
Lukacs Berkiee44c382016-09-14 10:53:37 +00001727 KillServerProcess(globals->server_pid);
1728 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001729
1730 connected_ = false;
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001731}
1732
1733unsigned int GrpcBlazeServer::Communicate() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001734 assert(connected_);
1735
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001736 vector<string> arg_vector;
Julio Merino28774852016-09-14 16:59:46 +00001737 string command = globals->option_processor->GetCommand();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001738 if (command != "") {
1739 arg_vector.push_back(command);
1740 AddLoggingArgs(&arg_vector);
1741 }
1742
Julio Merino28774852016-09-14 16:59:46 +00001743 globals->option_processor->GetCommandArguments(&arg_vector);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001744
1745 command_server::RunRequest request;
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001746 request.set_cookie(request_cookie_);
Julio Merino28774852016-09-14 16:59:46 +00001747 request.set_block_for_lock(globals->options->block_for_lock);
Lukacs Berkice1445f2016-04-19 15:52:55 +00001748 request.set_client_description("pid=" + ToString(getpid()));
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001749 for (const string& arg : arg_vector) {
1750 request.add_arg(arg);
1751 }
1752
1753 grpc::ClientContext context;
1754 command_server::RunResponse response;
1755 std::unique_ptr<grpc::ClientReader<command_server::RunResponse>> reader(
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001756 client_->Run(&context, request));
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001757
Lukacs Berki415d39a2016-04-28 13:18:54 +00001758 // Release the server lock because the gRPC handles concurrent clients just
1759 // fine. Note that this may result in two "waiting for other client" messages
1760 // (one during server startup and one emitted by the server)
Lukacs Berki1977d922016-05-02 09:31:37 +00001761 blaze::ReleaseLock(&blaze_lock_);
Lukacs Berki415d39a2016-04-28 13:18:54 +00001762
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001763 std::thread cancel_thread(&GrpcBlazeServer::CancelThread, this);
1764 bool command_id_set = false;
Laurent Le Brun08849b22016-09-20 12:21:32 +00001765 bool pipe_broken = false;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001766 while (reader->Read(&response)) {
Lukacs Berkic55e9c72016-04-25 13:43:40 +00001767 if (response.cookie() != response_cookie_) {
1768 fprintf(stderr, "\nServer response cookie invalid, exiting\n");
1769 return blaze_exit_code::INTERNAL_ERROR;
1770 }
1771
Laurent Le Brun08849b22016-09-20 12:21:32 +00001772 bool pipe_broken_now = false;
Lukacs Berkiedeb7532016-04-18 10:23:36 +00001773 if (response.standard_output().size() > 0) {
Laurent Le Brun08849b22016-09-20 12:21:32 +00001774 int result = write(STDOUT_FILENO, response.standard_output().c_str(),
1775 response.standard_output().size());
1776 if (result < 0 && errno == EPIPE) {
1777 pipe_broken_now = true;
1778 }
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001779 }
1780
Lukacs Berkiedeb7532016-04-18 10:23:36 +00001781 if (response.standard_error().size() > 0) {
Laurent Le Brun08849b22016-09-20 12:21:32 +00001782 int result = write(STDERR_FILENO, response.standard_error().c_str(),
1783 response.standard_error().size());
1784 if (result < 0 && errno == EPIPE) {
1785 pipe_broken_now = true;
1786 }
1787 }
1788
1789 if (pipe_broken_now && !pipe_broken) {
1790 pipe_broken = true;
1791 Cancel();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001792 }
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001793
1794 if (!command_id_set && response.command_id().size() > 0) {
Lukacs Berki6dd29092016-05-30 14:05:33 +00001795 std::unique_lock<std::mutex> lock(cancel_thread_mutex_);
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001796 command_id_ = response.command_id();
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001797 command_id_set = true;
Lukacs Berki6dd29092016-05-30 14:05:33 +00001798 SendAction(CancelThreadAction::COMMAND_ID_RECEIVED);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001799 }
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001800 }
1801
Lukacs Berki6dd29092016-05-30 14:05:33 +00001802 SendAction(CancelThreadAction::JOIN);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001803 cancel_thread.join();
1804
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001805 if (!response.finished()) {
Lukacs Berki3ace3002016-08-31 08:55:34 +00001806 fprintf(stderr, "\nServer finished RPC without an explicit exit code\n\n");
Lukacs Berki2896dc02016-07-07 07:55:04 +00001807 return GetExitCodeForAbruptExit(*globals);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001808 }
1809
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001810 return response.exit_code();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001811}
1812
1813void GrpcBlazeServer::Disconnect() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001814 assert(connected_);
1815
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001816 client_.reset();
1817 request_cookie_ = "";
1818 response_cookie_ = "";
Lukacs Berki1977d922016-05-02 09:31:37 +00001819 connected_ = false;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001820}
1821
Lukacs Berki6dd29092016-05-30 14:05:33 +00001822void GrpcBlazeServer::SendAction(CancelThreadAction action) {
1823 char msg = action;
Sasha Smundak1fdd31d2016-07-25 17:54:00 +00001824 if (write(send_socket_, &msg, 1) <= 0) {
Lukacs Berki3ace3002016-08-31 08:55:34 +00001825 sigprintf("\nCould not interrupt server (cannot write to client pipe)\n\n");
Sasha Smundak1fdd31d2016-07-25 17:54:00 +00001826 }
Lukacs Berki6dd29092016-05-30 14:05:33 +00001827}
1828
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001829void GrpcBlazeServer::Cancel() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001830 assert(connected_);
Lukacs Berki6dd29092016-05-30 14:05:33 +00001831 SendAction(CancelThreadAction::CANCEL);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001832}
1833
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001834} // namespace blaze