blob: e26b41dab0f36ac066ba65f44b695784135f8f99 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// blaze.cc: bootstrap and client code for Blaze server.
16//
17// Responsible for:
18// - extracting the Python, C++ and Java components.
19// - starting the server or finding the existing one.
20// - client options parsing.
21// - passing the argv array, and printing the out/err streams.
22// - signal handling.
23// - exiting with the right error/WTERMSIG code.
24// - debugger + profiler support.
25// - mutual exclusion between batch invocations.
Julio Merino28774852016-09-14 16:59:46 +000026#include "src/main/cpp/blaze.h"
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010027
28#include <assert.h>
29#include <ctype.h>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010030#include <errno.h>
31#include <fcntl.h>
32#include <limits.h>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010033#include <stdarg.h>
Thiago Farina8a67da42015-05-05 18:04:50 +000034#include <stdint.h>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010035#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
Lukacs Berkie21e5922016-04-12 12:22:20 +000038
39#include <grpc/grpc.h>
Googler197547b2016-09-26 22:25:14 +000040#include <grpc/support/log.h>
Lukacs Berkie21e5922016-04-12 12:22:20 +000041#include <grpc++/channel.h>
42#include <grpc++/client_context.h>
43#include <grpc++/create_channel.h>
44#include <grpc++/security/credentials.h>
45
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010046#include <algorithm>
Lukacs Berki1b25ce22016-04-15 13:11:21 +000047#include <chrono> // NOLINT (gRPC requires this)
Lukacs Berkif1df38a2016-04-19 07:42:22 +000048#include <mutex> // NOLINT
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010049#include <set>
50#include <string>
Lukacs Berkif1df38a2016-04-19 07:42:22 +000051#include <thread> // NOLINT
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010052#include <utility>
53#include <vector>
54
Lukacs Berkie21e5922016-04-12 12:22:20 +000055
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000056#include "src/main/cpp/blaze_util.h"
57#include "src/main/cpp/blaze_util_platform.h"
Thiago Farina676cb9f2016-10-06 11:00:43 +000058#include "src/main/cpp/global_variables.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000059#include "src/main/cpp/option_processor.h"
Julio Merino28774852016-09-14 16:59:46 +000060#include "src/main/cpp/startup_options.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000061#include "src/main/cpp/util/errors.h"
Thiago Farina7f9357f2015-04-23 13:57:43 +000062#include "src/main/cpp/util/exit_code.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000063#include "src/main/cpp/util/file.h"
Laszlo Csomor9c951962016-11-10 13:31:27 +000064#include "src/main/cpp/util/file_platform.h"
Chloe Calvarin78f1c852016-11-22 21:58:50 +000065#include "src/main/cpp/util/logging.h"
Han-Wen Nienhuys36fbe632015-04-21 13:58:08 +000066#include "src/main/cpp/util/numbers.h"
67#include "src/main/cpp/util/port.h"
68#include "src/main/cpp/util/strings.h"
Julio Merino211a95c2016-08-29 11:01:35 +000069#include "src/main/cpp/workspace_layout.h"
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +000070#include "third_party/ijar/zip.h"
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010071
Lukacs Berkie21e5922016-04-12 12:22:20 +000072#include "src/main/protobuf/command_server.grpc.pb.h"
73
Thiago Farina241f46c2015-04-13 14:33:30 +000074using blaze_util::die;
75using blaze_util::pdie;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010076
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010077namespace blaze {
78
Thiago Farina80bb0f22016-10-17 15:57:13 +000079using std::set;
80using std::string;
81using std::vector;
82
Lukacs Berki1977d922016-05-02 09:31:37 +000083static int GetServerPid(const string &server_dir);
Eric Fellheimer3a695f32016-05-11 17:26:30 +000084static void VerifyJavaVersionAndSetJvm();
Lukacs Berki907dbbf2016-04-15 11:30:12 +000085
Lukacs Berki1977d922016-05-02 09:31:37 +000086// The following is a treatise on how the interaction between the client and the
87// server works.
88//
89// First, the client unconditionally acquires an flock() lock on
90// $OUTPUT_BASE/lock then verifies if it has already extracted itself by
91// checking if the directory it extracts itself to (install base + a checksum)
92// is present. If not, then it does the extraction. Care is taken that this
93// process is atomic so that Blazen in multiple output bases do not clash.
94//
95// Then the client tries to connect to the currently executing server and kills
96// it if at least one of the following conditions is true:
97//
98// - The server is of the wrong version (as determined by the
99// $OUTPUT_BASE/install symlink)
100// - The server has different startup options than the client wants
101// - The client wants to run the command in batch mode
102//
103// Then, if needed, the client adjusts the install link to indicate which
104// version of the server it is running.
105//
106// In batch mode, the client then simply executes the server while taking care
107// that the output base lock is kept until it finishes.
108//
109// If in server mode, the client starts up a server if needed then sends the
Thiago Farina69dac862016-11-02 09:48:27 +0000110// command to the client and streams back stdout and stderr. The output base
111// lock is released after the command is sent to the server (the server
112// implements its own locking mechanism).
Lukacs Berki1977d922016-05-02 09:31:37 +0000113
114// Synchronization between the client and the server is a little precarious
115// because the client needs to know the PID of the server and it is not
116// available using a Java API and we don't have JNI on Windows at the moment,
117// so the server can't just communicate this over the communication channel.
118// Thus, a PID file is used, but care needs to be taken that the contents of
119// this PID file are right.
120//
121// Upon server startup, the PID file is written before the client spawns the
122// server. Thus, when the client can connect, it can be certain that the PID
123// file is up to date.
124//
125// Upon server shutdown, the PID file is deleted using a server shutdown hook.
126// However, this happens *after* the server stopped listening, so it's possible
127// that a client has already started up a server and written a new PID file.
128// In order to avoid this, when the client starts up a new server, it reads the
129// contents of the PID file and kills the process indicated in it (it could do
130// with a bit more care, since PIDs can be reused, but for now, we just believe
131// the PID file)
132//
133// Some more interesting scenarios:
134//
135// - The server receives a kill signal and it does not have a chance to delete
136// the PID file: the client cannot connect, reads the PID file, kills the
137// process indicated in it and starts up a new server.
138//
139// - The server stopped accepting connections but hasn't quit yet and a new
140// client comes around: the new client will kill the server based on the
141// PID file before a new server is started up.
142//
143// Alternative implementations:
144//
145// - Don't deal with PIDs at all. This would make it impossible for the client
146// to deliver a SIGKILL to the server after three SIGINTs. It would only be
147// possible with gRPC anyway.
148//
149// - Have the server check that the PID file containts the correct things
150// before deleting them: there is a window of time between checking the file
151// and deleting it in which a new server can overwrite the PID file. The
152// output base lock cannot be acquired, either, because when starting up a
153// new server, the client already holds it.
154//
155// - Delete the PID file before stopping to accept connections: then a client
156// could come about after deleting the PID file but before stopping accepting
157// connections. It would also not be resilient against a dead server that
158// left a PID file around.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000159class BlazeServer {
160 public:
161 virtual ~BlazeServer() {}
162
Lukacs Berki1977d922016-05-02 09:31:37 +0000163 // Acquire a lock for the server running in this output base. Returns the
164 // number of milliseconds spent waiting for the lock.
Lukacs Berki415d39a2016-04-28 13:18:54 +0000165 uint64_t AcquireLock();
166
Lukacs Berki1977d922016-05-02 09:31:37 +0000167 // Whether there is an active connection to a server.
168 bool Connected() const { return connected_; }
169
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000170 // Connect to the server. Returns if the connection was successful. Only
171 // call this when this object is in disconnected state. If it returns true,
172 // this object will be in connected state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000173 virtual bool Connect() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000174
175 // Disconnects from an existing server. Only call this when this object is in
176 // connected state. After this call returns, the object will be in connected
177 // state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000178 virtual void Disconnect() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000179
180 // Send the command line to the server and forward whatever it says to stdout
181 // and stderr. Returns the desired exit code. Only call this when the server
182 // is in connected state.
183 virtual unsigned int Communicate() = 0;
184
185 // Disconnects and kills an existing server. Only call this when this object
186 // is in connected state.
Lukacs Berki1977d922016-05-02 09:31:37 +0000187 virtual void KillRunningServer() = 0;
Lukacs Berkie6a34f62016-04-25 12:16:04 +0000188
189 // Cancel the currently running command. If there is no command currently
Lukacs Berki1977d922016-05-02 09:31:37 +0000190 // running, the result is unspecified. When called, this object must be in
191 // connected state.
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000192 virtual void Cancel() = 0;
Thiago Farina69dac862016-11-02 09:48:27 +0000193
194 protected:
195 BlazeLock blaze_lock_;
196 bool connected_;
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000197};
198
Lukacs Berki415d39a2016-04-28 13:18:54 +0000199////////////////////////////////////////////////////////////////////////
200// Global Variables
201static GlobalVariables *globals;
202static BlazeServer *blaze_server;
203
Laszlo Csomor32086b22016-11-24 15:23:55 +0000204// TODO(laszlocsomor) 2016-11-24: release the `globals` and `blaze_server`
205// objects. Currently nothing deletes them. Be careful that some functions may
206// call exit(2) or _exit(2) (attributed with ATTRIBUTE_NORETURN) meaning we have
207// to delete the objects before those.
208
Lukacs Berki415d39a2016-04-28 13:18:54 +0000209uint64_t BlazeServer::AcquireLock() {
210 return blaze::AcquireLock(
Julio Merino28774852016-09-14 16:59:46 +0000211 globals->options->output_base, globals->options->batch,
212 globals->options->block_for_lock, &blaze_lock_);
Lukacs Berki415d39a2016-04-28 13:18:54 +0000213}
214
Lukacs Berki1977d922016-05-02 09:31:37 +0000215// Communication method that uses gRPC on a socket bound to localhost. More
216// documentation is in command_server.proto .
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000217class GrpcBlazeServer : public BlazeServer {
218 public:
Lukacs Berki71675a52016-11-08 09:48:27 +0000219 GrpcBlazeServer(int connect_timeout_secs);
Lukacs Berki6dd29092016-05-30 14:05:33 +0000220 virtual ~GrpcBlazeServer();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000221
Lukacs Berki9d52bc52016-06-07 11:11:04 +0000222 virtual bool Connect();
223 virtual void Disconnect();
224 virtual unsigned int Communicate();
225 virtual void KillRunningServer();
226 virtual void Cancel();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000227
228 private:
Lukacs Berki6dd29092016-05-30 14:05:33 +0000229 enum CancelThreadAction { NOTHING, JOIN, CANCEL, COMMAND_ID_RECEIVED };
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000230
231 std::unique_ptr<command_server::CommandServer::Stub> client_;
232 std::string request_cookie_;
233 std::string response_cookie_;
234 std::string command_id_;
235
Lukacs Berki6dd29092016-05-30 14:05:33 +0000236 // protects command_id_ . Although we always set it before making the cancel
237 // thread do something with it, the mutex is still useful because it provides
238 // a memory fence.
239 std::mutex cancel_thread_mutex_;
Lukacs Berki8b999982016-04-26 15:40:38 +0000240
Lukacs Berki71675a52016-11-08 09:48:27 +0000241 int connect_timeout_secs_;
Laszlo Csomoref5ceef2016-11-18 11:19:02 +0000242
Thiago Farina0bba4c92016-12-14 15:29:11 +0000243 // Pipe that the main thread sends actions to and the cancel thread receives
Laszlo Csomoref5ceef2016-11-18 11:19:02 +0000244 // actions from.
Thiago Farina0bba4c92016-12-14 15:29:11 +0000245 blaze_util::IPipe *pipe_;
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000246
Lukacs Berki10dd6382017-01-11 09:08:54 +0000247 bool TryConnect(command_server::CommandServer::Stub* client);
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000248 void CancelThread();
Lukacs Berki6dd29092016-05-30 14:05:33 +0000249 void SendAction(CancelThreadAction action);
250 void SendCancelMessage();
Lukacs Berki00cfb7d2016-04-20 09:01:52 +0000251};
252
253
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100254////////////////////////////////////////////////////////////////////////
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100255// Logic
256
Lukacs Berki71675a52016-11-08 09:48:27 +0000257void debug_log(const char* format, ...) {
258 if (!globals->options->client_debug) {
259 return;
260 }
261
262 fprintf(stderr, "CLIENT: ");
263 va_list arglist;
264 va_start(arglist, format);
265 vfprintf(stderr, format, arglist);
266 va_end(arglist);
267 fprintf(stderr, "%s", "\n");
268 fflush(stderr);
269}
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100270
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000271// A devtools_ijar::ZipExtractorProcessor to extract the InstallKeyFile
272class GetInstallKeyFileProcessor : public devtools_ijar::ZipExtractorProcessor {
273 public:
Thiago Farina9cb32752015-06-03 15:34:19 +0000274 explicit GetInstallKeyFileProcessor(string *install_base_key)
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000275 : install_base_key_(install_base_key) {}
276
277 virtual bool Accept(const char *filename, const devtools_ijar::u4 attr) {
278 globals->extracted_binaries.push_back(filename);
279 return strcmp(filename, "install_base_key") == 0;
280 }
281
282 virtual void Process(const char *filename, const devtools_ijar::u4 attr,
283 const devtools_ijar::u1 *data, const size_t size) {
284 string str(reinterpret_cast<const char *>(data), size);
285 blaze_util::StripWhitespace(&str);
Lukacs Berki58c29ae2015-10-16 14:48:33 +0000286 if (str.size() != 32) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000287 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Lukacs Berki58c29ae2015-10-16 14:48:33 +0000288 "\nFailed to extract install_base_key: file size mismatch "
289 "(should be 32, is %zd)", str.size());
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000290 }
291 *install_base_key_ = str;
292 }
293
294 private:
295 string *install_base_key_;
296};
297
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100298// Returns the install base (the root concatenated with the contents of the file
299// 'install_base_key' contained as a ZIP entry in the Blaze binary); as a side
300// effect, it also populates the extracted_binaries global variable.
301static string GetInstallBase(const string &root, const string &self_path) {
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000302 GetInstallKeyFileProcessor processor(&globals->install_md5);
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000303 std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
304 devtools_ijar::ZipExtractor::Create(self_path.c_str(), &processor));
305 if (extractor.get() == NULL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100306 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
László Csomor6f1e31a2017-01-27 11:01:41 +0000307 "\nFailed to open %s as a zip file: %s",
308 globals->options->product_name.c_str(),
309 blaze_util::GetLastErrorString().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100310 }
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000311 if (extractor->ProcessAll() < 0) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100312 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000313 "\nFailed to extract install_base_key: %s", extractor->GetError());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100314 }
315
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000316 if (globals->install_md5.empty()) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000317 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
318 "\nFailed to find install_base_key's in zip file");
319 }
Laszlo Csomor760f7862016-12-19 15:46:47 +0000320 return blaze_util::JoinPath(root, globals->install_md5);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100321}
322
323// Escapes colons by replacing them with '_C' and underscores by replacing them
324// with '_U'. E.g. "name:foo_bar" becomes "name_Cfoo_Ubar"
325static string EscapeForOptionSource(const string& input) {
326 string result = input;
327 blaze_util::Replace("_", "_U", &result);
328 blaze_util::Replace(":", "_C", &result);
329 return result;
330}
331
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000332// Returns the installed embedded binaries directory, under the shared
333// install_base location.
334string GetEmbeddedBinariesRoot(const string &install_base) {
335 return blaze_util::JoinPath(install_base, "_embedded_binaries");
336}
337
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100338// Returns the JVM command argument array.
339static vector<string> GetArgumentArray() {
340 vector<string> result;
341
342 // e.g. A Blaze server process running in ~/src/build_root (where there's a
343 // ~/src/build_root/WORKSPACE file) will appear in ps(1) as "blaze(src)".
344 string workspace =
345 blaze_util::Basename(blaze_util::Dirname(globals->workspace));
Julio Merino28774852016-09-14 16:59:46 +0000346 string product = globals->options->product_name;
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000347 blaze_util::ToLower(&product);
348 result.push_back(product + "(" + workspace + ")");
Julio Merino28774852016-09-14 16:59:46 +0000349 globals->options->AddJVMArgumentPrefix(
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000350 blaze_util::Dirname(blaze_util::Dirname(globals->jvm_path)),
351 &result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100352
353 result.push_back("-XX:+HeapDumpOnOutOfMemoryError");
Julio Merino28774852016-09-14 16:59:46 +0000354 string heap_crash_path = globals->options->output_base;
Dmitry Lomov7608db52016-07-14 11:27:10 +0000355 result.push_back("-XX:HeapDumpPath=" + ConvertPath(heap_crash_path));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100356
357 result.push_back("-Xverify:none");
358
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000359 vector<string> user_options;
360
Janak Ramakrishnan0acd1542016-01-06 18:42:30 +0000361 user_options.insert(user_options.begin(),
Julio Merino28774852016-09-14 16:59:46 +0000362 globals->options->host_jvm_args.begin(),
363 globals->options->host_jvm_args.end());
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000364
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100365 // Add JVM arguments particular to building blaze64 and particular JVM
366 // versions.
367 string error;
368 blaze_exit_code::ExitCode jvm_args_exit_code =
Julio Merino28774852016-09-14 16:59:46 +0000369 globals->options->AddJVMArguments(globals->options->GetHostJavabase(),
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000370 &result, user_options, &error);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100371 if (jvm_args_exit_code != blaze_exit_code::SUCCESS) {
372 die(jvm_args_exit_code, "%s", error.c_str());
373 }
374
375 // We put all directories on the java.library.path that contain .so files.
376 string java_library_path = "-Djava.library.path=";
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000377 string real_install_dir =
378 GetEmbeddedBinariesRoot(globals->options->install_base);
379
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100380 bool first = true;
381 for (const auto& it : globals->extracted_binaries) {
Thiago Farina01f36002015-04-08 15:59:08 +0000382 if (IsSharedLibrary(it)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100383 if (!first) {
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000384 java_library_path += blaze::ListSeparator();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100385 }
386 first = false;
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000387 java_library_path += blaze::ConvertPath(
388 blaze_util::JoinPath(real_install_dir, blaze_util::Dirname(it)));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100389 }
390 }
391 result.push_back(java_library_path);
392
393 // Force use of latin1 for file names.
394 result.push_back("-Dfile.encoding=ISO-8859-1");
395
Julio Merino28774852016-09-14 16:59:46 +0000396 if (globals->options->host_jvm_debug) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100397 fprintf(stderr,
398 "Running host JVM under debugger (listening on TCP port 5005).\n");
399 // Start JVM so that it listens for a connection from a
400 // JDWP-compliant debugger:
401 result.push_back("-Xdebug");
402 result.push_back("-Xrunjdwp:transport=dt_socket,server=y,address=5005");
403 }
Janak Ramakrishnande735c02015-06-02 16:38:57 +0000404 result.insert(result.end(), user_options.begin(), user_options.end());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100405
Julio Merino28774852016-09-14 16:59:46 +0000406 globals->options->AddJVMArgumentSuffix(real_install_dir,
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000407 globals->extracted_binaries[0],
408 &result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100409
Lukacs Berki3d486832016-10-26 12:51:38 +0000410 // JVM arguments are complete. Now pass in Blaze startup options.
411 // Note that we always use the --flag=ARG form (instead of the --flag ARG one)
412 // so that BlazeRuntime#splitStartupOptions has an easy job.
Lukacs Berki71675a52016-11-08 09:48:27 +0000413
414 // TODO(lberki): Test that whatever the list constructed after this line is
415 // actually a list of parseable startup options.
Julio Merino28774852016-09-14 16:59:46 +0000416 if (!globals->options->batch) {
Lukacs Berki3d486832016-10-26 12:51:38 +0000417 result.push_back("--max_idle_secs=" +
418 ToString(globals->options->max_idle_secs));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100419 } else {
Googlerc8c64e72015-03-23 23:22:18 +0000420 // --batch must come first in the arguments to Java main() because
421 // the code expects it to be at args[0] if it's been set.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100422 result.push_back("--batch");
423 }
Lukacs Berkice1445f2016-04-19 15:52:55 +0000424
Julio Merino28774852016-09-14 16:59:46 +0000425 if (globals->options->command_port != 0) {
Lukacs Berki7e0249e2016-04-21 08:14:08 +0000426 result.push_back(
Julio Merino28774852016-09-14 16:59:46 +0000427 "--command_port=" + ToString(globals->options->command_port));
Lukacs Berkice1445f2016-04-19 15:52:55 +0000428 }
429
Lukacs Berki71675a52016-11-08 09:48:27 +0000430 result.push_back(
431 "--connect_timeout_secs=" +
432 ToString(globals->options->connect_timeout_secs));
433
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000434 result.push_back("--install_base=" +
Julio Merino28774852016-09-14 16:59:46 +0000435 blaze::ConvertPath(globals->options->install_base));
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +0000436 result.push_back("--install_md5=" + globals->install_md5);
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000437 result.push_back("--output_base=" +
Julio Merino28774852016-09-14 16:59:46 +0000438 blaze::ConvertPath(globals->options->output_base));
Dmitry Lomov78c0cc72015-08-11 16:44:21 +0000439 result.push_back("--workspace_directory=" +
440 blaze::ConvertPath(globals->workspace));
Marian Lobur6dcdd602015-04-09 09:28:40 +0000441
Julio Merino28774852016-09-14 16:59:46 +0000442 if (globals->options->allow_configurable_attributes) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100443 result.push_back("--allow_configurable_attributes");
444 }
Julio Merino28774852016-09-14 16:59:46 +0000445 if (globals->options->deep_execroot) {
Lukacs Berki5fb98d12015-12-09 15:29:46 +0000446 result.push_back("--deep_execroot");
447 } else {
448 result.push_back("--nodeep_execroot");
449 }
Julio Merino28774852016-09-14 16:59:46 +0000450 if (globals->options->oom_more_eagerly) {
Janak Ramakrishnanadc706f2016-03-07 19:12:48 +0000451 result.push_back("--experimental_oom_more_eagerly");
452 }
Janak Ramakrishnan19fde1f2016-05-23 21:20:16 +0000453 result.push_back("--experimental_oom_more_eagerly_threshold=" +
Julio Merino28774852016-09-14 16:59:46 +0000454 ToString(globals->options->oom_more_eagerly_threshold));
Janak Ramakrishnan8cc772e2016-03-23 17:26:12 +0000455
Michajlo Matijkiwaf79a322016-09-16 15:44:35 +0000456 if (!globals->options->write_command_log) {
457 result.push_back("--nowrite_command_log");
458 }
459
Julio Merino28774852016-09-14 16:59:46 +0000460 if (globals->options->watchfs) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100461 result.push_back("--watchfs");
462 }
Julio Merino28774852016-09-14 16:59:46 +0000463 if (globals->options->fatal_event_bus_exceptions) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100464 result.push_back("--fatal_event_bus_exceptions");
465 } else {
466 result.push_back("--nofatal_event_bus_exceptions");
467 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100468
Lukacs Berki71675a52016-11-08 09:48:27 +0000469 // We use this syntax so that the logic in ServerNeedsToBeKilled() that
470 // decides whether the server needs killing is simpler. This is parsed by the
471 // Java code where --noclient_debug and --client_debug=false are equivalent.
472 // Note that --client_debug false (separated by space) won't work either,
473 // because the logic in ServerNeedsToBeKilled() assumes that every argument
474 // is in the --arg=value form.
475 if (globals->options->client_debug) {
476 result.push_back("--client_debug=true");
477 } else {
478 result.push_back("--client_debug=false");
479 }
Chloe Calvarineaa3be72016-12-13 19:48:34 +0000480 if (globals->options->use_custom_exit_code_on_abrupt_exit) {
481 result.push_back("--use_custom_exit_code_on_abrupt_exit=true");
482 } else {
483 result.push_back("--use_custom_exit_code_on_abrupt_exit=false");
484 }
Lukacs Berki71675a52016-11-08 09:48:27 +0000485
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100486 // This is only for Blaze reporting purposes; the real interpretation of the
487 // jvm flags occurs when we set up the java command line.
Julio Merino28774852016-09-14 16:59:46 +0000488 if (globals->options->host_jvm_debug) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100489 result.push_back("--host_jvm_debug");
490 }
Julio Merino28774852016-09-14 16:59:46 +0000491 if (!globals->options->host_jvm_profile.empty()) {
492 result.push_back("--host_jvm_profile=" +
493 globals->options->host_jvm_profile);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100494 }
Julio Merino28774852016-09-14 16:59:46 +0000495 if (!globals->options->host_jvm_args.empty()) {
496 for (const auto &arg : globals->options->host_jvm_args) {
Janak Ramakrishnan533657e2015-11-13 23:34:14 +0000497 result.push_back("--host_jvm_args=" + arg);
498 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100499 }
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000500
Julio Merino28774852016-09-14 16:59:46 +0000501 if (globals->options->invocation_policy != NULL &&
502 strlen(globals->options->invocation_policy) > 0) {
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000503 result.push_back(string("--invocation_policy=") +
Julio Merino28774852016-09-14 16:59:46 +0000504 globals->options->invocation_policy);
Alex Humesky2f3f4cf2015-09-29 01:42:00 +0000505 }
506
Julio Merino28774852016-09-14 16:59:46 +0000507 result.push_back("--product_name=" + globals->options->product_name);
Luis Fernando Pino Duque623cdf82016-05-31 16:21:46 +0000508
Julio Merino28774852016-09-14 16:59:46 +0000509 globals->options->AddExtraOptions(&result);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100510
511 // The option sources are transmitted in the following format:
512 // --option_sources=option1:source1:option2:source2:...
513 string option_sources = "--option_sources=";
514 first = true;
Julio Merino28774852016-09-14 16:59:46 +0000515 for (const auto& it : globals->options->option_sources) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100516 if (!first) {
517 option_sources += ":";
518 }
519
520 first = false;
521 option_sources += EscapeForOptionSource(it.first) + ":" +
522 EscapeForOptionSource(it.second);
523 }
524
525 result.push_back(option_sources);
526 return result;
527}
528
Thiago Farina5735c252016-04-27 16:16:27 +0000529// Add common command options for logging to the given argument array.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100530static void AddLoggingArgs(vector<string>* args) {
Googler9588b812015-07-23 11:49:37 +0000531 args->push_back("--startup_time=" + ToString(globals->startup_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100532 if (globals->command_wait_time != 0) {
533 args->push_back("--command_wait_time=" +
Googler9588b812015-07-23 11:49:37 +0000534 ToString(globals->command_wait_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100535 }
536 if (globals->extract_data_time != 0) {
537 args->push_back("--extract_data_time=" +
Googler9588b812015-07-23 11:49:37 +0000538 ToString(globals->extract_data_time));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100539 }
540 if (globals->restart_reason != NO_RESTART) {
541 const char *reasons[] = {
542 "no_restart", "no_daemon", "new_version", "new_options"
543 };
544 args->push_back(
545 string("--restart_reason=") + reasons[globals->restart_reason]);
546 }
547 args->push_back(
548 string("--binary_path=") + globals->binary_path);
549}
550
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100551// Join the elements of the specified array with NUL's (\0's), akin to the
552// format of /proc/$PID/cmdline.
Thiago Farina0b6963e2015-04-28 20:26:45 +0000553static string GetArgumentString(const vector<string>& argument_array) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100554 string result;
555 blaze_util::JoinStrings(argument_array, '\0', &result);
556 return result;
557}
558
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100559// Do a chdir into the workspace, and die if it fails.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000560static void GoToWorkspace(const WorkspaceLayout* workspace_layout) {
561 if (workspace_layout->InWorkspace(globals->workspace) &&
Laszlo Csomor9c951962016-11-10 13:31:27 +0000562 !blaze_util::ChangeDirectory(globals->workspace)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100563 pdie(blaze_exit_code::INTERNAL_ERROR,
Laszlo Csomor9c951962016-11-10 13:31:27 +0000564 "changing directory into %s failed", globals->workspace.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100565 }
566}
567
568// Check the java version if a java version specification is bundled. On
Thiago Farina5735c252016-04-27 16:16:27 +0000569// success, returns the executable path of the java command.
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000570static void VerifyJavaVersionAndSetJvm() {
Julio Merino28774852016-09-14 16:59:46 +0000571 string exe = globals->options->GetJvm();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100572
573 string version_spec_file = blaze_util::JoinPath(
Thiago Farina6a2dc2b2016-10-28 13:05:22 +0000574 GetEmbeddedBinariesRoot(globals->options->install_base), "java.version");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100575 string version_spec = "";
Laszlo Csomor49970e02016-11-28 08:55:47 +0000576 if (blaze_util::ReadFile(version_spec_file, &version_spec)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100577 blaze_util::StripWhitespace(&version_spec);
578 // A version specification is given, get version of java.
579 string jvm_version = GetJvmVersion(exe);
580
581 // Compare that jvm_version is found and at least the one specified.
582 if (jvm_version.size() == 0) {
583 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
584 "Java version not detected while at least %s is needed.\n"
585 "Please set JAVA_HOME.", version_spec.c_str());
586 } else if (!CheckJavaVersionIsAtLeast(jvm_version, version_spec)) {
587 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
588 "Java version is %s while at least %s is needed.\n"
589 "Please set JAVA_HOME.",
590 jvm_version.c_str(), version_spec.c_str());
591 }
592 }
593
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000594 globals->jvm_path = exe;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100595}
596
597// Starts the Blaze server. Returns a readable fd connected to the server.
598// This is currently used only to detect liveness.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000599static void StartServer(const WorkspaceLayout* workspace_layout,
600 BlazeServerStartup** server_startup) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100601 vector<string> jvm_args_vector = GetArgumentArray();
602 string argument_string = GetArgumentString(jvm_args_vector);
Laszlo Csomor760f7862016-12-19 15:46:47 +0000603 string server_dir =
604 blaze_util::JoinPath(globals->options->output_base, "server");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100605 // Write the cmdline argument string to the server dir. If we get to this
606 // point, there is no server running, so we don't overwrite the cmdline file
607 // for the existing server. If might be that the server dies and the cmdline
608 // file stays there, but that is not a problem, since we always check the
609 // server, too.
Laszlo Csomor760f7862016-12-19 15:46:47 +0000610 blaze_util::WriteFile(argument_string,
611 blaze_util::JoinPath(server_dir, "cmdline"));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100612
613 // unless we restarted for a new-version, mark this as initial start
614 if (globals->restart_reason == NO_RESTART) {
615 globals->restart_reason = NO_DAEMON;
616 }
617
Julio Merino28774852016-09-14 16:59:46 +0000618 string exe = globals->options->GetExe(globals->jvm_path,
619 globals->extracted_binaries[0]);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100620 // Go to the workspace before we daemonize, so
621 // we can still print errors to the terminal.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000622 GoToWorkspace(workspace_layout);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100623
Laszlo Csomor49970e02016-11-28 08:55:47 +0000624 ExecuteDaemon(exe, jvm_args_vector, globals->jvm_log_file, server_dir,
625 server_startup);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100626}
627
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100628// Replace this process with blaze in standalone/batch mode.
629// The batch mode blaze process handles the command and exits.
630//
631// This function passes the commands array to the blaze process.
632// This array should start with a command ("build", "info", etc.).
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000633static void StartStandalone(const WorkspaceLayout* workspace_layout,
634 BlazeServer* server) {
Lukacs Berki1977d922016-05-02 09:31:37 +0000635 if (server->Connected()) {
636 server->KillRunningServer();
637 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100638
639 // Wall clock time since process startup.
Laszlo Csomor943d3cf2016-11-07 14:27:21 +0000640 globals->startup_time = GetMillisecondsSinceProcessStart();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100641
642 if (VerboseLogging()) {
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000643 fprintf(stderr, "Starting %s in batch mode.\n",
Julio Merino28774852016-09-14 16:59:46 +0000644 globals->options->product_name.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100645 }
Julio Merino28774852016-09-14 16:59:46 +0000646 string command = globals->option_processor->GetCommand();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100647 vector<string> command_arguments;
Julio Merino28774852016-09-14 16:59:46 +0000648 globals->option_processor->GetCommandArguments(&command_arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100649
650 if (!command_arguments.empty() && command == "shutdown") {
Julio Merino28774852016-09-14 16:59:46 +0000651 string product = globals->options->product_name;
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000652 blaze_util::ToLower(&product);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100653 fprintf(stderr,
654 "WARNING: Running command \"shutdown\" in batch mode. Batch mode "
Kristina Chodorow11d40d22015-03-17 18:26:59 +0000655 "is triggered\nwhen not running %s within a workspace. If you "
656 "intend to shutdown an\nexisting %s server, run \"%s "
657 "shutdown\" from the directory where\nit was started.\n",
Julio Merino28774852016-09-14 16:59:46 +0000658 globals->options->product_name.c_str(),
659 globals->options->product_name.c_str(), product.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100660 }
661 vector<string> jvm_args_vector = GetArgumentArray();
662 if (command != "") {
663 jvm_args_vector.push_back(command);
664 AddLoggingArgs(&jvm_args_vector);
665 }
666
667 jvm_args_vector.insert(jvm_args_vector.end(),
668 command_arguments.begin(),
669 command_arguments.end());
670
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000671 GoToWorkspace(workspace_layout);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100672
Julio Merino28774852016-09-14 16:59:46 +0000673 string exe = globals->options->GetExe(globals->jvm_path,
Eric Fellheimer3a695f32016-05-11 17:26:30 +0000674 globals->extracted_binaries[0]);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100675 ExecuteProgram(exe, jvm_args_vector);
676 pdie(blaze_exit_code::INTERNAL_ERROR, "execv of '%s' failed", exe.c_str());
677}
678
Laszlo Csomorae16e762016-11-18 10:16:08 +0000679static void WriteFileToStderrOrDie(const char *file_name) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100680 FILE *fp = fopen(file_name, "r");
681 if (fp == NULL) {
682 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
683 "opening %s failed", file_name);
684 }
685 char buffer[255];
686 int num_read;
687 while ((num_read = fread(buffer, 1, sizeof buffer, fp)) > 0) {
688 if (ferror(fp)) {
689 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
690 "failed to read from '%s'", file_name);
691 }
Laszlo Csomorae16e762016-11-18 10:16:08 +0000692 fwrite(buffer, 1, num_read, stderr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100693 }
694 fclose(fp);
695}
696
Lukacs Berki4912f7f2016-06-17 16:12:22 +0000697// After connecting to the Blaze server, return its PID, or -1 if there was an
698// error.
Lukacs Berkid9da60f2016-04-26 11:40:24 +0000699static int GetServerPid(const string &server_dir) {
Lukacs Berki907dbbf2016-04-15 11:30:12 +0000700 // Note: there is no race here on startup since the server creates
701 // the pid file strictly before it binds the socket.
Thiago Farina048bbfc2016-09-21 08:20:41 +0000702 string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
Laszlo Csomorae16e762016-11-18 10:16:08 +0000703 string bufstr;
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000704 int result;
Laszlo Csomor49970e02016-11-28 08:55:47 +0000705 if (!blaze_util::ReadFile(pid_file, &bufstr, 32) ||
Laszlo Csomor6450c182016-11-24 10:28:20 +0000706 !blaze_util::safe_strto32(bufstr, &result)) {
Lukacs Berkiea4c42e2016-04-25 07:22:11 +0000707 return -1;
708 }
709
710 return result;
Doug Rabsond655f2a2015-08-13 14:41:50 +0000711}
712
Lukacs Berki1977d922016-05-02 09:31:37 +0000713// Starts up a new server and connects to it. Exits if it didn't work not.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000714static void StartServerAndConnect(const WorkspaceLayout* workspace_layout,
715 BlazeServer *server) {
Laszlo Csomor760f7862016-12-19 15:46:47 +0000716 string server_dir =
717 blaze_util::JoinPath(globals->options->output_base, "server");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100718
719 // The server dir has the socket, so we don't allow access by other
720 // users.
Thiago Farina227369a2016-12-07 12:40:40 +0000721 if (!blaze_util::MakeDirectories(server_dir, 0700)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100722 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
723 "server directory '%s' could not be created", server_dir.c_str());
724 }
725
Laszlo Csomor6450c182016-11-24 10:28:20 +0000726 // TODO(laszlocsomor) 2016-11-21: remove `pid_symlink` and the `remove` call
727 // after 2017-05-01 (~half a year from writing this comment). By that time old
728 // Bazel clients that used to write PID symlinks will probably no longer be in
729 // use.
730 // Until then, defensively delete old PID symlinks that older clients may have
731 // left behind.
732 string pid_symlink = blaze_util::JoinPath(server_dir, kServerPidSymlink);
733 remove(pid_symlink.c_str());
734
Lukacs Berki1977d922016-05-02 09:31:37 +0000735 // If we couldn't connect to the server check if there is still a PID file
736 // and if so, kill the server that wrote it. This can happen e.g. if the
737 // server is in a GC pause and therefore cannot respond to ping requests and
738 // having two server instances running in the same output base is a
739 // disaster.
740 int server_pid = GetServerPid(server_dir);
741 if (server_pid > 0) {
Julio Merino28774852016-09-14 16:59:46 +0000742 if (VerifyServerProcess(server_pid, globals->options->output_base,
743 globals->options->install_base) &&
Lukacs Berkiee44c382016-09-14 10:53:37 +0000744 KillServerProcess(server_pid)) {
Lukacs Berki119dd4b2016-07-13 15:28:42 +0000745 fprintf(stderr, "Killed non-responsive server process (pid=%d)\n",
746 server_pid);
747 }
Lukacs Berki7e0249e2016-04-21 08:14:08 +0000748 }
749
Julio Merino28774852016-09-14 16:59:46 +0000750 SetScheduling(globals->options->batch_cpu_scheduling,
751 globals->options->io_nice_level);
Lukacs Berkif1df38a2016-04-19 07:42:22 +0000752
Lukacs Berki1977d922016-05-02 09:31:37 +0000753 BlazeServerStartup* server_startup;
Julio Merinoe3e3bfa2016-12-08 22:22:12 +0000754 StartServer(workspace_layout, &server_startup);
Lukacs Berki5570bcc2016-11-15 15:45:58 +0000755
756 // Give the server two minutes to start up. That's enough to connect with a
757 // debugger.
758 auto try_until_time(
759 std::chrono::system_clock::now() + std::chrono::seconds(120));
760 bool had_to_wait = false;
761 while (std::chrono::system_clock::now() < try_until_time) {
762 auto next_attempt_time(
763 std::chrono::system_clock::now() + std::chrono::milliseconds(100));
Lukacs Berki1977d922016-05-02 09:31:37 +0000764 if (server->Connect()) {
Lukacs Berki5570bcc2016-11-15 15:45:58 +0000765 if (had_to_wait && !globals->options->client_debug) {
Lukacs Berki1977d922016-05-02 09:31:37 +0000766 fputc('\n', stderr);
767 fflush(stderr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100768 }
Lukacs Berki1977d922016-05-02 09:31:37 +0000769 delete server_startup;
770 return;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100771 }
Lukacs Berki5570bcc2016-11-15 15:45:58 +0000772
773 had_to_wait = true;
Lukacs Berki71675a52016-11-08 09:48:27 +0000774 if (!globals->options->client_debug) {
775 fputc('.', stderr);
776 fflush(stderr);
777 }
778
Lukacs Berki5570bcc2016-11-15 15:45:58 +0000779 std::this_thread::sleep_until(next_attempt_time);
Lukacs Berki1977d922016-05-02 09:31:37 +0000780 if (!server_startup->IsStillAlive()) {
László Csomor6f1e31a2017-01-27 11:01:41 +0000781 fprintf(stderr,
782 "\nunexpected pipe read status: %s\n"
783 "Server presumed dead. Now printing '%s':\n",
784 blaze_util::GetLastErrorString().c_str(),
785 globals->jvm_log_file.c_str());
Laszlo Csomorae16e762016-11-18 10:16:08 +0000786 WriteFileToStderrOrDie(globals->jvm_log_file.c_str());
Lukacs Berki1977d922016-05-02 09:31:37 +0000787 exit(blaze_exit_code::INTERNAL_ERROR);
788 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100789 }
Lukacs Berki1977d922016-05-02 09:31:37 +0000790 die(blaze_exit_code::INTERNAL_ERROR,
Lukacs Berki5570bcc2016-11-15 15:45:58 +0000791 "\nError: couldn't connect to server after 120 seconds.");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100792}
793
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000794// A devtools_ijar::ZipExtractorProcessor to extract the files from the blaze
795// zip.
796class ExtractBlazeZipProcessor : public devtools_ijar::ZipExtractorProcessor {
797 public:
Thiago Farina9cb32752015-06-03 15:34:19 +0000798 explicit ExtractBlazeZipProcessor(const string &embedded_binaries)
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000799 : embedded_binaries_(embedded_binaries) {}
800
801 virtual bool Accept(const char *filename, const devtools_ijar::u4 attr) {
802 return !devtools_ijar::zipattr_is_dir(attr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100803 }
804
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000805 virtual void Process(const char *filename, const devtools_ijar::u4 attr,
806 const devtools_ijar::u1 *data, const size_t size) {
807 string path = blaze_util::JoinPath(embedded_binaries_, filename);
Thiago Farina227369a2016-12-07 12:40:40 +0000808 if (!blaze_util::MakeDirectories(blaze_util::Dirname(path), 0777)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100809 pdie(blaze_exit_code::INTERNAL_ERROR,
810 "couldn't create '%s'", path.c_str());
811 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100812
Laszlo Csomor49970e02016-11-28 08:55:47 +0000813 if (!blaze_util::WriteFile(data, size, path)) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000814 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
Laszlo Csomorae16e762016-11-18 10:16:08 +0000815 "\nFailed to write zipped file \"%s\": %s", path.c_str(),
László Csomor6f1e31a2017-01-27 11:01:41 +0000816 blaze_util::GetLastErrorString().c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100817 }
818 }
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000819
820 private:
821 const string embedded_binaries_;
822};
823
824// Actually extracts the embedded data files into the tree whose root
825// is 'embedded_binaries'.
826static void ActuallyExtractData(const string &argv0,
827 const string &embedded_binaries) {
828 ExtractBlazeZipProcessor processor(embedded_binaries);
Thiago Farina227369a2016-12-07 12:40:40 +0000829 if (!blaze_util::MakeDirectories(embedded_binaries, 0777)) {
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000830 pdie(blaze_exit_code::INTERNAL_ERROR, "couldn't create '%s'",
831 embedded_binaries.c_str());
832 }
833
834 fprintf(stderr, "Extracting %s installation...\n",
Julio Merino28774852016-09-14 16:59:46 +0000835 globals->options->product_name.c_str());
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000836 std::unique_ptr<devtools_ijar::ZipExtractor> extractor(
837 devtools_ijar::ZipExtractor::Create(argv0.c_str(), &processor));
838 if (extractor.get() == NULL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100839 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
László Csomor6f1e31a2017-01-27 11:01:41 +0000840 "\nFailed to open %s as a zip file: %s",
841 globals->options->product_name.c_str(),
842 blaze_util::GetLastErrorString().c_str());
Damien Martin-Guillerezeb6e9032015-06-01 14:45:21 +0000843 }
844 if (extractor->ProcessAll() < 0) {
845 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
846 "\nFailed to extract %s as a zip file: %s",
Julio Merino28774852016-09-14 16:59:46 +0000847 globals->options->product_name.c_str(), extractor->GetError());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100848 }
849
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100850 // Set the timestamps of the extracted files to the future and make sure (or
851 // at least as sure as we can...) that the files we have written are actually
852 // on the disk.
853
854 vector<string> extracted_files;
Laszlo Csomor251bf032016-11-16 11:01:32 +0000855
856 // Walks the temporary directory recursively and collects full file paths.
857 blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100858
Laszlo Csomorce1b3e12017-01-19 14:56:30 +0000859 std::unique_ptr<blaze_util::IFileMtime> mtime(blaze_util::CreateFileMtime());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100860 set<string> synced_directories;
Laszlo Csomor251bf032016-11-16 11:01:32 +0000861 for (const auto &it : extracted_files) {
862 const char *extracted_path = it.c_str();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100863
864 // Set the time to a distantly futuristic value so we can observe tampering.
Laszlo Csomorce1b3e12017-01-19 14:56:30 +0000865 // Note that keeping a static, deterministic timestamp, such as the default
866 // timestamp set by unzip (1970-01-01) and using that to detect tampering is
867 // not enough, because we also need the timestamp to change between Bazel
868 // releases so that the metadata cache knows that the files may have
869 // changed. This is essential for the correctness of actions that use
870 // embedded binaries as artifacts.
871 if (!mtime.get()->SetToDistantFuture(it)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100872 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
873 "failed to set timestamp on '%s'", extracted_path);
874 }
875
Laszlo Csomorae16e762016-11-18 10:16:08 +0000876 blaze_util::SyncFile(it);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100877
878 string directory = blaze_util::Dirname(extracted_path);
879
880 // Now walk up until embedded_binaries and sync every directory in between.
881 // synced_directories is used to avoid syncing the same directory twice.
Laszlo Csomor760f7862016-12-19 15:46:47 +0000882 // The !directory.empty() and !blaze_util::IsRootDirectory(directory)
883 // conditions are not strictly needed, but it makes this loop more robust,
884 // because otherwise, if due to some glitch, directory was not under
885 // embedded_binaries, it would get into an infinite loop.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100886 while (directory != embedded_binaries &&
Laszlo Csomor760f7862016-12-19 15:46:47 +0000887 synced_directories.count(directory) == 0 && !directory.empty() &&
888 !blaze_util::IsRootDirectory(directory)) {
Laszlo Csomorae16e762016-11-18 10:16:08 +0000889 blaze_util::SyncFile(directory);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100890 synced_directories.insert(directory);
891 directory = blaze_util::Dirname(directory);
892 }
893 }
894
Laszlo Csomorae16e762016-11-18 10:16:08 +0000895 blaze_util::SyncFile(embedded_binaries);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100896}
897
898// Installs Blaze by extracting the embedded data files, iff necessary.
899// The MD5-named install_base directory on disk is trusted; we assume
900// no-one has modified the extracted files beneath this directory once
901// it is in place. Concurrency during extraction is handled by
902// extracting in a tmp dir and then renaming it into place where it
903// becomes visible automically at the new path.
904// Populates globals->extracted_binaries with their extracted locations.
905static void ExtractData(const string &self_path) {
906 // If the install dir doesn't exist, create it, if it does, we know it's good.
Laszlo Csomor8a48f612016-11-17 10:18:34 +0000907 if (!blaze_util::PathExists(globals->options->install_base)) {
Laszlo Csomor943d3cf2016-11-07 14:27:21 +0000908 uint64_t st = GetMillisecondsMonotonic();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100909 // Work in a temp dir to avoid races.
Julio Merino28774852016-09-14 16:59:46 +0000910 string tmp_install = globals->options->install_base + ".tmp." +
Laszlo Csomorae16e762016-11-18 10:16:08 +0000911 blaze::GetProcessIdAsString();
Laszlo Csomor760f7862016-12-19 15:46:47 +0000912 string tmp_binaries =
913 blaze_util::JoinPath(tmp_install, "_embedded_binaries");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100914 ActuallyExtractData(self_path, tmp_binaries);
915
Laszlo Csomor943d3cf2016-11-07 14:27:21 +0000916 uint64_t et = GetMillisecondsMonotonic();
917 globals->extract_data_time = et - st;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100918
919 // Now rename the completed installation to its final name. If this
920 // fails due to an ENOTEMPTY then we assume another good
921 // installation snuck in before us.
Laszlo Csomor49970e02016-11-28 08:55:47 +0000922 if (rename(tmp_install.c_str(), globals->options->install_base.c_str()) ==
923 -1 &&
924 errno != ENOTEMPTY) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100925 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
926 "install base directory '%s' could not be renamed into place",
927 tmp_install.c_str());
928 }
929 } else {
Laszlo Csomor8a48f612016-11-17 10:18:34 +0000930 if (!blaze_util::IsDirectory(globals->options->install_base)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100931 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
932 "Error: Install base directory '%s' could not be created. "
933 "It exists but is not a directory.",
Julio Merino28774852016-09-14 16:59:46 +0000934 globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100935 }
936
Laszlo Csomorce1b3e12017-01-19 14:56:30 +0000937 std::unique_ptr<blaze_util::IFileMtime> mtime(
938 blaze_util::CreateFileMtime());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100939 string real_install_dir = blaze_util::JoinPath(
Julio Merino28774852016-09-14 16:59:46 +0000940 globals->options->install_base,
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100941 "_embedded_binaries");
942 for (const auto& it : globals->extracted_binaries) {
943 string path = blaze_util::JoinPath(real_install_dir, it);
944 // Check that the file exists and is readable.
Laszlo Csomor00549b42017-01-11 09:12:10 +0000945 if (blaze_util::IsDirectory(path)) {
946 continue;
947 }
948 if (!blaze_util::CanReadFile(path)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100949 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
950 "Error: corrupt installation: file '%s' missing."
951 " Please remove '%s' and try again.",
Julio Merino28774852016-09-14 16:59:46 +0000952 path.c_str(), globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100953 }
Laszlo Csomor8a48f612016-11-17 10:18:34 +0000954 // Check that the timestamp is in the future. A past timestamp would
955 // indicate that the file has been tampered with.
956 // See ActuallyExtractData().
Laszlo Csomorce1b3e12017-01-19 14:56:30 +0000957 bool is_in_future = false;
958 if (!mtime.get()->GetIfInDistantFuture(path, &is_in_future)) {
Laszlo Csomor00549b42017-01-11 09:12:10 +0000959 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
960 "Error: could not retrieve mtime of file '%s'. "
961 "Please remove '%s' and try again.",
962 path.c_str(), globals->options->install_base.c_str());
Laszlo Csomorce1b3e12017-01-19 14:56:30 +0000963 }
964 if (!is_in_future) {
Laszlo Csomor00549b42017-01-11 09:12:10 +0000965 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
966 "Error: corrupt installation: file '%s' "
967 "modified. Please remove '%s' and try again.",
968 path.c_str(), globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100969 }
970 }
971 }
972}
973
Lukacs Berki71675a52016-11-08 09:48:27 +0000974const char *volatile_startup_options[] = {
975 "--option_sources=",
976 "--max_idle_secs=",
977 "--connect_timeout_secs=",
978 "--client_debug=",
979 NULL,
980};
981
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100982// Returns true if the server needs to be restarted to accommodate changes
983// between the two argument lists.
984static bool ServerNeedsToBeKilled(const vector<string>& args1,
985 const vector<string>& args2) {
986 // We need not worry about one side missing an argument and the other side
987 // having the default value, since this command line is already the
988 // canonicalized one that always contains every switch (with default values
989 // if it was not present on the real command line). Same applies for argument
990 // ordering.
991 if (args1.size() != args2.size()) {
992 return true;
993 }
994
995 for (int i = 0; i < args1.size(); i++) {
Lukacs Berki71675a52016-11-08 09:48:27 +0000996 bool option_volatile = false;
997 for (const char** candidate = volatile_startup_options;
998 *candidate != NULL;
999 candidate++) {
1000 string candidate_string(*candidate);
1001 if (args1[i].substr(0, candidate_string.size()) == candidate_string &&
1002 args2[i].substr(0, candidate_string.size()) == candidate_string) {
1003 option_volatile = true;
1004 break;
1005 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001006 }
1007
Lukacs Berki71675a52016-11-08 09:48:27 +00001008 if (!option_volatile && args1[i] != args2[i]) {
Lukacs Berki3d486832016-10-26 12:51:38 +00001009 return true;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001010 }
1011 }
1012
1013 return false;
1014}
1015
1016// Kills the running Blaze server, if any, if the startup options do not match.
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001017static void KillRunningServerIfDifferentStartupOptions(BlazeServer* server) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001018 if (!server->Connected()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001019 return;
1020 }
1021
Laszlo Csomor760f7862016-12-19 15:46:47 +00001022 string cmdline_path =
1023 blaze_util::JoinPath(globals->options->output_base, "server/cmdline");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001024 string joined_arguments;
1025
1026 // No, /proc/$PID/cmdline does not work, because it is limited to 4K. Even
1027 // worse, its behavior differs slightly between kernels (in some, when longer
1028 // command lines are truncated, the last 4 bytes are replaced with
1029 // "..." + NUL.
Laszlo Csomor49970e02016-11-28 08:55:47 +00001030 blaze_util::ReadFile(cmdline_path, &joined_arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001031 vector<string> arguments = blaze_util::Split(joined_arguments, '\0');
1032
1033 // These strings contain null-separated command line arguments. If they are
1034 // the same, the server can stay alive, otherwise, it needs shuffle off this
1035 // mortal coil.
1036 if (ServerNeedsToBeKilled(arguments, GetArgumentArray())) {
1037 globals->restart_reason = NEW_OPTIONS;
1038 fprintf(stderr,
Kristina Chodorow11d40d22015-03-17 18:26:59 +00001039 "WARNING: Running %s server needs to be killed, because the "
1040 "startup options are different.\n",
Julio Merino28774852016-09-14 16:59:46 +00001041 globals->options->product_name.c_str());
Lukacs Berki1977d922016-05-02 09:31:37 +00001042 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001043 }
1044}
1045
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001046// Kills the old running server if it is not the same version as us,
1047// dealing with various combinations of installation scheme
1048// (installation symlink and older MD5_MANIFEST contents).
1049// This function requires that the installation be complete, and the
1050// server lock acquired.
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001051static void EnsureCorrectRunningVersion(BlazeServer* server) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001052 // Read the previous installation's semaphore symlink in output_base. If the
1053 // target dirs don't match, or if the symlink was not present, then kill any
1054 // running servers. Lastly, symlink to our installation so others know which
1055 // installation is running.
Laszlo Csomor760f7862016-12-19 15:46:47 +00001056 string installation_path =
1057 blaze_util::JoinPath(globals->options->output_base, "install");
Lukacs Berki497d8242016-04-28 07:21:26 +00001058 string prev_installation;
Laszlo Csomor49970e02016-11-28 08:55:47 +00001059 bool ok = ReadDirectorySymlink(installation_path, &prev_installation);
Lukacs Berki497d8242016-04-28 07:21:26 +00001060 if (!ok || !CompareAbsolutePaths(
Julio Merino28774852016-09-14 16:59:46 +00001061 prev_installation, globals->options->install_base)) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001062 if (server->Connected()) {
1063 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001064 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001065
1066 globals->restart_reason = NEW_VERSION;
Laszlo Csomor49970e02016-11-28 08:55:47 +00001067 blaze_util::UnlinkPath(installation_path);
1068 if (!SymlinkDirectories(globals->options->install_base,
1069 installation_path)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001070 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1071 "failed to create installation symlink '%s'",
1072 installation_path.c_str());
1073 }
Lukacs Berkib762afd2017-01-17 10:41:52 +00001074
1075 // Update the mtime of the install base so that cleanup tools can
1076 // find install bases that haven't been used for a long time
Laszlo Csomorce1b3e12017-01-19 14:56:30 +00001077 std::unique_ptr<blaze_util::IFileMtime> mtime(
1078 blaze_util::CreateFileMtime());
1079 if (!mtime.get()->SetToNow(globals->options->install_base)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001080 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1081 "failed to set timestamp on '%s'",
Julio Merino28774852016-09-14 16:59:46 +00001082 globals->options->install_base.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001083 }
1084 }
1085}
1086
Laszlo Csomor32086b22016-11-24 15:23:55 +00001087static void CancelServer() {
1088 blaze_server->Cancel();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001089}
1090
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001091// Performs all I/O for a single client request to the server, and
1092// shuts down the client (by exit or signal).
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001093static ATTRIBUTE_NORETURN void SendServerRequest(
1094 const WorkspaceLayout* workspace_layout, BlazeServer* server) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001095 while (true) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001096 if (!server->Connected()) {
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001097 StartServerAndConnect(workspace_layout, server);
Lukacs Berki1977d922016-05-02 09:31:37 +00001098 }
1099
Lukacs Berki4de98942016-09-09 09:23:36 +00001100 // Check for the case when the workspace directory deleted and then gets
1101 // recreated while the server is running
1102
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001103 string server_cwd = GetProcessCWD(globals->server_pid);
Lukacs Berki4be230a2015-10-15 13:43:03 +00001104 // If server_cwd is empty, GetProcessCWD failed. This notably occurs when
1105 // running under Docker because then readlink(/proc/[pid]/cwd) returns
1106 // EPERM.
1107 // Docker issue #6687 (https://github.com/docker/docker/issues/6687) fixed
1108 // this, but one still needs the --cap-add SYS_PTRACE command line flag, at
1109 // least according to the discussion on Docker issue #6800
1110 // (https://github.com/docker/docker/issues/6687), and even then, it's a
1111 // non-default Docker flag. Given that this occurs only in very weird
1112 // cases, it's better to assume that everything is alright if we can't get
1113 // the cwd.
1114
1115 if (!server_cwd.empty() &&
1116 (server_cwd != globals->workspace || // changed
1117 server_cwd.find(" (deleted)") != string::npos)) { // deleted.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001118 // There's a distant possibility that the two paths look the same yet are
1119 // actually different because the two processes have different mount
1120 // tables.
1121 if (VerboseLogging()) {
1122 fprintf(stderr, "Server's cwd moved or deleted (%s).\n",
1123 server_cwd.c_str());
1124 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001125 server->KillRunningServer();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001126 } else {
1127 break;
1128 }
1129 }
1130
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001131 if (VerboseLogging()) {
1132 fprintf(stderr, "Connected (server pid=%d).\n", globals->server_pid);
1133 }
1134
1135 // Wall clock time since process startup.
Laszlo Csomor943d3cf2016-11-07 14:27:21 +00001136 globals->startup_time = GetMillisecondsSinceProcessStart();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001137
Laszlo Csomor32086b22016-11-24 15:23:55 +00001138 SignalHandler::Get().Install(globals, CancelServer);
1139 SignalHandler::Get().PropagateSignalOrExit(server->Communicate());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001140}
1141
1142// Parse the options, storing parsed values in globals.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001143static void ParseOptions(int argc, const char *argv[]) {
1144 string error;
1145 blaze_exit_code::ExitCode parse_exit_code =
Julio Merino28774852016-09-14 16:59:46 +00001146 globals->option_processor->ParseOptions(argc, argv, globals->workspace,
1147 globals->cwd, &error);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001148 if (parse_exit_code != blaze_exit_code::SUCCESS) {
1149 die(parse_exit_code, "%s", error.c_str());
1150 }
Julio Merino28774852016-09-14 16:59:46 +00001151 globals->options = globals->option_processor->GetParsedStartupOptions();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001152}
1153
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001154// Compute the globals globals->cwd and globals->workspace.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001155static void ComputeWorkspace(const WorkspaceLayout* workspace_layout) {
Laszlo Csomorc3545392016-11-24 13:33:28 +00001156 globals->cwd = blaze_util::MakeCanonical(blaze_util::GetCwd().c_str());
1157 if (globals->cwd.empty()) {
1158 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1159 "blaze_util::MakeCanonical('%s') failed",
1160 blaze_util::GetCwd().c_str());
1161 }
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001162 globals->workspace = workspace_layout->GetWorkspace(globals->cwd);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001163}
1164
1165// Figure out the base directories based on embedded data, username, cwd, etc.
Julio Merino28774852016-09-14 16:59:46 +00001166// Sets globals->options->install_base, globals->options->output_base,
Thiago Farina6fd9bf12016-04-26 09:02:18 +00001167// globals->lockfile, globals->jvm_log_file.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001168static void ComputeBaseDirectories(const WorkspaceLayout* workspace_layout,
1169 const string &self_path) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001170 // Only start a server when in a workspace because otherwise we won't do more
1171 // than emit a help message.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001172 if (!workspace_layout->InWorkspace(globals->workspace)) {
Julio Merino28774852016-09-14 16:59:46 +00001173 globals->options->batch = true;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001174 }
1175
1176 // The default install_base is <output_user_root>/install/<md5(blaze)>
1177 // but if an install_base is specified on the command line, we use that as
1178 // the base instead.
Julio Merino28774852016-09-14 16:59:46 +00001179 if (globals->options->install_base.empty()) {
Laszlo Csomor760f7862016-12-19 15:46:47 +00001180 string install_user_root =
1181 blaze_util::JoinPath(globals->options->output_user_root, "install");
Julio Merino28774852016-09-14 16:59:46 +00001182 globals->options->install_base =
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001183 GetInstallBase(install_user_root, self_path);
1184 } else {
Eric Fellheimer4c5eb0f2015-08-12 15:02:24 +00001185 // We call GetInstallBase anyway to populate extracted_binaries and
1186 // install_md5.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001187 GetInstallBase("", self_path);
1188 }
1189
Julio Merino28774852016-09-14 16:59:46 +00001190 if (globals->options->output_base.empty()) {
Laszlo Csomor6bf95762016-11-16 13:29:22 +00001191 globals->options->output_base = blaze::GetHashedBaseDir(
Julio Merino28774852016-09-14 16:59:46 +00001192 globals->options->output_user_root, globals->workspace);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001193 }
1194
Julio Merino28774852016-09-14 16:59:46 +00001195 const char *output_base = globals->options->output_base.c_str();
Laszlo Csomor8a48f612016-11-17 10:18:34 +00001196 if (!blaze_util::PathExists(globals->options->output_base)) {
Thiago Farina227369a2016-12-07 12:40:40 +00001197 if (!blaze_util::MakeDirectories(globals->options->output_base, 0777)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001198 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1199 "Output base directory '%s' could not be created",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001200 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001201 }
1202 } else {
Laszlo Csomor8a48f612016-11-17 10:18:34 +00001203 if (!blaze_util::IsDirectory(globals->options->output_base)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001204 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1205 "Error: Output base directory '%s' could not be created. "
1206 "It exists but is not a directory.",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001207 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001208 }
1209 }
Laszlo Csomor00549b42017-01-11 09:12:10 +00001210 if (!blaze_util::CanAccessDirectory(globals->options->output_base)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001211 die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1212 "Error: Output base directory '%s' must be readable and writable.",
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001213 output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001214 }
Dave MacLachlan6b747ee2016-07-20 10:00:44 +00001215 ExcludePathFromBackup(output_base);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001216
Laszlo Csomorc3545392016-11-24 13:33:28 +00001217 globals->options->output_base = blaze_util::MakeCanonical(output_base);
1218 if (globals->options->output_base.empty()) {
1219 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
1220 "blaze_util::MakeCanonical('%s') failed", output_base);
1221 }
Chloe Calvarin78f1c852016-11-22 21:58:50 +00001222
Laszlo Csomor760f7862016-12-19 15:46:47 +00001223 globals->lockfile =
1224 blaze_util::JoinPath(globals->options->output_base, "lock");
1225 globals->jvm_log_file =
1226 blaze_util::JoinPath(globals->options->output_base, "server/jvm.out");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001227}
1228
1229static void CheckEnvironment() {
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001230 if (!blaze::GetEnv("http_proxy").empty()) {
Lukacs Berki86a28b02016-10-25 10:34:45 +00001231 fprintf(stderr, "Warning: ignoring http_proxy in environment.\n");
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001232 blaze::UnsetEnv("http_proxy");
Lukacs Berki86a28b02016-10-25 10:34:45 +00001233 }
1234
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001235 if (!blaze::GetEnv("LD_ASSUME_KERNEL").empty()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001236 // Fix for bug: if ulimit -s and LD_ASSUME_KERNEL are both
1237 // specified, the JVM fails to create threads. See thread_stack_regtest.
1238 // This is also provoked by LD_LIBRARY_PATH=/usr/lib/debug,
1239 // or anything else that causes the JVM to use LinuxThreads.
1240 fprintf(stderr, "Warning: ignoring LD_ASSUME_KERNEL in environment.\n");
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001241 blaze::UnsetEnv("LD_ASSUME_KERNEL");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001242 }
1243
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001244 if (!blaze::GetEnv("LD_PRELOAD").empty()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001245 fprintf(stderr, "Warning: ignoring LD_PRELOAD in environment.\n");
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001246 blaze::UnsetEnv("LD_PRELOAD");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001247 }
1248
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001249 if (!blaze::GetEnv("_JAVA_OPTIONS").empty()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001250 // This would override --host_jvm_args
1251 fprintf(stderr, "Warning: ignoring _JAVA_OPTIONS in environment.\n");
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001252 blaze::UnsetEnv("_JAVA_OPTIONS");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001253 }
1254
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001255 if (!blaze::GetEnv("TEST_TMPDIR").empty()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001256 fprintf(stderr, "INFO: $TEST_TMPDIR defined: output root default is "
Julio Merino28774852016-09-14 16:59:46 +00001257 "'%s'.\n", globals->options->output_root.c_str());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001258 }
1259
1260 // TODO(bazel-team): We've also seen a failure during loading (creating
1261 // threads?) when ulimit -Hs 8192. Characterize that and check for it here.
1262
1263 // Make the JVM use ISO-8859-1 for parsing its command line because "blaze
1264 // run" doesn't handle non-ASCII command line arguments. This is apparently
1265 // the most reliable way to select the platform default encoding.
Laszlo Csomorcefa9a22016-11-22 10:50:07 +00001266 blaze::SetEnv("LANG", "en_US.ISO-8859-1");
1267 blaze::SetEnv("LANGUAGE", "en_US.ISO-8859-1");
1268 blaze::SetEnv("LC_ALL", "en_US.ISO-8859-1");
1269 blaze::SetEnv("LC_CTYPE", "en_US.ISO-8859-1");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001270}
1271
Laszlo Csomorc3545392016-11-24 13:33:28 +00001272static string CheckAndGetBinaryPath(const string& argv0) {
Laszlo Csomor760f7862016-12-19 15:46:47 +00001273 if (blaze_util::IsAbsolute(argv0)) {
Laszlo Csomorc3545392016-11-24 13:33:28 +00001274 return argv0;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001275 } else {
Laszlo Csomor760f7862016-12-19 15:46:47 +00001276 string abs_path = blaze_util::JoinPath(globals->cwd, argv0);
Laszlo Csomorc3545392016-11-24 13:33:28 +00001277 string resolved_path = blaze_util::MakeCanonical(abs_path.c_str());
1278 if (!resolved_path.empty()) {
1279 return resolved_path;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001280 } else {
1281 // This happens during our integration tests, but thats okay, as we won't
1282 // log the invocation anyway.
Laszlo Csomorc3545392016-11-24 13:33:28 +00001283 return abs_path;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001284 }
1285 }
1286}
1287
Chloe Calvarineaa3be72016-12-13 19:48:34 +00001288int GetExitCodeForAbruptExit(const GlobalVariables &globals) {
1289 const StartupOptions *startup_options = globals.options;
1290 if (startup_options->use_custom_exit_code_on_abrupt_exit) {
1291 BAZEL_LOG(INFO) << "Looking for a custom exit-code.";
1292 std::string filename = blaze_util::JoinPath(
1293 globals.options->output_base, "exit_code_to_use_on_abrupt_exit");
1294 std::string content;
1295 if (!blaze_util::ReadFile(filename, &content)) {
1296 BAZEL_LOG(INFO) << "Unable to read the custom exit-code file. "
1297 << "Exiting with an INTERNAL_ERROR.";
1298 return blaze_exit_code::INTERNAL_ERROR;
1299 }
1300 if (!blaze_util::UnlinkPath(filename)) {
1301 BAZEL_LOG(INFO) << "Unable to delete the custom exit-code file. "
1302 << "Exiting with an INTERNAL_ERROR.";
1303 return blaze_exit_code::INTERNAL_ERROR;
1304 }
1305 int custom_exit_code;
1306 if (!blaze_util::safe_strto32(content, &custom_exit_code)) {
1307 BAZEL_LOG(INFO) << "Content of custom exit-code file not an int: "
1308 << content << "Exiting with an INTERNAL_ERROR.";
1309 return blaze_exit_code::INTERNAL_ERROR;
1310 }
1311 BAZEL_LOG(INFO) << "Read exit code " << custom_exit_code
1312 << " from custom exit-code file. Exiting accordingly.";
1313 return custom_exit_code;
1314 }
1315 return blaze_exit_code::INTERNAL_ERROR;
1316}
1317
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001318int Main(int argc, const char *argv[], WorkspaceLayout* workspace_layout,
1319 OptionProcessor *option_processor,
Chloe Calvarin78f1c852016-11-22 21:58:50 +00001320 std::unique_ptr<blaze_util::LogHandler> log_handler) {
1321 // Logging must be set first to assure no log statements are missed.
1322 blaze_util::SetLogHandler(std::move(log_handler));
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001323
Thiago Farina676cb9f2016-10-06 11:00:43 +00001324 globals = new GlobalVariables(option_processor);
Laszlo Csomor74ffaf72016-11-24 12:17:20 +00001325 blaze::SetupStdStreams();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001326
1327 // Must be done before command line parsing.
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001328 ComputeWorkspace(workspace_layout);
Laszlo Csomorc3545392016-11-24 13:33:28 +00001329 globals->binary_path = CheckAndGetBinaryPath(argv[0]);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001330 ParseOptions(argc, argv);
Lukacs Berkibb2230f2016-04-27 14:19:25 +00001331
Lukacs Berki71675a52016-11-08 09:48:27 +00001332 debug_log("Debug logging active");
1333
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001334 CheckEnvironment();
Laszlo Csomor8a48f612016-11-17 10:18:34 +00001335 blaze::CreateSecureOutputRoot(globals->options->output_user_root);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001336
1337 const string self_path = GetSelfPath();
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001338 ComputeBaseDirectories(workspace_layout, self_path);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001339
Lukacs Berki71675a52016-11-08 09:48:27 +00001340 blaze_server = static_cast<BlazeServer *>(new GrpcBlazeServer(
1341 globals->options->connect_timeout_secs));
Lukacs Berki907dbbf2016-04-15 11:30:12 +00001342
Lukacs Berki415d39a2016-04-28 13:18:54 +00001343 globals->command_wait_time = blaze_server->AcquireLock();
Lukacs Berkice1445f2016-04-19 15:52:55 +00001344
Julio Merino28774852016-09-14 16:59:46 +00001345 WarnFilesystemType(globals->options->output_base);
Lukacs Berkice1445f2016-04-19 15:52:55 +00001346
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001347 ExtractData(self_path);
Lukacs Berki949c8762016-07-08 12:17:28 +00001348 VerifyJavaVersionAndSetJvm();
1349
Lukacs Berki1977d922016-05-02 09:31:37 +00001350 blaze_server->Connect();
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001351 EnsureCorrectRunningVersion(blaze_server);
1352 KillRunningServerIfDifferentStartupOptions(blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001353
Julio Merino28774852016-09-14 16:59:46 +00001354 if (globals->options->batch) {
1355 SetScheduling(globals->options->batch_cpu_scheduling,
1356 globals->options->io_nice_level);
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001357 StartStandalone(workspace_layout, blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001358 } else {
Julio Merinoe3e3bfa2016-12-08 22:22:12 +00001359 SendServerRequest(workspace_layout, blaze_server);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001360 }
1361 return 0;
1362}
Thiago Farina0b6963e2015-04-28 20:26:45 +00001363
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001364static void null_grpc_log_function(gpr_log_func_args *args) {
1365}
1366
Lukacs Berki71675a52016-11-08 09:48:27 +00001367GrpcBlazeServer::GrpcBlazeServer(int connect_timeout_secs) {
Lukacs Berki1977d922016-05-02 09:31:37 +00001368 connected_ = false;
Lukacs Berki71675a52016-11-08 09:48:27 +00001369 connect_timeout_secs_ = connect_timeout_secs;
1370
1371 gpr_set_log_function(null_grpc_log_function);
1372
Thiago Farina0bba4c92016-12-14 15:29:11 +00001373 pipe_ = blaze_util::CreatePipe();
1374 if (pipe_ == NULL) {
Laszlo Csomoref5ceef2016-11-18 11:19:02 +00001375 pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, "Couldn't create pipe");
Lukacs Berki6dd29092016-05-30 14:05:33 +00001376 }
1377}
1378
1379GrpcBlazeServer::~GrpcBlazeServer() {
Thiago Farina0bba4c92016-12-14 15:29:11 +00001380 delete pipe_;
1381 pipe_ = NULL;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001382}
1383
Lukacs Berki10dd6382017-01-11 09:08:54 +00001384bool GrpcBlazeServer::TryConnect(command_server::CommandServer::Stub* client) {
1385 grpc::ClientContext context;
1386 context.set_deadline(
1387 std::chrono::system_clock::now() +
1388 std::chrono::seconds(connect_timeout_secs_));
1389
1390 command_server::PingRequest request;
1391 command_server::PingResponse response;
1392 request.set_cookie(request_cookie_);
1393
1394 debug_log("Trying to connect to server (timeout: %d secs)...",
1395 connect_timeout_secs_);
1396 grpc::Status status = client->Ping(&context, request, &response);
1397
1398 if (!status.ok() || response.cookie() != response_cookie_) {
1399 debug_log("Connection to server failed: %s",
1400 status.error_message().c_str());
1401 return false;
1402 }
1403
1404 return true;
1405}
1406
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001407bool GrpcBlazeServer::Connect() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001408 assert(!connected_);
1409
Laszlo Csomor760f7862016-12-19 15:46:47 +00001410 std::string server_dir =
1411 blaze_util::JoinPath(globals->options->output_base, "server");
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001412 std::string port;
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001413 std::string ipv4_prefix = "127.0.0.1:";
Lukacs Berkic8e74242016-04-28 08:32:04 +00001414 std::string ipv6_prefix_1 = "[0:0:0:0:0:0:0:1]:";
1415 std::string ipv6_prefix_2 = "[::1]:";
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001416
Laszlo Csomor760f7862016-12-19 15:46:47 +00001417 if (!blaze_util::ReadFile(blaze_util::JoinPath(server_dir, "command_port"),
1418 &port)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001419 return false;
1420 }
1421
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001422 // Make sure that we are being directed to localhost
1423 if (port.compare(0, ipv4_prefix.size(), ipv4_prefix)
Lukacs Berkic8e74242016-04-28 08:32:04 +00001424 && port.compare(0, ipv6_prefix_1.size(), ipv6_prefix_1)
1425 && port.compare(0, ipv6_prefix_2.size(), ipv6_prefix_2)) {
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001426 return false;
1427 }
1428
Laszlo Csomor760f7862016-12-19 15:46:47 +00001429 if (!blaze_util::ReadFile(blaze_util::JoinPath(server_dir, "request_cookie"),
1430 &request_cookie_)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001431 return false;
1432 }
1433
Laszlo Csomor760f7862016-12-19 15:46:47 +00001434 if (!blaze_util::ReadFile(blaze_util::JoinPath(server_dir, "response_cookie"),
Laszlo Csomor49970e02016-11-28 08:55:47 +00001435 &response_cookie_)) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001436 return false;
1437 }
1438
1439 std::shared_ptr<grpc::Channel> channel(grpc::CreateChannel(
Lukacs Berkib7caf9d2016-04-25 09:44:14 +00001440 port, grpc::InsecureChannelCredentials()));
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001441 std::unique_ptr<command_server::CommandServer::Stub> client(
1442 command_server::CommandServer::NewStub(channel));
1443
Lukacs Berki10dd6382017-01-11 09:08:54 +00001444 if (!TryConnect(client.get())) {
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001445 return false;
1446 }
1447
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001448 this->client_ = std::move(client);
Lukacs Berki1977d922016-05-02 09:31:37 +00001449 connected_ = true;
Lukacs Berki10dd6382017-01-11 09:08:54 +00001450
1451 globals->server_pid = GetServerPid(server_dir);
1452 if (globals->server_pid <= 0) {
1453 fprintf(stderr, "Can't get PID of existing server (server dir=%s). "
1454 "Shutting it down and starting a new one...\n",
1455 server_dir.c_str());
1456 // This means that we have a server we could connect to but without a PID
1457 // file, which in turn means that something went wrong before. Kill the
1458 // server so that we can start with as clean a slate as possible. This may
1459 // happen if someone (e.g. a client or server that's very old and uses an
1460 // AF_UNIX socket instead of gRPC) deletes the server.pid.txt file.
1461 KillRunningServer();
1462 // Then wait until it actually dies
1463 do {
1464 auto next_attempt_time(
1465 std::chrono::system_clock::now() + std::chrono::milliseconds(1000));
1466 std::this_thread::sleep_until(next_attempt_time);
1467 } while (TryConnect(client_.get()));
1468
1469 return false;
1470 }
1471
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001472 return true;
1473}
1474
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001475// Cancellation works as follows:
1476//
1477// When the user presses Ctrl-C, a SIGINT is delivered to the client, which is
1478// translated into a BlazeServer::Cancel() call. Since it's not a good idea to
Lukacs Berki6dd29092016-05-30 14:05:33 +00001479// do significant work in signal handlers, all it does is write a byte to an
1480// unnamed pipe.
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001481//
Lukacs Berki6dd29092016-05-30 14:05:33 +00001482// This unnamed pipe is used to communicate with the cancel thread. Whenever
1483// something interesting happens, a byte is written into it, which is read by
1484// the cancel thread. These commands are available:
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001485//
Lukacs Berki6dd29092016-05-30 14:05:33 +00001486// - NOP
1487// - JOIN. The cancel thread needs to be terminated.
1488// - CANCEL. If the command ID is already available, a cancel request is sent.
1489// - COMMAND_ID_RECEIVED. The client learned the command ID from the server.
1490// If there is a pending cancellation request, it is acted upon.
1491//
1492// The only data the cancellation thread shares with the main thread is the
1493// file descriptor for receiving commands and command_id_, the latter of which
1494// is protected by a mutex, which mainly serves as a memory fence.
1495//
1496// The cancellation thread is joined at the end of the execution of the command.
1497// The main thread wakes it up just so that it can finish (using the JOIN
1498// action)
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001499//
1500// It's conceivable that the server is busy and thus it cannot service the
1501// cancellation request. In that case, we simply ignore the failure and the both
1502// the server and the client go on as if nothing had happened (except that this
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001503// Ctrl-C still counts as a SIGINT, three of which result in a SIGKILL being
1504// delivered to the server)
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001505void GrpcBlazeServer::CancelThread() {
1506 bool running = true;
Lukacs Berki6dd29092016-05-30 14:05:33 +00001507 bool cancel = false;
1508 bool command_id_received = false;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001509 while (running) {
Lukacs Berki6dd29092016-05-30 14:05:33 +00001510 char buf;
Laszlo Csomoref5ceef2016-11-18 11:19:02 +00001511
Thiago Farina0bba4c92016-12-14 15:29:11 +00001512 int bytes_read = pipe_->Receive(&buf, 1);
Laszlo Csomoref5ceef2016-11-18 11:19:02 +00001513 if (bytes_read < 0 && errno == EINTR) {
Lukacs Berki6dd29092016-05-30 14:05:33 +00001514 continue;
1515 } else if (bytes_read != 1) {
1516 pdie(blaze_exit_code::INTERNAL_ERROR,
1517 "Cannot communicate with cancel thread");
1518 }
1519
1520 switch (buf) {
1521 case CancelThreadAction::NOTHING:
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001522 break;
1523
Lukacs Berki6dd29092016-05-30 14:05:33 +00001524 case CancelThreadAction::JOIN:
1525 running = false;
1526 break;
1527
1528 case CancelThreadAction::COMMAND_ID_RECEIVED:
1529 command_id_received = true;
1530 if (cancel) {
1531 SendCancelMessage();
1532 cancel = false;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001533 }
1534 break;
1535
Lukacs Berki6dd29092016-05-30 14:05:33 +00001536 case CancelThreadAction::CANCEL:
1537 if (command_id_received) {
1538 SendCancelMessage();
1539 } else {
1540 cancel = true;
1541 }
1542 break;
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001543 }
1544 }
1545}
1546
Lukacs Berki6dd29092016-05-30 14:05:33 +00001547void GrpcBlazeServer::SendCancelMessage() {
1548 std::unique_lock<std::mutex> lock(cancel_thread_mutex_);
1549
1550 command_server::CancelRequest request;
1551 request.set_cookie(request_cookie_);
1552 request.set_command_id(command_id_);
1553 grpc::ClientContext context;
1554 context.set_deadline(std::chrono::system_clock::now() +
Lukacs Berki3ace3002016-08-31 08:55:34 +00001555 std::chrono::seconds(10));
Lukacs Berki6dd29092016-05-30 14:05:33 +00001556 command_server::CancelResponse response;
1557 // There isn't a lot we can do if this request fails
Lukacs Berki3ace3002016-08-31 08:55:34 +00001558 grpc::Status status = client_->Cancel(&context, request, &response);
1559 if (!status.ok()) {
1560 fprintf(stderr, "\nCould not interrupt server (%s)\n\n",
1561 status.error_message().c_str());
1562 }
Lukacs Berki6dd29092016-05-30 14:05:33 +00001563}
1564
Lukacs Berki1977d922016-05-02 09:31:37 +00001565// This will wait indefinitely until the server shuts down
1566void GrpcBlazeServer::KillRunningServer() {
1567 assert(connected_);
Lukacs Berki1977d922016-05-02 09:31:37 +00001568
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001569 grpc::ClientContext context;
1570 command_server::RunRequest request;
1571 command_server::RunResponse response;
1572 request.set_cookie(request_cookie_);
Julio Merino28774852016-09-14 16:59:46 +00001573 request.set_block_for_lock(globals->options->block_for_lock);
Laszlo Csomorae16e762016-11-18 10:16:08 +00001574 request.set_client_description("pid=" + blaze::GetProcessIdAsString() +
1575 " (for shutdown)");
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001576 request.add_arg("shutdown");
1577 std::unique_ptr<grpc::ClientReader<command_server::RunResponse>> reader(
1578 client_->Run(&context, request));
1579
1580 while (reader->Read(&response)) {}
1581
Lukacs Berki10dd6382017-01-11 09:08:54 +00001582 // Kill the server process for good measure (if we know the server PID)
1583 if (globals->server_pid > 0 &&
1584 VerifyServerProcess(globals->server_pid, globals->options->output_base,
Julio Merino28774852016-09-14 16:59:46 +00001585 globals->options->install_base)) {
Lukacs Berkiee44c382016-09-14 10:53:37 +00001586 KillServerProcess(globals->server_pid);
1587 }
Lukacs Berki1977d922016-05-02 09:31:37 +00001588
1589 connected_ = false;
Lukacs Berkie6a34f62016-04-25 12:16:04 +00001590}
1591
1592unsigned int GrpcBlazeServer::Communicate() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001593 assert(connected_);
1594
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001595 vector<string> arg_vector;
Julio Merino28774852016-09-14 16:59:46 +00001596 string command = globals->option_processor->GetCommand();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001597 if (command != "") {
1598 arg_vector.push_back(command);
1599 AddLoggingArgs(&arg_vector);
1600 }
1601
Julio Merino28774852016-09-14 16:59:46 +00001602 globals->option_processor->GetCommandArguments(&arg_vector);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001603
1604 command_server::RunRequest request;
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001605 request.set_cookie(request_cookie_);
Julio Merino28774852016-09-14 16:59:46 +00001606 request.set_block_for_lock(globals->options->block_for_lock);
Laszlo Csomorae16e762016-11-18 10:16:08 +00001607 request.set_client_description("pid=" + blaze::GetProcessIdAsString());
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001608 for (const string& arg : arg_vector) {
1609 request.add_arg(arg);
1610 }
1611
1612 grpc::ClientContext context;
1613 command_server::RunResponse response;
1614 std::unique_ptr<grpc::ClientReader<command_server::RunResponse>> reader(
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001615 client_->Run(&context, request));
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001616
Lukacs Berki415d39a2016-04-28 13:18:54 +00001617 // Release the server lock because the gRPC handles concurrent clients just
1618 // fine. Note that this may result in two "waiting for other client" messages
1619 // (one during server startup and one emitted by the server)
Lukacs Berki1977d922016-05-02 09:31:37 +00001620 blaze::ReleaseLock(&blaze_lock_);
Lukacs Berki415d39a2016-04-28 13:18:54 +00001621
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001622 std::thread cancel_thread(&GrpcBlazeServer::CancelThread, this);
1623 bool command_id_set = false;
Laurent Le Brun08849b22016-09-20 12:21:32 +00001624 bool pipe_broken = false;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001625 while (reader->Read(&response)) {
Lukacs Berkic55e9c72016-04-25 13:43:40 +00001626 if (response.cookie() != response_cookie_) {
1627 fprintf(stderr, "\nServer response cookie invalid, exiting\n");
1628 return blaze_exit_code::INTERNAL_ERROR;
1629 }
1630
Laurent Le Brun08849b22016-09-20 12:21:32 +00001631 bool pipe_broken_now = false;
Lukacs Berki3c1e4042017-01-11 13:30:29 +00001632 const char* broken_pipe_name;
Laszlo Csomor74ffaf72016-11-24 12:17:20 +00001633
1634 if (!response.standard_output().empty()) {
1635 size_t size = response.standard_output().size();
1636 size_t r = fwrite(response.standard_output().c_str(), 1, size, stdout);
1637 if (r < size && errno == EPIPE) {
Laurent Le Brun08849b22016-09-20 12:21:32 +00001638 pipe_broken_now = true;
Lukacs Berki3c1e4042017-01-11 13:30:29 +00001639 broken_pipe_name = "standard output";
Laurent Le Brun08849b22016-09-20 12:21:32 +00001640 }
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001641 }
1642
Laszlo Csomor74ffaf72016-11-24 12:17:20 +00001643 if (!response.standard_error().empty()) {
1644 size_t size = response.standard_error().size();
1645 size_t r = fwrite(response.standard_error().c_str(), 1, size, stderr);
1646 if (r < size && errno == EPIPE) {
Laurent Le Brun08849b22016-09-20 12:21:32 +00001647 pipe_broken_now = true;
Lukacs Berki3c1e4042017-01-11 13:30:29 +00001648 broken_pipe_name = "standard error";
Laurent Le Brun08849b22016-09-20 12:21:32 +00001649 }
1650 }
1651
1652 if (pipe_broken_now && !pipe_broken) {
1653 pipe_broken = true;
Lukacs Berki3c1e4042017-01-11 13:30:29 +00001654 fprintf(stderr, "\nCannot write to %s; exiting...\n\n", broken_pipe_name);
Laurent Le Brun08849b22016-09-20 12:21:32 +00001655 Cancel();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001656 }
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001657
1658 if (!command_id_set && response.command_id().size() > 0) {
Lukacs Berki6dd29092016-05-30 14:05:33 +00001659 std::unique_lock<std::mutex> lock(cancel_thread_mutex_);
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001660 command_id_ = response.command_id();
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001661 command_id_set = true;
Lukacs Berki6dd29092016-05-30 14:05:33 +00001662 SendAction(CancelThreadAction::COMMAND_ID_RECEIVED);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001663 }
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001664 }
1665
Lukacs Berki6dd29092016-05-30 14:05:33 +00001666 SendAction(CancelThreadAction::JOIN);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001667 cancel_thread.join();
1668
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001669 if (!response.finished()) {
Lukacs Berki4e250942017-01-13 13:11:10 +00001670 fprintf(stderr, "\nServer finished RPC without an explicit exit code "
1671 "(log file: '%s')\n\n", globals->jvm_log_file.c_str());
Lukacs Berki2896dc02016-07-07 07:55:04 +00001672 return GetExitCodeForAbruptExit(*globals);
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001673 }
1674
Lukacs Berki3c1e4042017-01-11 13:30:29 +00001675 // We'll exit with exit code SIGPIPE on Unixes due to PropagateSignalOnExit()
1676 return pipe_broken
1677 ? blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR
1678 : response.exit_code();
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001679}
1680
1681void GrpcBlazeServer::Disconnect() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001682 assert(connected_);
1683
Lukacs Berki00cfb7d2016-04-20 09:01:52 +00001684 client_.reset();
1685 request_cookie_ = "";
1686 response_cookie_ = "";
Lukacs Berki1977d922016-05-02 09:31:37 +00001687 connected_ = false;
Lukacs Berki1b25ce22016-04-15 13:11:21 +00001688}
1689
Lukacs Berki6dd29092016-05-30 14:05:33 +00001690void GrpcBlazeServer::SendAction(CancelThreadAction action) {
1691 char msg = action;
Thiago Farina0bba4c92016-12-14 15:29:11 +00001692 if (!pipe_->Send(&msg, 1)) {
Laszlo Csomor3b89d2d2016-11-28 14:04:27 +00001693 blaze::SigPrintf(
1694 "\nCould not interrupt server (cannot write to client pipe)\n\n");
Sasha Smundak1fdd31d2016-07-25 17:54:00 +00001695 }
Lukacs Berki6dd29092016-05-30 14:05:33 +00001696}
1697
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001698void GrpcBlazeServer::Cancel() {
Lukacs Berki1977d922016-05-02 09:31:37 +00001699 assert(connected_);
Lukacs Berki6dd29092016-05-30 14:05:33 +00001700 SendAction(CancelThreadAction::CANCEL);
Lukacs Berkif1df38a2016-04-19 07:42:22 +00001701}
1702
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001703} // namespace blaze