blob: a93bb5bb545112334a2d487c315b7be68a64a6c7 [file] [log] [blame]
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.worker;
import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildMetrics.WorkerMetrics.WorkerStatus;
import com.google.devtools.build.lib.server.FailureDetails.Worker.Code;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.util.Optional;
/**
* This is a state machine instance that encapsulates the status of the worker process, and provides
* a mechanism to signal to Bazel when to kill a worker.
*/
public class WorkerProcessStatus {
/** The status of the worker process. */
public enum Status {
/**
* Used as a starting value, before the worker process has been created by Bazel.
*
* <p>This state is not logged in the BEP as {@code WorkerSpawnRunner#initializeMetrics} is only
* called after {@code #prepareExecution} (which sets the status to ALIVE).
*/
NOT_STARTED(/* priority= */ 0, WorkerStatus.NOT_STARTED),
/**
* Worker process has been created by Bazel (the process might not be immediately measurable).
*/
ALIVE(/* priority= */ 1, WorkerStatus.ALIVE),
/** Intermediate states: Bazel has marked this worker process to be killed. */
PENDING_KILL_DUE_TO_UNKNOWN(2, WorkerStatus.ALIVE, "KILLED_UNKNOWN"),
PENDING_KILL_DUE_TO_INTERRUPTED_EXCEPTION(
/* priority= */ 3, WorkerStatus.ALIVE, "KILLED_DUE_TO_INTERRUPTED_EXCEPTION"),
PENDING_KILL_DUE_TO_USER_EXEC_EXCEPTION(
/* priority= */ 3, WorkerStatus.ALIVE, "KILLED_DUE_TO_USER_EXEC_EXCEPTION"),
PENDING_KILL_DUE_TO_IO_EXCEPTION(
/* priority= */ 3, WorkerStatus.ALIVE, "KILLED_DUE_TO_IO_EXCEPTION"),
PENDING_KILL_DUE_TO_MEMORY_PRESSURE(
/* priority= */ 4, WorkerStatus.ALIVE, "KILLED_DUE_TO_MEMORY_PRESSURE"),
/**
* Semi-terminal status: Bazel has determined that worker process has already been killed due to
* some unknown reason; if a more specific reason (below) comes along, we can transition to
* those statuses.
*/
KILLED_UNKNOWN(/* priority= */ 5, WorkerStatus.KILLED_UNKNOWN),
/** Terminal statuses: The worker process has been killed and Bazel is aware of the reason. */
// Bazel killed the worker due to an InterruptedException, can happen when the remote branch
// interrupts the local branch after winning the race in dynamic execution.
KILLED_DUE_TO_INTERRUPTED_EXCEPTION(
/* priority= */ 6, WorkerStatus.KILLED_DUE_TO_INTERRUPTED_EXCEPTION),
// Bazel killed the worker due to a UserExecException.
KILLED_DUE_TO_USER_EXEC_EXCEPTION(
/* priority= */ 6, WorkerStatus.KILLED_DUE_TO_USER_EXEC_EXCEPTION),
// Bazel killed the worker due to an IOException.
KILLED_DUE_TO_IO_EXCEPTION(/* priority= */ 6, WorkerStatus.KILLED_DUE_TO_IO_EXCEPTION),
// This can be a result of Bazel forcibly killing the worker process, which might result in
// other exceptions caught in the execution threads (as enumerated by the other killed statuses
// above). Thus, this has the highest priority so that we can override that and set the correct
// reason why the worker was killed.
KILLED_DUE_TO_MEMORY_PRESSURE(/* priority= */ 7, WorkerStatus.KILLED_DUE_TO_MEMORY_PRESSURE);
// The priority of a status determines whether another status can be used to override and
// update it.
private final int priority;
private final Optional<String> killedStatus;
private final WorkerStatus workerStatus;
Status(final int priority, WorkerStatus workerStatus, String killedStatus) {
this.priority = priority;
this.workerStatus = workerStatus;
this.killedStatus = Optional.of(killedStatus);
}
Status(final int priority, WorkerStatus workerStatus) {
this.priority = priority;
this.workerStatus = workerStatus;
this.killedStatus = Optional.empty();
}
Status killedStatus() {
if (killedStatus.isEmpty()) {
return this;
}
return valueOf(killedStatus.get());
}
WorkerStatus toWorkerStatus() {
return workerStatus;
}
}
private Status status;
private Optional<Code> workerCode = Optional.empty();
public WorkerProcessStatus() {
this.status = Status.NOT_STARTED;
}
public Status get() {
return status;
}
// A status is invalid if it is killed or marked to be killed later.
public synchronized boolean isValid() {
return status == Status.NOT_STARTED || status == Status.ALIVE;
}
public boolean isPendingEviction() {
return status == Status.PENDING_KILL_DUE_TO_MEMORY_PRESSURE;
}
public Optional<Code> getWorkerCode() {
return workerCode;
}
private static final ImmutableSet<Status> PENDING_KILL_STATUSES =
ImmutableSet.of(
Status.PENDING_KILL_DUE_TO_UNKNOWN,
Status.PENDING_KILL_DUE_TO_IO_EXCEPTION,
Status.PENDING_KILL_DUE_TO_INTERRUPTED_EXCEPTION,
Status.PENDING_KILL_DUE_TO_USER_EXEC_EXCEPTION,
Status.PENDING_KILL_DUE_TO_MEMORY_PRESSURE);
private boolean isPendingKill() {
return PENDING_KILL_STATUSES.contains(status);
}
private static final ImmutableSet<Status> KILLED_STATUSES =
ImmutableSet.of(
Status.KILLED_UNKNOWN,
Status.KILLED_DUE_TO_IO_EXCEPTION,
Status.KILLED_DUE_TO_INTERRUPTED_EXCEPTION,
Status.KILLED_DUE_TO_USER_EXEC_EXCEPTION,
Status.KILLED_DUE_TO_MEMORY_PRESSURE);
public boolean isKilled() {
return KILLED_STATUSES.contains(status);
}
/**
* Attempts to update the status to its corresponding kill status. Should be called **after** the
* process is destroyed by Bazel.
*/
@CanIgnoreReturnValue
public synchronized boolean setKilled() {
if (isPendingKill()) {
status = status.killedStatus();
return true;
}
return false;
}
/**
* Returns whether the WorkerStatus was successfully updated after attempting to update it to a
* given Status.
*/
@CanIgnoreReturnValue
public synchronized boolean maybeUpdateStatus(Status toStatus) {
if (canTransitionTo(toStatus)) {
this.status = toStatus;
return true;
}
return false;
}
@CanIgnoreReturnValue
public synchronized boolean maybeUpdateStatus(Status toStatus, Code workerCode) {
this.workerCode = Optional.of(workerCode);
return maybeUpdateStatus(toStatus);
}
/**
* Returns whether a state transition can occur.
*
* @param toStatus the next state attempted to transition to.
*/
private boolean canTransitionTo(Status toStatus) {
return status.priority < toStatus.priority;
}
public WorkerStatus toWorkerStatus() {
return status.toWorkerStatus();
}
}