blob: d1cef48a6874d5b5699c397577f44a79d13fe03d [file] [log] [blame]
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.actions;
import static com.google.devtools.build.lib.profiler.AutoProfiler.profiled;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.profiler.AutoProfiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.unix.ProcMeminfoParser;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.util.Pair;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
/**
* Used to keep track of resources consumed by the Blaze action execution threads and throttle them
* when necessary.
*
* <p>Threads which are known to consume a significant amount of resources should call
* {@link #acquireResources} method. This method will check whether requested resources are
* available and will either mark them as used and allow the thread to proceed or will block the
* thread until requested resources will become available. When the thread completes its task, it
* must release allocated resources by calling {@link #releaseResources} method.
*
* <p>Available resources can be calculated using one of three ways:
* <ol>
* <li>They can be preset using {@link #setAvailableResources(ResourceSet)} method. This is used
* mainly by the unit tests (however it is possible to provide a future option that would
* artificially limit amount of CPU/RAM consumed by the Blaze).
* <li>They can be preset based on the /proc/cpuinfo and /proc/meminfo information. Blaze will
* calculate amount of available CPU cores (adjusting for hyperthreading logical cores) and
* amount of the total available memory and will limit itself to the number of effective cores
* and 2/3 of the available memory. For details, please look at the {@link
* LocalHostCapacity#getLocalHostCapacity} method.
* </ol>
*
* <p>The resource manager also allows a slight overallocation of the resources to account for the
* fact that requested resources are usually estimated using a pessimistic approximation. It also
* guarantees that at least one thread will always be able to acquire any amount of requested
* resources (even if it is greater than amount of available resources). Therefore, assuming that
* threads correctly release acquired resources, Blaze will never be fully blocked.
*/
@ThreadSafe
public class ResourceManager {
/**
* A handle returned by {@link #acquireResources(ActionExecutionMetadata, ResourceSet)} that must
* be closed in order to free the resources again.
*/
public static class ResourceHandle implements AutoCloseable {
final ResourceManager rm;
final ActionExecutionMetadata actionMetadata;
final ResourceSet resourceSet;
public ResourceHandle(ResourceManager rm, ActionExecutionMetadata actionMetadata,
ResourceSet resources) {
this.rm = rm;
this.actionMetadata = actionMetadata;
this.resourceSet = resources;
}
/**
* Closing the ResourceHandle releases the resources associated with it.
*/
@Override
public void close() {
rm.releaseResources(actionMetadata, resourceSet);
}
}
private final ThreadLocal<Boolean> threadLocked = new ThreadLocal<Boolean>() {
@Override
protected Boolean initialValue() {
return false;
}
};
/**
* Singleton reference defined in a separate class to ensure thread-safe lazy
* initialization.
*/
private static class Singleton {
static ResourceManager instance = new ResourceManager();
}
/**
* Returns singleton instance of the resource manager.
*/
public static ResourceManager instance() {
return Singleton.instance;
}
// Allocated resources are allowed to go "negative", but at least
// MIN_AVAILABLE_CPU_RATIO portion of CPU and MIN_AVAILABLE_RAM_RATIO portion
// of RAM should be available.
// Please note that this value is purely empirical - we assume that generally
// requested resources are somewhat pessimistic and thread would end up
// using less than requested amount.
private static final double MIN_NECESSARY_CPU_RATIO = 0.6;
private static final double MIN_NECESSARY_RAM_RATIO = 1.0;
// List of blocked threads. Associated CountDownLatch object will always
// be initialized to 1 during creation in the acquire() method.
private final List<Pair<ResourceSet, CountDownLatch>> requestList;
// The total amount of resources on the local host. Must be set by
// an explicit call to setAvailableResources(), often using
// LocalHostCapacity.getLocalHostCapacity() as an argument.
private ResourceSet staticResources = null;
@VisibleForTesting public ResourceSet availableResources = null;
// Used amount of CPU capacity (where 1.0 corresponds to the one fully
// occupied CPU core. Corresponds to the CPU resource definition in the
// ResourceSet class.
private double usedCpu;
// Used amount of RAM capacity in MB. Corresponds to the RAM resource
// definition in the ResourceSet class.
private double usedRam;
// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
private int usedLocalTestCount;
// Specifies how much of the RAM in staticResources we should allow to be used.
public static final int DEFAULT_RAM_UTILIZATION_PERCENTAGE = 67;
private int ramUtilizationPercentage = DEFAULT_RAM_UTILIZATION_PERCENTAGE;
// Determines if local memory estimates are used.
private boolean localMemoryEstimate = false;
private ResourceManager() {
requestList = new LinkedList<>();
}
@VisibleForTesting public static ResourceManager instanceForTestingOnly() {
return new ResourceManager();
}
/**
* Resets resource manager state and releases all thread locks.
* Note - it does not reset available resources. Use separate call to setAvailableResources().
*/
public synchronized void resetResourceUsage() {
usedCpu = 0;
usedRam = 0;
usedLocalTestCount = 0;
for (Pair<ResourceSet, CountDownLatch> request : requestList) {
// CountDownLatch can be set only to 0 or 1.
request.second.countDown();
}
requestList.clear();
}
/**
* Sets available resources using given resource set. Must be called
* at least once before using resource manager.
*/
public synchronized void setAvailableResources(ResourceSet resources) {
Preconditions.checkNotNull(resources);
staticResources = resources;
availableResources = ResourceSet.create(
staticResources.getMemoryMb() * this.ramUtilizationPercentage / 100.0,
staticResources.getCpuUsage(),
staticResources.getLocalTestCount());
processWaitingThreads();
}
/**
* Specify how much of the available RAM we should allow to be used.
*/
public synchronized void setRamUtilizationPercentage(int percentage) {
ramUtilizationPercentage = percentage;
}
/**
* If set to true, then resource acquisition will query the currently available memory, rather
* than counting it against the fixed maximum size.
*/
public void setUseLocalMemoryEstimate(boolean value) {
localMemoryEstimate = value;
}
/**
* Acquires requested resource set. Will block if resource is not available.
* NB! This method must be thread-safe!
*/
public ResourceHandle acquireResources(ActionExecutionMetadata owner, ResourceSet resources)
throws InterruptedException {
Preconditions.checkNotNull(
resources, "acquireResources called with resources == NULL during %s", owner);
Preconditions.checkState(
!threadHasResources(), "acquireResources with existing resource lock during %s", owner);
AutoProfiler p = profiled(owner.describe(), ProfilerTask.ACTION_LOCK);
CountDownLatch latch = null;
try {
latch = acquire(resources);
if (latch != null) {
latch.await();
}
} catch (InterruptedException e) {
// Synchronize on this to avoid any racing with #processWaitingThreads
synchronized (this) {
if (latch.getCount() == 0) {
// Resources already acquired by other side. Release them, but not inside this
// synchronized block to avoid deadlock.
release(resources);
} else {
// Inform other side that resources shouldn't be acquired.
latch.countDown();
}
}
throw e;
}
threadLocked.set(true);
// Profile acquisition only if it waited for resource to become available.
if (latch != null) {
p.complete();
}
return new ResourceHandle(this, owner, resources);
}
/**
* Acquires the given resources if available immediately. Does not block.
*
* @return a ResourceHandle iff the given resources were locked (all or nothing), null otherwise.
*/
@VisibleForTesting
ResourceHandle tryAcquire(ActionExecutionMetadata owner, ResourceSet resources) {
Preconditions.checkNotNull(
resources, "tryAcquire called with resources == NULL during %s", owner);
Preconditions.checkState(
!threadHasResources(), "tryAcquire with existing resource lock during %s", owner);
boolean acquired = false;
synchronized (this) {
if (areResourcesAvailable(resources)) {
incrementResources(resources);
acquired = true;
}
}
if (acquired) {
threadLocked.set(resources != ResourceSet.ZERO);
return new ResourceHandle(this, owner, resources);
}
return null;
}
private void incrementResources(ResourceSet resources) {
usedCpu += resources.getCpuUsage();
usedRam += resources.getMemoryMb();
usedLocalTestCount += resources.getLocalTestCount();
}
/**
* Return true if any resources have been claimed through this manager.
*/
public synchronized boolean inUse() {
return usedCpu != 0.0 || usedRam != 0.0 || usedLocalTestCount != 0 || !requestList.isEmpty();
}
/**
* Return true iff this thread has a lock on non-zero resources.
*/
public boolean threadHasResources() {
return threadLocked.get();
}
/**
* Releases previously requested resource =.
*
* <p>NB! This method must be thread-safe!
*/
@VisibleForTesting
void releaseResources(ActionExecutionMetadata owner, ResourceSet resources) {
Preconditions.checkNotNull(
resources, "releaseResources called with resources == NULL during %s", owner);
Preconditions.checkState(
threadHasResources(), "releaseResources without resource lock during %s", owner);
boolean isConflict = false;
AutoProfiler p = profiled(owner.describe(), ProfilerTask.ACTION_RELEASE);
try {
isConflict = release(resources);
} finally {
threadLocked.set(false);
// Profile resource release only if it resolved at least one allocation request.
if (isConflict) {
p.complete();
}
}
}
private synchronized CountDownLatch acquire(ResourceSet resources) {
if (areResourcesAvailable(resources)) {
incrementResources(resources);
return null;
}
Pair<ResourceSet, CountDownLatch> request =
new Pair<>(resources, new CountDownLatch(1));
requestList.add(request);
return request.second;
}
private synchronized boolean release(ResourceSet resources) {
usedCpu -= resources.getCpuUsage();
usedRam -= resources.getMemoryMb();
usedLocalTestCount -= resources.getLocalTestCount();
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
// e.g. 1E-15. So if it is small enough, we set it to 0. But maybe there is a better solution.
double epsilon = 0.0001;
if (usedCpu < epsilon) {
usedCpu = 0;
}
if (usedRam < epsilon) {
usedRam = 0;
}
if (!requestList.isEmpty()) {
processWaitingThreads();
return true;
}
return false;
}
/**
* Tries to unblock one or more waiting threads if there are sufficient resources available.
*/
private synchronized void processWaitingThreads() {
Iterator<Pair<ResourceSet, CountDownLatch>> iterator = requestList.iterator();
while (iterator.hasNext()) {
Pair<ResourceSet, CountDownLatch> request = iterator.next();
if (request.second.getCount() != 0) {
if (areResourcesAvailable(request.first)) {
incrementResources(request.first);
request.second.countDown();
iterator.remove();
}
} else {
// Cancelled by other side.
iterator.remove();
}
}
}
// Method will return true if all requested resources are considered to be available.
private boolean areResourcesAvailable(ResourceSet resources) {
Preconditions.checkNotNull(availableResources);
// Comparison below is robust, since any calculation errors will be fixed
// by the release() method.
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0) {
return true;
}
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
// allocation. This is necessary to account for the fact that most of the
// requested resource sets use pessimistic estimations. Note that this
// ratio is used only during comparison - for tracking we will actually
// mark whole requested amount as used.
double cpu = resources.getCpuUsage() * MIN_NECESSARY_CPU_RATIO;
double ram = resources.getMemoryMb() * MIN_NECESSARY_RAM_RATIO;
int localTestCount = resources.getLocalTestCount();
double availableCpu = availableResources.getCpuUsage();
double availableRam = availableResources.getMemoryMb();
int availableLocalTestCount = availableResources.getLocalTestCount();
double remainingRam = availableRam - usedRam;
if (localMemoryEstimate && OS.getCurrent() == OS.LINUX) {
try {
ProcMeminfoParser memInfo = new ProcMeminfoParser();
double totalFreeRam = memInfo.getFreeRamKb() / 1024;
double reserveMemory =
staticResources.getMemoryMb() * (100.0 - this.ramUtilizationPercentage) / 100.0;
remainingRam = totalFreeRam - reserveMemory;
} catch (IOException e) {
// If we get an error trying to determine the currently free system memory for any reason,
// just continue on. It is not terribly clear what could cause this, aside from an
// unexpected ABI breakage in the linux kernel or an OS-level misconfiguration such as not
// having permissions to read /proc/meminfo.
//
// remainingRam is initialized to a value that results in behavior as if localMemoryEstimate
// was disabled.
}
}
// Resources are considered available if any one of the conditions below is true:
// 1) If resource is not requested at all, it is available.
// 2) If resource is not used at the moment, it is considered to be
// available regardless of how much is requested. This is necessary to
// ensure that at any given time, at least one thread is able to acquire
// resources even if it requests more than available.
// 3) If used resource amount is less than total available resource amount.
boolean cpuIsAvailable = cpu == 0.0 || usedCpu == 0.0 || usedCpu + cpu <= availableCpu;
boolean ramIsAvailable = ram == 0.0 || usedRam == 0.0 || ram <= remainingRam;
boolean localTestCountIsAvailable = localTestCount == 0 || usedLocalTestCount == 0
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable;
}
@VisibleForTesting
synchronized int getWaitCount() {
return requestList.size();
}
@VisibleForTesting
synchronized boolean isAvailable(double ram, double cpu, int localTestCount) {
return areResourcesAvailable(ResourceSet.create(ram, cpu, localTestCount));
}
}