|  | // Copyright 2018 The Bazel Authors. All rights reserved. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  | package com.google.devtools.build.lib.profiler; | 
|  |  | 
|  | import static com.google.common.base.Preconditions.checkNotNull; | 
|  |  | 
|  | import com.google.common.base.Preconditions; | 
|  | import com.google.common.base.Stopwatch; | 
|  | import com.google.common.collect.ImmutableList; | 
|  | import com.google.common.collect.ImmutableMap; | 
|  | import com.google.devtools.build.lib.actions.ResourceEstimator; | 
|  | import com.google.devtools.build.lib.bugreport.BugReporter; | 
|  | import com.google.devtools.build.lib.profiler.NetworkMetricsCollector.SystemNetworkUsages; | 
|  | import com.google.devtools.build.lib.unix.ProcMeminfoParser; | 
|  | import com.google.devtools.build.lib.util.OS; | 
|  | import com.google.devtools.build.lib.util.ResourceUsage; | 
|  | import com.google.devtools.build.lib.worker.WorkerProcessMetrics; | 
|  | import com.google.devtools.build.lib.worker.WorkerProcessMetricsCollector; | 
|  | import com.google.errorprone.annotations.concurrent.GuardedBy; | 
|  | import com.sun.management.OperatingSystemMXBean; | 
|  | import java.io.IOException; | 
|  | import java.lang.management.ManagementFactory; | 
|  | import java.lang.management.MemoryMXBean; | 
|  | import java.time.Duration; | 
|  | import java.util.ArrayList; | 
|  | import java.util.HashMap; | 
|  | import java.util.List; | 
|  | import java.util.Map; | 
|  | import java.util.concurrent.TimeUnit; | 
|  | import javax.annotation.Nullable; | 
|  |  | 
|  | /** Thread to collect local resource usage data and log into JSON profile. */ | 
|  | public class CollectLocalResourceUsage extends Thread { | 
|  |  | 
|  | // TODO(twerth): Make these configurable. | 
|  | private static final Duration BUCKET_DURATION = Duration.ofSeconds(1); | 
|  | private static final Duration LOCAL_RESOURCES_COLLECT_SLEEP_INTERVAL = Duration.ofMillis(200); | 
|  |  | 
|  | private final BugReporter bugReporter; | 
|  | private final boolean collectWorkerDataInProfiler; | 
|  | private final boolean collectLoadAverage; | 
|  | private final boolean collectSystemNetworkUsage; | 
|  | private final boolean collectResourceManagerEstimation; | 
|  |  | 
|  | private volatile boolean stopLocalUsageCollection; | 
|  | private volatile boolean profilingStarted; | 
|  |  | 
|  | @GuardedBy("this") | 
|  | @Nullable | 
|  | private Map<ProfilerTask, TimeSeries> timeSeries; | 
|  |  | 
|  | private Stopwatch stopwatch; | 
|  |  | 
|  | private final WorkerProcessMetricsCollector workerProcessMetricsCollector; | 
|  |  | 
|  | private final ResourceEstimator resourceEstimator; | 
|  | private final boolean collectPressureStallIndicators; | 
|  |  | 
|  | CollectLocalResourceUsage( | 
|  | BugReporter bugReporter, | 
|  | WorkerProcessMetricsCollector workerProcessMetricsCollector, | 
|  | ResourceEstimator resourceEstimator, | 
|  | boolean collectWorkerDataInProfiler, | 
|  | boolean collectLoadAverage, | 
|  | boolean collectSystemNetworkUsage, | 
|  | boolean collectResourceManagerEstimation, | 
|  | boolean collectPressureStallIndicators) { | 
|  | super("collect-local-resources"); | 
|  | this.bugReporter = checkNotNull(bugReporter); | 
|  | this.collectWorkerDataInProfiler = collectWorkerDataInProfiler; | 
|  | this.workerProcessMetricsCollector = workerProcessMetricsCollector; | 
|  | this.collectLoadAverage = collectLoadAverage; | 
|  | this.collectSystemNetworkUsage = collectSystemNetworkUsage; | 
|  | this.collectResourceManagerEstimation = collectResourceManagerEstimation; | 
|  | this.resourceEstimator = resourceEstimator; | 
|  | this.collectPressureStallIndicators = collectPressureStallIndicators; | 
|  | } | 
|  |  | 
|  | @Override | 
|  | public void run() { | 
|  | int numProcessors = Runtime.getRuntime().availableProcessors(); | 
|  | stopwatch = Stopwatch.createStarted(); | 
|  | synchronized (this) { | 
|  | timeSeries = new HashMap<>(); | 
|  | Duration startTime = stopwatch.elapsed(); | 
|  | List<ProfilerTask> enabledCounters = new ArrayList<>(); | 
|  | enabledCounters.addAll( | 
|  | ImmutableList.of( | 
|  | ProfilerTask.LOCAL_CPU_USAGE, | 
|  | ProfilerTask.LOCAL_MEMORY_USAGE, | 
|  | ProfilerTask.SYSTEM_CPU_USAGE, | 
|  | ProfilerTask.SYSTEM_MEMORY_USAGE)); | 
|  |  | 
|  | if (collectWorkerDataInProfiler) { | 
|  | enabledCounters.add(ProfilerTask.WORKERS_MEMORY_USAGE); | 
|  | } | 
|  | if (collectLoadAverage) { | 
|  | enabledCounters.add(ProfilerTask.SYSTEM_LOAD_AVERAGE); | 
|  | } | 
|  | if (collectSystemNetworkUsage) { | 
|  | enabledCounters.add(ProfilerTask.SYSTEM_NETWORK_UP_USAGE); | 
|  | enabledCounters.add(ProfilerTask.SYSTEM_NETWORK_DOWN_USAGE); | 
|  | } | 
|  | if (collectResourceManagerEstimation) { | 
|  | enabledCounters.add(ProfilerTask.MEMORY_USAGE_ESTIMATION); | 
|  | enabledCounters.add(ProfilerTask.CPU_USAGE_ESTIMATION); | 
|  | } | 
|  | if (collectPressureStallIndicators) { | 
|  | enabledCounters.add(ProfilerTask.PRESSURE_STALL_IO); | 
|  | enabledCounters.add(ProfilerTask.PRESSURE_STALL_MEMORY); | 
|  | } | 
|  |  | 
|  | for (ProfilerTask counter : enabledCounters) { | 
|  | timeSeries.put(counter, new TimeSeries(startTime, BUCKET_DURATION)); | 
|  | } | 
|  | } | 
|  | OperatingSystemMXBean osBean = | 
|  | (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean(); | 
|  | MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); | 
|  | Duration previousElapsed = stopwatch.elapsed(); | 
|  | long previousCpuTimeNanos = osBean.getProcessCpuTime(); | 
|  | profilingStarted = true; | 
|  | while (!stopLocalUsageCollection) { | 
|  | try { | 
|  | Thread.sleep(LOCAL_RESOURCES_COLLECT_SLEEP_INTERVAL.toMillis()); | 
|  | } catch (InterruptedException e) { | 
|  | return; | 
|  | } | 
|  | Duration nextElapsed = stopwatch.elapsed(); | 
|  | long nextCpuTimeNanos = osBean.getProcessCpuTime(); | 
|  |  | 
|  | double systemCpuLoad = osBean.getSystemCpuLoad(); | 
|  | if (Double.isNaN(systemCpuLoad)) { | 
|  | // Unlike advertised, on Mac the system CPU load is NaN sometimes. | 
|  | // There is no good way to handle this, so to avoid any downstream method crashing on this, | 
|  | // we reset the CPU value here. | 
|  | systemCpuLoad = 0; | 
|  | } | 
|  | double systemUsage = systemCpuLoad * numProcessors; | 
|  |  | 
|  | long systemMemoryUsageMb = -1; | 
|  | if (OS.getCurrent() == OS.LINUX) { | 
|  | // On Linux we get a better estimate by using /proc/meminfo. See | 
|  | // https://www.linuxatemyram.com/ for more info on buffer caches. | 
|  | try { | 
|  | ProcMeminfoParser procMeminfoParser = new ProcMeminfoParser("/proc/meminfo"); | 
|  | systemMemoryUsageMb = | 
|  | (procMeminfoParser.getTotalKb() - procMeminfoParser.getFreeRamKb()) / 1024; | 
|  | } catch (IOException e) { | 
|  | // Silently ignore and fallback. | 
|  | } | 
|  | } | 
|  | if (systemMemoryUsageMb <= 0) { | 
|  | // In case we aren't running on Linux or /proc/meminfo parsing went wrong, fall back to the | 
|  | // OS bean. | 
|  | systemMemoryUsageMb = | 
|  | (osBean.getTotalPhysicalMemorySize() - osBean.getFreePhysicalMemorySize()) | 
|  | / (1024 * 1024); | 
|  | } | 
|  |  | 
|  | long memoryUsage; | 
|  | try { | 
|  | memoryUsage = | 
|  | memoryBean.getHeapMemoryUsage().getUsed() | 
|  | + memoryBean.getNonHeapMemoryUsage().getUsed(); | 
|  | } catch (IllegalArgumentException e) { | 
|  | // The JVM may report committed > max. See b/180619163. | 
|  | bugReporter.sendBugReport(e); | 
|  | memoryUsage = -1; | 
|  | } | 
|  |  | 
|  | int workerMemoryUsageMb = 0; | 
|  | if (collectWorkerDataInProfiler) { | 
|  | try (SilentCloseable c = Profiler.instance().profile("Worker metrics collection")) { | 
|  | workerMemoryUsageMb = | 
|  | this.workerProcessMetricsCollector.getLiveWorkerProcessMetrics().stream() | 
|  | .mapToInt(WorkerProcessMetrics::getUsedMemoryInKb) | 
|  | .sum() | 
|  | / 1024; | 
|  | } | 
|  | } | 
|  | double loadAverage = 0; | 
|  | if (collectLoadAverage) { | 
|  | loadAverage = osBean.getSystemLoadAverage(); | 
|  | } | 
|  | double pressureStallIo = 0; | 
|  | double pressureStallMemory = 0; | 
|  | if (collectPressureStallIndicators) { | 
|  | pressureStallIo = ResourceUsage.readPressureStallIndicator("io"); | 
|  | pressureStallMemory = ResourceUsage.readPressureStallIndicator("memory"); | 
|  | } | 
|  |  | 
|  | double deltaNanos = nextElapsed.minus(previousElapsed).toNanos(); | 
|  | double cpuLevel = (nextCpuTimeNanos - previousCpuTimeNanos) / deltaNanos; | 
|  |  | 
|  | SystemNetworkUsages systemNetworkUsages = null; | 
|  | if (collectSystemNetworkUsage) { | 
|  | systemNetworkUsages = | 
|  | NetworkMetricsCollector.instance().collectSystemNetworkUsages(deltaNanos); | 
|  | } | 
|  |  | 
|  | double estimatedCpuUsage = 0; | 
|  | double estimatedMemoryUsageInMb = 0; | 
|  | if (collectResourceManagerEstimation) { | 
|  | estimatedCpuUsage = resourceEstimator.getUsedCPU(); | 
|  | estimatedMemoryUsageInMb = resourceEstimator.getUsedMemoryInMb(); | 
|  | } | 
|  |  | 
|  | synchronized (this) { | 
|  | addRange(ProfilerTask.LOCAL_CPU_USAGE, previousElapsed, nextElapsed, cpuLevel); | 
|  | if (memoryUsage != -1) { | 
|  | double memoryUsageMb = (double) memoryUsage / (1024 * 1024); | 
|  | addRange(ProfilerTask.LOCAL_MEMORY_USAGE, previousElapsed, nextElapsed, memoryUsageMb); | 
|  | } | 
|  | addRange(ProfilerTask.SYSTEM_CPU_USAGE, previousElapsed, nextElapsed, systemUsage); | 
|  | addRange( | 
|  | ProfilerTask.SYSTEM_MEMORY_USAGE, | 
|  | previousElapsed, | 
|  | nextElapsed, | 
|  | (double) systemMemoryUsageMb); | 
|  | addRange( | 
|  | ProfilerTask.WORKERS_MEMORY_USAGE, previousElapsed, nextElapsed, workerMemoryUsageMb); | 
|  | if (loadAverage > 0) { | 
|  | addRange(ProfilerTask.SYSTEM_LOAD_AVERAGE, previousElapsed, nextElapsed, loadAverage); | 
|  | } | 
|  | if (pressureStallIo >= 0) { | 
|  | addRange(ProfilerTask.PRESSURE_STALL_IO, previousElapsed, nextElapsed, pressureStallIo); | 
|  | } | 
|  | if (pressureStallMemory >= 0) { | 
|  | addRange( | 
|  | ProfilerTask.PRESSURE_STALL_MEMORY, | 
|  | previousElapsed, | 
|  | nextElapsed, | 
|  | pressureStallMemory); | 
|  | } | 
|  | if (systemNetworkUsages != null) { | 
|  | addRange( | 
|  | ProfilerTask.SYSTEM_NETWORK_UP_USAGE, | 
|  | previousElapsed, | 
|  | nextElapsed, | 
|  | systemNetworkUsages.megabitsSentPerSec()); | 
|  | addRange( | 
|  | ProfilerTask.SYSTEM_NETWORK_DOWN_USAGE, | 
|  | previousElapsed, | 
|  | nextElapsed, | 
|  | systemNetworkUsages.megabitsRecvPerSec()); | 
|  | } | 
|  |  | 
|  | addRange( | 
|  | ProfilerTask.MEMORY_USAGE_ESTIMATION, | 
|  | previousElapsed, | 
|  | nextElapsed, | 
|  | estimatedMemoryUsageInMb); | 
|  | addRange( | 
|  | ProfilerTask.CPU_USAGE_ESTIMATION, previousElapsed, nextElapsed, estimatedCpuUsage); | 
|  | } | 
|  | previousElapsed = nextElapsed; | 
|  | previousCpuTimeNanos = nextCpuTimeNanos; | 
|  | } | 
|  | } | 
|  |  | 
|  | public void stopCollecting() { | 
|  | Preconditions.checkArgument(!stopLocalUsageCollection); | 
|  | stopLocalUsageCollection = true; | 
|  | interrupt(); | 
|  | } | 
|  |  | 
|  | synchronized void logCollectedData() { | 
|  | if (!profilingStarted) { | 
|  | return; | 
|  | } | 
|  | Preconditions.checkArgument(stopLocalUsageCollection); | 
|  | long endTimeNanos = System.nanoTime(); | 
|  | long elapsedNanos = stopwatch.elapsed(TimeUnit.NANOSECONDS); | 
|  | long startTimeNanos = endTimeNanos - elapsedNanos; | 
|  | Duration profileStart = Duration.ofNanos(startTimeNanos); | 
|  | int len = (int) (elapsedNanos / BUCKET_DURATION.toNanos()) + 1; | 
|  | Profiler profiler = Profiler.instance(); | 
|  |  | 
|  | for (ProfilerTask task : timeSeries.keySet()) { | 
|  | profiler.logCounters( | 
|  | ImmutableMap.ofEntries(Map.entry(task, timeSeries.get(task).toDoubleArray(len))), | 
|  | profileStart, | 
|  | BUCKET_DURATION); | 
|  | } | 
|  | timeSeries = null; | 
|  | } | 
|  |  | 
|  | private void addRange( | 
|  | ProfilerTask type, Duration previousElapsed, Duration nextElapsed, double value) { | 
|  | synchronized (this) { | 
|  | if (timeSeries == null) { | 
|  | return; | 
|  | } | 
|  | TimeSeries series = timeSeries.get(type); | 
|  | if (series != null) { | 
|  | series.addRange(previousElapsed, nextElapsed, value); | 
|  | } | 
|  | } | 
|  | } | 
|  | } |