| // Copyright 2018 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| package com.google.devtools.build.lib.actions; |
| |
| import com.google.common.base.Joiner; |
| import com.google.common.base.Preconditions; |
| import com.google.errorprone.annotations.CanIgnoreReturnValue; |
| import java.time.Duration; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| |
| /** Timing, size, and memory statistics for a Spawn execution. */ |
| public final class SpawnMetrics { |
| |
| /** Indicates whether the metrics correspond to the remote, local or worker execution. */ |
| public static enum ExecKind { |
| REMOTE("Remote"), |
| LOCAL("Local"), |
| WORKER("Worker"), |
| /** |
| * Other kinds of execution (or when it's not clear whether something happened locally or |
| * remotely). |
| */ |
| OTHER("Other"); |
| |
| private final String name; |
| |
| private ExecKind(String name) { |
| this.name = name; |
| } |
| |
| @Override |
| public String toString() { |
| return name; |
| } |
| } |
| |
| /** Any non important stats < than 10% will not be shown in the summary. */ |
| private static final double STATS_SHOW_THRESHOLD = 0.10; |
| |
| public static SpawnMetrics forLocalExecution(Duration wallTime) { |
| return Builder.forLocalExec().setTotalTime(wallTime).setExecutionWallTime(wallTime).build(); |
| } |
| |
| private final ExecKind execKind; |
| private final Duration totalTime; |
| private final Duration parseTime; |
| private final Duration fetchTime; |
| private final Duration queueTime; |
| private final Duration uploadTime; |
| private final Duration setupTime; |
| private final Duration executionWallTime; |
| private final Duration processOutputsTime; |
| private final Duration networkTime; |
| private final Map<Integer, Duration> retryTime; |
| private final long inputBytes; |
| private final long inputFiles; |
| private final long memoryEstimateBytes; |
| private final long inputBytesLimit; |
| private final long inputFilesLimit; |
| private final long outputBytesLimit; |
| private final long outputFilesLimit; |
| private final long memoryBytesLimit; |
| private final Duration timeLimit; |
| |
| private SpawnMetrics(Builder builder) { |
| this.execKind = builder.execKind; |
| this.totalTime = builder.totalTime; |
| this.parseTime = builder.parseTime; |
| this.networkTime = builder.networkTime; |
| this.fetchTime = builder.fetchTime; |
| this.queueTime = builder.queueTime; |
| this.setupTime = builder.setupTime; |
| this.uploadTime = builder.uploadTime; |
| this.executionWallTime = builder.executionWallTime; |
| this.retryTime = builder.retryTime; |
| this.processOutputsTime = builder.processOutputsTime; |
| this.inputBytes = builder.inputBytes; |
| this.inputFiles = builder.inputFiles; |
| this.memoryEstimateBytes = builder.memoryEstimateBytes; |
| this.inputBytesLimit = builder.inputBytesLimit; |
| this.inputFilesLimit = builder.inputFilesLimit; |
| this.outputBytesLimit = builder.outputBytesLimit; |
| this.outputFilesLimit = builder.outputFilesLimit; |
| this.memoryBytesLimit = builder.memoryBytesLimit; |
| this.timeLimit = builder.timeLimit; |
| } |
| |
| /** The kind of execution the metrics refer to (remote/local/worker). */ |
| public ExecKind execKind() { |
| return execKind; |
| } |
| |
| /** Returns true if {@link #totalTime()} is zero. */ |
| public boolean isEmpty() { |
| return totalTime.isZero(); |
| } |
| |
| /** |
| * Total (measured locally) wall time spent running a spawn. This should be at least as large as |
| * all the other times summed together. |
| */ |
| public Duration totalTime() { |
| return totalTime; |
| } |
| |
| /** |
| * Total time spent getting on network. This includes time getting network-side errors and the |
| * time of the round-trip, found by taking the difference of wall time here and the server time |
| * reported by the RPC. This is 0 for locally executed spawns. |
| */ |
| public Duration networkTime() { |
| return networkTime; |
| } |
| |
| /** Total time waiting in queues. Includes queue time for any failed attempts. */ |
| public Duration queueTime() { |
| return queueTime; |
| } |
| |
| /** The time spent transferring files to the backends. This is 0 for locally executed spawns. */ |
| public Duration uploadTime() { |
| return uploadTime; |
| } |
| |
| /** |
| * The time required to setup the environment in which the spawn is run. This may be 0 for locally |
| * executed spawns, or may include time to setup a sandbox or other environment. Does not include |
| * failed attempts. |
| */ |
| public Duration setupTime() { |
| return setupTime; |
| } |
| |
| /** Time spent running the subprocess. */ |
| public Duration executionWallTime() { |
| return executionWallTime; |
| } |
| |
| /** |
| * The time taken to convert the spawn into a network request, e.g., collecting runfiles, and |
| * digests for all input files. |
| */ |
| public Duration parseTime() { |
| return parseTime; |
| } |
| |
| /** Total time spent fetching remote outputs. */ |
| public Duration fetchTime() { |
| return fetchTime; |
| } |
| |
| /** Time spent in previous failed attempts. Does not include queue time. */ |
| public Duration retryTime() { |
| return retryTime.values().stream().reduce(Duration.ZERO, Duration::plus); |
| } |
| |
| /** Time spent in previous failed attempts, keyed by error code. Does not include queue time. */ |
| public Map<Integer, Duration> retryTimeByError() { |
| return retryTime; |
| } |
| |
| |
| /** Time spend by the execution framework on processing outputs. */ |
| public Duration processOutputsTime() { |
| return processOutputsTime; |
| } |
| |
| /** Any time that is not measured by a more specific component, out of {@code totalTime()}. */ |
| public Duration otherTime() { |
| return totalTime |
| .minus(parseTime) |
| .minus(networkTime) |
| .minus(queueTime) |
| .minus(uploadTime) |
| .minus(setupTime) |
| .minus(executionWallTime) |
| .minus(fetchTime) |
| .minus(retryTime()) |
| .minus(processOutputsTime); |
| } |
| |
| /** Total size in bytes of inputs or 0 if unavailable. */ |
| public long inputBytes() { |
| return inputBytes; |
| } |
| |
| /** Total number of input files or 0 if unavailable. */ |
| public long inputFiles() { |
| return inputFiles; |
| } |
| |
| /** Estimated memory usage or 0 if unavailable. */ |
| public long memoryEstimate() { |
| return memoryEstimateBytes; |
| } |
| |
| /** Limit of total size in bytes of inputs or 0 if unavailable. */ |
| public long inputBytesLimit() { |
| return inputBytesLimit; |
| } |
| |
| /** Limit of total number of input files or 0 if unavailable. */ |
| public long inputFilesLimit() { |
| return inputFilesLimit; |
| } |
| |
| /** Limit of total size in bytes of outputs or 0 if unavailable. */ |
| public long outputBytesLimit() { |
| return outputBytesLimit; |
| } |
| |
| /** Limit of total number of output files or 0 if unavailable. */ |
| public long outputFilesLimit() { |
| return outputFilesLimit; |
| } |
| |
| /** Memory limit or 0 if unavailable. */ |
| public long memoryLimit() { |
| return memoryBytesLimit; |
| } |
| |
| /** Time limit or 0 if unavailable. */ |
| public Duration timeLimit() { |
| return timeLimit; |
| } |
| |
| /** |
| * Generates a String representation of the stats. |
| * |
| * @param total total time used to compute the percentages |
| * @param summary whether to exclude input file count and sizes, and memory estimates |
| */ |
| public String toString(Duration total, boolean summary) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("("); |
| sb.append(prettyPercentage(totalTime, total)); |
| sb.append(" of the time): ["); |
| List<String> stats = new ArrayList<>(8); |
| addStatToString(stats, "parse", !summary, parseTime, total); |
| addStatToString(stats, "queue", true, queueTime, total); |
| addStatToString(stats, "network", !summary, networkTime, total); |
| addStatToString(stats, "upload", !summary, uploadTime, total); |
| addStatToString(stats, "setup", true, setupTime, total); |
| addStatToString(stats, "process", true, executionWallTime, total); |
| addStatToString(stats, "fetch", !summary, fetchTime, total); |
| addStatToString(stats, "retry", !summary, retryTime(), total); |
| addStatToString(stats, "processOutputs", !summary, processOutputsTime, total); |
| addStatToString(stats, "other", !summary, otherTime(), total); |
| if (!summary) { |
| stats.add("input files: " + inputFiles); |
| stats.add("input bytes: " + inputBytes); |
| stats.add("memory bytes: " + memoryEstimateBytes); |
| stats.add("input files limit: " + inputFilesLimit); |
| stats.add("input bytes limit: " + inputBytesLimit); |
| stats.add("output files limit: " + outputFilesLimit); |
| stats.add("output bytes limit: " + outputBytesLimit); |
| stats.add("memory limit: " + memoryBytesLimit); |
| stats.add("time limit: " + timeLimit.getSeconds() + " seconds"); |
| } |
| Joiner.on(", ").appendTo(sb, stats); |
| sb.append("]"); |
| return sb.toString(); |
| } |
| |
| /** |
| * Add to {@code strings} the string representation of {@code name} component. If {@code |
| * forceShow} is set to false it will only show if it is above certain threshold. |
| */ |
| private static void addStatToString( |
| List<String> strings, String name, boolean forceShow, Duration time, Duration totalTime) { |
| if (forceShow || isAboveThreshold(time, totalTime)) { |
| strings.add(name + ": " + prettyPercentage(time, totalTime)); |
| } |
| } |
| |
| private static boolean isAboveThreshold(Duration time, Duration totalTime) { |
| return totalTime.toMillis() > 0 |
| && (((float) time.toMillis() / totalTime.toMillis()) >= STATS_SHOW_THRESHOLD); |
| } |
| |
| /** |
| * Converts relative duration to the percentage string. |
| * |
| * @return formatted percentage string or "N/A" if result is undefined |
| */ |
| private static String prettyPercentage(Duration duration, Duration total) { |
| // Duration.toMillis() != 0 does not imply !Duration.isZero() (due to truncation). |
| if (total.toMillis() == 0) { |
| // Return "not available" string if total is 0 and result is undefined. |
| return "N/A"; |
| } |
| return String.format(Locale.US, "%.2f%%", duration.toMillis() * 100.0 / total.toMillis()); |
| } |
| |
| /** Builder class for SpawnMetrics. */ |
| public static class Builder { |
| private ExecKind execKind = null; |
| private Duration totalTime = Duration.ZERO; |
| private Duration parseTime = Duration.ZERO; |
| private Duration networkTime = Duration.ZERO; |
| private Duration fetchTime = Duration.ZERO; |
| private Duration queueTime = Duration.ZERO; |
| private Duration setupTime = Duration.ZERO; |
| private Duration uploadTime = Duration.ZERO; |
| private Duration executionWallTime = Duration.ZERO; |
| private Duration processOutputsTime = Duration.ZERO; |
| private Map<Integer, Duration> retryTime = new HashMap<>(); |
| private long inputBytes = 0; |
| private long inputFiles = 0; |
| private long memoryEstimateBytes = 0; |
| private long inputBytesLimit = 0; |
| private long inputFilesLimit = 0; |
| private long outputBytesLimit = 0; |
| private long outputFilesLimit = 0; |
| private long memoryBytesLimit = 0; |
| private Duration timeLimit = Duration.ZERO; |
| |
| public static Builder forLocalExec() { |
| return forExec(ExecKind.LOCAL); |
| } |
| |
| public static Builder forRemoteExec() { |
| return forExec(ExecKind.REMOTE); |
| } |
| |
| public static Builder forWorkerExec() { |
| return forExec(ExecKind.WORKER); |
| } |
| |
| public static Builder forOtherExec() { |
| return forExec(ExecKind.OTHER); |
| } |
| |
| public static Builder forExec(ExecKind kind) { |
| return new Builder().setExecKind(kind); |
| } |
| |
| // Make the constructor private to force users to set the ExecKind by using one of the factory |
| // methods. |
| private Builder() {} |
| |
| public SpawnMetrics build() { |
| Preconditions.checkNotNull(execKind, "ExecKind must be explicitly set using `setExecKind`"); |
| // TODO(ulfjack): Add consistency checks here? |
| return new SpawnMetrics(this); |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setExecKind(ExecKind execKind) { |
| this.execKind = execKind; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setTotalTime(Duration totalTime) { |
| this.totalTime = totalTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setParseTime(Duration parseTime) { |
| this.parseTime = parseTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setNetworkTime(Duration networkTime) { |
| this.networkTime = networkTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setFetchTime(Duration fetchTime) { |
| this.fetchTime = fetchTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setQueueTime(Duration queueTime) { |
| this.queueTime = queueTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setSetupTime(Duration setupTime) { |
| this.setupTime = setupTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder addSetupTime(Duration setupTime) { |
| this.setupTime = this.setupTime.plus(setupTime); |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setUploadTime(Duration uploadTime) { |
| this.uploadTime = uploadTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setExecutionWallTime(Duration executionWallTime) { |
| this.executionWallTime = executionWallTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder addRetryTime(int errorCode, Duration retryTime) { |
| Duration d = this.retryTime.getOrDefault(errorCode, Duration.ZERO); |
| this.retryTime.put(errorCode, d.plus(retryTime)); |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setRetryTime(Map<Integer, Duration> retryTime) { |
| this.retryTime = new HashMap<>(retryTime); |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setProcessOutputsTime(Duration processOutputsTime) { |
| this.processOutputsTime = processOutputsTime; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setInputBytes(long inputBytes) { |
| this.inputBytes = inputBytes; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setInputFiles(long inputFiles) { |
| this.inputFiles = inputFiles; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setMemoryEstimateBytes(long memoryEstimateBytes) { |
| this.memoryEstimateBytes = memoryEstimateBytes; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setInputBytesLimit(long inputBytesLimit) { |
| this.inputBytesLimit = inputBytesLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setInputFilesLimit(long inputFilesLimit) { |
| this.inputFilesLimit = inputFilesLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setOutputBytesLimit(long outputBytesLimit) { |
| this.outputBytesLimit = outputBytesLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setOutputFilesLimit(long outputFilesLimit) { |
| this.outputFilesLimit = outputFilesLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setMemoryBytesLimit(long memoryBytesLimit) { |
| this.memoryBytesLimit = memoryBytesLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder setTimeLimit(Duration timeLimit) { |
| this.timeLimit = timeLimit; |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder addDurations(SpawnMetrics metric) { |
| totalTime = totalTime.plus(metric.totalTime()); |
| parseTime = parseTime.plus(metric.parseTime()); |
| networkTime = networkTime.plus(metric.networkTime()); |
| fetchTime = fetchTime.plus(metric.fetchTime()); |
| queueTime = queueTime.plus(metric.queueTime()); |
| uploadTime = uploadTime.plus(metric.uploadTime()); |
| setupTime = setupTime.plus(metric.setupTime()); |
| executionWallTime = executionWallTime.plus(metric.executionWallTime()); |
| for (Map.Entry<Integer, Duration> entry : metric.retryTime.entrySet()) { |
| addRetryTime(entry.getKey().intValue(), entry.getValue()); |
| } |
| processOutputsTime = processOutputsTime.plus(metric.processOutputsTime()); |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder addNonDurations(SpawnMetrics metric) { |
| inputFiles += metric.inputFiles(); |
| inputBytes += metric.inputBytes(); |
| memoryEstimateBytes += metric.memoryEstimate(); |
| inputFilesLimit += metric.inputFilesLimit(); |
| inputBytesLimit += metric.inputBytesLimit(); |
| outputFilesLimit += metric.outputFilesLimit(); |
| outputBytesLimit += metric.outputBytesLimit(); |
| memoryBytesLimit += metric.memoryLimit(); |
| timeLimit = timeLimit.plus(metric.timeLimit()); |
| return this; |
| } |
| |
| @CanIgnoreReturnValue |
| public Builder maxNonDurations(SpawnMetrics metric) { |
| inputFiles = Long.max(inputFiles, metric.inputFiles()); |
| inputBytes = Long.max(inputBytes, metric.inputBytes()); |
| memoryEstimateBytes = Long.max(memoryEstimateBytes, metric.memoryEstimate()); |
| inputFilesLimit = Long.max(inputFilesLimit, metric.inputFilesLimit()); |
| inputBytesLimit = Long.max(inputBytesLimit, metric.inputBytesLimit()); |
| outputFilesLimit = Long.max(outputFilesLimit, metric.outputFilesLimit()); |
| outputBytesLimit = Long.max(outputBytesLimit, metric.outputBytesLimit()); |
| memoryBytesLimit = Long.max(memoryBytesLimit, metric.memoryLimit()); |
| timeLimit = |
| Duration.ofSeconds(Long.max(timeLimit.getSeconds(), metric.timeLimit().getSeconds())); |
| return this; |
| } |
| } |
| } |