Implement CgroupsInfoCollector that collects information from the CgroupsInfo about each process.
TODO: Extract out common interface between CgroupsInfoCollector and PsInfoCollector - the WorkerProcessMetricsCollector should be passed an interface and should not care what resource collector it is using.
PiperOrigin-RevId: 605269981
Change-Id: Ie2afedf01a84a6bc046aab072d8ef9d35fe3517d
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/BUILD b/src/main/java/com/google/devtools/build/lib/metrics/BUILD
index 6015565..a4f89c1 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/metrics/BUILD
@@ -49,6 +49,7 @@
"//src/main/java/com/google/devtools/build/lib/packages/metrics:package_load_metrics_java_proto",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/profiler:network_metrics_collector",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/skyframe:execution_finished_event",
"//src/main/java/com/google/devtools/build/lib/skyframe:top_level_status_events",
"//src/main/java/com/google/devtools/build/lib/worker:worker_process_metrics",
@@ -88,13 +89,15 @@
)
java_library(
- name = "ps_info_collector",
+ name = "resource_collector",
srcs = [
+ "CgroupsInfoCollector.java",
"PsInfoCollector.java",
"ResourceSnapshot.java",
],
deps = [
"//src/main/java/com/google/devtools/build/lib/clock",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java b/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java
new file mode 100644
index 0000000..684d875
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java
@@ -0,0 +1,51 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.metrics;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.devtools.build.lib.clock.Clock;
+import com.google.devtools.build.lib.sandbox.CgroupsInfo;
+import java.util.Map;
+
+/** Collects resource usage of processes from their cgroups {@code CgroupsInfo}. */
+public class CgroupsInfoCollector {
+ // TODO(b/292634407, b/323341972): Extract a common interface between CgroupsInfoCollector and
+ // PsInfoCollector to be passed to the WorkerProcessMetricsCollector. Then make both classes
+ // final.
+
+ // Mainly a singleton for mocking purposes. But also useful if we want to persist additional
+ // state as in the PsInfoCollector.
+ private static final CgroupsInfoCollector instance = new CgroupsInfoCollector();
+
+ private CgroupsInfoCollector() {}
+
+ public static CgroupsInfoCollector instance() {
+ return instance;
+ }
+
+ public ResourceSnapshot collectResourceUsage(Map<Long, CgroupsInfo> pidToCgroups, Clock clock) {
+ ImmutableMap.Builder<Long, Integer> pidToMemoryInKb = ImmutableMap.builder();
+
+ for (Map.Entry<Long, CgroupsInfo> entry : pidToCgroups.entrySet()) {
+ CgroupsInfo cgroup = entry.getValue();
+ // TODO(b/292634407): Consider how to handle the unlikely case where only some cgroups are
+ // invalid.
+ if (cgroup.exists()) {
+ pidToMemoryInKb.put(entry.getKey(), entry.getValue().getMemoryUsageInKb());
+ }
+ }
+ return ResourceSnapshot.create(pidToMemoryInKb.buildOrThrow(), clock.now());
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java b/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
index 00cd48f..f39bc70 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
+++ b/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
@@ -24,7 +24,6 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.flogger.GoogleLogger;
-import com.google.devtools.build.lib.clock.BlazeClock;
import com.google.devtools.build.lib.clock.Clock;
import java.io.BufferedReader;
import java.io.IOException;
@@ -42,7 +41,6 @@
// Updates snapshots no more than once per interval. Running ps is somewhat slow and should not be
// done too often.
private static final Duration MIN_COLLECTION_INTERVAL = Duration.ofMillis(500);
- private static final Clock clock = BlazeClock.instance();
private static final PsInfoCollector instance = new PsInfoCollector();
public static PsInfoCollector instance() {
@@ -58,14 +56,15 @@
* If ps snapshot was outdated will update it, and then returns resource consumption snapshot of
* processes subtrees based on collected ps snapshot.
*/
- public synchronized ResourceSnapshot collectResourceUsage(ImmutableSet<Long> processIds) {
+ public synchronized ResourceSnapshot collectResourceUsage(
+ ImmutableSet<Long> processIds, Clock clock) {
Instant now = clock.now();
if (currentPsSnapshot == null
|| Duration.between(currentPsSnapshot.getCollectionTime(), now)
.compareTo(MIN_COLLECTION_INTERVAL)
> 0) {
- updatePsSnapshot();
+ updatePsSnapshot(clock);
}
ImmutableMap.Builder<Long, Integer> pidToMemoryInKb = ImmutableMap.builder();
@@ -82,7 +81,7 @@
}
/** Updates current snapshot of all processes state, using ps command. */
- private void updatePsSnapshot() {
+ private void updatePsSnapshot(Clock clock) {
// TODO(b/279003887): add exception if we couldn't collect the metrics.
ImmutableMap<Long, PsInfo> pidToPsInfo = collectDataFromPs();
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java b/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
index 99009f7..5a3d693 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
+++ b/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
@@ -32,7 +32,7 @@
return new AutoValue_ResourceSnapshot(pidToMemoryInKb, collectionTime);
}
- public static ResourceSnapshot createEmpty() {
- return new AutoValue_ResourceSnapshot(ImmutableMap.of(), Instant.now());
+ public static ResourceSnapshot createEmpty(Instant collectionTime) {
+ return new AutoValue_ResourceSnapshot(ImmutableMap.of(), collectionTime);
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
index 88714c3..25421fc 100644
--- a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
@@ -194,15 +194,13 @@
java_library(
name = "linux_sandbox",
srcs = [
- "CgroupsInfo.java",
- "CgroupsInfoV1.java",
- "CgroupsInfoV2.java",
"LinuxSandboxedSpawnRunner.java",
"LinuxSandboxedStrategy.java",
],
data = ["//src/main/tools:linux-sandbox"],
deps = [
":abstract_sandbox_spawn_runner",
+ ":cgroups_info",
":linux_sandbox_command_line_builder",
":linux_sandbox_util",
":sandbox_helpers",
@@ -226,6 +224,20 @@
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
+ "//third_party:guava",
+ "//third_party:jsr305",
+ ],
+)
+
+java_library(
+ name = "cgroups_info",
+ srcs = [
+ "CgroupsInfo.java",
+ "CgroupsInfoV1.java",
+ "CgroupsInfoV2.java",
+ ],
+ deps = [
+ "//src/main/java/com/google/devtools/build/lib/util:os",
"//third_party:flogger",
"//third_party:guava",
"//third_party:jsr305",
diff --git a/src/main/java/com/google/devtools/build/lib/worker/BUILD b/src/main/java/com/google/devtools/build/lib/worker/BUILD
index cf14e15..13ff305 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/worker/BUILD
@@ -158,7 +158,7 @@
":worker_process_status",
"//src/main/java/com/google/devtools/build/lib/buildeventstream/proto:build_event_stream_java_proto",
"//src/main/java/com/google/devtools/build/lib/clock",
- "//src/main/java/com/google/devtools/build/lib/metrics:ps_info_collector",
+ "//src/main/java/com/google/devtools/build/lib/metrics:resource_collector",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//third_party:guava",
],
@@ -272,7 +272,7 @@
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/profiler",
- "//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox_command_line_builder",
"//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox_util",
"//src/main/java/com/google/devtools/build/lib/sandbox:sandbox_helpers",
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
index ba998a2..d80bb78 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
@@ -76,12 +76,12 @@
ResourceSnapshot collectResourceUsage(OS os, ImmutableSet<Long> processIds) {
// TODO(b/181317827): Support Windows.
if (processIds.isEmpty()) {
- return ResourceSnapshot.createEmpty();
+ return ResourceSnapshot.createEmpty(clock.now());
}
if (os == OS.LINUX || os == OS.DARWIN) {
- return PsInfoCollector.instance().collectResourceUsage(processIds);
+ return PsInfoCollector.instance().collectResourceUsage(processIds, clock);
}
- return ResourceSnapshot.createEmpty();
+ return ResourceSnapshot.createEmpty(clock.now());
}
public ImmutableList<WorkerProcessMetrics> getLiveWorkerProcessMetrics() {