Implement CgroupsInfoCollector that collects information from the CgroupsInfo about each process.
TODO: Extract out common interface between CgroupsInfoCollector and PsInfoCollector - the WorkerProcessMetricsCollector should be passed an interface and should not care what resource collector it is using.
PiperOrigin-RevId: 605269981
Change-Id: Ie2afedf01a84a6bc046aab072d8ef9d35fe3517d
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/BUILD b/src/main/java/com/google/devtools/build/lib/metrics/BUILD
index 6015565..a4f89c1 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/metrics/BUILD
@@ -49,6 +49,7 @@
"//src/main/java/com/google/devtools/build/lib/packages/metrics:package_load_metrics_java_proto",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/profiler:network_metrics_collector",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/skyframe:execution_finished_event",
"//src/main/java/com/google/devtools/build/lib/skyframe:top_level_status_events",
"//src/main/java/com/google/devtools/build/lib/worker:worker_process_metrics",
@@ -88,13 +89,15 @@
)
java_library(
- name = "ps_info_collector",
+ name = "resource_collector",
srcs = [
+ "CgroupsInfoCollector.java",
"PsInfoCollector.java",
"ResourceSnapshot.java",
],
deps = [
"//src/main/java/com/google/devtools/build/lib/clock",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java b/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java
new file mode 100644
index 0000000..684d875
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollector.java
@@ -0,0 +1,51 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.metrics;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.devtools.build.lib.clock.Clock;
+import com.google.devtools.build.lib.sandbox.CgroupsInfo;
+import java.util.Map;
+
+/** Collects resource usage of processes from their cgroups {@code CgroupsInfo}. */
+public class CgroupsInfoCollector {
+ // TODO(b/292634407, b/323341972): Extract a common interface between CgroupsInfoCollector and
+ // PsInfoCollector to be passed to the WorkerProcessMetricsCollector. Then make both classes
+ // final.
+
+ // Mainly a singleton for mocking purposes. But also useful if we want to persist additional
+ // state as in the PsInfoCollector.
+ private static final CgroupsInfoCollector instance = new CgroupsInfoCollector();
+
+ private CgroupsInfoCollector() {}
+
+ public static CgroupsInfoCollector instance() {
+ return instance;
+ }
+
+ public ResourceSnapshot collectResourceUsage(Map<Long, CgroupsInfo> pidToCgroups, Clock clock) {
+ ImmutableMap.Builder<Long, Integer> pidToMemoryInKb = ImmutableMap.builder();
+
+ for (Map.Entry<Long, CgroupsInfo> entry : pidToCgroups.entrySet()) {
+ CgroupsInfo cgroup = entry.getValue();
+ // TODO(b/292634407): Consider how to handle the unlikely case where only some cgroups are
+ // invalid.
+ if (cgroup.exists()) {
+ pidToMemoryInKb.put(entry.getKey(), entry.getValue().getMemoryUsageInKb());
+ }
+ }
+ return ResourceSnapshot.create(pidToMemoryInKb.buildOrThrow(), clock.now());
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java b/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
index 00cd48f..f39bc70 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
+++ b/src/main/java/com/google/devtools/build/lib/metrics/PsInfoCollector.java
@@ -24,7 +24,6 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.flogger.GoogleLogger;
-import com.google.devtools.build.lib.clock.BlazeClock;
import com.google.devtools.build.lib.clock.Clock;
import java.io.BufferedReader;
import java.io.IOException;
@@ -42,7 +41,6 @@
// Updates snapshots no more than once per interval. Running ps is somewhat slow and should not be
// done too often.
private static final Duration MIN_COLLECTION_INTERVAL = Duration.ofMillis(500);
- private static final Clock clock = BlazeClock.instance();
private static final PsInfoCollector instance = new PsInfoCollector();
public static PsInfoCollector instance() {
@@ -58,14 +56,15 @@
* If ps snapshot was outdated will update it, and then returns resource consumption snapshot of
* processes subtrees based on collected ps snapshot.
*/
- public synchronized ResourceSnapshot collectResourceUsage(ImmutableSet<Long> processIds) {
+ public synchronized ResourceSnapshot collectResourceUsage(
+ ImmutableSet<Long> processIds, Clock clock) {
Instant now = clock.now();
if (currentPsSnapshot == null
|| Duration.between(currentPsSnapshot.getCollectionTime(), now)
.compareTo(MIN_COLLECTION_INTERVAL)
> 0) {
- updatePsSnapshot();
+ updatePsSnapshot(clock);
}
ImmutableMap.Builder<Long, Integer> pidToMemoryInKb = ImmutableMap.builder();
@@ -82,7 +81,7 @@
}
/** Updates current snapshot of all processes state, using ps command. */
- private void updatePsSnapshot() {
+ private void updatePsSnapshot(Clock clock) {
// TODO(b/279003887): add exception if we couldn't collect the metrics.
ImmutableMap<Long, PsInfo> pidToPsInfo = collectDataFromPs();
diff --git a/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java b/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
index 99009f7..5a3d693 100644
--- a/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
+++ b/src/main/java/com/google/devtools/build/lib/metrics/ResourceSnapshot.java
@@ -32,7 +32,7 @@
return new AutoValue_ResourceSnapshot(pidToMemoryInKb, collectionTime);
}
- public static ResourceSnapshot createEmpty() {
- return new AutoValue_ResourceSnapshot(ImmutableMap.of(), Instant.now());
+ public static ResourceSnapshot createEmpty(Instant collectionTime) {
+ return new AutoValue_ResourceSnapshot(ImmutableMap.of(), collectionTime);
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
index 88714c3..25421fc 100644
--- a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD
@@ -194,15 +194,13 @@
java_library(
name = "linux_sandbox",
srcs = [
- "CgroupsInfo.java",
- "CgroupsInfoV1.java",
- "CgroupsInfoV2.java",
"LinuxSandboxedSpawnRunner.java",
"LinuxSandboxedStrategy.java",
],
data = ["//src/main/tools:linux-sandbox"],
deps = [
":abstract_sandbox_spawn_runner",
+ ":cgroups_info",
":linux_sandbox_command_line_builder",
":linux_sandbox_util",
":sandbox_helpers",
@@ -226,6 +224,20 @@
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
+ "//third_party:guava",
+ "//third_party:jsr305",
+ ],
+)
+
+java_library(
+ name = "cgroups_info",
+ srcs = [
+ "CgroupsInfo.java",
+ "CgroupsInfoV1.java",
+ "CgroupsInfoV2.java",
+ ],
+ deps = [
+ "//src/main/java/com/google/devtools/build/lib/util:os",
"//third_party:flogger",
"//third_party:guava",
"//third_party:jsr305",
diff --git a/src/main/java/com/google/devtools/build/lib/worker/BUILD b/src/main/java/com/google/devtools/build/lib/worker/BUILD
index cf14e15..13ff305 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/worker/BUILD
@@ -158,7 +158,7 @@
":worker_process_status",
"//src/main/java/com/google/devtools/build/lib/buildeventstream/proto:build_event_stream_java_proto",
"//src/main/java/com/google/devtools/build/lib/clock",
- "//src/main/java/com/google/devtools/build/lib/metrics:ps_info_collector",
+ "//src/main/java/com/google/devtools/build/lib/metrics:resource_collector",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//third_party:guava",
],
@@ -272,7 +272,7 @@
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/profiler",
- "//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox_command_line_builder",
"//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox_util",
"//src/main/java/com/google/devtools/build/lib/sandbox:sandbox_helpers",
diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
index ba998a2..d80bb78 100644
--- a/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
+++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerProcessMetricsCollector.java
@@ -76,12 +76,12 @@
ResourceSnapshot collectResourceUsage(OS os, ImmutableSet<Long> processIds) {
// TODO(b/181317827): Support Windows.
if (processIds.isEmpty()) {
- return ResourceSnapshot.createEmpty();
+ return ResourceSnapshot.createEmpty(clock.now());
}
if (os == OS.LINUX || os == OS.DARWIN) {
- return PsInfoCollector.instance().collectResourceUsage(processIds);
+ return PsInfoCollector.instance().collectResourceUsage(processIds, clock);
}
- return ResourceSnapshot.createEmpty();
+ return ResourceSnapshot.createEmpty(clock.now());
}
public ImmutableList<WorkerProcessMetrics> getLiveWorkerProcessMetrics() {
diff --git a/src/test/java/com/google/devtools/build/lib/metrics/BUILD b/src/test/java/com/google/devtools/build/lib/metrics/BUILD
index 98d6cbc..10026ef 100644
--- a/src/test/java/com/google/devtools/build/lib/metrics/BUILD
+++ b/src/test/java/com/google/devtools/build/lib/metrics/BUILD
@@ -55,11 +55,31 @@
)
java_test(
+ name = "CgroupsInfoCollectorTest",
+ size = "small",
+ srcs = [
+ "CgroupsInfoCollectorTest.java",
+ ],
+ deps = [
+ "//src/main/java/com/google/devtools/build/lib/clock",
+ "//src/main/java/com/google/devtools/build/lib/metrics:resource_collector",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
+ "//third_party:guava",
+ "//third_party:junit4",
+ "//third_party:mockito",
+ "//third_party:truth",
+ ],
+)
+
+java_test(
name = "PsInfoCollectorTest",
size = "small",
- srcs = ["PsInfoCollectorTest.java"],
+ srcs = [
+ "PsInfoCollectorTest.java",
+ ],
deps = [
- "//src/main/java/com/google/devtools/build/lib/metrics:ps_info_collector",
+ "//src/main/java/com/google/devtools/build/lib/clock",
+ "//src/main/java/com/google/devtools/build/lib/metrics:resource_collector",
"//third_party:guava",
"//third_party:junit4",
"//third_party:mockito",
diff --git a/src/test/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollectorTest.java b/src/test/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollectorTest.java
new file mode 100644
index 0000000..5308ea1
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/metrics/CgroupsInfoCollectorTest.java
@@ -0,0 +1,53 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.metrics;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.devtools.build.lib.clock.BlazeClock;
+import com.google.devtools.build.lib.clock.Clock;
+import com.google.devtools.build.lib.sandbox.CgroupsInfo;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class CgroupsInfoCollectorTest {
+
+ @Test
+ public void testCollectResourceUsage_returnsValidCgroupMemoryUsage() {
+ Clock clock = BlazeClock.instance();
+ CgroupsInfo cgroupsInfo1 = mock(CgroupsInfo.class);
+ when(cgroupsInfo1.getMemoryUsageInKb()).thenReturn(1000);
+ when(cgroupsInfo1.exists()).thenReturn(true);
+ CgroupsInfo cgroupsInfo2 = mock(CgroupsInfo.class);
+ when(cgroupsInfo2.exists()).thenReturn(false);
+ when(cgroupsInfo2.getMemoryUsageInKb()).thenReturn(2000);
+ CgroupsInfo cgroupsInfo3 = mock(CgroupsInfo.class);
+ when(cgroupsInfo3.exists()).thenReturn(true);
+ when(cgroupsInfo3.getMemoryUsageInKb()).thenReturn(3000);
+
+ ResourceSnapshot snapshot =
+ CgroupsInfoCollector.instance()
+ .collectResourceUsage(
+ ImmutableMap.of(1L, cgroupsInfo1, 2L, cgroupsInfo2, 3L, cgroupsInfo3), clock);
+
+ // Results from cgroups 2 should not be in the snapshot since it doesn't exist.
+ assertThat(snapshot.getPidToMemoryInKb()).containsExactly(1L, 1000, 3L, 3000);
+ }
+}
diff --git a/src/test/java/com/google/devtools/build/lib/metrics/PsInfoCollectorTest.java b/src/test/java/com/google/devtools/build/lib/metrics/PsInfoCollectorTest.java
index 3372fdc..790318c 100644
--- a/src/test/java/com/google/devtools/build/lib/metrics/PsInfoCollectorTest.java
+++ b/src/test/java/com/google/devtools/build/lib/metrics/PsInfoCollectorTest.java
@@ -21,13 +21,14 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
+import com.google.devtools.build.lib.clock.BlazeClock;
+import com.google.devtools.build.lib.clock.Clock;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-/** Unit tests for the PsInfoCollector. */
@RunWith(JUnit4.class)
public final class PsInfoCollectorTest {
@@ -54,6 +55,7 @@
@Test
public void testCollectStats_multipleSubprocesses() {
+ Clock clock = BlazeClock.instance();
// pstree of these processes
// 0-+-1---3-+-7
// | `-8
@@ -87,7 +89,7 @@
ImmutableSet<Long> pids = ImmutableSet.of(1L, 2L, 5L, 6L);
when(spyCollector.collectDataFromPs()).thenReturn(psInfos);
- ResourceSnapshot resourceSnapshot = spyCollector.collectResourceUsage(pids);
+ ResourceSnapshot resourceSnapshot = spyCollector.collectResourceUsage(pids, clock);
ImmutableMap<Long, Integer> expectedMemoryUsageByPid =
ImmutableMap.of(1L, 3216 + 1234 + 2345 + 3456, 2L, 4232 + 1001 + 1032, 5L, 40000);
diff --git a/src/test/java/com/google/devtools/build/lib/sandbox/BUILD b/src/test/java/com/google/devtools/build/lib/sandbox/BUILD
index 671997d..2645ed3 100644
--- a/src/test/java/com/google/devtools/build/lib/sandbox/BUILD
+++ b/src/test/java/com/google/devtools/build/lib/sandbox/BUILD
@@ -136,8 +136,7 @@
srcs = ["CgroupsInfoTest.java"],
tags = ["no_windows"],
deps = [
- "//src/main/java/com/google/devtools/build/lib/sandbox:linux_sandbox",
- "//src/main/java/com/google/devtools/build/lib/util:pair",
+ "//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/test/java/com/google/devtools/build/lib/vfs/util",
"//third_party:guava",
"//third_party:junit4",
diff --git a/src/test/java/com/google/devtools/build/lib/worker/BUILD b/src/test/java/com/google/devtools/build/lib/worker/BUILD
index c1ece17..73d83aa 100644
--- a/src/test/java/com/google/devtools/build/lib/worker/BUILD
+++ b/src/test/java/com/google/devtools/build/lib/worker/BUILD
@@ -99,7 +99,7 @@
"//src/main/java/com/google/devtools/build/lib/exec:bin_tools",
"//src/main/java/com/google/devtools/build/lib/exec:spawn_runner",
"//src/main/java/com/google/devtools/build/lib/exec/local",
- "//src/main/java/com/google/devtools/build/lib/metrics:ps_info_collector",
+ "//src/main/java/com/google/devtools/build/lib/metrics:resource_collector",
"//src/main/java/com/google/devtools/build/lib/sandbox:sandbox_helpers",
"//src/main/java/com/google/devtools/build/lib/sandbox:tree_deleter",
"//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception",