Delete `RetainedHeapLimiter`.
It has been superseded by `GcThrashingDetector`, which is simpler and has proven to be more effective.
Rename the flags without the `experimental_` prefix, keeping the old names to aid in migration. A default value for `--gc_thrashing_limits` is applied in order to keep the existing behavior that specifying a value < 100 for the threshold activates detection.
PiperOrigin-RevId: 552809809
Change-Id: Iee53eaa28632da0750ccc09e815eebc479d13890
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java b/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java
index 4412969..9657df3 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java
@@ -360,18 +360,16 @@
+ " by 4 GCs with zero second pause")
public MemoryProfileStableHeapParameters memoryProfileStableHeapParameters;
-
@Option(
name = "heap_dump_on_oom",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.LOGGING,
effectTags = {OptionEffectTag.BAZEL_MONITORING},
help =
- "Whether to manually output a heap dump if an OOM is thrown (including OOMs due to"
- + " --experimental_oom_more_eagerly_threshold). The dump will be written to"
+ "Whether to manually output a heap dump if an OOM is thrown (including manual OOMs due to"
+ + " reaching --gc_thrashing_limits). The dump will be written to"
+ " <output_base>/<invocation_id>.heapdump.hprof. This option effectively replaces"
- + " -XX:+HeapDumpOnOutOfMemoryError, which has no effect because OOMs are caught and"
- + " redirected to Runtime#halt.")
+ + " -XX:+HeapDumpOnOutOfMemoryError, which has no effect for manual OOMs.")
public boolean heapDumpOnOom;
@Option(
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/GcThrashingDetector.java b/src/main/java/com/google/devtools/build/lib/runtime/GcThrashingDetector.java
index 5c64839..68d6989 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/GcThrashingDetector.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/GcThrashingDetector.java
@@ -66,12 +66,12 @@
/** If enabled in {@link MemoryPressureOptions}, creates a {@link GcThrashingDetector}. */
@Nullable
static GcThrashingDetector createForCommand(MemoryPressureOptions options) {
- if (options.gcThrashingLimits.isEmpty() || options.oomMoreEagerlyThreshold == 100) {
+ if (options.gcThrashingLimits.isEmpty() || options.gcThrashingThreshold == 100) {
return null;
}
return new GcThrashingDetector(
- options.oomMoreEagerlyThreshold,
+ options.gcThrashingThreshold,
options.gcThrashingLimits,
BlazeClock.instance(),
BugReporter.defaultInstance());
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureListener.java b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureListener.java
index b85eeff..5003978 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureListener.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureListener.java
@@ -42,19 +42,16 @@
final class MemoryPressureListener implements NotificationListener {
private final AtomicReference<EventBus> eventBus = new AtomicReference<>();
- private final RetainedHeapLimiter retainedHeapLimiter;
private final AtomicReference<GcThrashingDetector> gcThrashingDetector = new AtomicReference<>();
private final Executor executor;
- private MemoryPressureListener(RetainedHeapLimiter retainedHeapLimiter, Executor executor) {
- this.retainedHeapLimiter = retainedHeapLimiter;
+ private MemoryPressureListener(Executor executor) {
this.executor = executor;
}
- static MemoryPressureListener create(RetainedHeapLimiter retainedHeapLimiter) {
+ static MemoryPressureListener create() {
return createFromBeans(
ManagementFactory.getGarbageCollectorMXBeans(),
- retainedHeapLimiter,
// Use a dedicated thread to broadcast memory pressure events. The service thread that calls
// handleNotification for GC events is not a typical Java thread - it doesn't work with
// debugger breakpoints and may not show up in thread dumps.
@@ -65,7 +62,6 @@
@VisibleForTesting
static MemoryPressureListener createFromBeans(
List<GarbageCollectorMXBean> gcBeans,
- RetainedHeapLimiter retainedHeapLimiter,
Executor executor) {
ImmutableList<NotificationEmitter> tenuredGcEmitters = findTenuredCollectorBeans(gcBeans);
if (tenuredGcEmitters.isEmpty()) {
@@ -79,8 +75,7 @@
"Unable to find tenured collector from %s: names were %s.", gcBeans, names));
}
- MemoryPressureListener memoryPressureListener =
- new MemoryPressureListener(retainedHeapLimiter, executor);
+ MemoryPressureListener memoryPressureListener = new MemoryPressureListener(executor);
tenuredGcEmitters.forEach(e -> e.addNotificationListener(memoryPressureListener, null, null));
return memoryPressureListener;
}
@@ -150,14 +145,10 @@
}
// A null EventBus implies memory pressure event between commands with no active EventBus.
- // In such cases, notify RetainedHeapLimiter but do not publish event.
EventBus eventBus = this.eventBus.get();
if (eventBus != null) {
eventBus.post(event);
}
- // Post to EventBus first so memory pressure subscribers have a chance to make things
- // eligible for GC before RetainedHeapLimiter would trigger a full GC.
- this.retainedHeapLimiter.handle(event);
}
void setEventBus(@Nullable EventBus eventBus) {
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureModule.java b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureModule.java
index 0ba6a8b..d1850ca 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureModule.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureModule.java
@@ -17,7 +17,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.eventbus.EventBus;
import com.google.common.eventbus.Subscribe;
-import com.google.devtools.build.lib.analysis.BlazeDirectories;
import com.google.devtools.build.lib.runtime.MemoryPressure.MemoryPressureStats;
import com.google.devtools.build.lib.skyframe.HighWaterMarkLimiter;
import com.google.devtools.common.options.OptionsBase;
@@ -28,19 +27,11 @@
* pressure events.
*/
public final class MemoryPressureModule extends BlazeModule {
- private RetainedHeapLimiter retainedHeapLimiter;
- private MemoryPressureListener memoryPressureListener;
+ private final MemoryPressureListener memoryPressureListener = MemoryPressureListener.create();
private HighWaterMarkLimiter highWaterMarkLimiter;
private EventBus eventBus;
@Override
- public void workspaceInit(
- BlazeRuntime runtime, BlazeDirectories directories, WorkspaceBuilder builder) {
- retainedHeapLimiter = RetainedHeapLimiter.create(runtime.getBugReporter());
- memoryPressureListener = MemoryPressureListener.create(retainedHeapLimiter);
- }
-
- @Override
public ImmutableList<Class<? extends OptionsBase>> getCommandOptions(Command command) {
return ImmutableList.of(MemoryPressureOptions.class);
}
@@ -54,7 +45,6 @@
highWaterMarkLimiter =
new HighWaterMarkLimiter(env.getSkyframeExecutor(), env.getSyscallCache(), options);
memoryPressureListener.setGcThrashingDetector(GcThrashingDetector.createForCommand(options));
- retainedHeapLimiter.setOptions(options);
eventBus.register(this);
eventBus.register(highWaterMarkLimiter);
@@ -77,7 +67,6 @@
private void postStats() {
MemoryPressureStats.Builder stats = MemoryPressureStats.newBuilder();
- retainedHeapLimiter.addStatsAndReset(stats);
highWaterMarkLimiter.addStatsAndReset(stats);
eventBus.post(stats.build());
}
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureOptions.java b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureOptions.java
index aa7f138..d9222f7 100644
--- a/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/runtime/MemoryPressureOptions.java
@@ -30,25 +30,6 @@
public final class MemoryPressureOptions extends OptionsBase {
@Option(
- name = "experimental_oom_more_eagerly_threshold",
- defaultValue = "100",
- documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
- effectTags = {OptionEffectTag.HOST_MACHINE_RESOURCE_OPTIMIZATIONS},
- converter = PercentageConverter.class,
- help =
- "If this flag is set to a value less than 100, Bazel will OOM if, after two full GC's, "
- + "more than this percentage of the (old gen) heap is still occupied.")
- public int oomMoreEagerlyThreshold;
-
- @Option(
- name = "min_time_between_triggered_gc",
- defaultValue = "1m",
- documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
- effectTags = {OptionEffectTag.BAZEL_INTERNAL_CONFIGURATION},
- help = "The minimum amount of time between GCs triggered by RetainedHeapLimiter.")
- public Duration minTimeBetweenTriggeredGc;
-
- @Option(
name = "skyframe_high_water_mark_threshold",
defaultValue = "85",
documentationCategory = OptionDocumentationCategory.BUILD_TIME_OPTIMIZATION,
@@ -97,30 +78,35 @@
public int skyframeHighWaterMarkFullGcDropsPerInvocation;
@Option(
- name = "experimental_gc_thrashing_limits",
- defaultValue = "",
+ name = "gc_thrashing_limits",
+ oldName = "experimental_gc_thrashing_limits",
+ oldNameWarning = false,
+ defaultValue = "1s:2,20s:3,1m:5",
documentationCategory = OptionDocumentationCategory.BUILD_TIME_OPTIMIZATION,
effectTags = {OptionEffectTag.HOST_MACHINE_RESOURCE_OPTIMIZATIONS},
converter = GcThrashingLimitsConverter.class,
help =
"Limits which, if reached, cause GcThrashingDetector to crash Bazel with an OOM. Each"
+ " limit is specified as <period>:<count> where period is a duration and count is a"
- + " positive integer. If more than --experimental_oom_more_eagerly_threshold percent"
- + " of tenured space (old gen heap) remains occupied after <count> consecutive full"
- + " GCs within <period>, an OOM is triggered. Multiple limits can be specified"
- + " separated by commas.")
+ + " positive integer. If more than --gc_thrashing_threshold percent of tenured space"
+ + " (old gen heap) remains occupied after <count> consecutive full GCs within"
+ + " <period>, an OOM is triggered. Multiple limits can be specified separated by"
+ + " commas.")
public ImmutableList<GcThrashingDetector.Limit> gcThrashingLimits;
@Option(
- name = "gc_thrashing_limits_retained_heap_limiter_mutually_exclusive",
- defaultValue = "true",
- documentationCategory = OptionDocumentationCategory.BUILD_TIME_OPTIMIZATION,
+ name = "gc_thrashing_threshold",
+ oldName = "experimental_oom_more_eagerly_threshold",
+ oldNameWarning = false,
+ defaultValue = "100",
+ documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.HOST_MACHINE_RESOURCE_OPTIMIZATIONS},
+ converter = PercentageConverter.class,
help =
- "If true, specifying non-empty --experimental_gc_thrashing_limits deactivates"
- + " RetainedHeapLimiter to make it mutually exclusive with GcThrashingDetector."
- + " Setting to false permits both to be active for the same command.")
- public boolean gcThrashingLimitsRetainedHeapLimiterMutuallyExclusive;
+ "The percent of tenured space occupied (0-100) above which GcThrashingDetector considers"
+ + " memory pressure events against its limits (--gc_thrashing_limits). If set to 100,"
+ + " GcThrashingDetector is disabled.")
+ public int gcThrashingThreshold;
static final class NonNegativeIntegerConverter extends RangeConverter {
NonNegativeIntegerConverter() {
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java b/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java
deleted file mode 100644
index a59b54e..0000000
--- a/src/main/java/com/google/devtools/build/lib/runtime/RetainedHeapLimiter.java
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright 2016 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.devtools.build.lib.runtime;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.flogger.GoogleLogger;
-import com.google.devtools.build.lib.bugreport.BugReporter;
-import com.google.devtools.build.lib.bugreport.Crash;
-import com.google.devtools.build.lib.bugreport.CrashContext;
-import com.google.devtools.build.lib.clock.BlazeClock;
-import com.google.devtools.build.lib.clock.Clock;
-import com.google.devtools.build.lib.concurrent.ThreadSafety;
-import com.google.devtools.build.lib.runtime.MemoryPressure.MemoryPressureStats;
-import com.google.devtools.common.options.Options;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-
-/**
- * Monitors the size of the retained heap and exit promptly if it grows too large.
- *
- * <p>Specifically, checks the size of the tenured space after each major GC; if it exceeds {@link
- * MemoryPressureOptions#oomMoreEagerlyThreshold}%, call {@link System#gc()} to trigger a
- * stop-the-world collection; if it's still more than {@link
- * MemoryPressureOptions#oomMoreEagerlyThreshold}% full, exit with an {@link OutOfMemoryError}.
- */
-final class RetainedHeapLimiter implements MemoryPressureStatCollector {
-
- private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();
-
- private final BugReporter bugReporter;
- private final Clock clock;
-
- private volatile MemoryPressureOptions options = inactiveOptions();
-
- private final AtomicBoolean throwingOom = new AtomicBoolean(false);
- private final AtomicBoolean heapLimiterTriggeredGc = new AtomicBoolean(false);
- private final AtomicInteger consecutiveIgnoredFullGcsOverThreshold = new AtomicInteger(0);
- private final AtomicBoolean loggedIgnoreWarningSinceLastGc = new AtomicBoolean(false);
- private final AtomicLong lastTriggeredGcMillis = new AtomicLong();
- private final AtomicInteger gcsTriggered = new AtomicInteger(0);
- private final AtomicInteger maxConsecutiveIgnoredFullGcsOverThreshold = new AtomicInteger(0);
-
- static RetainedHeapLimiter create(BugReporter bugReporter) {
- return new RetainedHeapLimiter(bugReporter, BlazeClock.instance());
- }
-
- @VisibleForTesting
- static RetainedHeapLimiter createForTest(BugReporter bugReporter, Clock clock) {
- return new RetainedHeapLimiter(bugReporter, clock);
- }
-
- private RetainedHeapLimiter(BugReporter bugReporter, Clock clock) {
- this.bugReporter = checkNotNull(bugReporter);
- this.clock = checkNotNull(clock);
- }
-
- @ThreadSafety.ThreadCompatible // Can only be called on the logical main Bazel thread.
- void setOptions(MemoryPressureOptions options) {
- if (options.gcThrashingLimitsRetainedHeapLimiterMutuallyExclusive
- && !options.gcThrashingLimits.isEmpty()) {
- this.options = inactiveOptions();
- } else {
- this.options = options;
- }
- }
-
- // Can be called concurrently, handles concurrent calls with #setThreshold gracefully.
- @ThreadSafety.ThreadSafe
- public void handle(MemoryPressureEvent event) {
- if (throwingOom.get()) {
- return; // Do nothing if a crash is already in progress.
- }
-
- boolean wasHeapLimiterTriggeredGc = false;
- boolean wasGcLockerDeferredHeapLimiterTriggeredGc = false;
- if (event.wasManualGc()) {
- wasHeapLimiterTriggeredGc = heapLimiterTriggeredGc.getAndSet(false);
- if (!wasHeapLimiterTriggeredGc) {
- // This was a manually triggered GC, but not from us earlier: short-circuit.
- logger.atInfo().log("Ignoring manual GC from other source");
- return;
- }
- } else if (event.wasGcLockerInitiatedGc() && heapLimiterTriggeredGc.getAndSet(false)) {
- // If System.gc() is called was while there are JNI thread(s) in the critical region, GCLocker
- // defers the GC until those threads exit the critical region. However, all GCLocker initiated
- // GCs are minor evacuation pauses, so we won't get the full GC we requested. Cancel the
- // timeout so we can attempt System.gc() again if we're still over the threshold. See full
- // explanation in b/263405096#comment14.
- logger.atWarning().log(
- "Observed a GCLocker initiated GC without observing a manual GC since the last call to"
- + " System.gc(), cancelling timeout to permit a retry");
- wasGcLockerDeferredHeapLimiterTriggeredGc = true;
- lastTriggeredGcMillis.set(0);
- }
-
- // Get a local reference to guard against concurrent modifications.
- MemoryPressureOptions options = this.options;
- int threshold = options.oomMoreEagerlyThreshold;
-
- if (threshold == 100) {
- return; // Inactive.
- }
-
- int actual = event.percentTenuredSpaceUsed();
- if (actual < threshold) {
- if (wasHeapLimiterTriggeredGc || wasGcLockerDeferredHeapLimiterTriggeredGc) {
- logger.atInfo().log("Back under threshold (%s%% of tenured space)", actual);
- }
- consecutiveIgnoredFullGcsOverThreshold.set(0);
- return;
- }
-
- if (wasHeapLimiterTriggeredGc) {
- if (!throwingOom.getAndSet(true)) {
- // We got here from a GC initiated by the other branch.
- OutOfMemoryError oom =
- new OutOfMemoryError(
- String.format(
- "RetainedHeapLimiter forcing exit due to GC thrashing: After back-to-back full"
- + " GCs, the tenured space is more than %s%% occupied (%s out of a tenured"
- + " space size of %s).",
- threshold, event.tenuredSpaceUsedBytes(), event.tenuredSpaceMaxBytes()));
- logger.atInfo().log("Calling handleCrash");
- // Exits the runtime.
- bugReporter.handleCrash(Crash.from(oom), CrashContext.halt());
- }
- } else if (clock.currentTimeMillis() - lastTriggeredGcMillis.get()
- > options.minTimeBetweenTriggeredGc.toMillis()) {
- logger.atInfo().log(
- "Triggering a full GC (%s%% of tenured space after %s GC)",
- actual, event.wasFullGc() ? "full" : "minor");
- heapLimiterTriggeredGc.set(true);
- gcsTriggered.incrementAndGet();
- // Force a full stop-the-world GC and see if it can get us below the threshold.
- System.gc();
- lastTriggeredGcMillis.set(clock.currentTimeMillis());
- consecutiveIgnoredFullGcsOverThreshold.set(0);
- loggedIgnoreWarningSinceLastGc.set(false);
- } else if (event.wasFullGc()) {
- int consecutiveIgnored = consecutiveIgnoredFullGcsOverThreshold.incrementAndGet();
- maxConsecutiveIgnoredFullGcsOverThreshold.accumulateAndGet(consecutiveIgnored, Math::max);
- logger.atWarning().log(
- "Ignoring possible GC thrashing x%s (%s%% of tenured space after full GC) because of"
- + " recently triggered GC",
- consecutiveIgnored, actual);
- } else if (!loggedIgnoreWarningSinceLastGc.getAndSet(true)) {
- logger.atWarning().log(
- "Ignoring possible GC thrashing (%s%% of tenured space after minor GC) because of"
- + " recently triggered GC",
- actual);
- }
- }
-
- @Override
- public void addStatsAndReset(MemoryPressureStats.Builder stats) {
- stats
- .setManuallyTriggeredGcs(gcsTriggered.getAndSet(0))
- .setMaxConsecutiveIgnoredGcsOverThreshold(
- maxConsecutiveIgnoredFullGcsOverThreshold.getAndSet(0));
- consecutiveIgnoredFullGcsOverThreshold.set(0);
- }
-
- private static MemoryPressureOptions inactiveOptions() {
- var options = Options.getDefaults(MemoryPressureOptions.class);
- options.oomMoreEagerlyThreshold = 100;
- return options;
- }
-}