Damien Martin-Guillerez | f88f4d8 | 2015-09-25 13:56:55 +0000 | [diff] [blame] | 1 | // Copyright 2014 The Bazel Authors. All rights reserved. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | package com.google.devtools.build.lib.skyframe; |
| 15 | |
| 16 | import com.google.common.annotations.VisibleForTesting; |
tomlu | a155b53 | 2017-11-08 20:12:47 +0100 | [diff] [blame] | 17 | import com.google.common.base.Preconditions; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 18 | import com.google.devtools.build.lib.actions.ActionExecutionStatusReporter; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 19 | import java.util.concurrent.atomic.AtomicBoolean; |
| 20 | |
| 21 | /** |
| 22 | * An object that can monitor whether actions are getting completed in a timely manner. |
| 23 | * |
| 24 | * <p>If there's nothing happening for a while, a background thread will print (and update) the |
| 25 | * "Still waiting for N actions to complete..." message. |
| 26 | */ |
| 27 | public final class ActionExecutionInactivityWatchdog { |
| 28 | |
| 29 | /** An object used in monitoring action execution inactivity. */ |
| 30 | public interface InactivityMonitor { |
| 31 | |
| 32 | /** Returns whether action execution has started. */ |
| 33 | boolean hasStarted(); |
| 34 | |
| 35 | /** Returns the number of enqueued but not yet completed actions. */ |
| 36 | int getPending(); |
| 37 | |
| 38 | /** |
| 39 | * Waits for any action to complete, or the timeout to elapse. |
| 40 | * |
| 41 | * <p>The thread must wait at least for the specified timeout, unless some action completes in |
| 42 | * the meantime. It's not allowed to return 0 too early. |
| 43 | * |
| 44 | * <p>Note that it's acceptable to return (any value) later than specified by the timeout. |
| 45 | * |
| 46 | * @return the number of actions completed during the wait |
| 47 | */ |
Googler | 10e7f3e | 2018-08-17 00:31:57 -0700 | [diff] [blame] | 48 | int waitForNextCompletion(int timeoutSeconds) throws InterruptedException; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 49 | } |
| 50 | |
| 51 | /** An object that the watchdog can report inactivity to. */ |
| 52 | public interface InactivityReporter { |
| 53 | |
| 54 | /** |
| 55 | * Report that actions are not getting completed in a timely manner. |
| 56 | * |
| 57 | * <p>Inactivity is typically not reported if tests with streaming output are being run. |
| 58 | */ |
| 59 | void maybeReportInactivity(); |
| 60 | } |
| 61 | |
| 62 | @VisibleForTesting |
| 63 | interface Sleep { |
| 64 | void sleep(int durationMilliseconds) throws InterruptedException; |
| 65 | } |
| 66 | |
| 67 | private static final class WaitTime { |
| 68 | private final int progressIntervalFlagValue; |
| 69 | private int prev; |
| 70 | |
| 71 | public WaitTime(int progressIntervalFlagValue) { |
| 72 | this.progressIntervalFlagValue = progressIntervalFlagValue; |
| 73 | } |
| 74 | |
| 75 | public void reset() { |
| 76 | prev = 0; |
| 77 | } |
| 78 | |
| 79 | public int next() { |
| 80 | prev = ActionExecutionStatusReporter.getWaitTime(progressIntervalFlagValue, prev); |
| 81 | return prev; |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | private final AtomicBoolean isRunning = new AtomicBoolean(false); |
| 86 | private final InactivityMonitor monitor; |
| 87 | private final InactivityReporter reporter; |
| 88 | private final Sleep sleeper; |
| 89 | private final Thread thread; |
| 90 | private final WaitTime waitTime; |
| 91 | |
| 92 | public ActionExecutionInactivityWatchdog(InactivityMonitor monitor, InactivityReporter reporter, |
| 93 | int progressIntervalFlagValue) { |
| 94 | this(monitor, reporter, progressIntervalFlagValue, new Sleep() { |
| 95 | @Override |
| 96 | public void sleep(int durationMilliseconds) throws InterruptedException { |
| 97 | Thread.sleep(durationMilliseconds); |
| 98 | } |
| 99 | }); |
| 100 | } |
| 101 | |
| 102 | @VisibleForTesting |
| 103 | public ActionExecutionInactivityWatchdog(InactivityMonitor monitor, InactivityReporter reporter, |
| 104 | int progressIntervalFlagValue, Sleep sleeper) { |
| 105 | this.monitor = Preconditions.checkNotNull(monitor); |
| 106 | this.reporter = Preconditions.checkNotNull(reporter); |
| 107 | this.sleeper = Preconditions.checkNotNull(sleeper); |
| 108 | this.waitTime = new WaitTime(progressIntervalFlagValue); |
ulfjack | 2602a17 | 2018-11-05 07:39:43 -0800 | [diff] [blame] | 109 | this.thread = new Thread(() -> enterWatchdogLoop(), "action-execution-watchdog"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 110 | this.thread.setDaemon(true); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 111 | } |
| 112 | |
| 113 | /** Starts the watchdog thread. This method should only be called once. */ |
| 114 | public void start() { |
| 115 | Preconditions.checkState(!isRunning.getAndSet(true)); |
| 116 | thread.start(); |
| 117 | } |
| 118 | |
| 119 | /** |
| 120 | * Stops the watchdog thread. This method should only be called once. |
| 121 | * |
| 122 | * <p>The method waits for the thread to terminate. If the caller thread is interrupted |
| 123 | * in the meantime, the interrupted status will be set. |
| 124 | */ |
| 125 | public void stop() { |
| 126 | Preconditions.checkState(isRunning.getAndSet(false)); |
| 127 | thread.interrupt(); |
| 128 | try { |
| 129 | thread.join(); |
| 130 | } catch (InterruptedException e) { |
| 131 | // When Thread.join throws, the interrupted status is cleared. We need to set it again. |
| 132 | Thread.currentThread().interrupt(); |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | private void enterWatchdogLoop() { |
| 137 | while (isRunning.get()) { |
| 138 | try { |
| 139 | // Wait a while for any SkyFunction to finish. The returned number indicates how many |
| 140 | // actions completed during the wait. It's possible that this is more than 1, since |
| 141 | // this thread may not immediately regain control. |
Googler | 10e7f3e | 2018-08-17 00:31:57 -0700 | [diff] [blame] | 142 | int completedActions = monitor.waitForNextCompletion(waitTime.next()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 143 | if (!isRunning.get()) { |
| 144 | break; |
| 145 | } |
| 146 | |
| 147 | int pending = monitor.getPending(); |
| 148 | if (!monitor.hasStarted() || completedActions > 0 || pending == 0) { |
| 149 | // If no keys have been enqueued yet (execution hasn't started), or some actions |
| 150 | // were completed since this thread was notified (we are making visible progress), |
| 151 | // or there are currently no enqueued actions waiting to be processed (perhaps all |
| 152 | // have completed and we are about to stop monitoring), then there's no need to |
| 153 | // display any messages. |
| 154 | waitTime.reset(); |
| 155 | |
| 156 | // Sleep a while before checking again. Actions might be executing at a nice rate, no |
| 157 | // need to worry about inactivity. This extra sleep isn't required but it's nice to |
| 158 | // have: without it we would, at times of high action completion rate, unnecessarily |
| 159 | // put the monitor into a fast sleep-wake cycle --- not a big problem but wasteful. |
| 160 | sleeper.sleep(1000); |
| 161 | } else { |
| 162 | // If actions are executing but we haven't made any progress in a while (no new |
| 163 | // action completion), then reassure the user that we're still running. Next time |
| 164 | // wait a little longer. |
| 165 | reporter.maybeReportInactivity(); |
| 166 | } |
| 167 | } catch (InterruptedException ie) { |
| 168 | Thread.currentThread().interrupt(); |
| 169 | return; |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | } |