src/main/java/com/google/devtools/build/lib/profiler/Profiler.java - bazel - Git at Google

 // Copyright 2014 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.devtools.build.lib.profiler;

 import static com.google.devtools.build.lib.profiler.ProfilerTask.TASK_COUNT;

 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.devtools.build.lib.clock.Clock;
 import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadCompatible;
 import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
 import com.google.devtools.build.lib.profiler.PredicateBasedStatRecorder.RecorderAndPredicate;
 import com.google.devtools.build.lib.profiler.StatRecorder.VfsHeuristics;
 import com.google.devtools.build.lib.util.VarInt;
 import java.io.BufferedOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.PriorityQueue;
 import java.util.Queue;
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.Logger;
 import java.util.zip.Deflater;
 import java.util.zip.DeflaterOutputStream;

 /**
  * Blaze internal profiler. Provides facility to report various Blaze tasks and store them
  * (asynchronously) in the file for future analysis.
  *
  * <p>Implemented as singleton so any caller should use Profiler.instance() to obtain reference.
  *
  * <p>Internally, profiler uses two data structures - ThreadLocal task stack to track nested tasks
  * and single ConcurrentLinkedQueue to gather all completed tasks.
  *
  * <p>Also, due to the nature of the provided functionality (instrumentation of all Blaze
  * components), build.lib.profiler package will be used by almost every other Blaze package, so
  * special attention should be paid to avoid any dependencies on the rest of the Blaze code,
  * including build.lib.util and build.lib.vfs. This is important because build.lib.util and
  * build.lib.vfs contain Profiler invocations and any dependency on those two packages would create
  * circular relationship.
  *
  * <p>All gathered instrumentation data will be stored in the file. Please, note, that while file
  * format is described here it is considered internal and can change at any time. For scripting,
  * using blaze analyze-profile --dump=raw would be more robust and stable solution.
  *
  * <p>
  *
  * <pre>
  * Profiler file consists of the deflated stream with following overall structure:
  *   HEADER
  *   TASK_TYPE_TABLE
  *   TASK_RECORD...
  *   EOF_MARKER
  *
  * HEADER:
  *   int32: magic token (Profiler.MAGIC)
  *   int32: version format (Profiler.VERSION)
  *   string: file comment
  *
  * TASK_TYPE_TABLE:
  *   int32: number of type names below
  *   string... : type names. Each of the type names is assigned id according to
  *               their position in this table starting from 0.
  *
  * TASK_RECORD:
  *   int32 size: size of the encoded task record
  *   byte[size] encoded_task_record:
  *     varint64: thread id - as was returned by Thread.getId()
  *     varint32: task id - starting from 1.
  *     varint32: parent task id for subtasks or 0 for root tasks
  *     varint64: start time in ns, relative to the Profiler.start() invocation
  *     varint64: task duration in ns
  *     byte:     task type id (see TASK_TYPE_TABLE)
  *     varint32: description string index incremented by 1 (>0) or 0 this is
  *               a first occurrence of the description string
  *     AGGREGATED_STAT...: remainder of the field (if present) represents
  *                         aggregated stats for that task
  *   string: *optional* description string, will appear only if description
  *           string index above was 0. In that case this string will be
  *           assigned next sequential id so every unique description string
  *           will appear in the file only once - after that it will be
  *           referenced by id.
  *
  * AGGREGATE_STAT:
  *   byte:     stat type
  *   varint32: total number of subtask invocations
  *   varint64: cumulative duration of subtask invocations in ns.
  *
  * EOF_MARKER:
  *   int64: -1 - please note that this corresponds to the thread id in the
  *               TASK_RECORD which is always > 0
  * </pre>
  *
  * @see ProfilerTask enum for recognized task types.
  */
 @ThreadSafe
 public final class Profiler {
   private static final Logger logger = Logger.getLogger(Profiler.class.getName());

   public static final int MAGIC = 0x11223344;

   // File version number. Note that merely adding new record types in
   // the ProfilerTask does not require bumping version number as long as original
   // enum values are not renamed or deleted.
   public static final int VERSION = 0x03;

   // EOF marker. Must be < 0.
   public static final int EOF_MARKER = -1;

   // Profiler will check for gathered data and persist all of it in the
   // separate thread every SAVE_DELAY ms.
   private static final int SAVE_DELAY = 2000; // ms

   /**
    * The profiler (a static singleton instance). Inactive by default.
    */
   private static final Profiler instance = new Profiler();

   private static final int HISTOGRAM_BUCKETS = 20;

   /** A task that was very slow. */
   public static final class SlowTask implements Comparable<SlowTask> {
     final long durationNanos;
     final Object object;
     ProfilerTask type;

     private SlowTask(TaskData taskData) {
       this.durationNanos = taskData.duration;
       this.object = taskData.object;
       this.type = taskData.type;
     }

     @Override
     public int compareTo(SlowTask other) {
       long delta = durationNanos - other.durationNanos;
       if (delta < 0) {  // Very clumsy
         return -1;
       } else if (delta > 0) {
         return 1;
       } else {
         return 0;
       }
     }

     public long getDurationNanos() {
       return durationNanos;
     }

     public String getDescription() {
       return toDescription(object);
     }

     public ProfilerTask getType() {
       return type;
     }
   }

   /**
    * Container for the single task record.
    * Should never be instantiated directly - use TaskStack.create() instead.
    *
    * Class itself is not thread safe, but all access to it from Profiler
    * methods is.
    */
   @ThreadCompatible
   private final class TaskData {
     final long threadId;
     final long startTime;
     long duration = 0L;
     final int id;
     final int parentId;
     int[] counts; // number of invocations per ProfilerTask type
     long[] durations; // time spend in the task per ProfilerTask type
     final ProfilerTask type;
     final Object object;

     TaskData(long startTime, TaskData parent,
              ProfilerTask eventType, Object object) {
       threadId = Thread.currentThread().getId();
       counts = null;
       durations = null;
       id = taskId.incrementAndGet();
       parentId = (parent == null  ? 0 : parent.id);
       this.startTime = startTime;
       this.type = eventType;
       this.object = Preconditions.checkNotNull(object);
     }

     /**
      * Aggregates information about an *immediate* subtask.
      */
     public void aggregateChild(ProfilerTask type, long duration) {
       int index = type.ordinal();
       if (counts == null) {
         // one entry for each ProfilerTask type
         counts = new int[TASK_COUNT];
         durations = new long[TASK_COUNT];
       }
       counts[index]++;
       durations[index] += duration;
     }

     @Override
     public String toString() {
       return "Thread " + threadId + ", task " + id + ", type " + type + ", " + object;
     }
   }

   /**
    * Tracks nested tasks for each thread.
    *
    * java.util.ArrayDeque is the most efficient stack implementation in the
    * Java Collections Framework (java.util.Stack class is older synchronized
    * alternative). It is, however, used here strictly for LIFO operations.
    * However, ArrayDeque is 1.6 only. For 1.5 best approach would be to utilize
    * ArrayList and emulate stack using it.
    */
   @ThreadSafe
   private final class TaskStack extends ThreadLocal<List<TaskData>> {

     @Override
     public List<TaskData> initialValue() {
       return new ArrayList<>();
     }

     public TaskData peek() {
       List<TaskData> list = get();
       if (list.isEmpty()) {
         return null;
       }
       return list.get(list.size() - 1);
     }

     public TaskData pop() {
       List<TaskData> list = get();
       return list.remove(list.size() - 1);
     }

     public boolean isEmpty() {
       return get().isEmpty();
     }

     public void push(ProfilerTask eventType, Object object) {
       get().add(create(clock.nanoTime(), eventType, object));
     }

     public TaskData create(long startTime, ProfilerTask eventType, Object object) {
       return new TaskData(startTime, peek(), eventType, object);
     }

     @Override
     public String toString() {
       StringBuilder builder = new StringBuilder(
           "Current task stack for thread " + Thread.currentThread().getName() + ":\n");
       List<TaskData> list = get();
       for (int i = list.size() - 1; i >= 0; i--) {
         builder.append(list.get(i));
         builder.append("\n");
       }
       return builder.toString();
     }
   }

   private static String toDescription(Object object) {
     return (object instanceof Describable)
         ? ((Describable) object).describe()
         : object.toString();
   }

   /**
    * Implements datastore for object description indices. Intended to be used only by the
    * Profiler.save() method.
    */
   @ThreadCompatible
   private static final class ObjectDescriber {
     private Map<Object, Integer> descMap = new IdentityHashMap<>(2000);
     private int indexCounter = 0;

     ObjectDescriber() { }

     int getDescriptionIndex(Object object) {
       Integer index = descMap.get(object);
       return (index != null) ? index : -1;
     }

     String getDescription(Object object) {
       String description = toDescription(object);

       Integer oldIndex = descMap.put(object, indexCounter++);
       // Do not use Preconditions class below due to the rather expensive
       // toString() calls used in the message.
       if (oldIndex != null) {
         throw new IllegalStateException(" Object '" + description + "' @ "
             + System.identityHashCode(object) + " already had description index "
             + oldIndex + " while assigning index " + descMap.get(object));
       } else if (description.length() > 20000) {
         // Note size 64k byte limitation in DataOutputStream#writeUTF().
         description = description.substring(0, 20000);
       }
       return description;
     }

     boolean isUnassigned(int index) {
       return (index < 0);
     }
   }

   /**
    * Aggregator class that keeps track of the slowest tasks of the specified type.
    *
    * <p><code>priorityQueues</p> is sharded so that all threads need not compete for the same
    * lock if they do the same operation at the same time. Access to the individual queues is
    * synchronized on the queue objects themselves.
    */
   private static final class SlowestTaskAggregator {
     private static final int SHARDS = 16;
     private final int size;

     @SuppressWarnings({"unchecked", "rawtypes"})
     private final PriorityQueue<SlowTask>[] priorityQueues = new PriorityQueue[SHARDS];

     SlowestTaskAggregator(int size) {
       this.size = size;

       for (int i = 0; i < SHARDS; i++) {
         priorityQueues[i] = new PriorityQueue<>(size + 1);
       }
     }

     // @ThreadSafe
     void add(TaskData taskData) {
       PriorityQueue<SlowTask> queue =
           priorityQueues[(int) (Thread.currentThread().getId() % SHARDS)];
       synchronized (queue) {
         if (queue.size() == size) {
           // Optimization: check if we are faster than the fastest element. If we are, we would
           // be the ones to fall off the end of the queue, therefore, we can safely return early.
           if (queue.peek().getDurationNanos() > taskData.duration) {
             return;
           }

           queue.add(new SlowTask(taskData));
           queue.remove();
         } else {
           queue.add(new SlowTask(taskData));
         }
       }
     }

     // @ThreadSafe
     void clear() {
       for (int i = 0; i < SHARDS; i++) {
         PriorityQueue<SlowTask> queue = priorityQueues[i];
         synchronized (queue) {
           queue.clear();
         }
       }
     }

     // @ThreadSafe
     Iterable<SlowTask> getSlowestTasks() {
       // This is slow, but since it only happens during the end of the invocation, it's OK
       PriorityQueue<SlowTask> merged = new PriorityQueue<>(size * SHARDS);
       for (int i = 0; i < SHARDS; i++) {
         PriorityQueue<SlowTask> queue = priorityQueues[i];
         synchronized (queue) {
           merged.addAll(queue);
         }
       }

       while (merged.size() > size) {
         merged.remove();
       }

       return merged;
     }
   }

   /**
    * Which {@link ProfilerTask}s are profiled.
    */
   public enum ProfiledTaskKinds {
     /**
      * Do not profile anything.
      *
      * <p>Performance is best with this case, but we lose critical path analysis and slowest
      * operation tracking.
      */
     NONE {
       @Override
       boolean isProfiling(ProfilerTask type) {
         return false;
       }
     },

     /**
      * Profile on a few, known-to-be-slow tasks.
      *
      * <p>Performance is somewhat decreased in comparison to {@link #NONE}, but we still track the
      * slowest operations (VFS).
      */
     SLOWEST {
       @Override
       boolean isProfiling(ProfilerTask type) {
         return type.collectsSlowestInstances();
       }
     },

     /**
      * Profile all tasks.
      *
      * <p>This is in use when {@code --profile} is specified.
      */
     ALL {
       @Override
       boolean isProfiling(ProfilerTask type) {
         return true;
       }
     };

     /** Whether the Profiler collects data for the given task type. */
     abstract boolean isProfiling(ProfilerTask type);
   }

   private Clock clock;
   private ProfiledTaskKinds profiledTaskKinds;
   private volatile long profileStartTime = 0L;
   private volatile boolean recordAllDurations = false;
   private AtomicInteger taskId = new AtomicInteger();

   private TaskStack taskStack;
   private Queue<TaskData> taskQueue;
   private DataOutputStream out;
   private Timer timer;
   private IOException saveException;
   private ObjectDescriber describer;
   @SuppressWarnings("unchecked")
   private final SlowestTaskAggregator[] slowestTasks =
   new SlowestTaskAggregator[ProfilerTask.values().length];

   private final StatRecorder[] tasksHistograms = new StatRecorder[ProfilerTask.values().length];

   private Profiler() {
     initHistograms();
     for (ProfilerTask task : ProfilerTask.values()) {
       if (task.slowestInstancesCount != 0) {
         slowestTasks[task.ordinal()] = new SlowestTaskAggregator(task.slowestInstancesCount);
       }
     }
   }

   private void initHistograms() {
     for (ProfilerTask task : ProfilerTask.values()) {
       if (task.isVfs()) {
         Map<String, ? extends Predicate<? super String>> vfsHeuristics =
             VfsHeuristics.vfsTypeHeuristics;
         List<RecorderAndPredicate> recorders = new ArrayList<>(vfsHeuristics.size());
         for (Entry<String, ? extends Predicate<? super String>> e : vfsHeuristics.entrySet()) {
           recorders.add(new RecorderAndPredicate(
               new SingleStatRecorder(task + " " + e.getKey(), HISTOGRAM_BUCKETS), e.getValue()));
         }
         tasksHistograms[task.ordinal()] = new PredicateBasedStatRecorder(recorders);
       } else {
         tasksHistograms[task.ordinal()] = new SingleStatRecorder(task, HISTOGRAM_BUCKETS);
       }
     }
   }

   public ImmutableList<StatRecorder> getTasksHistograms() {
     return ImmutableList.copyOf(tasksHistograms);
   }

   public static Profiler instance() {
     return instance;
   }

   /**
    * Returns the nanoTime of the current profiler instance, or an arbitrary
    * constant if not active.
    */
   public static long nanoTimeMaybe() {
     if (instance.isActive()) {
       return instance.clock.nanoTime();
     }
     return -1;
   }

   /**
    * Enable profiling.
    *
    * <p>Subsequent calls to beginTask/endTask will be recorded
    * in the provided output stream. Please note that stream performance is
    * extremely important and buffered streams should be utilized.
    *
    * @param profiledTaskKinds which kinds of {@link ProfilerTask}s to track
    * @param stream output stream to store profile data. Note: passing unbuffered stream object
    *     reference may result in significant performance penalties
    * @param comment a comment to insert in the profile data
    * @param recordAllDurations iff true, record all tasks regardless of their duration; otherwise
    *     some tasks may get aggregated if they finished quick enough
    * @param clock a {@code BlazeClock.instance()}
    * @param execStartTimeNanos execution start time in nanos obtained from {@code clock.nanoTime()}
    */
   public synchronized void start(ProfiledTaskKinds profiledTaskKinds, OutputStream stream,
       String comment, boolean recordAllDurations, Clock clock, long execStartTimeNanos)
       throws IOException {
     Preconditions.checkState(!isActive(), "Profiler already active");
     taskStack = new TaskStack();
     taskQueue = new ConcurrentLinkedQueue<>();
     describer = new ObjectDescriber();

     this.profiledTaskKinds = profiledTaskKinds;
     this.clock = clock;

     // sanity check for current limitation on the number of supported types due
     // to using enum.ordinal() to store them instead of EnumSet for performance reasons.
     Preconditions.checkState(TASK_COUNT < 256,
         "The profiler implementation supports only up to 255 different ProfilerTask values.");

     // reset state for the new profiling session
     taskId.set(0);
     this.recordAllDurations = recordAllDurations;
     this.saveException = null;
     if (stream != null) {
       this.timer = new Timer("ProfilerTimer", true);
       // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by
       // the save() method. Values for buffer sizes were chosen by running small amount of tests
       // and identifying point of diminishing returns - but I have not really tried to optimize
       // them.
       this.out = new DataOutputStream(new BufferedOutputStream(new DeflaterOutputStream(
           stream, new Deflater(Deflater.BEST_SPEED, false), 65536), 262144));

       this.out.writeInt(MAGIC); // magic
       this.out.writeInt(VERSION); // protocol_version
       this.out.writeUTF(comment);
       // ProfileTask.values() method sorts enums using their ordinal() value, so
       // there there is no need to store ordinal() value for each entry.
       this.out.writeInt(TASK_COUNT);
       for (ProfilerTask type : ProfilerTask.values()) {
         this.out.writeUTF(type.toString());
       }

       // Start save thread
       timer.schedule(new TimerTask() {
         @Override public void run() { save(); }
       }, SAVE_DELAY, SAVE_DELAY);
     } else {
       this.out = null;
     }

     // activate profiler
     profileStartTime = execStartTimeNanos;
   }

   public synchronized Iterable<SlowTask> getSlowestTasks() {
     List<Iterable<SlowTask>> slowestTasksByType = new ArrayList<>();

     for (SlowestTaskAggregator aggregator : slowestTasks) {
       if (aggregator != null) {
         slowestTasksByType.add(aggregator.getSlowestTasks());
       }
     }

     return Iterables.concat(slowestTasksByType);
   }

   /**
    * Disable profiling and complete profile file creation.
    * Subsequent calls to beginTask/endTask will no longer
    * be recorded in the profile.
    */
   public synchronized void stop() throws IOException {
     if (saveException != null) {
       throw saveException;
     }
     if (!isActive()) {
       return;
     }
     // Log a final event to update the duration of ProfilePhase.FINISH.
     logEvent(ProfilerTask.INFO, "Finishing");
     save();
     clear();

     for (SlowestTaskAggregator aggregator : slowestTasks) {
       if (aggregator != null) {
         aggregator.clear();
       }
     }

     if (saveException != null) {
       throw saveException;
     }
     if (out != null) {
       out.writeInt(EOF_MARKER);
       out.close();
       out = null;
     }
   }

   /**
    *  Returns true iff profiling is currently enabled.
    */
   public boolean isActive() {
     return profileStartTime != 0L;
   }

   public boolean isProfiling(ProfilerTask type) {
     return profiledTaskKinds.isProfiling(type);
   }

   /**
    * Saves all gathered information from taskQueue queue to the file.
    * Method is invoked internally by the Timer-based thread and at the end of
    * profiling session.
    */
   private synchronized void save() {
     if (out == null) {
       return;
     }
     try {
       // Allocate the sink once to avoid GC
       ByteBuffer sink = ByteBuffer.allocate(1024);
       while (!taskQueue.isEmpty()) {
         sink.clear();
         TaskData data = taskQueue.poll();

         VarInt.putVarLong(data.threadId, sink);
         VarInt.putVarInt(data.id, sink);
         VarInt.putVarInt(data.parentId, sink);
         VarInt.putVarLong(data.startTime - profileStartTime, sink);
         VarInt.putVarLong(data.duration, sink);

         // To save space (and improve performance), convert all description
         // strings to the canonical object and use IdentityHashMap to assign
         // unique numbers for each string.
         int descIndex = describer.getDescriptionIndex(data.object);
         VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values.

         // Save types using their ordinal() value
         sink.put((byte) data.type.ordinal());

         // Save aggregated data stats.
         if (data.counts != null) {
           for (int i = 0; i < TASK_COUNT; i++) {
             if (data.counts[i] > 0) {
               sink.put((byte) i); // aggregated type ordinal value
               VarInt.putVarInt(data.counts[i], sink);
               VarInt.putVarLong(data.durations[i], sink);
             }
           }
         }

         this.out.writeInt(sink.position());
         this.out.write(sink.array(), 0, sink.position());
         if (describer.isUnassigned(descIndex)) {
           this.out.writeUTF(describer.getDescription(data.object));
         }
       }
       this.out.flush();
     } catch (IOException e) {
       saveException = e;
       clear();
       try {
         out.close();
       } catch (IOException e2) {
         // ignore it
       }
     }
   }

   private synchronized void clear() {
     initHistograms();
     profileStartTime = 0L;
     if (timer != null) {
       timer.cancel();
       timer = null;
     }
     taskStack = null;
     taskQueue = null;
     describer = null;

     // Note that slowest task aggregator are not cleared here because clearing happens
     // periodically over the course of a command invocation.
   }

   /**
    * Unless --record_full_profiler_data is given we drop small tasks and add their time to the
    * parents duration.
    */
   private boolean wasTaskSlowEnoughToRecord(ProfilerTask type, long duration) {
     return (recordAllDurations || duration >= type.minDuration);
   }

   /**
    * Adds task directly to the main queue bypassing task stack. Used for simple
    * tasks that are known to not have any subtasks.
    *
    * @param startTime task start time (obtained through {@link Profiler#nanoTimeMaybe()})
    * @param duration task duration
    * @param type task type
    * @param object object associated with that task. Can be String object that
    *               describes it.
    */
   private void logTask(long startTime, long duration, ProfilerTask type, Object object) {
     Preconditions.checkNotNull(object);
     Preconditions.checkState(startTime > 0, "startTime was %s", startTime);
     if (duration < 0) {
       // See note in Clock#nanoTime, which is used by Profiler#nanoTimeMaybe.
       duration = 0;
     }

     tasksHistograms[type.ordinal()].addStat((int) TimeUnit.NANOSECONDS.toMillis(duration), object);
     // Store instance fields as local variables so they are not nulled out from under us by #clear.
     TaskStack localStack = taskStack;
     Queue<TaskData> localQueue = taskQueue;
     if (localStack == null || localQueue == null) {
       // Variables have been nulled out by #clear in between the check the caller made and this
       // point in the code. Probably due to an asynchronous crash.
       logger.severe("Variables null in profiler for " + type + ", probably due to async crash");
       return;
     }
     TaskData parent = localStack.peek();
     if (parent != null) {
       parent.aggregateChild(type, duration);
     }
     if (wasTaskSlowEnoughToRecord(type, duration)) {
       TaskData data = localStack.create(startTime, type, object);
       data.duration = duration;
       if (out != null) {
         localQueue.add(data);
       }

       SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];

       if (aggregator != null) {
         aggregator.add(data);
       }
     }
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks).
    * Depending on the minDuration attribute of the task type, task may be
    * just aggregated into the parent task and not stored directly.
    *
    * @param startTime task start time (obtained through {@link
    *        Profiler#nanoTimeMaybe()})
    * @param type task type
    * @param object object associated with that task. Can be String object that
    *               describes it.
    */
   public void logSimpleTask(long startTime, ProfilerTask type, Object object) {
     if (isActive() && isProfiling(type)) {
       logTask(startTime, clock.nanoTime() - startTime, type, object);
     }
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks). Depending on the
    * minDuration attribute of the task type, task may be just aggregated into the parent task and
    * not stored directly.
    *
    * <p>Note that start and stop time must both be acquired from the same clock instance.
    *
    * @param startTimeNanos task start time
    * @param stopTimeNanos task stop time
    * @param type task type
    * @param object object associated with that task. Can be String object that describes it.
    */
   public void logSimpleTask(
       long startTimeNanos, long stopTimeNanos, ProfilerTask type, Object object) {
     if (isActive() && isProfiling(type)) {
       logTask(startTimeNanos, stopTimeNanos - startTimeNanos, type, object);
     }
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks). Depending on the
    * minDuration attribute of the task type, task may be just aggregated into the parent task and
    * not stored directly.
    *
    * @param startTimeNanos task start time (obtained through {@link Profiler#nanoTimeMaybe()})
    * @param duration the duration of the task
    * @param type task type
    * @param object object associated with that task. Can be String object that describes it.
    */
   public void logSimpleTaskDuration(
       long startTimeNanos, Duration duration, ProfilerTask type, Object object) {
     if (isActive() && isProfiling(type)) {
       logTask(startTimeNanos, duration.toNanos(), type, object);
     }
   }

   /**
    * Used to log "events" - tasks with zero duration.
    */
   public void logEvent(ProfilerTask type, Object object) {
     if (isActive() && isProfiling(type)) {
       logTask(clock.nanoTime(), 0, type, object);
     }
   }

   /**
    * Records the beginning of the task specified by the parameters. This method
    * should always be followed by completeTask() invocation to mark the end of
    * task execution (usually ensured by try {} finally {} block). Failure to do
    * so will result in task stack corruption.
    *
    * Use of this method allows to support nested task monitoring. For tasks that
    * are known to not have any subtasks, logSimpleTask() should be used instead.
    *
    * @param type predefined task type - see ProfilerTask for available types.
    * @param object object associated with that task. Can be String object that
    *               describes it.
    */
   public void startTask(ProfilerTask type, Object object) {
     // ProfilerInfo.allTasksById is supposed to be an id -> Task map, but it is in fact a List,
     // which means that we cannot drop tasks to which we had already assigned ids. Therefore,
     // non-leaf tasks must not have a minimum duration. However, we don't quite consistently
     // enforce this, and Blaze only works because we happen not to add child tasks to those parent
     // tasks that have a minimum duration.
     Preconditions.checkNotNull(object);
     if (isActive() && isProfiling(type)) {
       taskStack.push(type, object);
     }
   }

   /**
    * Records the end of the task and moves tasks from the thread-local stack to
    * the main queue. Will validate that given task type matches task at the top
    * of the stack.
    *
    * @param type task type.
    */
   public void completeTask(ProfilerTask type) {
     if (isActive() && isProfiling(type)) {
       long endTime = clock.nanoTime();
       TaskData data = taskStack.pop();
       Preconditions.checkState(
           data.type == type,
           "Inconsistent Profiler.completeTask() call: should have been %s but got %s (%s, %s)",
           data.type,
           type,
           data,
           taskStack);
       data.duration = endTime - data.startTime;
       if (data.parentId > 0) {
         taskStack.peek().aggregateChild(data.type, data.duration);
       }
       boolean shouldRecordTask = wasTaskSlowEnoughToRecord(type, data.duration);
       if (out != null && (shouldRecordTask || data.counts != null)) {
         taskQueue.add(data);
       }

       if (shouldRecordTask) {
         SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];

         if (aggregator != null) {
           aggregator.add(data);
         }
       }
     }
   }

   /**
    * Convenience method to log phase marker tasks.
    */
   public void markPhase(ProfilePhase phase) {
     MemoryProfiler.instance().markPhase(phase);
     if (isActive() && isProfiling(ProfilerTask.PHASE)) {
       Preconditions.checkState(taskStack.isEmpty(), "Phase tasks must not be nested");
       logEvent(ProfilerTask.PHASE, phase.description);
     }
   }

   /**
    * Convenience method to log spawn tasks.
    *
    * TODO(bazel-team): Right now method expects single string of the spawn action
    * as task description (usually either argv[0] or a name of the main executable
    * in case of complex shell commands). Maybe it should accept Command object
    * and create more user friendly description.
    */
   public void logSpawn(long startTime, String arg0) {
     if (isActive() && isProfiling(ProfilerTask.SPAWN)) {
       logTask(startTime, clock.nanoTime() - startTime, ProfilerTask.SPAWN, arg0);
     }
   }

 }