src/main/java/com/google/devtools/build/lib/profiler/Profiler.java - bazel - Git at Google

 // Copyright 2014 The Bazel Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.devtools.build.lib.profiler;

 import static com.google.devtools.build.lib.profiler.ProfilerTask.TASK_COUNT;

 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.devtools.build.lib.clock.Clock;
 import com.google.devtools.build.lib.collect.Extrema;
 import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadCompatible;
 import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
 import com.google.devtools.build.lib.profiler.PredicateBasedStatRecorder.RecorderAndPredicate;
 import com.google.devtools.build.lib.profiler.StatRecorder.VfsHeuristics;
 import com.google.devtools.build.lib.util.VarInt;
 import com.google.gson.stream.JsonWriter;
 import java.io.BufferedOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingDeque;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.logging.Logger;
 import java.util.zip.Deflater;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.GZIPOutputStream;

 /**
  * Blaze internal profiler. Provides facility to report various Blaze tasks and store them
  * (asynchronously) in the file for future analysis.
  *
  * <p>Implemented as singleton so any caller should use Profiler.instance() to obtain reference.
  *
  * <p>Internally, profiler uses two data structures - ThreadLocal task stack to track nested tasks
  * and single ConcurrentLinkedQueue to gather all completed tasks.
  *
  * <p>Also, due to the nature of the provided functionality (instrumentation of all Blaze
  * components), build.lib.profiler package will be used by almost every other Blaze package, so
  * special attention should be paid to avoid any dependencies on the rest of the Blaze code,
  * including build.lib.util and build.lib.vfs. This is important because build.lib.util and
  * build.lib.vfs contain Profiler invocations and any dependency on those two packages would create
  * circular relationship.
  *
  * <p>All gathered instrumentation data will be stored in the file. Please, note, that while file
  * format is described here it is considered internal and can change at any time. For scripting,
  * using blaze analyze-profile --dump=raw would be more robust and stable solution.
  *
  * <p>
  *
  * <pre>
  * Profiler file consists of the deflated stream with following overall structure:
  *   HEADER
  *   TASK_TYPE_TABLE
  *   TASK_RECORD...
  *   EOF_MARKER
  *
  * HEADER:
  *   int32: magic token (Profiler.MAGIC)
  *   int32: version format (Profiler.VERSION)
  *   string: file comment
  *
  * TASK_TYPE_TABLE:
  *   int32: number of type names below
  *   string... : type names. Each of the type names is assigned id according to
  *               their position in this table starting from 0.
  *
  * TASK_RECORD:
  *   int32 size: size of the encoded task record
  *   byte[size] encoded_task_record:
  *     varint64: thread id - as was returned by Thread.getId()
  *     varint32: task id - starting from 1.
  *     varint32: parent task id for subtasks or 0 for root tasks
  *     varint64: start time in ns, relative to the Profiler.start() invocation
  *     varint64: task duration in ns
  *     byte:     task type id (see TASK_TYPE_TABLE)
  *     varint32: description string index incremented by 1 (>0) or 0 this is
  *               a first occurrence of the description string
  *     AGGREGATED_STAT...: remainder of the field (if present) represents
  *                         aggregated stats for that task
  *   string: *optional* description string, will appear only if description
  *           string index above was 0. In that case this string will be
  *           assigned next sequential id so every unique description string
  *           will appear in the file only once - after that it will be
  *           referenced by id.
  *
  * AGGREGATE_STAT:
  *   byte:     stat type
  *   varint32: total number of subtask invocations
  *   varint64: cumulative duration of subtask invocations in ns.
  *
  * EOF_MARKER:
  *   int64: -1 - please note that this corresponds to the thread id in the
  *               TASK_RECORD which is always > 0
  * </pre>
  *
  * @see ProfilerTask enum for recognized task types.
  */
 @ThreadSafe
 public final class Profiler {
   private static final Logger logger = Logger.getLogger(Profiler.class.getName());

   public static final int MAGIC = 0x11223344;

   // File version number. Note that merely adding new record types in
   // the ProfilerTask does not require bumping version number as long as original
   // enum values are not renamed or deleted.
   public static final int VERSION = 0x03;

   // EOF marker. Must be < 0.
   public static final int EOF_MARKER = -1;

   /** The profiler (a static singleton instance). Inactive by default. */
   private static final Profiler instance = new Profiler();

   private static final int HISTOGRAM_BUCKETS = 20;

   private static final TaskData POISON_PILL = new TaskData(0, 0, null, null, "poison pill");

   /** File format enum. */
   public enum Format {
     BINARY_BAZEL_FORMAT,
     JSON_TRACE_FILE_FORMAT,
     JSON_TRACE_FILE_COMPRESSED_FORMAT;
   }

   /** A task that was very slow. */
   public static final class SlowTask implements Comparable<SlowTask> {
     final long durationNanos;
     final String description;
     final ProfilerTask type;

     private SlowTask(TaskData taskData) {
       this.durationNanos = taskData.duration;
       this.description = taskData.description;
       this.type = taskData.type;
     }

     @Override
     public int compareTo(SlowTask other) {
       long delta = durationNanos - other.durationNanos;
       if (delta < 0) {  // Very clumsy
         return -1;
       } else if (delta > 0) {
         return 1;
       } else {
         return 0;
       }
     }

     public long getDurationNanos() {
       return durationNanos;
     }

     public String getDescription() {
       return description;
     }

     public ProfilerTask getType() {
       return type;
     }
   }

   /**
    * Container for the single task record.
    * Should never be instantiated directly - use TaskStack.create() instead.
    *
    * Class itself is not thread safe, but all access to it from Profiler
    * methods is.
    */
   @ThreadCompatible
   private static final class TaskData {
     final long threadId;
     final long startTimeNanos;
     final int id;
     final int parentId;
     final ProfilerTask type;
     final String description;

     long duration;
     int[] counts; // number of invocations per ProfilerTask type
     long[] durations; // time spend in the task per ProfilerTask type

     TaskData(
         int id, long startTimeNanos, TaskData parent, ProfilerTask eventType, String description) {
       this.id = id;
       this.threadId = Thread.currentThread().getId();
       this.parentId = (parent == null  ? 0 : parent.id);
       this.startTimeNanos = startTimeNanos;
       this.type = eventType;
       this.description = Preconditions.checkNotNull(description);
     }

     TaskData(long threadId, long startTimeNanos, long duration, String description) {
       this.id = -1;
       this.parentId = 0;
       this.type = ProfilerTask.UNKNOWN;
       this.threadId = threadId;
       this.startTimeNanos = startTimeNanos;
       this.duration = duration;
       this.description = description;
     }

     /** Aggregates information about an *immediate* subtask. */
     public void aggregateChild(ProfilerTask type, long duration) {
       int index = type.ordinal();
       if (counts == null) {
         // one entry for each ProfilerTask type
         counts = new int[TASK_COUNT];
         durations = new long[TASK_COUNT];
       }
       counts[index]++;
       durations[index] += duration;
     }

     @Override
     public String toString() {
       return "Thread " + threadId + ", task " + id + ", type " + type + ", " + description;
     }
   }

   /**
    * Tracks nested tasks for each thread.
    *
    * java.util.ArrayDeque is the most efficient stack implementation in the
    * Java Collections Framework (java.util.Stack class is older synchronized
    * alternative). It is, however, used here strictly for LIFO operations.
    * However, ArrayDeque is 1.6 only. For 1.5 best approach would be to utilize
    * ArrayList and emulate stack using it.
    */
   @ThreadSafe
   private final class TaskStack extends ThreadLocal<List<TaskData>> {
     @Override
     public List<TaskData> initialValue() {
       return new ArrayList<>();
     }

     public TaskData peek() {
       List<TaskData> list = get();
       if (list.isEmpty()) {
         return null;
       }
       return list.get(list.size() - 1);
     }

     public TaskData pop() {
       List<TaskData> list = get();
       return list.remove(list.size() - 1);
     }

     public boolean isEmpty() {
       return get().isEmpty();
     }

     public void push(ProfilerTask eventType, String description) {
       get().add(create(clock.nanoTime(), eventType, description));
     }

     public TaskData create(long startTimeNanos, ProfilerTask eventType, String description) {
       return new TaskData(taskId.incrementAndGet(), startTimeNanos, peek(), eventType, description);
     }

     @Override
     public String toString() {
       StringBuilder builder = new StringBuilder(
           "Current task stack for thread " + Thread.currentThread().getName() + ":\n");
       List<TaskData> list = get();
       for (int i = list.size() - 1; i >= 0; i--) {
         builder.append(list.get(i));
         builder.append("\n");
       }
       return builder.toString();
     }
   }

   /**
    * Implements datastore for object description indices. Intended to be used only by the
    * Profiler.save() method.
    */
   @ThreadCompatible
   private static final class ObjectDescriber {
     private Map<Object, Integer> descMap = new IdentityHashMap<>(2000);
     private int indexCounter = 0;

     ObjectDescriber() { }

     int getDescriptionIndex(String description) {
       Integer index = descMap.get(description);
       return (index != null) ? index : -1;
     }

     String memoizeDescription(String description) {
       Integer oldIndex = descMap.put(description, indexCounter++);
       // Do not use Preconditions class below due to the rather expensive
       // toString() calls used in the message.
       if (oldIndex != null) {
         throw new IllegalStateException(
             description
                 + "' @ "
                 + System.identityHashCode(description)
                 + " already had description index "
                 + oldIndex
                 + " while assigning index "
                 + descMap.get(description));
       } else if (description.length() > 20000) {
         // Note size 64k byte limitation in DataOutputStream#writeUTF().
         description = description.substring(0, 20000);
       }
       return description;
     }

     boolean isUnassigned(int index) {
       return (index < 0);
     }
   }

   /**
    * Aggregator class that keeps track of the slowest tasks of the specified type.
    *
    * <p><code>extremaAggregators</p> is sharded so that all threads need not compete for the same
    * lock if they do the same operation at the same time. Access to an individual {@link Extrema}
    * is synchronized on the {@link Extrema} instance itself.
    */
   private static final class SlowestTaskAggregator {
     private static final int SHARDS = 16;
     private final int size;

     @SuppressWarnings({"unchecked", "rawtypes"})
     private final Extrema<SlowTask>[] extremaAggregators = new Extrema[SHARDS];

     SlowestTaskAggregator(int size) {
       this.size = size;

       for (int i = 0; i < SHARDS; i++) {
         extremaAggregators[i] = Extrema.max(size);
       }
     }

     // @ThreadSafe
     void add(TaskData taskData) {
       Extrema<SlowTask> extrema =
           extremaAggregators[(int) (Thread.currentThread().getId() % SHARDS)];
       synchronized (extrema) {
         extrema.aggregate(new SlowTask(taskData));
       }
     }

     // @ThreadSafe
     void clear() {
       for (int i = 0; i < SHARDS; i++) {
         Extrema<SlowTask> extrema = extremaAggregators[i];
         synchronized (extrema) {
           extrema.clear();
         }
       }
     }

     // @ThreadSafe
     Iterable<SlowTask> getSlowestTasks() {
       // This is slow, but since it only happens during the end of the invocation, it's OK
       Extrema<SlowTask> mergedExtrema = Extrema.max(size);
       for (int i = 0; i < SHARDS; i++) {
         Extrema<SlowTask> extrema = extremaAggregators[i];
         synchronized (extrema) {
           for (SlowTask task : extrema.getExtremeElements()) {
             mergedExtrema.aggregate(task);
           }
         }
       }
       return mergedExtrema.getExtremeElements();
     }
   }

   private Clock clock;
   private ImmutableSet<ProfilerTask> profiledTasks;
   private volatile long profileStartTime;
   private volatile boolean recordAllDurations = false;

   /** This counter provides a unique id for every task, used to provide a parent/child relation. */
   private AtomicInteger taskId = new AtomicInteger();

   /**
    * The reference to the current writer, if any. If the referenced writer is null, then disk writes
    * are disabled. This can happen when slowest task recording is enabled.
    */
   private AtomicReference<FileWriter> writerRef = new AtomicReference<>();

   /**
    * This is a per-thread data structure that's used to track the current stack of open tasks, the
    * purpose of which is to track the parent id of every task. This is also used to ensure that
    * {@link #profile} and {@link #completeTask} calls always occur in pairs.
    */
   // TODO(ulfjack): We can infer the parent/child relationship after the fact instead of tracking it
   // at runtime. That would allow us to remove this data structure entirely.
   private TaskStack taskStack;

   private final SlowestTaskAggregator[] slowestTasks =
       new SlowestTaskAggregator[ProfilerTask.values().length];

   private final StatRecorder[] tasksHistograms = new StatRecorder[ProfilerTask.values().length];

   /** Thread that collects local cpu usage data (if enabled). */
   private CollectLocalCpuUsage cpuUsageThread;

   private Profiler() {
     initHistograms();
     for (ProfilerTask task : ProfilerTask.values()) {
       if (task.slowestInstancesCount != 0) {
         slowestTasks[task.ordinal()] = new SlowestTaskAggregator(task.slowestInstancesCount);
       }
     }
   }

   private void initHistograms() {
     for (ProfilerTask task : ProfilerTask.values()) {
       if (task.isVfs()) {
         Map<String, ? extends Predicate<? super String>> vfsHeuristics =
             VfsHeuristics.vfsTypeHeuristics;
         List<RecorderAndPredicate> recorders = new ArrayList<>(vfsHeuristics.size());
         for (Map.Entry<String, ? extends Predicate<? super String>> e : vfsHeuristics.entrySet()) {
           recorders.add(new RecorderAndPredicate(
               new SingleStatRecorder(task + " " + e.getKey(), HISTOGRAM_BUCKETS), e.getValue()));
         }
         tasksHistograms[task.ordinal()] = new PredicateBasedStatRecorder(recorders);
       } else {
         tasksHistograms[task.ordinal()] = new SingleStatRecorder(task, HISTOGRAM_BUCKETS);
       }
     }
   }

   /**
    * Returns task histograms. This must be called between calls to {@link #start} and {@link #stop},
    * or the returned recorders are all empty. Note that the returned recorders may still be modified
    * concurrently (but at least they are thread-safe, so that's good).
    *
    * <p>The stat recorders are indexed by {@code ProfilerTask#ordinal}.
    */
   // TODO(ulfjack): This returns incomplete data by design. Maybe we should return the histograms on
   // stop instead? However, this is currently only called from one location in a module, and that
   // can't call stop itself. What to do?
   public ImmutableList<StatRecorder> getTasksHistograms() {
     return ImmutableList.copyOf(tasksHistograms);
   }

   public static Profiler instance() {
     return instance;
   }

   /**
    * Returns the nanoTime of the current profiler instance, or an arbitrary
    * constant if not active.
    */
   public static long nanoTimeMaybe() {
     if (instance.isActive()) {
       return instance.clock.nanoTime();
     }
     return -1;
   }

   /**
    * Enable profiling.
    *
    * <p>Subsequent calls to beginTask/endTask will be recorded in the provided output stream. Please
    * note that stream performance is extremely important and buffered streams should be utilized.
    *
    * @param profiledTasks which of {@link ProfilerTask}s to track
    * @param stream output stream to store profile data. Note: passing unbuffered stream object
    *     reference may result in significant performance penalties
    * @param comment a comment to insert in the profile data
    * @param recordAllDurations iff true, record all tasks regardless of their duration; otherwise
    *     some tasks may get aggregated if they finished quick enough
    * @param clock a {@code BlazeClock.instance()}
    * @param execStartTimeNanos execution start time in nanos obtained from {@code clock.nanoTime()}
    */
   public synchronized void start(
       ImmutableSet<ProfilerTask> profiledTasks,
       OutputStream stream,
       Format format,
       String comment,
       boolean recordAllDurations,
       Clock clock,
       long execStartTimeNanos,
       boolean enabledCpuUsageProfiling,
       boolean slimProfile)
       throws IOException {
     Preconditions.checkState(!isActive(), "Profiler already active");
     initHistograms();

     this.profiledTasks = profiledTasks;
     this.clock = clock;

     // sanity check for current limitation on the number of supported types due
     // to using enum.ordinal() to store them instead of EnumSet for performance reasons.
     Preconditions.checkState(TASK_COUNT < 256,
         "The profiler implementation supports only up to 255 different ProfilerTask values.");

     // reset state for the new profiling session
     taskId.set(0);
     this.recordAllDurations = recordAllDurations;
     this.taskStack = new TaskStack();
     FileWriter writer = null;
     if (stream != null && format != null) {
       switch (format) {
         case BINARY_BAZEL_FORMAT:
           writer = new BinaryFormatWriter(stream, execStartTimeNanos, comment);
           break;
         case JSON_TRACE_FILE_FORMAT:
           writer = new JsonTraceFileWriter(stream, execStartTimeNanos, slimProfile);
           break;
         case JSON_TRACE_FILE_COMPRESSED_FORMAT:
           writer =
               new JsonTraceFileWriter(
                   new GZIPOutputStream(stream), execStartTimeNanos, slimProfile);
       }
       writer.start();
     }
     this.writerRef.set(writer);

     // activate profiler
     profileStartTime = execStartTimeNanos;

     if (enabledCpuUsageProfiling) {
       cpuUsageThread = new CollectLocalCpuUsage();
       cpuUsageThread.setDaemon(true);
       cpuUsageThread.start();
     }
   }

   /**
    * Returns task histograms. This must be called between calls to {@link #start} and {@link #stop},
    * or the returned list is empty.
    */
   // TODO(ulfjack): This returns incomplete data by design. Also see getTasksHistograms.
   public synchronized Iterable<SlowTask> getSlowestTasks() {
     List<Iterable<SlowTask>> slowestTasksByType = new ArrayList<>();

     for (SlowestTaskAggregator aggregator : slowestTasks) {
       if (aggregator != null) {
         slowestTasksByType.add(aggregator.getSlowestTasks());
       }
     }

     return Iterables.concat(slowestTasksByType);
   }

   /**
    * Disable profiling and complete profile file creation.
    * Subsequent calls to beginTask/endTask will no longer
    * be recorded in the profile.
    */
   public synchronized void stop() throws IOException {
     if (!isActive()) {
       return;
     }

     if (cpuUsageThread != null) {
       cpuUsageThread.stopCollecting();
       try {
         cpuUsageThread.join();
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
       }
       cpuUsageThread.logCollectedData();
       cpuUsageThread = null;
     }

     // Log a final event to update the duration of ProfilePhase.FINISH.
     logEvent(ProfilerTask.INFO, "Finishing");
     FileWriter writer = writerRef.getAndSet(null);
     if (writer != null) {
       writer.shutdown();
       writer = null;
     }
     taskStack = null;
     initHistograms();
     profileStartTime = 0L;

     for (SlowestTaskAggregator aggregator : slowestTasks) {
       if (aggregator != null) {
         aggregator.clear();
       }
     }
   }

   /**
    *  Returns true iff profiling is currently enabled.
    */
   public boolean isActive() {
     return profileStartTime != 0L;
   }

   public boolean isProfiling(ProfilerTask type) {
     return profiledTasks.contains(type);
   }

   /**
    * Unless --record_full_profiler_data is given we drop small tasks and add their time to the
    * parents duration.
    */
   private boolean wasTaskSlowEnoughToRecord(ProfilerTask type, long duration) {
     return (recordAllDurations || duration >= type.minDuration);
   }

   /**
    * Adds task directly to the main queue bypassing task stack. Used for simple tasks that are known
    * to not have any subtasks.
    *
    * @param startTimeNanos task start time (obtained through {@link Profiler#nanoTimeMaybe()})
    * @param duration task duration
    * @param type task type
    * @param description task description. May be stored until end of build.
    */
   private void logTask(long startTimeNanos, long duration, ProfilerTask type, String description) {
     Preconditions.checkNotNull(description);
     Preconditions.checkState(startTimeNanos > 0, "startTime was %s", startTimeNanos);
     Preconditions.checkState(!"".equals(description), "No description -> not helpful");
     if (duration < 0) {
       // See note in Clock#nanoTime, which is used by Profiler#nanoTimeMaybe.
       duration = 0;
     }

     tasksHistograms[type.ordinal()].addStat(
         (int) TimeUnit.NANOSECONDS.toMillis(duration), description);
     // Store instance fields as local variables so they are not nulled out from under us by #clear.
     TaskStack localStack = taskStack;
     FileWriter currentWriter = writerRef.get();
     if (localStack == null) {
       // Variables have been nulled out by #clear in between the check the caller made and this
       // point in the code. Probably due to an asynchronous crash.
       logger.severe("Variables null in profiler for " + type + ", probably due to async crash");
       return;
     }
     TaskData parent = localStack.peek();
     if (parent != null) {
       parent.aggregateChild(type, duration);
     }
     if (wasTaskSlowEnoughToRecord(type, duration)) {
       TaskData data = localStack.create(startTimeNanos, type, description);
       data.duration = duration;
       if (currentWriter != null) {
         currentWriter.enqueue(data);
       }

       SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];

       if (aggregator != null) {
         aggregator.add(data);
       }
     }
   }

   private boolean shouldProfile(long startTime, ProfilerTask type) {
     return isActive() && startTime > 0 && isProfiling(type);
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks). Depending on the
    * minDuration attribute of the task type, task may be just aggregated into the parent task and
    * not stored directly.
    *
    * @param startTime task start time (obtained through {@link Profiler#nanoTimeMaybe()})
    * @param type task type
    * @param description task description. May be stored until the end of the build.
    */
   public void logSimpleTask(long startTime, ProfilerTask type, String description) {
     if (shouldProfile(startTime, type)) {
       logTask(startTime, clock.nanoTime() - startTime, type, description);
     }
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks). Depending on the
    * minDuration attribute of the task type, task may be just aggregated into the parent task and
    * not stored directly.
    *
    * <p>Note that start and stop time must both be acquired from the same clock instance.
    *
    * @param startTimeNanos task start time
    * @param stopTimeNanos task stop time
    * @param type task type
    * @param description task description. May be stored until the end of the build.
    */
   public void logSimpleTask(
       long startTimeNanos, long stopTimeNanos, ProfilerTask type, String description) {
     if (shouldProfile(startTimeNanos, type)) {
       logTask(startTimeNanos, stopTimeNanos - startTimeNanos, type, description);
     }
   }

   /**
    * Used externally to submit simple task (one that does not have any subtasks). Depending on the
    * minDuration attribute of the task type, task may be just aggregated into the parent task and
    * not stored directly.
    *
    * @param startTimeNanos task start time (obtained through {@link Profiler#nanoTimeMaybe()})
    * @param duration the duration of the task
    * @param type task type
    * @param description task description. May be stored until the end of the build.
    */
   public void logSimpleTaskDuration(
       long startTimeNanos, Duration duration, ProfilerTask type, String description) {
     if (shouldProfile(startTimeNanos, type)) {
       logTask(startTimeNanos, duration.toNanos(), type, description);
     }
   }

   /** Used to log "events" happening at a specific time - tasks with zero duration. */
   public void logEventAtTime(long atTimeNanos, ProfilerTask type, String description) {
     if (isActive() && isProfiling(type)) {
       logTask(atTimeNanos, 0, type, description);
     }
   }

   /** Used to log "events" - tasks with zero duration. */
   @VisibleForTesting
   void logEvent(ProfilerTask type, String description) {
     logEventAtTime(clock.nanoTime(), type, description);
   }

   /**
    * Records the beginning of a task as specified, and returns a {@link SilentCloseable} instance
    * that ends the task. This lets the system do the work of ending the task, with the compiler
    * giving a warning if the returned instance is not closed.
    *
    * <p>Use of this method allows to support nested task monitoring. For tasks that are known to not
    * have any subtasks, logSimpleTask() should be used instead.
    *
    * <p>Use like this:
    * <pre>
    * {@code
    * try (SilentCloseable c = Profiler.instance().profile(type, "description")) {
    *   // Your code here.
    * }
    * }
    * </pre>
    *
    * @param type predefined task type - see ProfilerTask for available types.
    * @param description task description. May be stored until the end of the build.
    */
   public SilentCloseable profile(ProfilerTask type, String description) {
     // ProfilerInfo.allTasksById is supposed to be an id -> Task map, but it is in fact a List,
     // which means that we cannot drop tasks to which we had already assigned ids. Therefore,
     // non-leaf tasks must not have a minimum duration. However, we don't quite consistently
     // enforce this, and Blaze only works because we happen not to add child tasks to those parent
     // tasks that have a minimum duration.
     Preconditions.checkNotNull(description);
     if (isActive() && isProfiling(type)) {
       taskStack.push(type, description);
       return () -> completeTask(type);
     } else {
       return () -> {};
     }
   }

   /**
    * Records the beginning of a task as specified, and returns a {@link SilentCloseable} instance
    * that ends the task. This lets the system do the work of ending the task, with the compiler
    * giving a warning if the returned instance is not closed.
    *
    * <p>Use of this method allows to support nested task monitoring. For tasks that are known to not
    * have any subtasks, logSimpleTask() should be used instead.
    *
    * <p>This is a convenience method that uses {@link ProfilerTask#INFO}.
    *
    * <p>Use like this:
    * <pre>
    * {@code
    * try (SilentCloseable c = Profiler.instance().profile("description")) {
    *   // Your code here.
    * }
    * }
    * </pre>
    *
    * @param description task description. May be stored until the end of the build.
    */
   public SilentCloseable profile(String description) {
     return profile(ProfilerTask.INFO, description);
   }

   /**
    * Records the end of the task and moves tasks from the thread-local stack to
    * the main queue. Will validate that given task type matches task at the top
    * of the stack.
    *
    * @param type task type.
    */
   private void completeTask(ProfilerTask type) {
     if (isActive() && isProfiling(type)) {
       long endTime = clock.nanoTime();
       TaskData data = taskStack.pop();
       Preconditions.checkState(
           data.type == type,
           "Inconsistent Profiler.completeTask() call: should have been %s but got %s (%s, %s)",
           data.type,
           type,
           data,
           taskStack);
       data.duration = endTime - data.startTimeNanos;
       if (data.parentId > 0) {
         taskStack.peek().aggregateChild(data.type, data.duration);
       }
       boolean shouldRecordTask = wasTaskSlowEnoughToRecord(type, data.duration);
       FileWriter writer = writerRef.get();
       if ((shouldRecordTask || data.counts != null) && writer != null) {
         writer.enqueue(data);
       }

       if (shouldRecordTask) {
         SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];
         if (aggregator != null) {
           aggregator.add(data);
         }
       }
     }
   }

   /** Convenience method to log phase marker tasks. */
   public void markPhase(ProfilePhase phase) throws InterruptedException {
     MemoryProfiler.instance().markPhase(phase);
     if (isActive() && isProfiling(ProfilerTask.PHASE)) {
       Preconditions.checkState(taskStack.isEmpty(), "Phase tasks must not be nested");
       logEvent(ProfilerTask.PHASE, phase.description);
     }
   }

   private abstract static class FileWriter implements Runnable {
     protected final BlockingQueue<TaskData> queue;
     protected final Thread thread;
     protected IOException savedException;

     FileWriter() {
       this.queue = new LinkedBlockingDeque<>();
       this.thread = new Thread(this, "profile-writer-thread");
     }

     public void shutdown() throws IOException {
       // Add poison pill to queue and then wait for writer thread to shut down.
       queue.add(POISON_PILL);
       try {
         thread.join();
       } catch (InterruptedException e) {
         thread.interrupt();
         Thread.currentThread().interrupt();
       }
       if (savedException != null) {
         throw savedException;
       }
     }

     public void start() {
       thread.start();
     }

     public void enqueue(TaskData data) {
       queue.add(data);
     }
   }

   /** Writes the profile in the binary Bazel profile format. */
   private static class BinaryFormatWriter extends FileWriter {
     private final OutputStream outStream;
     private final long profileStartTime;
     private final String comment;

     BinaryFormatWriter(OutputStream outStream, long profileStartTime, String comment) {
       // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by
       // the write() method. Values for buffer sizes were chosen by running small amount of tests
       // and identifying point of diminishing returns - but I have not really tried to optimize
       // them.
       this.outStream = outStream;
       this.profileStartTime = profileStartTime;
       this.comment = comment;
     }

     private static void writeHeader(DataOutputStream out, String comment) throws IOException {
       out.writeInt(MAGIC); // magic
       out.writeInt(VERSION); // protocol_version
       out.writeUTF(comment);
       // ProfileTask.values() method sorts enums using their ordinal() value, so
       // there there is no need to store ordinal() value for each entry.
       out.writeInt(TASK_COUNT);
       for (ProfilerTask type : ProfilerTask.values()) {
         out.writeUTF(type.toString());
       }
     }

     /**
      * Saves all gathered information from taskQueue queue to the file.
      * Method is invoked internally by the Timer-based thread and at the end of
      * profiling session.
      */
     @Override
     public void run() {
       try {
         boolean receivedPoisonPill = false;
         try (DataOutputStream out =
             new DataOutputStream(
                 new BufferedOutputStream(
                     new DeflaterOutputStream(
                         // the DeflaterOutputStream has its own output buffer of 65k, chosen at
                         // random
                         outStream, new Deflater(Deflater.BEST_SPEED, false), 65536),
                     // buffer size, basically chosen at random
                     262144))) {
           writeHeader(out, comment);
           // Allocate the sink once to avoid GC
           ByteBuffer sink = ByteBuffer.allocate(1024);
           ObjectDescriber describer = new ObjectDescriber();
           TaskData data;
           while ((data = queue.take()) != POISON_PILL) {
             ((Buffer) sink).clear();

             VarInt.putVarLong(data.threadId, sink);
             VarInt.putVarInt(data.id, sink);
             VarInt.putVarInt(data.parentId, sink);
             VarInt.putVarLong(data.startTimeNanos - profileStartTime, sink);
             VarInt.putVarLong(data.duration, sink);

             // To save space (and improve performance), convert all description
             // strings to the canonical object and use IdentityHashMap to assign
             // unique numbers for each string.
             int descIndex = describer.getDescriptionIndex(data.description);
             VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values.

             // Save types using their ordinal() value
             sink.put((byte) data.type.ordinal());

             // Save aggregated data stats.
             if (data.counts != null) {
               for (int i = 0; i < TASK_COUNT; i++) {
                 if (data.counts[i] > 0) {
                   sink.put((byte) i); // aggregated type ordinal value
                   VarInt.putVarInt(data.counts[i], sink);
                   VarInt.putVarLong(data.durations[i], sink);
                 }
               }
             }

             out.writeInt(sink.position());
             out.write(sink.array(), 0, sink.position());
             if (describer.isUnassigned(descIndex)) {
               out.writeUTF(describer.memoizeDescription(data.description));
             }
           }
           receivedPoisonPill = true;
           out.writeInt(EOF_MARKER);
         } catch (IOException e) {
           this.savedException = e;
           if (!receivedPoisonPill) {
             while (queue.take() != POISON_PILL) {
               // We keep emptying the queue, but we can't write anything.
             }
           }
         }
       } catch (InterruptedException e) {
         // Exit silently.
       }
     }
   }

   /** Writes the profile in Json Trace file format. */
   private static class JsonTraceFileWriter extends FileWriter {
     private final OutputStream outStream;
     private final long profileStartTimeNanos;
     private final ThreadLocal<Boolean> metadataPosted =
         ThreadLocal.withInitial(() -> Boolean.FALSE);
     private final boolean slimProfile;

     // The JDK never returns 0 as thread id so we use that as fake thread id for the critical path.
     private static final long CRITICAL_PATH_THREAD_ID = 0;

     private static final long SLIM_PROFILE_EVENT_THRESHOLD = 10_000;
     private static final long SLIM_PROFILE_MAXIMAL_PAUSE_NS = Duration.ofMillis(100).toNanos();
     private static final long SLIM_PROFILE_MAXIMAL_DURATION_NS = Duration.ofMillis(250).toNanos();

     JsonTraceFileWriter(OutputStream outStream, long profileStartTimeNanos, boolean slimProfile) {
       this.outStream = outStream;
       this.profileStartTimeNanos = profileStartTimeNanos;
       this.slimProfile = slimProfile;
     }

     @Override
     public void enqueue(TaskData data) {
       if (!metadataPosted.get().booleanValue()) {
         metadataPosted.set(Boolean.TRUE);
         // Create a TaskData object that is special-cased below.
         queue.add(
             new TaskData(
                 /* id= */ 0,
                 /* startTimeNanos= */ -1,
                 /* parent= */ null,
                 ProfilerTask.THREAD_NAME,
                 Thread.currentThread().getName()));
       }
       queue.add(data);
     }

     private static final class MergedEvent {
       int count = 0;
       long startTimeNanos;
       long endTimeNanos;
       TaskData data;

       /*
        * Tries to merge an additional event, i.e. if the event is close enough to the already merged
        * event.
        *
        * Returns null, if merging was possible.
        * If not mergeable, returns the TaskData of the previously merged events and clears the
        * internal data structures.
        */
       TaskData maybeMerge(TaskData data) {
         long startTimeNanos = data.startTimeNanos;
         long endTimeNanos = startTimeNanos + data.duration;
         if (count > 0
             && startTimeNanos >= this.startTimeNanos
             && endTimeNanos <= this.endTimeNanos) {
           // Skips child tasks.
           return null;
         }
         if (count == 0) {
           this.data = data;
           this.startTimeNanos = startTimeNanos;
           this.endTimeNanos = endTimeNanos;
           count++;
           return null;
         } else if (startTimeNanos <= this.endTimeNanos + SLIM_PROFILE_MAXIMAL_PAUSE_NS) {
           this.endTimeNanos = endTimeNanos;
           count++;
           return null;
         } else {
           TaskData ret = getAndReset();
           this.startTimeNanos = startTimeNanos;
           this.endTimeNanos = endTimeNanos;
           this.data = data;
           count = 1;
           return ret;
         }
       }

       // Returns a TaskData object representing the merged data and clears internal data structures.
       TaskData getAndReset() {
         TaskData ret;
         if (count <= 1) {
           ret = data;
         } else {
           if (data == null) {
             ret = data;
           }
           ret =
               new TaskData(
                   data.threadId,
                   this.startTimeNanos,
                   this.endTimeNanos - this.startTimeNanos,
                   "merged " + count + " events");
         }
         count = 0;
         data = null;
         return ret;
       }
     }

     private void writeTask(JsonWriter writer, TaskData data) throws IOException {
       String eventType = data.duration == 0 ? "i" : "X";
       writer.setIndent("  ");
       writer.beginObject();
       writer.setIndent("");
       if (data == null || data.type == null) {
         writer.setIndent("    ");
       }
       writer.name("cat").value(data.type.description);
       writer.name("name").value(data.description);
       writer.name("ph").value(eventType);
       writer
           .name("ts")
           .value(TimeUnit.NANOSECONDS.toMicros(data.startTimeNanos - profileStartTimeNanos));
       if (data.duration != 0) {
         writer.name("dur").value(TimeUnit.NANOSECONDS.toMicros(data.duration));
       }
       writer.name("pid").value(1);
       long threadId =
           data.type == ProfilerTask.CRITICAL_PATH_COMPONENT
               ? CRITICAL_PATH_THREAD_ID
               : data.threadId;
       writer.name("tid").value(threadId);
       writer.endObject();
     }

     /**
      * Saves all gathered information from taskQueue queue to the file.
      * Method is invoked internally by the Timer-based thread and at the end of
      * profiling session.
      */
     @Override
     public void run() {
       try {
         boolean receivedPoisonPill = false;
         try (JsonWriter writer =
             new JsonWriter(
                 // The buffer size of 262144 is chosen at random.
                 new OutputStreamWriter(
                     new BufferedOutputStream(outStream, 262144), StandardCharsets.UTF_8))) {
           writer.beginArray();
           TaskData data;

           // Generate metadata event for the critical path as thread 0 in disguise.
           writer.setIndent("  ");
           writer.beginObject();
           writer.setIndent("");
           writer.name("name").value("thread_name");
           writer.name("ph").value("M");
           writer.name("pid").value(1);
           writer.name("tid").value(CRITICAL_PATH_THREAD_ID);
           writer.name("args");
           writer.beginObject();
           writer.name("name").value("Critical Path");
           writer.endObject();
           writer.endObject();

           HashMap<Long, MergedEvent> eventsPerThread = new HashMap<>();
           int eventCount = 0;
           while ((data = queue.take()) != POISON_PILL) {
             eventCount++;
             if (data.type == ProfilerTask.THREAD_NAME) {
               writer.setIndent("  ");
               writer.beginObject();
               writer.setIndent("");
               writer.name("name").value("thread_name");
               writer.name("ph").value("M");
               writer.name("pid").value(1);
               writer.name("tid").value(data.threadId);
               writer.name("args");

               writer.beginObject();
               writer.name("name").value(data.description);
               writer.endObject();

               writer.endObject();
               continue;
             }
             if (data.type == ProfilerTask.LOCAL_CPU_USAGE) {
               writer.setIndent("  ");
               writer.beginObject();
               writer.setIndent("");
               writer.name("name").value(data.type.description);
               writer.name("ph").value("C");
               writer
                   .name("ts")
                   .value(
                       TimeUnit.NANOSECONDS.toMicros(data.startTimeNanos - profileStartTimeNanos));
               writer.name("pid").value(1);
               writer.name("tid").value(data.threadId);
               writer.name("args");

               writer.beginObject();
               writer.name("cpu").value(data.description);
               writer.endObject();

               writer.endObject();
               continue;
             }
             if (slimProfile
                 && eventCount > SLIM_PROFILE_EVENT_THRESHOLD
                 && data.duration > 0
                 && data.duration < SLIM_PROFILE_MAXIMAL_DURATION_NS
                 && data.type != ProfilerTask.CRITICAL_PATH_COMPONENT) {
               eventsPerThread.putIfAbsent(data.threadId, new MergedEvent());
               TaskData taskData = eventsPerThread.get(data.threadId).maybeMerge(data);
               if (taskData != null) {
                 writeTask(writer, taskData);
               }
             } else {
               writeTask(writer, data);
             }
           }
           for (Profiler.JsonTraceFileWriter.MergedEvent value : eventsPerThread.values()) {
             TaskData taskData = value.getAndReset();
             if (taskData != null) {
               writeTask(writer, taskData);
             }
           }
           receivedPoisonPill = true;
           writer.setIndent("  ");
           writer.endArray();
         } catch (IOException e) {
           this.savedException = e;
           if (!receivedPoisonPill) {
             while (queue.take() != POISON_PILL) {
               // We keep emptying the queue, but we can't write anything.
             }
           }
         }
       } catch (InterruptedException e) {
         // Exit silently.
       }
     }
   }
 }