starlark: a simple benchmark runner

This change adds a simple benchmark runner, modelled on Go's testing.B
(https://golang.org/pkg/testing/#B). See also discussion in
https://github.com/bazelbuild/starlark/pull/75#pullrequestreview-275604129.
And https://github.com/bazelbuild/bazel/pull/9195, a JMH-based approach.
(Although JMH's measurement logic is of high quality, sadly it cannot be used
without annotations, which makes it unsuitable for a completely dynamic suite
such as this one.)

Each function named bench_* in a file named bench_*.star is executed repeatedly
by the runner. The runner provides a parameter, b, that gives, among other things,
the number b.n of operations to attempt, and operations to stop, start,
or restart the timer.

Output:
$ bazel run src/test/java/net/starlark/java/eval/Benchmarks
File src/test/java/net/starlark/java/eval/testdata/bench_list.star:
benchmark                   ops     cpu/op    wall/op   steps/op
bench_append               8191      186µs      183µs       5004
bench_extend            2097151      664ns      618ns          7

File src/test/java/net/starlark/java/eval/testdata/bench_sorted.star:
benchmark                   ops     cpu/op    wall/op   steps/op
bench_sort_large             31  40.2619ms  37.3016ms          7
bench_sort_small        4194303      346ns      346ns          7
PiperOrigin-RevId: 342904761
diff --git a/src/test/java/net/starlark/java/eval/BUILD b/src/test/java/net/starlark/java/eval/BUILD
index 98b066f..62579c9 100644
--- a/src/test/java/net/starlark/java/eval/BUILD
+++ b/src/test/java/net/starlark/java/eval/BUILD
@@ -1,4 +1,4 @@
-load("@rules_java//java:defs.bzl", "java_test")
+load("@rules_java//java:defs.bzl", "java_binary", "java_test")
 
 package(default_testonly = 1)
 
@@ -56,3 +56,18 @@
         "//third_party:guava",
     ],
 )
+
+# Script-based benchmarks of the Starlark interpreter.
+java_binary(
+    name = "Benchmarks",
+    srcs = ["Benchmarks.java"],
+    data = glob(["testdata/bench_*.star"]),
+    jvm_flags = ["-Dfile.encoding=UTF8"],
+    deps = [
+        "//src/main/java/net/starlark/java/annot",
+        "//src/main/java/net/starlark/java/eval",
+        "//src/main/java/net/starlark/java/lib/json",
+        "//src/main/java/net/starlark/java/syntax",
+        "//third_party:guava",
+    ],
+)
diff --git a/src/test/java/net/starlark/java/eval/Benchmarks.java b/src/test/java/net/starlark/java/eval/Benchmarks.java
new file mode 100644
index 0000000..f130a32
--- /dev/null
+++ b/src/test/java/net/starlark/java/eval/Benchmarks.java
@@ -0,0 +1,349 @@
+// Copyright 2020 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package net.starlark.java.eval;
+
+import com.google.common.collect.ImmutableMap;
+import com.sun.management.ThreadMXBean;
+import java.io.File;
+import java.lang.management.ManagementFactory;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import net.starlark.java.annot.StarlarkBuiltin;
+import net.starlark.java.annot.StarlarkMethod;
+import net.starlark.java.lib.json.Json;
+import net.starlark.java.syntax.FileOptions;
+import net.starlark.java.syntax.ParserInput;
+import net.starlark.java.syntax.SyntaxError;
+
+// TODO(adonovan): document how to obtain a Java CPU profile.
+
+// TODO(adonovan): mitigate the effects of JVM warmup.
+// (See Oracle's JMH; we can't use it directly because it
+// seems to be entirely driven by Java annotations,
+// which is no good for a dynamic suite.)
+
+/**
+ * Script-based benchmarks of the Starlark evaluator.
+ *
+ * <p>Scripts in testdata/bench_*.star are executed, and then each function named {@code bench_*} is
+ * repeatedly called and measured. The function has one parameter, b, a Benchmark, that provides
+ * b.n, the number of iterations to execute. The function must have resource costs linear in b.n.
+ * Typically, the function body is a loop of the form {@code for _ in range(b.n): ...}. Using b.n
+ * for other purposes leads to meaningless results. For example, it would be a mistake to use it as
+ * the length of a random list to be sorted, because sorting does not run in linear time.
+ *
+ * <p>A benchmark with significant set-up costs may reset the timer ({@code b.restart()}) before
+ * entering its loop. Example:
+ *
+ * <pre>
+ * def bench_my_func(b):
+ *     """Description goes here."""
+ *     my_setup()
+ *     b.restart()
+ *     for _ in range(b.n):
+ *         my_func()
+ * </pre>
+ */
+public final class Benchmarks {
+
+  private static final String HELP =
+      "Usage: Benchmarks [--help] [--filter regex] [--seconds float]\n"
+          + "Runs Starlark benchmarks matching the filter for the specified (approximate) time,\n"
+          + "and reports various performance measures.";
+
+  private static boolean ok = true;
+
+  public static void main(String[] args) throws Exception {
+    Pattern filter = null; // default: all
+    long budgetNanos = 1_000_000_000;
+
+    // parse flags
+    int i;
+    for (i = 0; i < args.length; i++) {
+      if (args[i].equals("--")) {
+        i++;
+        break;
+
+      } else if (args[i].equals("--help")) {
+        System.out.println(HELP);
+        System.exit(0);
+
+      } else if (args[i].equals("--filter")) {
+        if (++i == args.length) {
+          fail("--filter needs an argument");
+        }
+        try {
+          filter = Pattern.compile(args[i]);
+        } catch (PatternSyntaxException ex) {
+          fail("for --filter, invalid regexp: %s", ex.getMessage());
+        }
+
+      } else if (args[i].equals("--seconds")) {
+        if (++i == args.length) {
+          fail("--seconds needs an argument");
+        }
+        try {
+          budgetNanos = (long) (1e9 * Double.parseDouble(args[i]));
+        } catch (NumberFormatException unused) {
+          fail("for --seconds, got '%s', want floating-point number of seconds", args[i]);
+        }
+        if (!(0 <= budgetNanos && budgetNanos <= 1e13)) {
+          fail("--seconds out of range");
+        }
+
+      } else {
+        fail("unknown flag: %s", args[i]);
+      }
+    }
+    if (i < args.length) {
+      fail("unexpected arguments");
+    }
+
+    // Read testdata/bench_* files.
+    File src = new File("third_party/bazel/src"); // blaze
+    if (!src.exists()) {
+      src = new File("src"); // bazel
+    }
+    File testdata = new File(src, "test/java/net/starlark/java/eval/testdata");
+    for (File file : testdata.listFiles()) {
+      if (!(file.getName().startsWith("bench_") && file.getName().endsWith(".star"))) {
+        continue;
+      }
+
+      // parse & execute
+      ParserInput input = ParserInput.readFile(file.toString());
+      ImmutableMap.Builder<String, Object> predeclared = ImmutableMap.builder();
+      predeclared.put("json", Json.INSTANCE);
+
+      Module module = Module.withPredeclared(semantics, predeclared.build());
+      try (Mutability mu = Mutability.create("test")) {
+        StarlarkThread thread = new StarlarkThread(mu, semantics);
+        Starlark.execFile(input, FileOptions.DEFAULT, module, thread);
+
+      } catch (SyntaxError.Exception ex) {
+        for (SyntaxError err : ex.errors()) {
+          System.err.println(err); // includes location
+          ok = false;
+          continue;
+        }
+      } catch (EvalException ex) {
+        System.err.println(ex.getMessageWithStack());
+        ok = false;
+        continue;
+
+      } catch (
+          @SuppressWarnings("InterruptedExceptionSwallowed")
+          Throwable ex) {
+        // unhandled exception (incl. InterruptedException)
+        System.err.printf("in %s: %s\n", file, ex.getMessage());
+        ex.printStackTrace();
+        ok = false;
+        continue;
+      }
+
+      // Sort bench_* functions by name.
+      TreeMap<String, StarlarkFunction> benchmarks = new TreeMap<>();
+      for (Map.Entry<String, Object> e : module.getExportedGlobals().entrySet()) {
+        if (e.getKey().startsWith("bench_") && e.getValue() instanceof StarlarkFunction) {
+          if (filter == null || filter.matcher(e.getKey()).find()) {
+            benchmarks.put(e.getKey(), (StarlarkFunction) e.getValue());
+          }
+        }
+      }
+      if (benchmarks.isEmpty()) {
+        if (filter == null) {
+          System.err.printf("File %s: no bench_* functions\n", file);
+          ok = false;
+        }
+        continue;
+      }
+
+      // Run benchmarks.
+      System.out.printf("File %s:\n", file);
+      System.out.printf(
+          "%-20s %10s %10s %10s %10s %10s\n", //
+          "benchmark", "ops", "cpu/op", "wall/op", "steps/op", "alloc/op");
+      for (Map.Entry<String, StarlarkFunction> e : benchmarks.entrySet()) {
+        String name = e.getKey();
+        System.out.flush(); // help user identify a slow benchmark
+        Benchmark b = run(name, e.getValue(), budgetNanos);
+        if (b == null) {
+          ok = false;
+          continue;
+        }
+        System.out.printf(
+            "%-20s %10d %10s %10s %10d %10s\n",
+            name,
+            b.count,
+            formatDuration(((double) b.time) / b.count),
+            formatDuration(((double) b.cpu) / b.count),
+            b.steps / b.count,
+            formatBytes(b.alloc / b.count));
+      }
+      System.out.println();
+    }
+    if (!ok) {
+      System.exit(1);
+    }
+  }
+
+  private static void fail(String format, Object... args) {
+    System.err.printf(format, args);
+    System.err.println();
+    System.exit(1);
+  }
+
+  // Runs benchmark function f for the specified time budget
+  // (which we may exceed by a factor of two).
+  private static Benchmark run(String name, StarlarkFunction f, long budgetNanos) {
+    Mutability mu = Mutability.create("test");
+    StarlarkThread thread = new StarlarkThread(mu, semantics);
+
+    Benchmark b = new Benchmark();
+
+    // Keep doubling the number of iterations until we exceed the deadline.
+    // TODO(adonovan): opt: extrapolate and predict the number of iterations
+    // in the remaining time budget, being wary of extrapolation error.
+    for (b.n = 1; b.time < budgetNanos; b.n <<= 1) {
+      try {
+        b.start(thread);
+        Starlark.fastcall(thread, f, new Object[] {b}, new Object[0]);
+        b.stop(thread);
+
+      } catch (EvalException ex) {
+        System.err.println(ex.getMessageWithStack());
+        return null;
+
+      } catch (
+          @SuppressWarnings("InterruptedExceptionSwallowed")
+          Throwable ex) {
+        // unhandled exception (incl. InterruptedException)
+        System.err.printf("In %s: %s\n", name, ex.getMessage());
+        ex.printStackTrace();
+        return null;
+      }
+    }
+
+    return b;
+  }
+
+  // The type of the parameter to each bench(b) function.
+  // Provides n, the number of iterations.
+  @StarlarkBuiltin(name = "Benchmark")
+  private static class Benchmark implements StarlarkValue {
+
+    // The cast assumes we use the "Sun" JVM, which measures per-thread allocation and CPU.
+    private final ThreadMXBean threadMX = (ThreadMXBean) ManagementFactory.getThreadMXBean();
+
+    // Starlark attributes
+    private int n; // requested number of iterations
+
+    // current span  (time0 != 0 => started)
+    private long cpu0;
+    private long alloc0;
+    private long time0;
+    private long steps0;
+
+    // accumulators
+    private int count; // iterations
+    private long cpu; // CPU time (ns) in this thread
+    private long alloc; // bytes allocated by this thread
+    private long time; // wall time (ns)
+    private long steps; // Starlark computation steps
+
+    @StarlarkMethod(name = "n", doc = "Requested number of iterations.", structField = true)
+    public int n() {
+      return n;
+    }
+
+    @StarlarkMethod(name = "start", doc = "Starts the timer.", useStarlarkThread = true)
+    public void start(StarlarkThread thread) throws EvalException {
+      if (time0 != 0) {
+        throw Starlark.errorf("timer already started");
+      }
+
+      this.cpu0 = threadMX.getCurrentThreadCpuTime();
+      this.alloc0 = threadMX.getThreadAllocatedBytes(Thread.currentThread().getId());
+      this.steps0 = thread.getExecutedSteps();
+      this.time0 = System.nanoTime();
+    }
+
+    @StarlarkMethod(name = "stop", doc = "Starts the timer.", useStarlarkThread = true)
+    public void stop(StarlarkThread thread) throws EvalException {
+      if (time0 == 0) {
+        throw Starlark.errorf("timer already stopped");
+      }
+      long time1 = System.nanoTime();
+      long steps1 = thread.getExecutedSteps();
+      long alloc1 = threadMX.getThreadAllocatedBytes(Thread.currentThread().getId());
+      long cpu1 = threadMX.getCurrentThreadCpuTime();
+
+      this.time += time1 - this.time0;
+      this.steps += steps1 - this.steps0;
+      this.alloc += alloc1 - this.alloc0;
+      this.cpu += cpu1 - this.cpu0;
+
+      this.count += this.n;
+
+      time0 = 0; // stopped
+    }
+
+    @StarlarkMethod(name = "restart", doc = "Restarts the timer.", useStarlarkThread = true)
+    public void restart(StarlarkThread thread) throws EvalException {
+      time0 = 0; // stop, and discard current span
+      start(thread);
+    }
+
+    @Override
+    public void repr(Printer p) {
+      p.append("<Benchmark>");
+    }
+  }
+
+  private static String formatDuration(double ns) {
+    // (Similar format to Go's time.Duration.)
+    if (ns == 0) {
+      return "0s";
+    } else if (ns < 1e3) {
+      return String.format("%dns", (long) ns);
+    } else if (ns < 1e6) {
+      return String.format("%.3gµs", ns / 1e3);
+    } else if (ns < 1e9) {
+      return String.format("%.6gms", ns / 1e6);
+    } else {
+      return String.format("%.3gs", ns / 1e9);
+    }
+  }
+
+  private static String formatBytes(long bytes) {
+    if (bytes == 0) {
+      return "0B";
+    } else if (bytes < 1e3) {
+      return String.format("%dB", bytes);
+    } else if (bytes < 1e6) {
+      return String.format("%.3gKB", bytes / 1e3);
+    } else if (bytes < 1e9) {
+      return String.format("%.6gMB", bytes / 1e6);
+    } else {
+      return String.format("%.3gGB", bytes / 1e9);
+    }
+  }
+
+  private static final StarlarkSemantics semantics = StarlarkSemantics.DEFAULT;
+
+  private Benchmarks() {}
+}
diff --git a/src/test/java/net/starlark/java/eval/testdata/bench_list.star b/src/test/java/net/starlark/java/eval/testdata/bench_list.star
new file mode 100644
index 0000000..f571222
--- /dev/null
+++ b/src/test/java/net/starlark/java/eval/testdata/bench_list.star
@@ -0,0 +1,14 @@
+_list1000 = list(range(1000))
+
+def bench_extend(b):
+    "Extends an empty list by 1000 items."
+    for _ in range(b.n):
+        x = []
+        x.extend(_list1000)
+
+def bench_append(b):
+    "Appends 1000 items to an empty list."
+    for _ in range(b.n):
+        x = []
+        for y in _list1000:
+            x.append(y)
diff --git a/src/test/java/net/starlark/java/eval/testdata/bench_sorted.star b/src/test/java/net/starlark/java/eval/testdata/bench_sorted.star
new file mode 100644
index 0000000..2c55735
--- /dev/null
+++ b/src/test/java/net/starlark/java/eval/testdata/bench_sorted.star
@@ -0,0 +1,21 @@
+def random(seed):
+    "Updates seed[0] and returns the next pseudorandom number."
+    x = seed[0]
+    seed[0] = x + 9319 * 125 % 6011
+    return x
+
+def _bench_sort(b, size):
+    seed = [0]
+    orig = [random(seed) for x in range(size)]
+    b.restart()
+    for _ in range(b.n):
+        copy = orig[:]  # TODO(adonovan): move allocation outside loop
+        sorted(copy)
+
+def bench_sort_large(b):
+    "Sort an array of a million ints."
+    _bench_sort(b, 1000000)
+
+def bench_sort_small(b):
+    "Sort an array of ten ints."
+    _bench_sort(b, 10)