bazel syntax: avoid allocation in str.{starts,ends}with

Also, tweak doc strings.

Saves 1% of CPU on the usual loading+analysis benchmark.

PiperOrigin-RevId: 331576094
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
index 350ae02..bb28acb 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
@@ -83,28 +83,32 @@
     }
   }
 
-  // Emulate Python substring function
-  // It converts out of range indices, and never fails
-  //
-  // TODO(adonovan): opt: avoid this function, as String.substring now allocates a copy (!)
-  private static String pythonSubstring(String str, int start, Object end, String what)
+  // Returns the substring denoted by str[start:end], which is never out of bounds.
+  // For speed, we don't return str.substring(start, end), as substring allocates a copy.
+  // Instead we return the (start, end) indices, packed into the lo/hi arms of a long.
+  private static long substringIndices(String str, int start, Object end, String name)
       throws EvalException {
-    if (start == 0 && Starlark.isNullOrNone(end)) {
-      return str;
+    // This function duplicates the logic of Starlark.slice for strings.
+    int n = str.length();
+    // TODO(adonovan): allow start to be None, as required by spec.
+    start = EvalUtils.toIndex(start, n);
+    int stop = end == Starlark.NONE ? n : EvalUtils.toIndex(Starlark.toInt(end, name), n);
+    if (stop < start) {
+      stop = start; // => empty result
     }
-    start = EvalUtils.toIndex(start, str.length());
-    int stop;
-    if (Starlark.isNullOrNone(end)) {
-      stop = str.length();
-    } else if (end instanceof Integer) {
-      stop = EvalUtils.toIndex((Integer) end, str.length());
-    } else {
-      throw Starlark.errorf("expected int for %s, got %s", what, Starlark.type(end));
-    }
-    if (start >= stop) {
-      return "";
-    }
-    return str.substring(start, stop);
+    return pack(start, stop); // = str.substring(start, stop)
+  }
+
+  private static long pack(int lo, int hi) {
+    return (((long) hi) << 32) | (lo & 0xffffffffL);
+  }
+
+  private static int lo(long x) {
+    return (int) x;
+  }
+
+  private static int hi(long x) {
+    return (int) (x >>> 32);
   }
 
   @StarlarkMethod(
@@ -473,6 +477,7 @@
     if (self.isEmpty()) {
       return self;
     }
+    // TODO(adonovan): fix: support non-ASCII characters. Requires that Bazel stop abusing Latin1.
     return Character.toUpperCase(self.charAt(0)) + Ascii.toLowerCase(self.substring(1));
   }
 
@@ -513,11 +518,15 @@
   private static int stringFind(
       boolean forward, String self, String sub, int start, Object end, String msg)
       throws EvalException {
-    String substr = pythonSubstring(self, start, end, msg);
+    long indices = substringIndices(self, start, end, msg);
+    // Unfortunately Java forces us to allocate here, even though
+    // String has a private indexOf method that accepts indices.
+    // Fortunately the common case is self[0:n].
+    String substr = self.substring(lo(indices), hi(indices));
     int subpos = forward ? substr.indexOf(sub) : substr.lastIndexOf(sub);
     return subpos < 0
         ? subpos //
-        : subpos + EvalUtils.toIndex(start, self.length());
+        : subpos + lo(indices);
   }
 
   private static final Pattern SPLIT_LINES_PATTERN =
@@ -816,10 +825,14 @@
             doc = "optional position before which to restrict to search.")
       })
   public Integer count(String self, String sub, Integer start, Object end) throws EvalException {
-    String str = pythonSubstring(self, start, end, "'end' operand of 'find'");
+    long indices = substringIndices(self, start, end, "'end' operand of 'count'");
     if (sub.isEmpty()) {
-      return str.length() + 1;
+      return hi(indices) - lo(indices) + 1; // str.length() + 1
     }
+    // Unfortunately Java forces us to allocate here, even though
+    // String has a private indexOf method that accepts indices.
+    // Fortunately the common case is self[0:n].
+    String str = self.substring(lo(indices), hi(indices));
     int count = 0;
     int index = 0;
     while ((index = str.indexOf(sub, index)) >= 0) {
@@ -858,7 +871,7 @@
               @ParamType(type = String.class),
               @ParamType(type = Tuple.class, generic1 = String.class),
             },
-            doc = "The substring to check."),
+            doc = "The suffix (or tuple of alternative suffixes) to match."),
         @Param(
             name = "start",
             type = Integer.class,
@@ -872,18 +885,24 @@
             doc = "optional position at which to stop comparing.")
       })
   public Boolean endsWith(String self, Object sub, Integer start, Object end) throws EvalException {
-    String str = pythonSubstring(self, start, end, "'end' operand of 'endswith'");
+    long indices = substringIndices(self, start, end, "'end' operand of 'endswith'");
     if (sub instanceof String) {
-      return str.endsWith((String) sub);
+      return substringEndsWith(self, lo(indices), hi(indices), (String) sub);
     }
     for (String s : Sequence.cast(sub, String.class, "sub")) {
-      if (str.endsWith(s)) {
+      if (substringEndsWith(self, lo(indices), hi(indices), s)) {
         return true;
       }
     }
     return false;
   }
 
+  // Computes str.substring(start, end).endsWith(suffix) without allocation.
+  private static boolean substringEndsWith(String str, int start, int end, String suffix) {
+    int n = suffix.length();
+    return start + n <= end && str.regionMatches(end - n, suffix, 0, n);
+  }
+
   // In Python, formatting is very complex.
   // We handle here the simplest case which provides most of the value of the function.
   // https://docs.python.org/3/library/string.html#formatstrings
@@ -940,7 +959,7 @@
               @ParamType(type = String.class),
               @ParamType(type = Tuple.class, generic1 = String.class),
             },
-            doc = "The substring(s) to check."),
+            doc = "The prefix (or tuple of alternative prefixes) to match."),
         @Param(
             name = "start",
             type = Integer.class,
@@ -955,15 +974,20 @@
       })
   public Boolean startsWith(String self, Object sub, Integer start, Object end)
       throws EvalException {
-    String str = pythonSubstring(self, start, end, "'end' operand of 'startswith'");
+    long indices = substringIndices(self, start, end, "'end' operand of 'startswith'");
     if (sub instanceof String) {
-      return str.startsWith((String) sub);
+      return substringStartsWith(self, lo(indices), hi(indices), (String) sub);
     }
     for (String s : Sequence.cast(sub, String.class, "sub")) {
-      if (str.startsWith(s)) {
+      if (substringStartsWith(self, lo(indices), hi(indices), s)) {
         return true;
       }
     }
     return false;
   }
+
+  // Computes str.substring(start, end).startsWith(prefix) without allocation.
+  private static boolean substringStartsWith(String str, int start, int end, String prefix) {
+    return start + prefix.length() <= end && str.startsWith(prefix, start);
+  }
 }