bazel syntax: avoid allocation in str.{starts,ends}with
Also, tweak doc strings.
Saves 1% of CPU on the usual loading+analysis benchmark.
PiperOrigin-RevId: 331576094
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
index 350ae02..bb28acb 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java
@@ -83,28 +83,32 @@
}
}
- // Emulate Python substring function
- // It converts out of range indices, and never fails
- //
- // TODO(adonovan): opt: avoid this function, as String.substring now allocates a copy (!)
- private static String pythonSubstring(String str, int start, Object end, String what)
+ // Returns the substring denoted by str[start:end], which is never out of bounds.
+ // For speed, we don't return str.substring(start, end), as substring allocates a copy.
+ // Instead we return the (start, end) indices, packed into the lo/hi arms of a long.
+ private static long substringIndices(String str, int start, Object end, String name)
throws EvalException {
- if (start == 0 && Starlark.isNullOrNone(end)) {
- return str;
+ // This function duplicates the logic of Starlark.slice for strings.
+ int n = str.length();
+ // TODO(adonovan): allow start to be None, as required by spec.
+ start = EvalUtils.toIndex(start, n);
+ int stop = end == Starlark.NONE ? n : EvalUtils.toIndex(Starlark.toInt(end, name), n);
+ if (stop < start) {
+ stop = start; // => empty result
}
- start = EvalUtils.toIndex(start, str.length());
- int stop;
- if (Starlark.isNullOrNone(end)) {
- stop = str.length();
- } else if (end instanceof Integer) {
- stop = EvalUtils.toIndex((Integer) end, str.length());
- } else {
- throw Starlark.errorf("expected int for %s, got %s", what, Starlark.type(end));
- }
- if (start >= stop) {
- return "";
- }
- return str.substring(start, stop);
+ return pack(start, stop); // = str.substring(start, stop)
+ }
+
+ private static long pack(int lo, int hi) {
+ return (((long) hi) << 32) | (lo & 0xffffffffL);
+ }
+
+ private static int lo(long x) {
+ return (int) x;
+ }
+
+ private static int hi(long x) {
+ return (int) (x >>> 32);
}
@StarlarkMethod(
@@ -473,6 +477,7 @@
if (self.isEmpty()) {
return self;
}
+ // TODO(adonovan): fix: support non-ASCII characters. Requires that Bazel stop abusing Latin1.
return Character.toUpperCase(self.charAt(0)) + Ascii.toLowerCase(self.substring(1));
}
@@ -513,11 +518,15 @@
private static int stringFind(
boolean forward, String self, String sub, int start, Object end, String msg)
throws EvalException {
- String substr = pythonSubstring(self, start, end, msg);
+ long indices = substringIndices(self, start, end, msg);
+ // Unfortunately Java forces us to allocate here, even though
+ // String has a private indexOf method that accepts indices.
+ // Fortunately the common case is self[0:n].
+ String substr = self.substring(lo(indices), hi(indices));
int subpos = forward ? substr.indexOf(sub) : substr.lastIndexOf(sub);
return subpos < 0
? subpos //
- : subpos + EvalUtils.toIndex(start, self.length());
+ : subpos + lo(indices);
}
private static final Pattern SPLIT_LINES_PATTERN =
@@ -816,10 +825,14 @@
doc = "optional position before which to restrict to search.")
})
public Integer count(String self, String sub, Integer start, Object end) throws EvalException {
- String str = pythonSubstring(self, start, end, "'end' operand of 'find'");
+ long indices = substringIndices(self, start, end, "'end' operand of 'count'");
if (sub.isEmpty()) {
- return str.length() + 1;
+ return hi(indices) - lo(indices) + 1; // str.length() + 1
}
+ // Unfortunately Java forces us to allocate here, even though
+ // String has a private indexOf method that accepts indices.
+ // Fortunately the common case is self[0:n].
+ String str = self.substring(lo(indices), hi(indices));
int count = 0;
int index = 0;
while ((index = str.indexOf(sub, index)) >= 0) {
@@ -858,7 +871,7 @@
@ParamType(type = String.class),
@ParamType(type = Tuple.class, generic1 = String.class),
},
- doc = "The substring to check."),
+ doc = "The suffix (or tuple of alternative suffixes) to match."),
@Param(
name = "start",
type = Integer.class,
@@ -872,18 +885,24 @@
doc = "optional position at which to stop comparing.")
})
public Boolean endsWith(String self, Object sub, Integer start, Object end) throws EvalException {
- String str = pythonSubstring(self, start, end, "'end' operand of 'endswith'");
+ long indices = substringIndices(self, start, end, "'end' operand of 'endswith'");
if (sub instanceof String) {
- return str.endsWith((String) sub);
+ return substringEndsWith(self, lo(indices), hi(indices), (String) sub);
}
for (String s : Sequence.cast(sub, String.class, "sub")) {
- if (str.endsWith(s)) {
+ if (substringEndsWith(self, lo(indices), hi(indices), s)) {
return true;
}
}
return false;
}
+ // Computes str.substring(start, end).endsWith(suffix) without allocation.
+ private static boolean substringEndsWith(String str, int start, int end, String suffix) {
+ int n = suffix.length();
+ return start + n <= end && str.regionMatches(end - n, suffix, 0, n);
+ }
+
// In Python, formatting is very complex.
// We handle here the simplest case which provides most of the value of the function.
// https://docs.python.org/3/library/string.html#formatstrings
@@ -940,7 +959,7 @@
@ParamType(type = String.class),
@ParamType(type = Tuple.class, generic1 = String.class),
},
- doc = "The substring(s) to check."),
+ doc = "The prefix (or tuple of alternative prefixes) to match."),
@Param(
name = "start",
type = Integer.class,
@@ -955,15 +974,20 @@
})
public Boolean startsWith(String self, Object sub, Integer start, Object end)
throws EvalException {
- String str = pythonSubstring(self, start, end, "'end' operand of 'startswith'");
+ long indices = substringIndices(self, start, end, "'end' operand of 'startswith'");
if (sub instanceof String) {
- return str.startsWith((String) sub);
+ return substringStartsWith(self, lo(indices), hi(indices), (String) sub);
}
for (String s : Sequence.cast(sub, String.class, "sub")) {
- if (str.startsWith(s)) {
+ if (substringStartsWith(self, lo(indices), hi(indices), s)) {
return true;
}
}
return false;
}
+
+ // Computes str.substring(start, end).startsWith(prefix) without allocation.
+ private static boolean substringStartsWith(String str, int start, int end, String prefix) {
+ return start + prefix.length() <= end && str.startsWith(prefix, start);
+ }
}