Starlark: microoptimize str[index]
Preallocate single-char ASCII strings.
```
def test():
for i in range(1000000):
s = "abcdefghijklmnopqrstuvwxyz"
for j in range(len(s)):
s[j]
test()
```
```
A: N=48, r=3.896+-0.475
B: N=48, r=3.516+-0.571
B/A: 0.902
```
Closes #12490.
PiperOrigin-RevId: 342728476
diff --git a/src/main/java/net/starlark/java/eval/EvalUtils.java b/src/main/java/net/starlark/java/eval/EvalUtils.java
index 8577630..678b926 100644
--- a/src/main/java/net/starlark/java/eval/EvalUtils.java
+++ b/src/main/java/net/starlark/java/eval/EvalUtils.java
@@ -443,7 +443,7 @@
String string = (String) object;
int index = Starlark.toInt(key, "string index");
index = getSequenceIndex(index, string.length());
- return string.substring(index, index + 1);
+ return StringModule.memoizedCharToString(string.charAt(index));
} else {
throw Starlark.errorf(
"type '%s' has no operator [](%s)", Starlark.type(object), Starlark.type(key));
diff --git a/src/main/java/net/starlark/java/eval/StringModule.java b/src/main/java/net/starlark/java/eval/StringModule.java
index 3202aed..2fe748a 100644
--- a/src/main/java/net/starlark/java/eval/StringModule.java
+++ b/src/main/java/net/starlark/java/eval/StringModule.java
@@ -69,7 +69,11 @@
static String slice(String s, int start, int stop, int step) {
RangeList indices = new RangeList(start, stop, step);
int n = indices.size();
- if (step == 1) { // common case
+ if (n == 0) {
+ return "";
+ } else if (n == 1) {
+ return memoizedCharToString(s.charAt(indices.at(0)));
+ } else if (step == 1) { // common case
return s.substring(indices.at(0), indices.at(n));
} else {
char[] res = new char[n];
@@ -80,6 +84,27 @@
}
}
+ // Nearly all chars in Starlark strings are ASCII.
+ // This is a cache of single-char strings to avoid allocation in the s[i] operation.
+ private static final String[] ASCII_CHAR_STRINGS = initCharStrings();
+
+ private static String[] initCharStrings() {
+ String[] a = new String[0x80];
+ for (int i = 0; i < a.length; ++i) {
+ a[i] = String.valueOf((char) i);
+ }
+ return a;
+ }
+
+ /** Semantically equivalent to {@link String#valueOf(char)} but faster for ASCII strings. */
+ static String memoizedCharToString(char c) {
+ if (c < ASCII_CHAR_STRINGS.length) {
+ return ASCII_CHAR_STRINGS[c];
+ } else {
+ return String.valueOf(c);
+ }
+ }
+
// Returns the substring denoted by str[start:end], which is never out of bounds.
// For speed, we don't return str.substring(start, end), as substring allocates a copy.
// Instead we return the (start, end) indices, packed into the lo/hi arms of a long.
@@ -861,7 +886,7 @@
char[] chars = self.toCharArray();
Object[] strings = new Object[chars.length];
for (int i = 0; i < chars.length; i++) {
- strings[i] = String.valueOf(chars[i]);
+ strings[i] = memoizedCharToString(chars[i]);
}
return StarlarkList.wrap(null, strings);
}