blob: 48293d67fffe1673b8d92b8580c1b3b1e3eeb837 [file] [log] [blame]
// Copyright 2018 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package net.starlark.java.eval;
import com.google.common.base.Ascii;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.starlark.java.annot.Param;
import net.starlark.java.annot.ParamType;
import net.starlark.java.annot.StarlarkBuiltin;
import net.starlark.java.annot.StarlarkMethod;
/**
* Starlark String module.
*
* <p>This module has special treatment in Starlark, as its methods represent methods represent for
* any 'string' objects in the language.
*
* <p>Methods of this class annotated with {@link StarlarkMethod} must have a positional-only
* 'String self' parameter as the first parameter of the method.
*/
@StarlarkBuiltin(
name = "string",
category = "core",
doc =
"A language built-in type to support strings. "
+ "Examples of string literals:<br>"
+ "<pre class=\"language-python\">a = 'abc\\ndef'\n"
+ "b = \"ab'cd\"\n"
+ "c = \"\"\"multiline string\"\"\"\n"
+ "\n"
+ "# Strings support slicing (negative index starts from the end):\n"
+ "x = \"hello\"[2:4] # \"ll\"\n"
+ "y = \"hello\"[1:-1] # \"ell\"\n"
+ "z = \"hello\"[:4] # \"hell\""
+ "# Slice steps can be used, too:\n"
+ "s = \"hello\"[::2] # \"hlo\"\n"
+ "t = \"hello\"[3:0:-1] # \"lle\"\n</pre>"
+ "Strings are not directly iterable, use the <code>.elems()</code> "
+ "method to iterate over their characters. Examples:<br>"
+ "<pre class=\"language-python\">\"bc\" in \"abcd\" # evaluates to True\n"
+ "x = [s for s.elems() in \"abc\"] # x == [\"a\", \"b\", \"c\"]</pre>\n"
+ "Implicit concatenation of strings is not allowed; use the <code>+</code> "
+ "operator instead. Comparison operators perform a lexicographical comparison; "
+ "use <code>==</code> to test for equality.")
final class StringModule implements StarlarkValue {
static final StringModule INSTANCE = new StringModule();
private StringModule() {}
// Returns s[start:stop:step], as if by Sequence.getSlice.
static String slice(String s, int start, int stop, int step) {
RangeList indices = new RangeList(start, stop, step);
int n = indices.size();
if (n == 0) {
return "";
} else if (n == 1) {
return memoizedCharToString(s.charAt(indices.at(0)));
} else if (step == 1) { // common case
return s.substring(indices.at(0), indices.at(n));
} else {
char[] res = new char[n];
for (int i = 0; i < n; ++i) {
res[i] = s.charAt(indices.at(i));
}
return new String(res);
}
}
// Nearly all chars in Starlark strings are ASCII.
// This is a cache of single-char strings to avoid allocation in the s[i] operation.
private static final String[] ASCII_CHAR_STRINGS = initCharStrings();
private static String[] initCharStrings() {
String[] a = new String[0x80];
for (int i = 0; i < a.length; ++i) {
a[i] = String.valueOf((char) i);
}
return a;
}
/** Semantically equivalent to {@link String#valueOf(char)} but faster for ASCII strings. */
static String memoizedCharToString(char c) {
if (c < ASCII_CHAR_STRINGS.length) {
return ASCII_CHAR_STRINGS[c];
} else {
return String.valueOf(c);
}
}
// Returns the substring denoted by str[start:end], which is never out of bounds.
// For speed, we don't return str.substring(start, end), as substring allocates a copy.
// Instead we return the (start, end) indices, packed into the lo/hi arms of a long.
private static long substringIndices(String str, Object start, Object end) throws EvalException {
// This function duplicates the logic of Starlark.slice for strings.
int n = str.length();
int istart = 0;
if (start != Starlark.NONE) {
istart = EvalUtils.toIndex(Starlark.toInt(start, "start"), n);
}
int iend = n;
if (end != Starlark.NONE) {
iend = EvalUtils.toIndex(Starlark.toInt(end, "end"), n);
}
if (iend < istart) {
iend = istart; // => empty result
}
return pack(istart, iend); // = str.substring(start, end)
}
private static long pack(int lo, int hi) {
return (((long) hi) << 32) | (lo & 0xffffffffL);
}
private static int lo(long x) {
return (int) x;
}
private static int hi(long x) {
return (int) (x >>> 32);
}
@StarlarkMethod(
name = "join",
doc =
"Returns a string in which the string elements of the argument have been "
+ "joined by this string as a separator. Example:<br>"
+ "<pre class=\"language-python\">\"|\".join([\"a\", \"b\", \"c\"]) == \"a|b|c\""
+ "</pre>",
parameters = {@Param(name = "self"), @Param(name = "elements", doc = "The objects to join.")})
public String join(String self, Object elements) throws EvalException {
Iterable<?> items = Starlark.toIterable(elements);
int i = 0;
for (Object item : items) {
if (!(item instanceof String)) {
throw Starlark.errorf(
"expected string for sequence element %d, got '%s'", i, Starlark.type(item));
}
i++;
}
return Joiner.on(self).join(items);
}
@StarlarkMethod(
name = "lower",
doc = "Returns the lower case version of this string.",
parameters = {@Param(name = "self")})
public String lower(String self) {
return Ascii.toLowerCase(self);
}
@StarlarkMethod(
name = "upper",
doc = "Returns the upper case version of this string.",
parameters = {@Param(name = "self")})
public String upper(String self) {
return Ascii.toUpperCase(self);
}
/**
* For consistency with Python we recognize the same whitespace characters as they do over the
* range 0x00-0xFF. See https://hg.python.org/cpython/file/3.6/Objects/unicodetype_db.h#l5738 This
* list is a consequence of Unicode character information.
*
* <p>Note that this differs from Python 2.7, which uses ctype.h#isspace(), and from
* java.lang.Character#isWhitespace(), which does not recognize U+00A0.
*/
private static final String LATIN1_WHITESPACE =
("\u0009" + "\n" + "\u000B" + "\u000C" + "\r" + "\u001C" + "\u001D" + "\u001E" + "\u001F"
+ "\u0020" + "\u0085" + "\u00A0");
private static String stringLStrip(String self, String chars) {
CharMatcher matcher = CharMatcher.anyOf(chars);
for (int i = 0; i < self.length(); i++) {
if (!matcher.matches(self.charAt(i))) {
return self.substring(i);
}
}
return ""; // All characters were stripped.
}
private static String stringRStrip(String self, String chars) {
CharMatcher matcher = CharMatcher.anyOf(chars);
for (int i = self.length() - 1; i >= 0; i--) {
if (!matcher.matches(self.charAt(i))) {
return self.substring(0, i + 1);
}
}
return ""; // All characters were stripped.
}
private static String stringStrip(String self, String chars) {
return stringLStrip(stringRStrip(self, chars), chars);
}
@StarlarkMethod(
name = "lstrip",
doc =
"Returns a copy of the string where leading characters that appear in "
+ "<code>chars</code> are removed. Note that <code>chars</code> "
+ "is not a prefix: all combinations of its value are removed:"
+ "<pre class=\"language-python\">"
+ "\"abcba\".lstrip(\"ba\") == \"cba\""
+ "</pre>",
parameters = {
@Param(name = "self"),
@Param(
name = "chars",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = NoneType.class),
},
doc = "The characters to remove, or all whitespace if None.",
defaultValue = "None")
})
public String lstrip(String self, Object charsOrNone) {
String chars = charsOrNone != Starlark.NONE ? (String) charsOrNone : LATIN1_WHITESPACE;
return stringLStrip(self, chars);
}
@StarlarkMethod(
name = "rstrip",
doc =
"Returns a copy of the string where trailing characters that appear in "
+ "<code>chars</code> are removed. Note that <code>chars</code> "
+ "is not a suffix: all combinations of its value are removed:"
+ "<pre class=\"language-python\">"
+ "\"abcbaa\".rstrip(\"ab\") == \"abc\""
+ "</pre>",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(
name = "chars",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = NoneType.class),
},
doc = "The characters to remove, or all whitespace if None.",
defaultValue = "None")
})
public String rstrip(String self, Object charsOrNone) {
String chars = charsOrNone != Starlark.NONE ? (String) charsOrNone : LATIN1_WHITESPACE;
return stringRStrip(self, chars);
}
@StarlarkMethod(
name = "strip",
doc =
"Returns a copy of the string where leading or trailing characters that appear in "
+ "<code>chars</code> are removed. Note that <code>chars</code> "
+ "is neither a prefix nor a suffix: all combinations of its value "
+ "are removed:"
+ "<pre class=\"language-python\">"
+ "\"aabcbcbaa\".strip(\"ab\") == \"cbc\""
+ "</pre>",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(
name = "chars",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = NoneType.class),
},
doc = "The characters to remove, or all whitespace if None.",
defaultValue = "None")
})
public String strip(String self, Object charsOrNone) {
String chars = charsOrNone != Starlark.NONE ? (String) charsOrNone : LATIN1_WHITESPACE;
return stringStrip(self, chars);
}
@StarlarkMethod(
name = "replace",
doc =
"Returns a copy of the string in which the occurrences "
+ "of <code>old</code> have been replaced with <code>new</code>, optionally "
+ "restricting the number of replacements to <code>maxsplit</code>.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "old", doc = "The string to be replaced."),
@Param(name = "new", doc = "The string to replace with."),
@Param(
name = "count",
defaultValue = "-1",
doc =
"The maximum number of replacements. If omitted, or if the value is negative, "
+ "there is no limit.")
},
useStarlarkThread = true)
public String replace(
String self, String oldString, String newString, StarlarkInt countI, StarlarkThread thread)
throws EvalException {
int count = countI.toInt("count");
if (count < 0) {
count = Integer.MAX_VALUE;
}
StringBuilder sb = new StringBuilder();
int start = 0;
for (int i = 0; i < count; i++) {
if (oldString.isEmpty()) {
sb.append(newString);
if (start < self.length()) {
sb.append(self.charAt(start++));
} else {
break;
}
} else {
int end = self.indexOf(oldString, start);
if (end < 0) {
break;
}
sb.append(self, start, end).append(newString);
start = end + oldString.length();
}
}
sb.append(self, start, self.length());
return sb.toString();
}
@StarlarkMethod(
name = "split",
doc =
"Returns a list of all the words in the string, using <code>sep</code> as the "
+ "separator, optionally limiting the number of splits to <code>maxsplit</code>.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sep", doc = "The string to split on."),
@Param(
name = "maxsplit",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "The maximum number of splits.")
},
useStarlarkThread = true)
public StarlarkList<String> split(
String self, String sep, Object maxSplitO, StarlarkThread thread) throws EvalException {
if (sep.isEmpty()) {
throw Starlark.errorf("Empty separator");
}
int maxSplit = Integer.MAX_VALUE;
if (maxSplitO != Starlark.NONE) {
maxSplit = Starlark.toInt(maxSplitO, "maxsplit");
}
StarlarkList<String> res = StarlarkList.newList(thread.mutability());
int start = 0;
while (true) {
int end = self.indexOf(sep, start);
if (end < 0 || maxSplit-- == 0) {
res.addElement(self.substring(start));
break;
}
res.addElement(self.substring(start, end));
start = end + sep.length();
}
return res;
}
@StarlarkMethod(
name = "rsplit",
doc =
"Returns a list of all the words in the string, using <code>sep</code> as the "
+ "separator, optionally limiting the number of splits to <code>maxsplit</code>. "
+ "Except for splitting from the right, this method behaves like split().",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sep", doc = "The string to split on."),
@Param(
name = "maxsplit",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "The maximum number of splits.")
},
useStarlarkThread = true)
public StarlarkList<String> rsplit(
String self, String sep, Object maxSplitO, StarlarkThread thread) throws EvalException {
if (sep.isEmpty()) {
throw Starlark.errorf("Empty separator");
}
int maxSplit = Integer.MAX_VALUE;
if (maxSplitO != Starlark.NONE) {
maxSplit = Starlark.toInt(maxSplitO, "maxsplit");
}
ArrayList<String> res = new ArrayList<>();
int end = self.length();
while (true) {
int start = self.lastIndexOf(sep, end - 1);
if (start < 0 || maxSplit-- == 0) {
res.add(self.substring(0, end));
break;
}
res.add(self.substring(start + sep.length(), end));
end = start;
}
Collections.reverse(res);
return StarlarkList.copyOf(thread.mutability(), res);
}
@StarlarkMethod(
name = "partition",
doc =
"Splits the input string at the first occurrence of the separator <code>sep</code> and"
+ " returns the resulting partition as a three-element tuple of the form (before,"
+ " separator, after). If the input string does not contain the separator, partition"
+ " returns (self, '', '').",
parameters = {@Param(name = "self"), @Param(name = "sep", doc = "The string to split on.")})
public Tuple partition(String self, String sep) throws EvalException {
return partitionCommon(self, sep, /*first=*/ true);
}
@StarlarkMethod(
name = "rpartition",
doc =
"Splits the input string at the last occurrence of the separator <code>sep</code> and"
+ " returns the resulting partition as a three-element tuple of the form (before,"
+ " separator, after). If the input string does not contain the separator,"
+ " rpartition returns ('', '', self).",
parameters = {@Param(name = "self"), @Param(name = "sep", doc = "The string to split on.")})
public Tuple rpartition(String self, String sep) throws EvalException {
return partitionCommon(self, sep, /*first=*/ false);
}
// Splits input at the first or last occurrence of the given separator,
// and returns a triple of substrings (before, separator, after).
// If the input does not contain the separator,
// it returns (input, "", "") if first, or ("", "", input), if !first.
private static Tuple partitionCommon(String input, String separator, boolean first)
throws EvalException {
if (separator.isEmpty()) {
throw Starlark.errorf("empty separator");
}
String a = "";
String b = "";
String c = "";
int pos = first ? input.indexOf(separator) : input.lastIndexOf(separator);
if (pos < 0) {
if (first) {
a = input;
} else {
c = input;
}
} else {
a = input.substring(0, pos);
b = separator;
c = input.substring(pos + separator.length());
}
return Tuple.triple(a, b, c);
}
@StarlarkMethod(
name = "capitalize",
doc =
"Returns a copy of the string with its first character (if any) capitalized and the rest "
+ "lowercased. This method does not support non-ascii characters. ",
parameters = {@Param(name = "self", doc = "This string.")})
public String capitalize(String self) throws EvalException {
if (self.isEmpty()) {
return self;
}
// TODO(adonovan): fix: support non-ASCII characters. Requires that Bazel stop abusing Latin1.
return Character.toUpperCase(self.charAt(0)) + Ascii.toLowerCase(self.substring(1));
}
@StarlarkMethod(
name = "title",
doc =
"Converts the input string into title case, i.e. every word starts with an "
+ "uppercase letter while the remaining letters are lowercase. In this "
+ "context, a word means strictly a sequence of letters. This method does "
+ "not support supplementary Unicode characters.",
parameters = {@Param(name = "self", doc = "This string.")})
public String title(String self) throws EvalException {
char[] data = self.toCharArray();
boolean previousWasLetter = false;
for (int pos = 0; pos < data.length; ++pos) {
char current = data[pos];
boolean currentIsLetter = Character.isLetter(current);
if (currentIsLetter) {
if (previousWasLetter && Character.isUpperCase(current)) {
data[pos] = Character.toLowerCase(current);
} else if (!previousWasLetter && Character.isLowerCase(current)) {
data[pos] = Character.toUpperCase(current);
}
}
previousWasLetter = currentIsLetter;
}
return new String(data);
}
/**
* Common implementation for find, rfind, index, rindex.
*
* @param forward true if we want to return the last matching index.
*/
private static int stringFind(boolean forward, String self, String sub, Object start, Object end)
throws EvalException {
long indices = substringIndices(self, start, end);
// Unfortunately Java forces us to allocate here, even though
// String has a private indexOf method that accepts indices.
// Fortunately the common case is self[0:n].
String substr = self.substring(lo(indices), hi(indices));
int subpos = forward ? substr.indexOf(sub) : substr.lastIndexOf(sub);
return subpos < 0
? subpos //
: subpos + lo(indices);
}
private static final Pattern SPLIT_LINES_PATTERN =
Pattern.compile("(?<line>.*)(?<break>(\\r\\n|\\r|\\n)?)");
@StarlarkMethod(
name = "rfind",
doc =
"Returns the last index where <code>sub</code> is found, or -1 if no such index exists, "
+ "optionally restricting to <code>[start:end]</code>, "
+ "<code>start</code> being inclusive and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sub", doc = "The substring to find."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public int rfind(String self, String sub, Object start, Object end) throws EvalException {
return stringFind(false, self, sub, start, end);
}
@StarlarkMethod(
name = "find",
doc =
"Returns the first index where <code>sub</code> is found, or -1 if no such index exists, "
+ "optionally restricting to <code>[start:end]</code>, "
+ "<code>start</code> being inclusive and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sub", doc = "The substring to find."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public int find(String self, String sub, Object start, Object end) throws EvalException {
return stringFind(true, self, sub, start, end);
}
@StarlarkMethod(
name = "rindex",
doc =
"Returns the last index where <code>sub</code> is found, or raises an error if no such "
+ "index exists, optionally restricting to <code>[start:end]</code>, "
+ "<code>start</code> being inclusive and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sub", doc = "The substring to find."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public int rindex(String self, String sub, Object start, Object end) throws EvalException {
int res = stringFind(false, self, sub, start, end);
if (res < 0) {
throw Starlark.errorf("substring not found");
}
return res;
}
@StarlarkMethod(
name = "index",
doc =
"Returns the first index where <code>sub</code> is found, or raises an error if no such "
+ " index exists, optionally restricting to <code>[start:end]</code>"
+ "<code>start</code> being inclusive and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sub", doc = "The substring to find."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public int index(String self, String sub, Object start, Object end) throws EvalException {
int res = stringFind(true, self, sub, start, end);
if (res < 0) {
throw Starlark.errorf("substring not found");
}
return res;
}
@StarlarkMethod(
name = "splitlines",
doc =
"Splits the string at line boundaries ('\\n', '\\r\\n', '\\r') "
+ "and returns the result as a new mutable list.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(
// TODO(b/67740837): clarify whether this is named or positional.
name = "keepends",
defaultValue = "False",
doc = "Whether the line breaks should be included in the resulting list.")
},
useStarlarkThread = true)
public Sequence<String> splitLines(String self, boolean keepEnds, StarlarkThread thread)
throws EvalException {
StarlarkList<String> result = StarlarkList.newList(thread.mutability());
Matcher matcher = SPLIT_LINES_PATTERN.matcher(self);
while (matcher.find()) {
String line = matcher.group("line");
String lineBreak = matcher.group("break");
boolean trailingBreak = lineBreak.isEmpty();
if (line.isEmpty() && trailingBreak) {
break;
}
if (keepEnds && !trailingBreak) {
result.addElement(line + lineBreak);
} else {
result.addElement(line);
}
}
// TODO(adonovan): spec should state that result is mutable,
// as in Python[23] and go.starlark.net.
return result;
}
@StarlarkMethod(
name = "isalpha",
doc =
"Returns True if all characters in the string are alphabetic ([a-zA-Z]) and there is "
+ "at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isAlpha(String self) throws EvalException {
return matches(self, ALPHA, false);
}
@StarlarkMethod(
name = "isalnum",
doc =
"Returns True if all characters in the string are alphanumeric ([a-zA-Z0-9]) and there "
+ "is at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isAlnum(String self) throws EvalException {
return matches(self, ALNUM, false);
}
@StarlarkMethod(
name = "isdigit",
doc =
"Returns True if all characters in the string are digits ([0-9]) and there is "
+ "at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isDigit(String self) throws EvalException {
return matches(self, DIGIT, false);
}
@StarlarkMethod(
name = "isspace",
doc =
"Returns True if all characters are white space characters and the string "
+ "contains at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isSpace(String self) throws EvalException {
return matches(self, SPACE, false);
}
@StarlarkMethod(
name = "islower",
doc =
"Returns True if all cased characters in the string are lowercase and there is "
+ "at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isLower(String self) throws EvalException {
// Python also accepts non-cased characters, so we cannot use LOWER.
return matches(self, UPPER.negate(), true);
}
@StarlarkMethod(
name = "isupper",
doc =
"Returns True if all cased characters in the string are uppercase and there is "
+ "at least one character.",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isUpper(String self) throws EvalException {
// Python also accepts non-cased characters, so we cannot use UPPER.
return matches(self, LOWER.negate(), true);
}
@StarlarkMethod(
name = "istitle",
doc =
"Returns True if the string is in title case and it contains at least one character. "
+ "This means that every uppercase character must follow an uncased one (e.g. "
+ "whitespace) and every lowercase character must follow a cased one (e.g. "
+ "uppercase or lowercase).",
parameters = {@Param(name = "self", doc = "This string.")})
public boolean isTitle(String self) throws EvalException {
if (self.isEmpty()) {
return false;
}
// From the Python documentation: "uppercase characters may only follow uncased characters
// and lowercase characters only cased ones".
char[] data = self.toCharArray();
CharMatcher matcher = CharMatcher.any();
char leftMostCased = ' ';
for (int pos = data.length - 1; pos >= 0; --pos) {
char current = data[pos];
// 1. Check condition that was determined by the right neighbor.
if (!matcher.matches(current)) {
return false;
}
// 2. Determine condition for the left neighbor.
if (LOWER.matches(current)) {
matcher = CASED;
} else if (UPPER.matches(current)) {
matcher = CASED.negate();
} else {
matcher = CharMatcher.any();
}
// 3. Store character if it is cased.
if (CASED.matches(current)) {
leftMostCased = current;
}
}
// The leftmost cased letter must be uppercase. If leftMostCased is not a cased letter here,
// then the string doesn't have any cased letter, so UPPER.test will return false.
return UPPER.matches(leftMostCased);
}
private static boolean matches(
String str, CharMatcher matcher, boolean requiresAtLeastOneCasedLetter) {
if (str.isEmpty()) {
return false;
} else if (!requiresAtLeastOneCasedLetter) {
return matcher.matchesAllOf(str);
}
int casedLetters = 0;
for (char current : str.toCharArray()) {
if (!matcher.matches(current)) {
return false;
} else if (requiresAtLeastOneCasedLetter && CASED.matches(current)) {
++casedLetters;
}
}
return casedLetters > 0;
}
private static final CharMatcher DIGIT = CharMatcher.javaDigit();
private static final CharMatcher LOWER = CharMatcher.inRange('a', 'z');
private static final CharMatcher UPPER = CharMatcher.inRange('A', 'Z');
private static final CharMatcher ALPHA = LOWER.or(UPPER);
private static final CharMatcher ALNUM = ALPHA.or(DIGIT);
private static final CharMatcher CASED = ALPHA;
private static final CharMatcher SPACE = CharMatcher.whitespace();
@StarlarkMethod(
name = "count",
doc =
"Returns the number of (non-overlapping) occurrences of substring <code>sub</code> in "
+ "string, optionally restricting to <code>[start:end]</code>, <code>start</code> "
+ "being inclusive and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "sub", doc = "The substring to count."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public int count(String self, String sub, Object start, Object end) throws EvalException {
long indices = substringIndices(self, start, end);
if (sub.isEmpty()) {
return hi(indices) - lo(indices) + 1; // str.length() + 1
}
// Unfortunately Java forces us to allocate here, even though
// String has a private indexOf method that accepts indices.
// Fortunately the common case is self[0:n].
String str = self.substring(lo(indices), hi(indices));
int count = 0;
int index = 0;
while ((index = str.indexOf(sub, index)) >= 0) {
count++;
index += sub.length();
}
return count;
}
@StarlarkMethod(
name = "elems",
doc =
"Returns an iterable value containing successive 1-element substrings of the string. "
+ "Equivalent to <code>[s[i] for i in range(len(s))]</code>, except that the "
+ "returned value might not be a list.",
parameters = {@Param(name = "self", doc = "This string.")})
public Sequence<String> elems(String self) {
// TODO(adonovan): opt: return a new type that is lazily iterable.
char[] chars = self.toCharArray();
Object[] strings = new Object[chars.length];
for (int i = 0; i < chars.length; i++) {
strings[i] = memoizedCharToString(chars[i]);
}
return StarlarkList.wrap(null, strings);
}
@StarlarkMethod(
name = "endswith",
doc =
"Returns True if the string ends with <code>sub</code>, otherwise False, optionally "
+ "restricting to <code>[start:end]</code>, <code>start</code> being inclusive "
+ "and <code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(
name = "sub",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = Tuple.class, generic1 = String.class),
},
doc = "The suffix (or tuple of alternative suffixes) to match."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Test beginning at this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "optional position at which to stop comparing.")
})
public boolean endsWith(String self, Object sub, Object start, Object end) throws EvalException {
long indices = substringIndices(self, start, end);
if (sub instanceof String) {
return substringEndsWith(self, lo(indices), hi(indices), (String) sub);
}
for (String s : Sequence.cast(sub, String.class, "sub")) {
if (substringEndsWith(self, lo(indices), hi(indices), s)) {
return true;
}
}
return false;
}
// Computes str.substring(start, end).endsWith(suffix) without allocation.
private static boolean substringEndsWith(String str, int start, int end, String suffix) {
int n = suffix.length();
return start + n <= end && str.regionMatches(end - n, suffix, 0, n);
}
// In Python, formatting is very complex.
// We handle here the simplest case which provides most of the value of the function.
// https://docs.python.org/3/library/string.html#formatstrings
@StarlarkMethod(
name = "format",
doc =
"Perform string interpolation. Format strings contain replacement fields "
+ "surrounded by curly braces <code>{}</code>. Anything that is not contained "
+ "in braces is considered literal text, which is copied unchanged to the output."
+ "If you need to include a brace character in the literal text, it can be "
+ "escaped by doubling: <code>{{</code> and <code>}}</code>"
+ "A replacement field can be either a name, a number, or empty. Values are "
+ "converted to strings using the <a href=\"globals.html#str\">str</a> function."
+ "<pre class=\"language-python\">"
+ "# Access in order:\n"
+ "\"{} < {}\".format(4, 5) == \"4 < 5\"\n"
+ "# Access by position:\n"
+ "\"{1}, {0}\".format(2, 1) == \"1, 2\"\n"
+ "# Access by name:\n"
+ "\"x{key}x\".format(key = 2) == \"x2x\"</pre>\n",
parameters = {
@Param(name = "self", doc = "This string."),
},
extraPositionals = @Param(name = "args", defaultValue = "()", doc = "List of arguments."),
extraKeywords =
@Param(name = "kwargs", defaultValue = "{}", doc = "Dictionary of arguments."))
public String format(String self, Tuple args, Dict<String, Object> kwargs) throws EvalException {
return new FormatParser().format(self, args, kwargs);
}
@StarlarkMethod(
name = "startswith",
doc =
"Returns True if the string starts with <code>sub</code>, otherwise False, optionally "
+ "restricting to <code>[start:end]</code>, <code>start</code> being inclusive and "
+ "<code>end</code> being exclusive.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(
name = "sub",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = Tuple.class, generic1 = String.class),
},
doc = "The prefix (or tuple of alternative prefixes) to match."),
@Param(
name = "start",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "0",
doc = "Test beginning at this position."),
@Param(
name = "end",
allowedTypes = {
@ParamType(type = StarlarkInt.class),
@ParamType(type = NoneType.class),
},
defaultValue = "None",
doc = "Stop comparing at this position.")
})
public boolean startsWith(String self, Object sub, Object start, Object end)
throws EvalException {
long indices = substringIndices(self, start, end);
if (sub instanceof String) {
return substringStartsWith(self, lo(indices), hi(indices), (String) sub);
}
for (String s : Sequence.cast(sub, String.class, "sub")) {
if (substringStartsWith(self, lo(indices), hi(indices), s)) {
return true;
}
}
return false;
}
// Computes str.substring(start, end).startsWith(prefix) without allocation.
private static boolean substringStartsWith(String str, int start, int end, String prefix) {
return start + prefix.length() <= end && str.startsWith(prefix, start);
}
@StarlarkMethod(
name = "removeprefix",
doc =
"If the string starts with <code>prefix</code>, returns a new string with the prefix "
+ "removed. Otherwise, returns the string.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "prefix", doc = "The prefix to remove if present."),
})
public String removePrefix(String self, String prefix) {
if (self.startsWith(prefix)) {
return self.substring(prefix.length());
}
return self;
}
@StarlarkMethod(
name = "removesuffix",
doc =
"If the string ends with <code>suffix</code>, returns a new string with the suffix "
+ "removed. Otherwise, returns the string.",
parameters = {
@Param(name = "self", doc = "This string."),
@Param(name = "suffix", doc = "The suffix to remove if present."),
})
public String removeSuffix(String self, String suffix) {
if (self.endsWith(suffix)) {
return self.substring(0, self.length() - suffix.length());
}
return self;
}
}