blob: 8223c29a175b067c5145d2dba75f935cf7944d1d [file] [log] [blame] [edit]
// Copyright 2014 The Bazel Authors. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.util.Collection;
import java.util.Iterator;
* Various utility methods operating on strings.
public class StringUtil {
* Creates a comma-separated list of words as in English.
* <p>Example: ["a", "b", "c"] -&gt; "a, b or c".
public static String joinEnglishList(Iterable<?> choices) {
return joinEnglishList(choices, "or", "");
* Creates a comma-separated list of words as in English with the given last-separator.
* <p>Example with lastSeparator="then": ["a", "b", "c"] -&gt; "a, b then c".
public static String joinEnglishList(Iterable<?> choices, String lastSeparator) {
return joinEnglishList(choices, lastSeparator, "");
* Creates a comma-separated list of words as in English with the given last-separator and quotes.
* <p>Example with lastSeparator="then", quote="'": ["a", "b", "c"] -&gt; "'a', 'b' then 'c'".
public static String joinEnglishList(Iterable<?> choices, String lastSeparator, String quote) {
StringBuilder buf = new StringBuilder();
for (Iterator<?> ii = choices.iterator(); ii.hasNext(); ) {
Object choice =;
if (buf.length() > 0) {
buf.append(ii.hasNext() ? "," : " " + lastSeparator);
buf.append(" ");
return buf.length() == 0 ? "nothing" : buf.toString();
* Lists items up to a given limit, then prints how many were omitted.
public static StringBuilder listItemsWithLimit(StringBuilder appendTo, int limit,
Collection<?> items) {
Preconditions.checkState(limit > 0);
Joiner.on(", ").appendTo(appendTo, Iterables.limit(items, limit));
if (items.size() > limit) {
appendTo.append(" ...(omitting ")
.append(items.size() - limit)
.append(" more item(s))");
return appendTo;
* Returns the ordinal representation of the number.
public static String ordinal(int number) {
switch (number) {
case 1:
return "1st";
case 2:
return "2nd";
case 3:
return "3rd";
return number + "th";
* Decode a String that might actually be UTF-8, in which case each input character will be
* treated as a byte.
* <p>Several Bazel subsystems, including Starlark, store bytes in `String` values where each
* `char` stores one `byte` in its lower 8 bits. This function converts its input to a `[]byte`,
* then decodes that byte array as UTF-8.
* <p>Using U+2049 (EXCLAMATION QUESTION MARK) as an example:
* <p>"\u2049".getBytes(UTF_8) == [0xE2, 0x81, 0x89]
* <p>decodeBytestringUtf8("\u00E2\u0081\u0089") == "\u2049"
* <p>The return value is suitable for passing to Protobuf string fields or printing to the
* terminal.
public static String decodeBytestringUtf8(String maybeUtf8) {
if (maybeUtf8.chars().allMatch(c -> c < 128)) {
return maybeUtf8;
// Try our best to get a valid Unicode string, assuming that the input
// is either UTF-8 (from Starlark or a UNIX file path) or already valid
// Unicode (from a Windows file path).
if (maybeUtf8.chars().anyMatch(c -> c > 0xFF)) {
return maybeUtf8;
final byte[] utf8 = maybeUtf8.getBytes(ISO_8859_1);
final String decoded = new String(utf8, UTF_8);
// If the input was Unicode that happens to contain only codepoints in
// the ISO-8859-1 range, then it will probably have a partial decoding
// failure.
if (decoded.chars().anyMatch(c -> c == 0xFFFD)) {
return maybeUtf8;
return decoded;
* Encodes a String to UTF-8, then converts those UTF-8 bytes to a String by zero-extending each
* `byte` into a `char`.
* <p>Using U+2049 (EXCLAMATION QUESTION MARK) as an example:
* <p>"\u2049".getBytes(UTF_8) == [0xE2, 0x81, 0x89]
* <p>encodeBytestringUtf8("\u2049") == "\u00E2\u0081\u0089"
* <p>See {@link #decodeBytestringUtf8} for motivation.
public static String encodeBytestringUtf8(String unicode) {
if (unicode.chars().allMatch(c -> c < 128)) {
return unicode;
final byte[] utf8 = unicode.getBytes(UTF_8);
return new String(utf8, ISO_8859_1);