Accept any high byte in the "target" part of a label. Currently this code is
rejecting some valid UTF-8 because it examines the characters as bytes stored in
a latin1 String.
Bazel doesn't need to police what's in a label as long as it can find characters
with special meaning to itself, the operating system, and the shell. Those are
all ASCII.
RELNOTES: None.
PiperOrigin-RevId: 244699716
diff --git a/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java b/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
index e63b096..6080cb3 100644
--- a/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
+++ b/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
@@ -65,12 +65,16 @@
CharMatcher.javaLetterOrDigit()
.or(PUNCTUATION_REQUIRING_QUOTING)
.or(PUNCTUATION_NOT_REQUIRING_QUOTING)
+ // On unix platforms, strings obtained from readdir() are bytes "decoded" as latin1. But
+ // the user is likely to have stored UTF-8 in them. So we permit all non-ASCII characters
+ // in UTF-8 by allowing all high bytes.
+ .or(CharMatcher.inRange((char) 128, (char) 255))
.precomputed();
@VisibleForTesting
static final String PACKAGE_NAME_ERROR =
"package names may contain A-Z, a-z, 0-9, or any of ' !\"#$%&'()*+,-./;<=>?[]^_`{|}~'"
- + " (most 127-bit ascii characters except 0-31, 127, ':', or '\\')";
+ + " (most 7-bit ascii characters except 0-31, 127, ':', or '\\')";
@VisibleForTesting
static final String PACKAGE_NAME_DOT_ERROR =
@@ -183,7 +187,7 @@
}
continue;
}
- if (CharMatcher.javaIsoControl().matches(c)) {
+ if (c <= '\u001f' || c == '\u007f') {
return "target names may not contain non-printable characters: '" +
String.format("\\x%02X", (int) c) + "'";
}
diff --git a/src/test/shell/integration/loading_phase_posix_test.sh b/src/test/shell/integration/loading_phase_posix_test.sh
index 21abb59..18ef93e 100755
--- a/src/test/shell/integration/loading_phase_posix_test.sh
+++ b/src/test/shell/integration/loading_phase_posix_test.sh
@@ -58,6 +58,30 @@
#### TESTS #############################################################
+function test_glob_control_chars() {
+ local char escape raw
+ for char in {1..31} 127; do
+ local pkg="$FUNCNAME/char$char"
+ mkdir -p $pkg
+ echo "filegroup(name='t', srcs=glob(['*']))" > $pkg/BUILD
+ printf -v escape \\%03o $char
+ printf -v raw %b "$escape"
+ touch "$pkg/$raw"
+ bazel query "//$pkg:*" >& $TEST_log && fail "Expected failure"
+ expect_log 'invalid label'
+ done
+}
+
+function test_glob_utf8() {
+ local -r pkg="$FUNCNAME"
+ mkdir $pkg
+ echo "filegroup(name='t', srcs=glob(['*']))" > $pkg/BUILD
+ cd $pkg
+ perl -CS -e 'for $i (160..0xd7ff) {print chr $i, $i%80?"":"\n"}' | xargs touch
+ cd ..
+ bazel query "//$pkg:*" >& $TEST_log || fail "Expected success"
+}
+
function test_glob_with_io_error() {
local -r pkg="${FUNCNAME}"
mkdir -p "$pkg" || fail "could not create \"$pkg\""