Accept any high byte in the "target" part of a label.  Currently this code is
rejecting some valid UTF-8 because it examines the characters as bytes stored in
a latin1 String.

Bazel doesn't need to police what's in a label as long as it can find characters
with special meaning to itself, the operating system, and the shell.  Those are
all ASCII.

RELNOTES: None.
PiperOrigin-RevId: 244699716
diff --git a/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java b/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
index e63b096..6080cb3 100644
--- a/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
+++ b/src/main/java/com/google/devtools/build/lib/cmdline/LabelValidator.java
@@ -65,12 +65,16 @@
       CharMatcher.javaLetterOrDigit()
           .or(PUNCTUATION_REQUIRING_QUOTING)
           .or(PUNCTUATION_NOT_REQUIRING_QUOTING)
+          // On unix platforms, strings obtained from readdir() are bytes "decoded" as latin1.  But
+          // the user is likely to have stored UTF-8 in them.  So we permit all non-ASCII characters
+          // in UTF-8 by allowing all high bytes.
+          .or(CharMatcher.inRange((char) 128, (char) 255))
           .precomputed();
 
   @VisibleForTesting
   static final String PACKAGE_NAME_ERROR =
       "package names may contain A-Z, a-z, 0-9, or any of ' !\"#$%&'()*+,-./;<=>?[]^_`{|}~'"
-          + " (most 127-bit ascii characters except 0-31, 127, ':', or '\\')";
+          + " (most 7-bit ascii characters except 0-31, 127, ':', or '\\')";
 
   @VisibleForTesting
   static final String PACKAGE_NAME_DOT_ERROR =
@@ -183,7 +187,7 @@
         }
         continue;
       }
-      if (CharMatcher.javaIsoControl().matches(c)) {
+      if (c <= '\u001f' || c == '\u007f') {
         return "target names may not contain non-printable characters: '" +
                String.format("\\x%02X", (int) c) + "'";
       }
diff --git a/src/test/shell/integration/loading_phase_posix_test.sh b/src/test/shell/integration/loading_phase_posix_test.sh
index 21abb59..18ef93e 100755
--- a/src/test/shell/integration/loading_phase_posix_test.sh
+++ b/src/test/shell/integration/loading_phase_posix_test.sh
@@ -58,6 +58,30 @@
 
 #### TESTS #############################################################
 
+function test_glob_control_chars() {
+  local char escape raw
+  for char in {1..31} 127; do
+    local pkg="$FUNCNAME/char$char"
+    mkdir -p $pkg
+    echo "filegroup(name='t', srcs=glob(['*']))" > $pkg/BUILD
+    printf -v escape \\%03o $char
+    printf -v raw %b "$escape"
+    touch "$pkg/$raw"
+    bazel query "//$pkg:*" >& $TEST_log && fail "Expected failure"
+    expect_log 'invalid label'
+  done
+}
+
+function test_glob_utf8() {
+  local -r pkg="$FUNCNAME"
+  mkdir $pkg
+  echo "filegroup(name='t', srcs=glob(['*']))" > $pkg/BUILD
+  cd $pkg
+  perl -CS -e 'for $i (160..0xd7ff) {print chr $i, $i%80?"":"\n"}' | xargs touch
+  cd ..
+  bazel query "//$pkg:*" >& $TEST_log || fail "Expected success"
+}
+
 function test_glob_with_io_error() {
   local -r pkg="${FUNCNAME}"
   mkdir -p "$pkg" || fail "could not create \"$pkg\""