Add comprehensive testing for network sandboxing.

This uncovers a divergence in localhost sandboxing between Linux and macOS,
which in theory should not exist based on the code... but somehow does.
Added a TODO to investigate later.

Prerequisite for https://github.com/bazelbuild/bazel/issues/10068.

RELNOTES: None.
PiperOrigin-RevId: 279949248
diff --git a/.bazelci/postsubmit.yml b/.bazelci/postsubmit.yml
index 373a9a0..dffcfe8 100644
--- a/.bazelci/postsubmit.yml
+++ b/.bazelci/postsubmit.yml
@@ -74,6 +74,8 @@
       - "//src:bazel_jdk_minimal"
     test_flags:
       - "--test_timeout=1200"
+      # Configure and enable tests that require access to the network.
+      - "--test_env=REMOTE_NETWORK_ADDRESS=bazel.build:80"
     test_targets:
       - "--"
       - "//scripts/..."
@@ -133,6 +135,8 @@
       - "//src:bazel_jdk_minimal"
     test_flags:
       - "--test_timeout=1200"
+      # Configure and enable tests that require access to the network.
+      - "--test_env=REMOTE_NETWORK_ADDRESS=bazel.build:80"
       # Remove when https://github.com/bazelbuild/bazel/issues/7026 is fixed.
       - "--noincompatible_strict_action_env"
     test_targets:
diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml
index b6c021d..30def0d 100644
--- a/.bazelci/presubmit.yml
+++ b/.bazelci/presubmit.yml
@@ -77,6 +77,8 @@
       - "//src:bazel_jdk_minimal"
     test_flags:
       - "--test_timeout=1200"
+      # Configure and enable tests that require access to the network.
+      - "--test_env=REMOTE_NETWORK_ADDRESS=bazel.build:80"
     test_targets:
       - "//scripts/..."
       - "//src/java_tools/..."
@@ -133,6 +135,8 @@
       - "//src:bazel_jdk_minimal"
     test_flags:
       - "--test_timeout=1200"
+      # Configure and enable tests that require access to the network.
+      - "--test_env=REMOTE_NETWORK_ADDRESS=bazel.build:80"
       # Remove when https://github.com/bazelbuild/bazel/issues/7026 is fixed.
       - "--noincompatible_strict_action_env"
     test_targets:
diff --git a/src/test/shell/bazel/bazel_sandboxing_test.sh b/src/test/shell/bazel/bazel_sandboxing_test.sh
index 5f74c51..b61649e 100755
--- a/src/test/shell/bazel/bazel_sandboxing_test.sh
+++ b/src/test/shell/bazel/bazel_sandboxing_test.sh
@@ -17,6 +17,12 @@
 # Test sandboxing spawn strategy
 #
 
+# Set to a host:port address that is outside of the local machine to
+# test remote network sandboxing features.
+#
+# Can be passed in via --test_env=REMOTE_NETWORK_ADDRESS=host:port.
+: "${REMOTE_NETWORK_ADDRESS:=}"
+
 # Load test environment
 # Load the test setup defined in the parent directory
 CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -339,103 +345,186 @@
   }
 }
 
-function test_sandbox_network_access() {
+# Prepares common targets and services to be used by all network-related
+# tests.  The tests for remote network access are only enabled if the
+# user has requested them by setting REMOTE_NETWORK_ADDRESS in the
+# environment.
+function setup_network_tests() {
+  local tags="${1}"; shift
+
   serve_file file_to_serve
-  cat << EOF >> examples/genrule/BUILD
+
+  local socket_dir
+  socket_dir="$(mktemp -d /tmp/test.XXXXXX)" || fail "mktemp failed"
+  local socket="${socket_dir}/socket"
+  python $python_server --unix_socket="${socket}" always file_to_serve &
+  local pid="${!}"
+
+  trap "kill_nc || true; kill '${pid}' || true; rm -f '${socket}'; rmdir '${socket_dir}'" EXIT
+
+  mkdir pkg
+  cat <<EOF >pkg/BUILD
+genrule(
+  name = "localhost",
+  outs = [ "localhost.txt" ],
+  cmd = "curl -o \$@ localhost:${nc_port}",
+  tags = [ ${tags} ],
+)
 
 genrule(
-  name = "sandbox_network_access",
-  outs = [ "sandbox_network_access.txt" ],
-  cmd = "curl -o \$@ localhost:${nc_port}",
+  name = "unix-socket",
+  outs = [ "unix-socket.txt" ],
+  cmd = "curl --unix-socket ${socket} -o \$@ irrelevant-url",
+  tags = [ ${tags} ],
 )
 EOF
-  bazel build examples/genrule:sandbox_network_access &> $TEST_log \
-    || fail "genrule 'sandbox_network_access' trying to use network failed, but should have succeeded"
-  [ -f "${BAZEL_GENFILES_DIR}/examples/genrule/sandbox_network_access.txt" ] \
-    || fail "genrule 'sandbox_network_access' did not produce output"
-  kill_nc
+
+  # TODO(https://github.com/bazelbuild/bazel/issues/10068): Remove once
+  # network sandboxing works on macOS.
+  case "$(uname -s)" in
+    Darwin) REMOTE_NETWORK_ADDRESS= ;;
+  esac
+
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    local hostname="${REMOTE_NETWORK_ADDRESS%:*}"
+    local remote_ip
+    if which host 2>/dev/null; then
+      remote_ip="$(host -t A "${hostname}" | head -n 1 | awk '{print $4}')"
+    elif which dig 2>/dev/null; then
+      remote_ip="$(dig -t A "${hostname}" | grep "^${hostname}" | awk '{print $5}')"
+    else
+      fail "Don't know how to query IP of remote host ${hostname}"
+    fi
+    if [[ -z "${remote_ip}" ]]; then
+      fail "No IPv4 connectivity within unsandboxed test"
+    fi
+
+    cat <<EOF >>pkg/BUILD
+genrule(
+  name = "remote-ip",
+  outs = [ "remote-ip.txt" ],
+  cmd = "curl -o \$@ ${remote_ip}:80",
+  tags = [ ${tags} ],
+)
+
+genrule(
+  name = "remote-name",
+  outs = [ "remote-name.txt" ],
+  cmd = "curl -o \$@ '${REMOTE_NETWORK_ADDRESS}'",
+  tags = [ ${tags} ],
+)
+EOF
+  else
+    echo "Not registering tests for remote network sandboxing;" \
+      "REMOTE_NETWORK_ADDRESS has not been set"
+  fi
+}
+
+# Checks that the given target name, which must have been created by
+# a previous call to setup_network_tests, can access the network.
+function check_network_ok() {
+  local target="${1}"; shift
+
+  (
+    # macOS's /bin/bash is ancient and cannot reference $@ when -u is set.
+    # https://unix.stackexchange.com/questions/16560/bash-su-unbound-variable-with-set-u
+    set +u
+
+    bazel build "${@}" "pkg:${target}" &>$TEST_log \
+      || fail "'${target}' could not access the network"
+  )
+}
+
+# Checks that the given target name, which must have been created by
+# a previous call to setup_network_tests, cannot access the network.
+function check_network_not_ok() {
+  local target="${1}"; shift
+
+  (
+    # macOS's /bin/bash is ancient and cannot reference $@ when -u is set.
+    # https://unix.stackexchange.com/questions/16560/bash-su-unbound-variable-with-set-u
+    set +u
+
+    bazel build "${@}" "pkg:${target}" &> $TEST_log \
+      && fail "'${target}' trying to use network succeeded but should have failed" || true
+  )
+  [[ ! -f "${BAZEL_GENFILES_DIR}/pkg/${target}.txt" ]] \
+    || fail "'${target}' produced output but was expected to fail"
+}
+
+function test_sandbox_network_access() {
+  setup_network_tests '"some-tag"'
+
+  check_network_ok localhost
+  check_network_ok unix-socket
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    check_network_ok remote-ip
+    check_network_ok remote-name
+  fi
 }
 
 function test_sandbox_block_network_access() {
-  serve_file file_to_serve
-  cat << EOF >> examples/genrule/BUILD
+  setup_network_tests '"some-tag"'
 
-genrule(
-  name = "breaks4",
-  outs = [ "breaks4.txt" ],
-  cmd = "curl -o \$@ localhost:${nc_port}",
-)
-EOF
-  bazel build --experimental_sandbox_default_allow_network=false examples/genrule:breaks1 &> $TEST_log \
-    && fail "Non-hermetic genrule succeeded: examples/genrule:breaks4" || true
-  [ ! -f "${BAZEL_GENFILES_DIR}/examples/genrule/breaks4.txt" ] || {
-    output=$(cat "${BAZEL_GENFILES_DIR}/examples/genrule/breaks4.txt")
-    fail "Non-hermetic genrule breaks1 succeeded with following output: $output"
-  }
-  kill_nc
+  case "$(uname -s)" in
+    Linux)
+      # TODO(jmmv): The linux-sandbox claims to allow localhost connectivity
+      # within the network namespace... but that doesn't seem to be the case.
+      check_network_not_ok localhost --experimental_sandbox_default_allow_network=false
+      ;;
+
+    *)
+      check_network_ok localhost --experimental_sandbox_default_allow_network=false
+      ;;
+  esac
+  check_network_ok unix-socket --experimental_sandbox_default_allow_network=false
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    check_network_not_ok remote-ip --experimental_sandbox_default_allow_network=false
+    check_network_not_ok remote-name --experimental_sandbox_default_allow_network=false
+  fi
 }
 
 function test_sandbox_network_access_with_local() {
-  serve_file file_to_serve
-  cat << EOF >> examples/genrule/BUILD
+  setup_network_tests '"local"'
 
-genrule(
-  name = "sandbox_network_access_with_local",
-  outs = [ "sandbox_network_access_with_local.txt" ],
-  cmd = "curl -o \$@ localhost:${nc_port}",
-  tags = [ "local" ],
-)
-EOF
-  bazel build examples/genrule:sandbox_network_access_with_local &> $TEST_log \
-    || fail "genrule 'sandbox_network_access_with_local' trying to use network failed, but should have succeeded"
-  [ -f "${BAZEL_GENFILES_DIR}/examples/genrule/sandbox_network_access_with_local.txt" ] \
-    || fail "genrule 'sandbox_network_access_with_local' did not produce output"
-  kill_nc
+  check_network_ok localhost
+  check_network_ok unix-socket
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    check_network_ok remote-ip
+    check_network_ok remote-name
+  fi
 }
 
 function test_sandbox_network_access_with_requires_network() {
-  serve_file file_to_serve
-  cat << EOF >> examples/genrule/BUILD
+  setup_network_tests '"requires-network"'
 
-genrule(
-  name = "sandbox_network_access_with_requires_network",
-  outs = [ "sandbox_network_access_with_requires_network.txt" ],
-  cmd = "curl -o \$@ localhost:${nc_port}",
-  tags = [ "requires-network" ],
-)
-EOF
-  bazel build --experimental_sandbox_default_allow_network=false \
-    examples/genrule:sandbox_network_access_with_requires_network &> $TEST_log \
-    || fail "genrule failed even though tags=['requires-network']: \
-    examples/genrule:breaks4_works_with_requires_network"
-  [ -f "${BAZEL_GENFILES_DIR}/examples/genrule/sandbox_network_access_with_requires_network.txt" ] \
-    || fail "Genrule did not produce output: examples/genrule:sandbox_network_access_with_requires_network.txt"
-  kill_nc
+  check_network_ok localhost --experimental_sandbox_default_allow_network=false
+  check_network_ok unix-socket --experimental_sandbox_default_allow_network=false
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    check_network_ok remote-ip --experimental_sandbox_default_allow_network=false
+    check_network_ok remote-name --experimental_sandbox_default_allow_network=false
+  fi
 }
 
 function test_sandbox_network_access_with_block_network() {
-  if [[ "$(uname -s)" = Darwin ]]; then
-    # TODO(https://github.com/bazelbuild/bazel/issues/10068): Network blocking
-    # currently broken on macOS.
-    echo "Skipping test: functionality known to be broken on macOS"
-    return 0
+  setup_network_tests '"block-network"'
+
+  case "$(uname -s)" in
+    Linux)
+      # TODO(jmmv): The linux-sandbox claims to allow localhost connectivity
+      # within the network namespace... but that doesn't seem to be the case.
+      check_network_not_ok localhost --experimental_sandbox_default_allow_network=true
+      ;;
+
+    *)
+      check_network_ok localhost --experimental_sandbox_default_allow_network=true
+      ;;
+  esac
+  check_network_ok unix-socket --experimental_sandbox_default_allow_network=true
+  if [[ -n "${REMOTE_NETWORK_ADDRESS}" ]]; then
+    check_network_not_ok remote-ip --experimental_sandbox_default_allow_network=true
+    check_network_not_ok remote-name --experimental_sandbox_default_allow_network=true
   fi
-
-  serve_file file_to_serve
-  cat << EOF >> examples/genrule/BUILD
-
-genrule(
-  name = "sandbox_network_access_with_block_network",
-  outs = [ "sandbox_network_access_with_block_network.txt" ],
-  cmd = "curl -o \$@ localhost:${nc_port}",
-  tags = [ "block-network" ],
-)
-EOF
-  bazel build --experimental_sandbox_default_allow_network=true examples/genrule:sandbox_network_access_with_block_network &> $TEST_log \
-    && fail "genrule 'sandbox_network_access_with_block_network' trying to use network succeeded, but should have failed" || true
-  [ ! -f "${BAZEL_GENFILES_DIR}/examples/genrule/sandbox_network_access_with_block_network.txt" ] \
-    || fail "genrule 'sandbox_network_access_with_block_network' produced output, but was expected to fail"
-  kill_nc
 }
 
 function test_sandbox_can_resolve_own_hostname() {
diff --git a/src/test/shell/bazel/testing_server.py b/src/test/shell/bazel/testing_server.py
index 1285ec9..e78c49a 100644
--- a/src/test/shell/bazel/testing_server.py
+++ b/src/test/shell/bazel/testing_server.py
@@ -15,6 +15,7 @@
 """An HTTP server to use for external repository integration tests."""
 
 # pylint: disable=g-import-not-at-top,g-importing-member
+import argparse
 import base64
 try:
   from http.server import BaseHTTPRequestHandler
@@ -25,9 +26,11 @@
 import os.path
 try:
   from socketserver import TCPServer
+  from socketserver import UnixStreamServer
 except ImportError:
   # Python 2.x compatibility hack.
   from SocketServer import TCPServer
+  from SocketServer import UnixStreamServer
 import random
 import socket
 import sys
@@ -55,6 +58,11 @@
     self.end_headers()
 
   def do_GET(self):  # pylint: disable=invalid-name
+    if not self.client_address:
+      # Needed for Unix domain connections as the response functions
+      # fail without this being set.
+      self.client_address = 'localhost'
+
     if self.simulate_timeout:
       while True:
         time.sleep(1)
@@ -92,41 +100,49 @@
       self.wfile.write(file_to_serve.read())
 
 
-def main(argv=None):
-  if argv is None:
-    argv = sys.argv[1:]
+def main(argv):
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--unix_socket', action='store')
+  parser.add_argument('mode', type=str, nargs='?')
+  parser.add_argument('target', type=str, nargs='?')
+  args = parser.parse_args(argv)
 
-  if len(argv) > 1 and argv[0] == 'always':
-    Handler.filename = argv[1]
-  elif len(argv) > 1 and argv[0] == 'redirect':
-    Handler.redirect = argv[1]
-  elif argv and argv[0] == '404':
-    Handler.not_found = True
-  elif argv and argv[0] == 'timeout':
-    Handler.simulate_timeout = True
-  elif argv and argv[0] == 'auth':
-    Handler.auth = True
-    if len(argv) > 1:
-      Handler.filename = argv[1]
+  if args.mode:
+    if args.mode == 'always' and args.target:
+      Handler.filename = args.target
+    elif args.mode == 'redirect' and args.target:
+      Handler.redirect = args.target
+    elif args.mode == '404':
+      Handler.not_found = True
+    elif args.mode == 'timeout':
+      Handler.simulate_timeout = True
+    elif args.mode == 'auth':
+      Handler.auth = True
+      if args.target:
+        Handler.filename = args.target
 
   httpd = None
-  port = None
-  while port is None:
-    try:
-      port = random.randrange(32760, 59760)
-      httpd = TCPServer(('', port), Handler)
-    except socket.error:
-      port = None
-
-  try:
+  if args.unix_socket:
+    httpd = UnixStreamServer(args.unix_socket, Handler)
+    sys.stderr.write('Serving forever on %s.\n' % args.unix_socket)
+  else:
+    port = None
+    while port is None:
+      try:
+        port = random.randrange(32760, 59760)
+        httpd = TCPServer(('', port), Handler)
+      except socket.error:
+        port = None
     sys.stdout.write('%d\nstarted\n' % (port,))
     sys.stdout.flush()
     sys.stdout.close()
     sys.stderr.write('Serving forever on %d.\n' % port)
+
+  try:
     httpd.serve_forever()
   finally:
     sys.stderr.write('Goodbye.\n')
 
 
 if __name__ == '__main__':
-  sys.exit(main())
+  sys.exit(main(sys.argv[1:]))