Fix encoding of non-ascii contents written to parameter files. .

When args are written to parameter files, non-ascii values are wrongly encoded again as utf-8. This seems to be unaffected by the JDK20 upgrade of Bazel, and has always been happening.

Fixes #18792.

Closes #18972.

RELNOTES: Fixes non-ascii arguments written to parameter files as mojibake.
PiperOrigin-RevId: 549950259
Change-Id: Ia4997cdc049d1fcda266d63920b68afbed3f9ae1
diff --git a/src/main/java/com/google/devtools/build/lib/actions/BUILD b/src/main/java/com/google/devtools/build/lib/actions/BUILD
index a9ad4fde..b154e21 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/actions/BUILD
@@ -192,6 +192,7 @@
         "//src/main/java/com/google/devtools/build/lib/util:filetype",
         "//src/main/java/com/google/devtools/build/lib/util:os",
         "//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
+        "//src/main/java/com/google/devtools/build/lib/util:string",
         "//src/main/java/com/google/devtools/build/lib/util:var_int",
         "//src/main/java/com/google/devtools/build/lib/util/io",
         "//src/main/java/com/google/devtools/build/lib/vfs",
diff --git a/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java b/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java
index 4e86984..8f6eec4 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java
@@ -24,6 +24,7 @@
 import com.google.devtools.build.lib.util.FileType;
 import com.google.devtools.build.lib.util.GccParamFileEscaper;
 import com.google.devtools.build.lib.util.ShellEscaper;
+import com.google.devtools.build.lib.util.StringUtil;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import java.io.BufferedOutputStream;
 import java.io.IOException;
@@ -170,6 +171,10 @@
       byte[] bytes = stringUnsafe.getByteArray(line);
       if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) {
         outputStream.write(bytes);
+      } else if (!StringUtil.decodeBytestringUtf8(line).equals(line)) {
+        // We successfully decoded line from utf8 - meaning it was already encoded as utf8.
+        // We do not want to double-encode.
+        outputStream.write(bytes);
       } else {
         ByteBuffer encodedBytes = encoder.encode(CharBuffer.wrap(line));
         outputStream.write(
diff --git a/src/test/shell/integration/unicode_test.bzl b/src/test/shell/integration/unicode_test.bzl
index 301a2fa..72ec6e4 100644
--- a/src/test/shell/integration/unicode_test.bzl
+++ b/src/test/shell/integration/unicode_test.bzl
@@ -51,3 +51,28 @@
         "is_executable": attr.bool(),
     },
 )
+
+def _run_executable_with_param_file_impl(ctx):
+    args = ctx.actions.args()
+    args.use_param_file("%s", use_always = True)
+    args.add(ctx.attr.content)
+    ctx.actions.run(
+        inputs = [],
+        outputs = [ctx.outputs.out],
+        arguments = [args, ctx.outputs.out.path],
+        executable = ctx.executable.executable,
+    )
+
+run_executable_with_param_file_rule = rule(
+    implementation = _run_executable_with_param_file_impl,
+    doc = "Writes `content` to a param file and passes the file to the executable",
+    attrs = {
+        "out": attr.output(mandatory = True),
+        "content": attr.string(mandatory = True),
+        "executable": attr.label(
+            allow_files = True,
+            executable = True,
+            cfg = "exec",
+        ),
+    },
+)
diff --git a/src/test/shell/integration/unicode_test.sh b/src/test/shell/integration/unicode_test.sh
index 7ef3a33..d4108c8 100755
--- a/src/test/shell/integration/unicode_test.sh
+++ b/src/test/shell/integration/unicode_test.sh
@@ -72,4 +72,25 @@
     >>"${TEST_log}" 2>&1 || fail "Output not as expected"
 }
 
+function test_unicode_action_run_param_file {
+  local test_name="action_run_param_file"
+  bazel build --genrule_strategy=local --spawn_strategy=local \
+      "//:${test_name}" >& "$TEST_log" \
+      || fail "expected build to succeed"
+
+  quoted_unicode_test_expected="'$(cat unicode_test_expected.txt)'"
+
+  echo "Expected: ${quoted_unicode_test_expected}"
+
+  cat_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out")
+  assert_equals "${cat_output}" \
+      "${quoted_unicode_test_expected}" \
+      || fail "Output not as expected"
+
+  param_file_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out-0.params")
+  assert_equals "${param_file_output}" \
+        "${quoted_unicode_test_expected}" \
+        || fail "Output not as expected"
+}
+
 run_suite "Integration tests for ${PRODUCT_NAME}'s unicode i/o in actions"
\ No newline at end of file
diff --git a/src/test/shell/integration/unicode_test_BUILD b/src/test/shell/integration/unicode_test_BUILD
index 21c787d..fa8b334 100644
--- a/src/test/shell/integration/unicode_test_BUILD
+++ b/src/test/shell/integration/unicode_test_BUILD
@@ -1,5 +1,5 @@
 # BUILD file for unicode_test
-load(":unicode_test.bzl", "run_executable_rule", "write_file_rule")
+load(":unicode_test.bzl", "run_executable_rule", "write_file_rule", "run_executable_with_param_file_rule")
 
 # In Russian and Bengali: "Down with mojibake! We want unicode!"
 non_ascii_string = "Долой кракозябры! আমরা ইউনিকোড চাই!"
@@ -31,3 +31,19 @@
     content = non_ascii_string,
     out = "action_write_content.out",
 )
+
+run_executable_with_param_file_rule(
+    name = "action_run_param_file",
+    executable = "cat_param_file.sh",
+    content = non_ascii_string,
+    out = "action_run_param_file.out",
+)
+
+write_file_rule(
+    name = "cat_param_file",
+    content = """#!/bin/bash
+cat "$1" >> "$2";
+""",
+    out = "cat_param_file.sh",
+    is_executable = True,
+)
\ No newline at end of file