3405691582 | ed1b0b1 | 2022-08-30 03:10:54 -0700 | [diff] [blame] | 1 | #!/usr/bin/env bash |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 2 | |
| 3 | # Copyright 2018 The Bazel Authors. All rights reserved. |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
ulfjack | 1d17d5a | 2019-01-16 08:30:00 -0800 | [diff] [blame] | 17 | set -euo pipefail |
| 18 | |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 19 | TEST_LOG="$1" |
| 20 | XML_OUTPUT_FILE="$2" |
| 21 | DURATION_IN_SECONDS="$3" |
| 22 | EXIT_CODE="$4" |
| 23 | |
| 24 | # Keep this in sync with test-setup.sh! |
ulfjack | 258f2b4 | 2019-01-22 02:41:53 -0800 | [diff] [blame] | 25 | function encode_stream { |
| 26 | # Replace invalid XML characters and invalid sequence in CDATA |
| 27 | # We do this in four steps: |
| 28 | # |
| 29 | # 1. Add a single whitespace character to the end of every line |
| 30 | # |
| 31 | # 2. Replace every sequence of legal characters followed by an illegal |
| 32 | # character *or* followed by a legal character at the end of the line with |
| 33 | # the same sequence of legal characters followed by a question mark |
| 34 | # character (replacing the illegal or last character). Since this will |
| 35 | # always replace the last character in a line with a question mark, we |
| 36 | # make sure to append a whitespace in step #1. |
| 37 | # |
| 38 | # A character is legal if it is a valid UTF-8 character that is allowed in |
| 39 | # an XML file (this excludes a few control codes, but otherwise allows |
| 40 | # most UTF-8 characters). |
| 41 | # |
| 42 | # We can't use sed in UTF-8 mode, because it would fail on the first |
| 43 | # illegal character. Instead, we have to match legal characters by their |
| 44 | # 8-bit binary sequences, and also switch sed to an 8-bit mode. |
| 45 | # |
| 46 | # The legal UTF codepoint ranges are 9,a,d,20-d7ff,e000-fffd,10000-10ffff, |
| 47 | # which results in the following 8-bit binary UTF-8 matchers: |
| 48 | # [\x9\xa\xd\x20-\x7f] <--- (9,A,D,20-7F) |
| 49 | # [\xc0-\xdf][\x80-\xbf] <--- (0080-07FF) |
| 50 | # [\xe0-\xec][\x80-\xbf][\x80-\xbf] <--- (0800-CFFF) |
| 51 | # [\xed][\x80-\x9f][\x80-\xbf] <--- (D000-D7FF) |
| 52 | # [\xee][\x80-\xbf][\x80-\xbf] <--- (E000-EFFF) |
| 53 | # [\xef][\x80-\xbe][\x80-\xbf] <--- (F000-FFEF) |
| 54 | # [\xef][\xbf][\x80-\xbd] <--- (FFF0-FFFD) |
| 55 | # [\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf] <--- (010000-10FFFF) |
| 56 | # |
| 57 | # We omit \xa and \xd below since sed already splits the input into lines. |
| 58 | # |
| 59 | # 3. Remove the last character in the line, which we expect to be a |
| 60 | # question mark (that was originally added as a whitespace in step #1). |
| 61 | # |
| 62 | # 4. Replace the string ']]>' with ']]>]]<![CDATA[>' to prevent escaping the |
| 63 | # surrounding CDATA block. |
| 64 | # |
| 65 | # Sed supports the necessary operations as of version 4.4, but not in all |
| 66 | # earlier versions. Specifically, we have found that sed 4.1.5 is not 8-bit |
| 67 | # safe even when set to an 8-bit locale. |
| 68 | # |
| 69 | # OSX sed does not support escape sequences (\xhh), use echo as workaround. |
| 70 | # |
| 71 | # Alternatives considered: |
| 72 | # Perl - We originally used Perl, but wanted to avoid the dependency. |
| 73 | # Recent versions of Perl now error on invalid utf-8 characters. |
| 74 | # tr - tr only replaces single-byte sequences, so cannot handle utf-8. |
| 75 | LC_ALL=C sed -E \ |
| 76 | -e 's/.*/& /g' \ |
| 77 | -e 's/(('\ |
| 78 | "$(echo -e '[\x9\x20-\x7f]')|"\ |
| 79 | "$(echo -e '[\xc0-\xdf][\x80-\xbf]')|"\ |
| 80 | "$(echo -e '[\xe0-\xec][\x80-\xbf][\x80-\xbf]')|"\ |
| 81 | "$(echo -e '[\xed][\x80-\x9f][\x80-\xbf]')|"\ |
| 82 | "$(echo -e '[\xee-\xef][\x80-\xbf][\x80-\xbf]')|"\ |
| 83 | "$(echo -e '[\xf0][\x80-\x8f][\x80-\xbf][\x80-\xbf]')"\ |
| 84 | ')*)./\1?/g' \ |
| 85 | -e 's/(.*)\?/\1/g' \ |
| 86 | -e 's|]]>|]]>]]<![CDATA[>|g' |
| 87 | } |
| 88 | |
ulfjack | 1d17d5a | 2019-01-16 08:30:00 -0800 | [diff] [blame] | 89 | function encode_as_xml { |
ulfjack | 258f2b4 | 2019-01-22 02:41:53 -0800 | [diff] [blame] | 90 | if [ -f "$1" ]; then |
| 91 | cat "$1" | encode_stream |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 92 | fi |
| 93 | } |
| 94 | |
ulfjack | 258f2b4 | 2019-01-22 02:41:53 -0800 | [diff] [blame] | 95 | # For testing, we allow calling this script with "-", in which case we only |
| 96 | # perform the encoding step. We intentionally ignore the rest of the parameters. |
| 97 | if [ "$TEST_LOG" == "-" ]; then |
| 98 | encode_stream |
| 99 | exit 0 |
| 100 | fi |
| 101 | |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 102 | test_name="${TEST_BINARY#./}" |
chiwang | e6f1066 | 2021-03-30 23:30:13 -0700 | [diff] [blame] | 103 | test_name="${test_name#../}" |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 104 | errors=0 |
| 105 | error_msg="" |
| 106 | if (( $EXIT_CODE != 0 )); then |
| 107 | errors=1 |
| 108 | error_msg="<error message=\"exited with error code $EXIT_CODE\"></error>" |
| 109 | fi |
| 110 | |
| 111 | # Ensure that test shards have unique names in the xml output. |
| 112 | if [[ -n "${TEST_TOTAL_SHARDS+x}" ]] && ((TEST_TOTAL_SHARDS != 0)); then |
| 113 | ((shard_num=TEST_SHARD_INDEX+1)) |
| 114 | test_name="${test_name}"_shard_"${shard_num}"/"${TEST_TOTAL_SHARDS}" |
| 115 | fi |
| 116 | |
ulfjack | 1d17d5a | 2019-01-16 08:30:00 -0800 | [diff] [blame] | 117 | FAILED=0 |
ulfjack | 258f2b4 | 2019-01-22 02:41:53 -0800 | [diff] [blame] | 118 | ENCODED_LOG="$(encode_as_xml "${TEST_LOG}")" || FAILED=$? |
| 119 | cat >"${XML_OUTPUT_FILE}" <<EOF |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 120 | <?xml version="1.0" encoding="UTF-8"?> |
| 121 | <testsuites> |
ulfjack | 258f2b4 | 2019-01-22 02:41:53 -0800 | [diff] [blame] | 122 | <testsuite name="${test_name}" tests="1" failures="0" errors="${errors}"> |
| 123 | <testcase name="${test_name}" status="run" duration="${DURATION_IN_SECONDS}" time="${DURATION_IN_SECONDS}">${error_msg}</testcase> |
| 124 | <system-out> |
| 125 | Generated test.log (if the file is not UTF-8, then this may be unreadable): |
| 126 | <![CDATA[${ENCODED_LOG}]]> |
| 127 | </system-out> |
| 128 | </testsuite> |
ulfjack | 0858ae1 | 2018-07-27 02:37:53 -0700 | [diff] [blame] | 129 | </testsuites> |
| 130 | EOF |
ulfjack | 1d17d5a | 2019-01-16 08:30:00 -0800 | [diff] [blame] | 131 | exit "$FAILED" |