blob: 6025c88ce5aeb8fde3bc122be45f985d54f386c7 [file] [log] [blame]
3405691582ed1b0b12022-08-30 03:10:54 -07001#!/usr/bin/env bash
ulfjack0858ae12018-07-27 02:37:53 -07002
3# Copyright 2018 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
ulfjack1d17d5a2019-01-16 08:30:00 -080017set -euo pipefail
18
ulfjack0858ae12018-07-27 02:37:53 -070019TEST_LOG="$1"
20XML_OUTPUT_FILE="$2"
21DURATION_IN_SECONDS="$3"
22EXIT_CODE="$4"
23
24# Keep this in sync with test-setup.sh!
ulfjack258f2b42019-01-22 02:41:53 -080025function encode_stream {
26 # Replace invalid XML characters and invalid sequence in CDATA
27 # We do this in four steps:
28 #
29 # 1. Add a single whitespace character to the end of every line
30 #
31 # 2. Replace every sequence of legal characters followed by an illegal
32 # character *or* followed by a legal character at the end of the line with
33 # the same sequence of legal characters followed by a question mark
34 # character (replacing the illegal or last character). Since this will
35 # always replace the last character in a line with a question mark, we
36 # make sure to append a whitespace in step #1.
37 #
38 # A character is legal if it is a valid UTF-8 character that is allowed in
39 # an XML file (this excludes a few control codes, but otherwise allows
40 # most UTF-8 characters).
41 #
42 # We can't use sed in UTF-8 mode, because it would fail on the first
43 # illegal character. Instead, we have to match legal characters by their
44 # 8-bit binary sequences, and also switch sed to an 8-bit mode.
45 #
46 # The legal UTF codepoint ranges are 9,a,d,20-d7ff,e000-fffd,10000-10ffff,
47 # which results in the following 8-bit binary UTF-8 matchers:
48 # [\x9\xa\xd\x20-\x7f] <--- (9,A,D,20-7F)
49 # [\xc0-\xdf][\x80-\xbf] <--- (0080-07FF)
50 # [\xe0-\xec][\x80-\xbf][\x80-\xbf] <--- (0800-CFFF)
51 # [\xed][\x80-\x9f][\x80-\xbf] <--- (D000-D7FF)
52 # [\xee][\x80-\xbf][\x80-\xbf] <--- (E000-EFFF)
53 # [\xef][\x80-\xbe][\x80-\xbf] <--- (F000-FFEF)
54 # [\xef][\xbf][\x80-\xbd] <--- (FFF0-FFFD)
55 # [\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf] <--- (010000-10FFFF)
56 #
57 # We omit \xa and \xd below since sed already splits the input into lines.
58 #
59 # 3. Remove the last character in the line, which we expect to be a
60 # question mark (that was originally added as a whitespace in step #1).
61 #
62 # 4. Replace the string ']]>' with ']]>]]<![CDATA[>' to prevent escaping the
63 # surrounding CDATA block.
64 #
65 # Sed supports the necessary operations as of version 4.4, but not in all
66 # earlier versions. Specifically, we have found that sed 4.1.5 is not 8-bit
67 # safe even when set to an 8-bit locale.
68 #
69 # OSX sed does not support escape sequences (\xhh), use echo as workaround.
70 #
71 # Alternatives considered:
72 # Perl - We originally used Perl, but wanted to avoid the dependency.
73 # Recent versions of Perl now error on invalid utf-8 characters.
74 # tr - tr only replaces single-byte sequences, so cannot handle utf-8.
75 LC_ALL=C sed -E \
76 -e 's/.*/& /g' \
77 -e 's/(('\
78"$(echo -e '[\x9\x20-\x7f]')|"\
79"$(echo -e '[\xc0-\xdf][\x80-\xbf]')|"\
80"$(echo -e '[\xe0-\xec][\x80-\xbf][\x80-\xbf]')|"\
81"$(echo -e '[\xed][\x80-\x9f][\x80-\xbf]')|"\
82"$(echo -e '[\xee-\xef][\x80-\xbf][\x80-\xbf]')|"\
83"$(echo -e '[\xf0][\x80-\x8f][\x80-\xbf][\x80-\xbf]')"\
84')*)./\1?/g' \
85 -e 's/(.*)\?/\1/g' \
86 -e 's|]]>|]]>]]<![CDATA[>|g'
87}
88
ulfjack1d17d5a2019-01-16 08:30:00 -080089function encode_as_xml {
ulfjack258f2b42019-01-22 02:41:53 -080090 if [ -f "$1" ]; then
91 cat "$1" | encode_stream
ulfjack0858ae12018-07-27 02:37:53 -070092 fi
93}
94
ulfjack258f2b42019-01-22 02:41:53 -080095# For testing, we allow calling this script with "-", in which case we only
96# perform the encoding step. We intentionally ignore the rest of the parameters.
97if [ "$TEST_LOG" == "-" ]; then
98 encode_stream
99 exit 0
100fi
101
ulfjack0858ae12018-07-27 02:37:53 -0700102test_name="${TEST_BINARY#./}"
chiwange6f10662021-03-30 23:30:13 -0700103test_name="${test_name#../}"
ulfjack0858ae12018-07-27 02:37:53 -0700104errors=0
105error_msg=""
106if (( $EXIT_CODE != 0 )); then
107 errors=1
108 error_msg="<error message=\"exited with error code $EXIT_CODE\"></error>"
109fi
110
111# Ensure that test shards have unique names in the xml output.
112if [[ -n "${TEST_TOTAL_SHARDS+x}" ]] && ((TEST_TOTAL_SHARDS != 0)); then
113 ((shard_num=TEST_SHARD_INDEX+1))
114 test_name="${test_name}"_shard_"${shard_num}"/"${TEST_TOTAL_SHARDS}"
115fi
116
ulfjack1d17d5a2019-01-16 08:30:00 -0800117FAILED=0
ulfjack258f2b42019-01-22 02:41:53 -0800118ENCODED_LOG="$(encode_as_xml "${TEST_LOG}")" || FAILED=$?
119cat >"${XML_OUTPUT_FILE}" <<EOF
ulfjack0858ae12018-07-27 02:37:53 -0700120<?xml version="1.0" encoding="UTF-8"?>
121<testsuites>
ulfjack258f2b42019-01-22 02:41:53 -0800122 <testsuite name="${test_name}" tests="1" failures="0" errors="${errors}">
123 <testcase name="${test_name}" status="run" duration="${DURATION_IN_SECONDS}" time="${DURATION_IN_SECONDS}">${error_msg}</testcase>
124 <system-out>
125Generated test.log (if the file is not UTF-8, then this may be unreadable):
126<![CDATA[${ENCODED_LOG}]]>
127 </system-out>
128 </testsuite>
ulfjack0858ae12018-07-27 02:37:53 -0700129</testsuites>
130EOF
ulfjack1d17d5a2019-01-16 08:30:00 -0800131exit "$FAILED"