blob: 78d66b9be4dad0d0897a8816792fae35679bad0e [file] [log] [blame]
#!/bin/bash
# Copyright 2018 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -euo pipefail
TEST_LOG="$1"
XML_OUTPUT_FILE="$2"
DURATION_IN_SECONDS="$3"
EXIT_CODE="$4"
# Keep this in sync with test-setup.sh!
function encode_stream {
# Replace invalid XML characters and invalid sequence in CDATA
# We do this in four steps:
#
# 1. Add a single whitespace character to the end of every line
#
# 2. Replace every sequence of legal characters followed by an illegal
# character *or* followed by a legal character at the end of the line with
# the same sequence of legal characters followed by a question mark
# character (replacing the illegal or last character). Since this will
# always replace the last character in a line with a question mark, we
# make sure to append a whitespace in step #1.
#
# A character is legal if it is a valid UTF-8 character that is allowed in
# an XML file (this excludes a few control codes, but otherwise allows
# most UTF-8 characters).
#
# We can't use sed in UTF-8 mode, because it would fail on the first
# illegal character. Instead, we have to match legal characters by their
# 8-bit binary sequences, and also switch sed to an 8-bit mode.
#
# The legal UTF codepoint ranges are 9,a,d,20-d7ff,e000-fffd,10000-10ffff,
# which results in the following 8-bit binary UTF-8 matchers:
# [\x9\xa\xd\x20-\x7f] <--- (9,A,D,20-7F)
# [\xc0-\xdf][\x80-\xbf] <--- (0080-07FF)
# [\xe0-\xec][\x80-\xbf][\x80-\xbf] <--- (0800-CFFF)
# [\xed][\x80-\x9f][\x80-\xbf] <--- (D000-D7FF)
# [\xee][\x80-\xbf][\x80-\xbf] <--- (E000-EFFF)
# [\xef][\x80-\xbe][\x80-\xbf] <--- (F000-FFEF)
# [\xef][\xbf][\x80-\xbd] <--- (FFF0-FFFD)
# [\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf] <--- (010000-10FFFF)
#
# We omit \xa and \xd below since sed already splits the input into lines.
#
# 3. Remove the last character in the line, which we expect to be a
# question mark (that was originally added as a whitespace in step #1).
#
# 4. Replace the string ']]>' with ']]>]]<![CDATA[>' to prevent escaping the
# surrounding CDATA block.
#
# Sed supports the necessary operations as of version 4.4, but not in all
# earlier versions. Specifically, we have found that sed 4.1.5 is not 8-bit
# safe even when set to an 8-bit locale.
#
# OSX sed does not support escape sequences (\xhh), use echo as workaround.
#
# Alternatives considered:
# Perl - We originally used Perl, but wanted to avoid the dependency.
# Recent versions of Perl now error on invalid utf-8 characters.
# tr - tr only replaces single-byte sequences, so cannot handle utf-8.
LC_ALL=C sed -E \
-e 's/.*/& /g' \
-e 's/(('\
"$(echo -e '[\x9\x20-\x7f]')|"\
"$(echo -e '[\xc0-\xdf][\x80-\xbf]')|"\
"$(echo -e '[\xe0-\xec][\x80-\xbf][\x80-\xbf]')|"\
"$(echo -e '[\xed][\x80-\x9f][\x80-\xbf]')|"\
"$(echo -e '[\xee-\xef][\x80-\xbf][\x80-\xbf]')|"\
"$(echo -e '[\xf0][\x80-\x8f][\x80-\xbf][\x80-\xbf]')"\
')*)./\1?/g' \
-e 's/(.*)\?/\1/g' \
-e 's|]]>|]]>]]<![CDATA[>|g'
}
function encode_as_xml {
if [ -f "$1" ]; then
cat "$1" | encode_stream
fi
}
# For testing, we allow calling this script with "-", in which case we only
# perform the encoding step. We intentionally ignore the rest of the parameters.
if [ "$TEST_LOG" == "-" ]; then
encode_stream
exit 0
fi
test_name="${TEST_BINARY#./}"
test_name="${test_name#../}"
errors=0
error_msg=""
if (( $EXIT_CODE != 0 )); then
errors=1
error_msg="<error message=\"exited with error code $EXIT_CODE\"></error>"
fi
# Ensure that test shards have unique names in the xml output.
if [[ -n "${TEST_TOTAL_SHARDS+x}" ]] && ((TEST_TOTAL_SHARDS != 0)); then
((shard_num=TEST_SHARD_INDEX+1))
test_name="${test_name}"_shard_"${shard_num}"/"${TEST_TOTAL_SHARDS}"
fi
FAILED=0
ENCODED_LOG="$(encode_as_xml "${TEST_LOG}")" || FAILED=$?
cat >"${XML_OUTPUT_FILE}" <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<testsuites>
<testsuite name="${test_name}" tests="1" failures="0" errors="${errors}">
<testcase name="${test_name}" status="run" duration="${DURATION_IN_SECONDS}" time="${DURATION_IN_SECONDS}">${error_msg}</testcase>
<system-out>
Generated test.log (if the file is not UTF-8, then this may be unreadable):
<![CDATA[${ENCODED_LOG}]]>
</system-out>
</testsuite>
</testsuites>
EOF
exit "$FAILED"