Windows, test wrapper: use IFStream to encode XML
Use a buffered input file stream (IFStream) to
read the test log file and escape invalid UTF-8
characters and sequences that would be unsafe to
embed in the test XML's CDATA section.
The benefit: the code no longer reads the entire
test log into memory before said escaping, which
is great for memory usage because these log files
may get arbitrarily large.
See https://github.com/bazelbuild/bazel/issues/5508
Closes #7539.
PiperOrigin-RevId: 235709946
diff --git a/src/test/cpp/util/windows_test_util.cc b/src/test/cpp/util/windows_test_util.cc
index aba74c2..8b78d86 100644
--- a/src/test/cpp/util/windows_test_util.cc
+++ b/src/test/cpp/util/windows_test_util.cc
@@ -88,6 +88,11 @@
}
bool CreateDummyFile(const wstring& path, const std::string& content) {
+ return CreateDummyFile(path, content.c_str(), content.size());
+}
+
+bool CreateDummyFile(const std::wstring& path, const void* content,
+ const DWORD size) {
HANDLE handle =
::CreateFileW(path.c_str(), GENERIC_WRITE, FILE_SHARE_READ, NULL,
CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
@@ -96,9 +101,8 @@
}
bool result = true;
DWORD actually_written = 0;
- if (!::WriteFile(handle, content.c_str(), content.size(), &actually_written,
- NULL) &&
- actually_written != content.size()) {
+ if (!::WriteFile(handle, content, size, &actually_written, NULL) &&
+ actually_written != size) {
result = false;
}
CloseHandle(handle);
diff --git a/src/test/cpp/util/windows_test_util.h b/src/test/cpp/util/windows_test_util.h
index fbf7710..3f0ed93 100644
--- a/src/test/cpp/util/windows_test_util.h
+++ b/src/test/cpp/util/windows_test_util.h
@@ -32,6 +32,11 @@
bool CreateDummyFile(const std::wstring& path,
const std::string& content = "hello");
+// Creates a dummy file under `path`.
+// `path` must be a valid Windows path, and have a UNC prefix if necessary.
+bool CreateDummyFile(const std::wstring& path, const void* content,
+ const DWORD size);
+
} // namespace blaze_util
#endif // BAZEL_SRC_TEST_CPP_UTIL_WINDOWS_TEST_UTIL_H_
diff --git a/tools/test/windows/tw.cc b/tools/test/windows/tw.cc
index ea1f232..aec0853 100644
--- a/tools/test/windows/tw.cc
+++ b/tools/test/windows/tw.cc
@@ -1480,83 +1480,69 @@
//
// Every octet-sequence matching one of these regexps will be left alone, all
// other octet-sequences will be replaced by '?' characters.
-bool CdataEscape(const uint8_t* input, const DWORD size,
- std::basic_ostream<char>* out) {
- // We aren't modifying the input, so const_cast is fine.
- uint8_t* p = const_cast<uint8_t*>(input);
-
- for (DWORD i = 0; i < size; ++i, ++p) {
- if (p[0] == ']' && (i + 2 < size) && p[1] == ']' && p[2] == '>') {
+bool CdataEscape(IFStream* in, std::basic_ostream<char>* out) {
+ int c0 = in->Get();
+ uint8_t p[3];
+ for (; c0 < 256; c0 = in->Get()) {
+ if (c0 == ']' && in->Peek(2, p) == 2 && p[0] == ']' && p[1] == '>') {
*out << "]]>]]<![CDATA[>";
if (!out->good()) {
return false;
}
- i += 2;
- p += 2;
- } else if (*p == 0x9 || *p == 0xA || *p == 0xD ||
- (*p >= 0x20 && *p <= 0x7F)) {
+ (void)in->Get();
+ (void)in->Get();
+ } else if (c0 == 0x9 || c0 == 0xA || c0 == 0xD ||
+ (c0 >= 0x20 && c0 <= 0x7F)) {
// Matched legal single-octet sequence.
- *out << *p;
+ *out << (char)c0;
if (!out->good()) {
return false;
}
- } else if ((i + 1 < size) && p[0] >= 0xC0 && p[0] <= 0xDF && p[1] >= 0x80 &&
- p[1] <= 0xBF) {
+ } else if (c0 >= 0xC0 && c0 <= 0xDF && in->Peek(1, p) == 1 &&
+ p[0] >= 0x80 && p[0] <= 0xBF) {
// Matched legal double-octet sequence. Skip the next octet.
- *out << p[0] << p[1];
+ *out << (char)c0 << (char)p[0];
if (!out->good()) {
return false;
}
- i += 1;
- p += 1;
- } else if ((i + 2 < size) &&
- ((p[0] >= 0xE0 && p[0] <= 0xEC && p[1] >= 0x80 && p[1] <= 0xBF &&
- p[2] >= 0x80 && p[2] <= 0xBF) ||
- (p[0] == 0xED && p[1] >= 0x80 && p[1] <= 0x9F && p[2] >= 0x80 &&
- p[2] <= 0xBF) ||
- (p[0] == 0xEE && p[1] >= 0x80 && p[1] <= 0xBF && p[2] >= 0x80 &&
- p[2] <= 0xBF) ||
- (p[0] == 0xEF && p[1] >= 0x80 && p[1] <= 0xBE && p[2] >= 0x80 &&
- p[2] <= 0xBF) ||
- (p[0] == 0xEF && p[1] == 0xBF && p[2] >= 0x80 &&
- p[2] <= 0xBD))) {
+ (void)in->Get();
+ } else if (in->Peek(2, p) == 2 &&
+ ((c0 >= 0xE0 && c0 <= 0xEC && p[0] >= 0x80 && p[0] <= 0xBF &&
+ p[1] >= 0x80 && p[1] <= 0xBF) ||
+ (c0 == 0xED && p[0] >= 0x80 && p[0] <= 0x9F && p[1] >= 0x80 &&
+ p[1] <= 0xBF) ||
+ (c0 == 0xEE && p[0] >= 0x80 && p[0] <= 0xBF && p[1] >= 0x80 &&
+ p[1] <= 0xBF) ||
+ (c0 == 0xEF && p[0] >= 0x80 && p[0] <= 0xBE && p[1] >= 0x80 &&
+ p[1] <= 0xBF) ||
+ (c0 == 0xEF && p[0] == 0xBF && p[1] >= 0x80 && p[1] <= 0xBD))) {
// Matched legal triple-octet sequence. Skip the next two octets.
- *out << p[0] << p[1] << p[2];
+ *out << (char)c0 << (char)p[0] << (char)p[1];
if (!out->good()) {
return false;
}
- i += 2;
- p += 2;
- } else if ((i + 3 < size) && p[0] >= 0xF0 && p[0] <= 0xF7 && p[1] >= 0x80 &&
- p[1] <= 0xBF && p[2] >= 0x80 && p[2] <= 0xBF && p[3] >= 0x80 &&
- p[3] <= 0xBF) {
+ (void)in->Get();
+ (void)in->Get();
+ } else if (in->Peek(3, p) == 3 && c0 >= 0xF0 && c0 <= 0xF7 &&
+ p[0] >= 0x80 && p[0] <= 0xBF && p[1] >= 0x80 && p[1] <= 0xBF &&
+ p[2] >= 0x80 && p[2] <= 0xBF) {
// Matched legal quadruple-octet sequence. Skip the next three octets.
- *out << p[0] << p[1] << p[2] << p[3];
+ *out << (char)c0 << (char)p[0] << (char)p[1] << (char)p[2];
if (!out->good()) {
return false;
}
- i += 3;
- p += 3;
+ (void)in->Get();
+ (void)in->Get();
+ (void)in->Get();
} else {
// Illegal octet; replace.
- *out << '?';
+ *out << (char)'?';
if (!out->good()) {
return false;
}
}
}
- return true;
-}
-
-bool CdataEscapeAndAppend(const Path& input, std::ofstream* out_stm) {
- DWORD size;
- std::unique_ptr<uint8_t[]> data;
- if (!ReadCompleteFile(input, &data, &size)) {
- LogError(__LINE__, input.Get());
- return false;
- }
-
- return CdataEscape(data.get(), size, out_stm);
+ return c0 == IFStream::kIFStreamErrorEOF;
}
bool GetTestName(std::wstring* result) {
@@ -1664,39 +1650,51 @@
return false;
}
- std::ofstream stm(
+ bazel::windows::AutoHandle test_log;
+ if (!OpenExistingFileForRead(test_outerr, &test_log)) {
+ LogError(__LINE__, test_outerr.Get().c_str());
+ return false;
+ }
+
+ std::unique_ptr<IFStream> istm(IFStreamImpl::Create(test_log));
+ if (istm == nullptr) {
+ LogError(__LINE__, test_outerr.Get().c_str());
+ return false;
+ }
+
+ std::ofstream ostm(
AddUncPrefixMaybe(output).c_str(),
std::ios_base::out | std::ios_base::binary | std::ios_base::trunc);
- if (!stm.is_open() || !stm.good()) {
+ if (!ostm.is_open() || !ostm.good()) {
LogError(__LINE__, output.Get().c_str());
return false;
}
// Create XML file stub.
- stm << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
- "<testsuites>\n"
- "<testsuite name=\""
- << acp_test_name << "\" tests=\"1\" failures=\"0\" errors=\"" << errors
- << "\">\n"
- "<testcase name=\""
- << acp_test_name << "\" status=\"run\" duration=\"" << duration.seconds
- << "\" time=\"" << duration.seconds << "\">" << error_msg
- << "</testcase>\n"
- "<system-out><![CDATA[";
- if (!stm.good()) {
+ ostm << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<testsuites>\n"
+ "<testsuite name=\""
+ << acp_test_name << "\" tests=\"1\" failures=\"0\" errors=\"" << errors
+ << "\">\n"
+ "<testcase name=\""
+ << acp_test_name << "\" status=\"run\" duration=\"" << duration.seconds
+ << "\" time=\"" << duration.seconds << "\">" << error_msg
+ << "</testcase>\n"
+ "<system-out><![CDATA[";
+ if (!ostm.good()) {
LogError(__LINE__, output.Get().c_str());
return false;
}
// Encode test log to make it embeddable in CDATA.
- if (!CdataEscapeAndAppend(test_outerr, &stm)) {
+ if (!CdataEscape(istm.get(), &ostm)) {
LogError(__LINE__, output.Get().c_str());
return false;
}
// Append CDATA end and closing tags.
- stm << "]]></system-out>\n</testsuite>\n</testsuites>\n";
- if (!stm.good()) {
+ ostm << "]]></system-out>\n</testsuite>\n</testsuites>\n";
+ if (!ostm.good()) {
LogError(__LINE__, output.Get().c_str());
return false;
}
@@ -1973,9 +1971,8 @@
return TeeImpl::Create(input, output1, output2, result);
}
-bool TestOnly_CdataEncode(const uint8_t* input, const DWORD size,
- std::basic_ostream<char>* out_stm) {
- return CdataEscape(input, size, out_stm);
+bool TestOnly_CdataEncode(IFStream* in_stm, std::basic_ostream<char>* out_stm) {
+ return CdataEscape(in_stm, out_stm);
}
IFStream* TestOnly_CreateIFStream(HANDLE handle, DWORD page_size) {
diff --git a/tools/test/windows/tw.h b/tools/test/windows/tw.h
index 85a3e7b..146c2d0 100644
--- a/tools/test/windows/tw.h
+++ b/tools/test/windows/tw.h
@@ -194,8 +194,7 @@
bazel::windows::AutoHandle* output2,
std::unique_ptr<Tee>* result);
-bool TestOnly_CdataEncode(const uint8_t* buffer, const DWORD size,
- std::basic_ostream<char>* out_stm);
+bool TestOnly_CdataEncode(IFStream* in_stm, std::basic_ostream<char>* out_stm);
IFStream* TestOnly_CreateIFStream(HANDLE handle, DWORD page_size);
diff --git a/tools/test/windows/tw_test.cc b/tools/test/windows/tw_test.cc
index 24dc601..2129879 100644
--- a/tools/test/windows/tw_test.cc
+++ b/tools/test/windows/tw_test.cc
@@ -142,14 +142,18 @@
FILE_ATTRIBUTE_NORMAL, NULL);
}
-HANDLE FopenContents(wchar_t* wline, const char* contents) {
+HANDLE FopenContents(const wchar_t* wline, const char* contents, DWORD size) {
std::wstring tmpdir;
GET_TEST_TMPDIR(&tmpdir);
std::wstring filename = tmpdir + L"\\tmp" + wline;
- EXPECT_TRUE(blaze_util::CreateDummyFile(filename, contents));
+ EXPECT_TRUE(blaze_util::CreateDummyFile(filename, contents, size));
return FopenRead(filename);
}
+HANDLE FopenContents(const wchar_t* wline, const char* contents) {
+ return FopenContents(wline, contents, strlen(contents));
+}
+
TEST_F(TestWrapperWindowsTest, TestGetFileListRelativeTo) {
std::wstring tmpdir;
GET_TEST_TMPDIR(&tmpdir);
@@ -471,24 +475,27 @@
write1 = INVALID_HANDLE_VALUE; // closes handle so the Tee thread can exit
}
-void AssertCdataEncodeBuffer(const char* input, DWORD size,
- const char* expected_output) {
+void AssertCdataEncodeBuffer(const wchar_t* wline, const char* input,
+ DWORD size, const char* expected_output) {
+ bazel::windows::AutoHandle h(FopenContents(wline, input, size));
+ std::unique_ptr<IFStream> istm(TestOnly_CreateIFStream(h, /* page_size */ 4));
std::stringstream out_stm;
- ASSERT_TRUE(TestOnly_CdataEncode(reinterpret_cast<const uint8_t*>(input),
- size, &out_stm));
+ ASSERT_TRUE(TestOnly_CdataEncode(istm.get(), &out_stm));
ASSERT_EQ(expected_output, out_stm.str());
}
-void AssertCdataEncodeBuffer(const char* input, const char* expected_output) {
- AssertCdataEncodeBuffer(input, strlen(input), expected_output);
+void AssertCdataEncodeBuffer(const wchar_t* wline, const char* input,
+ const char* expected_output) {
+ AssertCdataEncodeBuffer(wline, input, strlen(input), expected_output);
}
TEST_F(TestWrapperWindowsTest, TestCdataEscapeNullTerminator) {
- AssertCdataEncodeBuffer("x\0y", 3, "x?y");
+ AssertCdataEncodeBuffer(WLINE, "x\0y", 3, "x?y");
}
TEST_F(TestWrapperWindowsTest, TestCdataEscapeCdataEndings) {
AssertCdataEncodeBuffer(
+ WLINE,
// === Input ===
// CDATA end sequence, followed by some arbitrary octet.
"]]>x"
@@ -504,24 +511,26 @@
}
TEST_F(TestWrapperWindowsTest, TestCdataEscapeSingleOctets) {
- AssertCdataEncodeBuffer( // === Input ===
- // Legal single-octets.
- "AB\x9\xA\xD\x20\x7F"
- // Illegal single-octets.
- "\x8\xB\xC\x1F\x80\xFF"
- "x",
+ AssertCdataEncodeBuffer(WLINE,
+ // === Input ===
+ // Legal single-octets.
+ "AB\x9\xA\xD\x20\x7F"
+ // Illegal single-octets.
+ "\x8\xB\xC\x1F\x80\xFF"
+ "x",
- // === Expected output ===
- // Legal single-octets.
- "AB\x9\xA\xD\x20\x7F"
- // Illegal single-octets.
- "??????"
- "x");
+ // === Expected output ===
+ // Legal single-octets.
+ "AB\x9\xA\xD\x20\x7F"
+ // Illegal single-octets.
+ "??????"
+ "x");
}
TEST_F(TestWrapperWindowsTest, TestCdataEscapeDoubleOctets) {
// Legal range: [\xc0-\xdf][\x80-\xbf]
AssertCdataEncodeBuffer(
+ WLINE,
"x"
// Legal double-octet sequences.
"\xC0\x80"
@@ -558,34 +567,34 @@
std::wstring tmpdir;
GET_TEST_TMPDIR(&tmpdir);
- AssertCdataEncodeBuffer(
- // === Input ===
- "AB\xA\xC\xD"
- "]]>"
- "]]]>"
- "\xC0\x80"
- "a"
- "\xED\x9F\xBF"
- "b"
- "\xEF\xBF\xB0"
- "c"
- "\xF7\xB0\x80\x81"
- "d"
- "]]>",
+ AssertCdataEncodeBuffer(WLINE,
+ // === Input ===
+ "AB\xA\xC\xD"
+ "]]>"
+ "]]]>"
+ "\xC0\x80"
+ "a"
+ "\xED\x9F\xBF"
+ "b"
+ "\xEF\xBF\xB0"
+ "c"
+ "\xF7\xB0\x80\x81"
+ "d"
+ "]]>",
- // === Output ===
- "AB\xA?\xD"
- "]]>]]<![CDATA[>"
- "]]]>]]<![CDATA[>"
- "\xC0\x80"
- "a"
- "\xED\x9F\xBF"
- "b"
- "\xEF\xBF\xB0"
- "c"
- "\xF7\xB0\x80\x81"
- "d"
- "]]>]]<![CDATA[>");
+ // === Output ===
+ "AB\xA?\xD"
+ "]]>]]<![CDATA[>"
+ "]]]>]]<![CDATA[>"
+ "\xC0\x80"
+ "a"
+ "\xED\x9F\xBF"
+ "b"
+ "\xEF\xBF\xB0"
+ "c"
+ "\xF7\xB0\x80\x81"
+ "d"
+ "]]>]]<![CDATA[>");
}
TEST_F(TestWrapperWindowsTest, TestIFStreamNoData) {