Windows, test wrapper: use IFStream to encode XML

Use a buffered input file stream (IFStream) to
read the test log file and escape invalid UTF-8
characters and sequences that would be unsafe to
embed in the test XML's CDATA section.

The benefit: the code no longer reads the entire
test log into memory before said escaping, which
is great for memory usage because these log files
may get arbitrarily large.

See https://github.com/bazelbuild/bazel/issues/5508

Closes #7539.

PiperOrigin-RevId: 235709946
diff --git a/src/test/cpp/util/windows_test_util.cc b/src/test/cpp/util/windows_test_util.cc
index aba74c2..8b78d86 100644
--- a/src/test/cpp/util/windows_test_util.cc
+++ b/src/test/cpp/util/windows_test_util.cc
@@ -88,6 +88,11 @@
 }
 
 bool CreateDummyFile(const wstring& path, const std::string& content) {
+  return CreateDummyFile(path, content.c_str(), content.size());
+}
+
+bool CreateDummyFile(const std::wstring& path, const void* content,
+                     const DWORD size) {
   HANDLE handle =
       ::CreateFileW(path.c_str(), GENERIC_WRITE, FILE_SHARE_READ, NULL,
                     CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
@@ -96,9 +101,8 @@
   }
   bool result = true;
   DWORD actually_written = 0;
-  if (!::WriteFile(handle, content.c_str(), content.size(), &actually_written,
-                   NULL) &&
-      actually_written != content.size()) {
+  if (!::WriteFile(handle, content, size, &actually_written, NULL) &&
+      actually_written != size) {
     result = false;
   }
   CloseHandle(handle);
diff --git a/src/test/cpp/util/windows_test_util.h b/src/test/cpp/util/windows_test_util.h
index fbf7710..3f0ed93 100644
--- a/src/test/cpp/util/windows_test_util.h
+++ b/src/test/cpp/util/windows_test_util.h
@@ -32,6 +32,11 @@
 bool CreateDummyFile(const std::wstring& path,
                      const std::string& content = "hello");
 
+// Creates a dummy file under `path`.
+// `path` must be a valid Windows path, and have a UNC prefix if necessary.
+bool CreateDummyFile(const std::wstring& path, const void* content,
+                     const DWORD size);
+
 }  // namespace blaze_util
 
 #endif  // BAZEL_SRC_TEST_CPP_UTIL_WINDOWS_TEST_UTIL_H_
diff --git a/tools/test/windows/tw.cc b/tools/test/windows/tw.cc
index ea1f232..aec0853 100644
--- a/tools/test/windows/tw.cc
+++ b/tools/test/windows/tw.cc
@@ -1480,83 +1480,69 @@
 //
 // Every octet-sequence matching one of these regexps will be left alone, all
 // other octet-sequences will be replaced by '?' characters.
-bool CdataEscape(const uint8_t* input, const DWORD size,
-                 std::basic_ostream<char>* out) {
-  // We aren't modifying the input, so const_cast is fine.
-  uint8_t* p = const_cast<uint8_t*>(input);
-
-  for (DWORD i = 0; i < size; ++i, ++p) {
-    if (p[0] == ']' && (i + 2 < size) && p[1] == ']' && p[2] == '>') {
+bool CdataEscape(IFStream* in, std::basic_ostream<char>* out) {
+  int c0 = in->Get();
+  uint8_t p[3];
+  for (; c0 < 256; c0 = in->Get()) {
+    if (c0 == ']' && in->Peek(2, p) == 2 && p[0] == ']' && p[1] == '>') {
       *out << "]]>]]<![CDATA[>";
       if (!out->good()) {
         return false;
       }
-      i += 2;
-      p += 2;
-    } else if (*p == 0x9 || *p == 0xA || *p == 0xD ||
-               (*p >= 0x20 && *p <= 0x7F)) {
+      (void)in->Get();
+      (void)in->Get();
+    } else if (c0 == 0x9 || c0 == 0xA || c0 == 0xD ||
+               (c0 >= 0x20 && c0 <= 0x7F)) {
       // Matched legal single-octet sequence.
-      *out << *p;
+      *out << (char)c0;
       if (!out->good()) {
         return false;
       }
-    } else if ((i + 1 < size) && p[0] >= 0xC0 && p[0] <= 0xDF && p[1] >= 0x80 &&
-               p[1] <= 0xBF) {
+    } else if (c0 >= 0xC0 && c0 <= 0xDF && in->Peek(1, p) == 1 &&
+               p[0] >= 0x80 && p[0] <= 0xBF) {
       // Matched legal double-octet sequence. Skip the next octet.
-      *out << p[0] << p[1];
+      *out << (char)c0 << (char)p[0];
       if (!out->good()) {
         return false;
       }
-      i += 1;
-      p += 1;
-    } else if ((i + 2 < size) &&
-               ((p[0] >= 0xE0 && p[0] <= 0xEC && p[1] >= 0x80 && p[1] <= 0xBF &&
-                 p[2] >= 0x80 && p[2] <= 0xBF) ||
-                (p[0] == 0xED && p[1] >= 0x80 && p[1] <= 0x9F && p[2] >= 0x80 &&
-                 p[2] <= 0xBF) ||
-                (p[0] == 0xEE && p[1] >= 0x80 && p[1] <= 0xBF && p[2] >= 0x80 &&
-                 p[2] <= 0xBF) ||
-                (p[0] == 0xEF && p[1] >= 0x80 && p[1] <= 0xBE && p[2] >= 0x80 &&
-                 p[2] <= 0xBF) ||
-                (p[0] == 0xEF && p[1] == 0xBF && p[2] >= 0x80 &&
-                 p[2] <= 0xBD))) {
+      (void)in->Get();
+    } else if (in->Peek(2, p) == 2 &&
+               ((c0 >= 0xE0 && c0 <= 0xEC && p[0] >= 0x80 && p[0] <= 0xBF &&
+                 p[1] >= 0x80 && p[1] <= 0xBF) ||
+                (c0 == 0xED && p[0] >= 0x80 && p[0] <= 0x9F && p[1] >= 0x80 &&
+                 p[1] <= 0xBF) ||
+                (c0 == 0xEE && p[0] >= 0x80 && p[0] <= 0xBF && p[1] >= 0x80 &&
+                 p[1] <= 0xBF) ||
+                (c0 == 0xEF && p[0] >= 0x80 && p[0] <= 0xBE && p[1] >= 0x80 &&
+                 p[1] <= 0xBF) ||
+                (c0 == 0xEF && p[0] == 0xBF && p[1] >= 0x80 && p[1] <= 0xBD))) {
       // Matched legal triple-octet sequence. Skip the next two octets.
-      *out << p[0] << p[1] << p[2];
+      *out << (char)c0 << (char)p[0] << (char)p[1];
       if (!out->good()) {
         return false;
       }
-      i += 2;
-      p += 2;
-    } else if ((i + 3 < size) && p[0] >= 0xF0 && p[0] <= 0xF7 && p[1] >= 0x80 &&
-               p[1] <= 0xBF && p[2] >= 0x80 && p[2] <= 0xBF && p[3] >= 0x80 &&
-               p[3] <= 0xBF) {
+      (void)in->Get();
+      (void)in->Get();
+    } else if (in->Peek(3, p) == 3 && c0 >= 0xF0 && c0 <= 0xF7 &&
+               p[0] >= 0x80 && p[0] <= 0xBF && p[1] >= 0x80 && p[1] <= 0xBF &&
+               p[2] >= 0x80 && p[2] <= 0xBF) {
       // Matched legal quadruple-octet sequence. Skip the next three octets.
-      *out << p[0] << p[1] << p[2] << p[3];
+      *out << (char)c0 << (char)p[0] << (char)p[1] << (char)p[2];
       if (!out->good()) {
         return false;
       }
-      i += 3;
-      p += 3;
+      (void)in->Get();
+      (void)in->Get();
+      (void)in->Get();
     } else {
       // Illegal octet; replace.
-      *out << '?';
+      *out << (char)'?';
       if (!out->good()) {
         return false;
       }
     }
   }
-  return true;
-}
-
-bool CdataEscapeAndAppend(const Path& input, std::ofstream* out_stm) {
-  DWORD size;
-  std::unique_ptr<uint8_t[]> data;
-  if (!ReadCompleteFile(input, &data, &size)) {
-    LogError(__LINE__, input.Get());
-    return false;
-  }
-
-  return CdataEscape(data.get(), size, out_stm);
+  return c0 == IFStream::kIFStreamErrorEOF;
 }
 
 bool GetTestName(std::wstring* result) {
@@ -1664,39 +1650,51 @@
     return false;
   }
 
-  std::ofstream stm(
+  bazel::windows::AutoHandle test_log;
+  if (!OpenExistingFileForRead(test_outerr, &test_log)) {
+    LogError(__LINE__, test_outerr.Get().c_str());
+    return false;
+  }
+
+  std::unique_ptr<IFStream> istm(IFStreamImpl::Create(test_log));
+  if (istm == nullptr) {
+    LogError(__LINE__, test_outerr.Get().c_str());
+    return false;
+  }
+
+  std::ofstream ostm(
       AddUncPrefixMaybe(output).c_str(),
       std::ios_base::out | std::ios_base::binary | std::ios_base::trunc);
-  if (!stm.is_open() || !stm.good()) {
+  if (!ostm.is_open() || !ostm.good()) {
     LogError(__LINE__, output.Get().c_str());
     return false;
   }
 
   // Create XML file stub.
-  stm << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-         "<testsuites>\n"
-         "<testsuite name=\""
-      << acp_test_name << "\" tests=\"1\" failures=\"0\" errors=\"" << errors
-      << "\">\n"
-         "<testcase name=\""
-      << acp_test_name << "\" status=\"run\" duration=\"" << duration.seconds
-      << "\" time=\"" << duration.seconds << "\">" << error_msg
-      << "</testcase>\n"
-         "<system-out><![CDATA[";
-  if (!stm.good()) {
+  ostm << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+          "<testsuites>\n"
+          "<testsuite name=\""
+       << acp_test_name << "\" tests=\"1\" failures=\"0\" errors=\"" << errors
+       << "\">\n"
+          "<testcase name=\""
+       << acp_test_name << "\" status=\"run\" duration=\"" << duration.seconds
+       << "\" time=\"" << duration.seconds << "\">" << error_msg
+       << "</testcase>\n"
+          "<system-out><![CDATA[";
+  if (!ostm.good()) {
     LogError(__LINE__, output.Get().c_str());
     return false;
   }
 
   // Encode test log to make it embeddable in CDATA.
-  if (!CdataEscapeAndAppend(test_outerr, &stm)) {
+  if (!CdataEscape(istm.get(), &ostm)) {
     LogError(__LINE__, output.Get().c_str());
     return false;
   }
 
   // Append CDATA end and closing tags.
-  stm << "]]></system-out>\n</testsuite>\n</testsuites>\n";
-  if (!stm.good()) {
+  ostm << "]]></system-out>\n</testsuite>\n</testsuites>\n";
+  if (!ostm.good()) {
     LogError(__LINE__, output.Get().c_str());
     return false;
   }
@@ -1973,9 +1971,8 @@
   return TeeImpl::Create(input, output1, output2, result);
 }
 
-bool TestOnly_CdataEncode(const uint8_t* input, const DWORD size,
-                          std::basic_ostream<char>* out_stm) {
-  return CdataEscape(input, size, out_stm);
+bool TestOnly_CdataEncode(IFStream* in_stm, std::basic_ostream<char>* out_stm) {
+  return CdataEscape(in_stm, out_stm);
 }
 
 IFStream* TestOnly_CreateIFStream(HANDLE handle, DWORD page_size) {
diff --git a/tools/test/windows/tw.h b/tools/test/windows/tw.h
index 85a3e7b..146c2d0 100644
--- a/tools/test/windows/tw.h
+++ b/tools/test/windows/tw.h
@@ -194,8 +194,7 @@
                         bazel::windows::AutoHandle* output2,
                         std::unique_ptr<Tee>* result);
 
-bool TestOnly_CdataEncode(const uint8_t* buffer, const DWORD size,
-                          std::basic_ostream<char>* out_stm);
+bool TestOnly_CdataEncode(IFStream* in_stm, std::basic_ostream<char>* out_stm);
 
 IFStream* TestOnly_CreateIFStream(HANDLE handle, DWORD page_size);
 
diff --git a/tools/test/windows/tw_test.cc b/tools/test/windows/tw_test.cc
index 24dc601..2129879 100644
--- a/tools/test/windows/tw_test.cc
+++ b/tools/test/windows/tw_test.cc
@@ -142,14 +142,18 @@
                      FILE_ATTRIBUTE_NORMAL, NULL);
 }
 
-HANDLE FopenContents(wchar_t* wline, const char* contents) {
+HANDLE FopenContents(const wchar_t* wline, const char* contents, DWORD size) {
   std::wstring tmpdir;
   GET_TEST_TMPDIR(&tmpdir);
   std::wstring filename = tmpdir + L"\\tmp" + wline;
-  EXPECT_TRUE(blaze_util::CreateDummyFile(filename, contents));
+  EXPECT_TRUE(blaze_util::CreateDummyFile(filename, contents, size));
   return FopenRead(filename);
 }
 
+HANDLE FopenContents(const wchar_t* wline, const char* contents) {
+  return FopenContents(wline, contents, strlen(contents));
+}
+
 TEST_F(TestWrapperWindowsTest, TestGetFileListRelativeTo) {
   std::wstring tmpdir;
   GET_TEST_TMPDIR(&tmpdir);
@@ -471,24 +475,27 @@
   write1 = INVALID_HANDLE_VALUE;  // closes handle so the Tee thread can exit
 }
 
-void AssertCdataEncodeBuffer(const char* input, DWORD size,
-                             const char* expected_output) {
+void AssertCdataEncodeBuffer(const wchar_t* wline, const char* input,
+                             DWORD size, const char* expected_output) {
+  bazel::windows::AutoHandle h(FopenContents(wline, input, size));
+  std::unique_ptr<IFStream> istm(TestOnly_CreateIFStream(h, /* page_size */ 4));
   std::stringstream out_stm;
-  ASSERT_TRUE(TestOnly_CdataEncode(reinterpret_cast<const uint8_t*>(input),
-                                   size, &out_stm));
+  ASSERT_TRUE(TestOnly_CdataEncode(istm.get(), &out_stm));
   ASSERT_EQ(expected_output, out_stm.str());
 }
 
-void AssertCdataEncodeBuffer(const char* input, const char* expected_output) {
-  AssertCdataEncodeBuffer(input, strlen(input), expected_output);
+void AssertCdataEncodeBuffer(const wchar_t* wline, const char* input,
+                             const char* expected_output) {
+  AssertCdataEncodeBuffer(wline, input, strlen(input), expected_output);
 }
 
 TEST_F(TestWrapperWindowsTest, TestCdataEscapeNullTerminator) {
-  AssertCdataEncodeBuffer("x\0y", 3, "x?y");
+  AssertCdataEncodeBuffer(WLINE, "x\0y", 3, "x?y");
 }
 
 TEST_F(TestWrapperWindowsTest, TestCdataEscapeCdataEndings) {
   AssertCdataEncodeBuffer(
+      WLINE,
       // === Input ===
       // CDATA end sequence, followed by some arbitrary octet.
       "]]>x"
@@ -504,24 +511,26 @@
 }
 
 TEST_F(TestWrapperWindowsTest, TestCdataEscapeSingleOctets) {
-  AssertCdataEncodeBuffer(  // === Input ===
-                            // Legal single-octets.
-      "AB\x9\xA\xD\x20\x7F"
-      // Illegal single-octets.
-      "\x8\xB\xC\x1F\x80\xFF"
-      "x",
+  AssertCdataEncodeBuffer(WLINE,
+                          // === Input ===
+                          // Legal single-octets.
+                          "AB\x9\xA\xD\x20\x7F"
+                          // Illegal single-octets.
+                          "\x8\xB\xC\x1F\x80\xFF"
+                          "x",
 
-      // === Expected output ===
-      // Legal single-octets.
-      "AB\x9\xA\xD\x20\x7F"
-      // Illegal single-octets.
-      "??????"
-      "x");
+                          // === Expected output ===
+                          // Legal single-octets.
+                          "AB\x9\xA\xD\x20\x7F"
+                          // Illegal single-octets.
+                          "??????"
+                          "x");
 }
 
 TEST_F(TestWrapperWindowsTest, TestCdataEscapeDoubleOctets) {
   // Legal range: [\xc0-\xdf][\x80-\xbf]
   AssertCdataEncodeBuffer(
+      WLINE,
       "x"
       // Legal double-octet sequences.
       "\xC0\x80"
@@ -558,34 +567,34 @@
   std::wstring tmpdir;
   GET_TEST_TMPDIR(&tmpdir);
 
-  AssertCdataEncodeBuffer(
-      // === Input ===
-      "AB\xA\xC\xD"
-      "]]>"
-      "]]]>"
-      "\xC0\x80"
-      "a"
-      "\xED\x9F\xBF"
-      "b"
-      "\xEF\xBF\xB0"
-      "c"
-      "\xF7\xB0\x80\x81"
-      "d"
-      "]]>",
+  AssertCdataEncodeBuffer(WLINE,
+                          // === Input ===
+                          "AB\xA\xC\xD"
+                          "]]>"
+                          "]]]>"
+                          "\xC0\x80"
+                          "a"
+                          "\xED\x9F\xBF"
+                          "b"
+                          "\xEF\xBF\xB0"
+                          "c"
+                          "\xF7\xB0\x80\x81"
+                          "d"
+                          "]]>",
 
-      // === Output ===
-      "AB\xA?\xD"
-      "]]>]]<![CDATA[>"
-      "]]]>]]<![CDATA[>"
-      "\xC0\x80"
-      "a"
-      "\xED\x9F\xBF"
-      "b"
-      "\xEF\xBF\xB0"
-      "c"
-      "\xF7\xB0\x80\x81"
-      "d"
-      "]]>]]<![CDATA[>");
+                          // === Output ===
+                          "AB\xA?\xD"
+                          "]]>]]<![CDATA[>"
+                          "]]]>]]<![CDATA[>"
+                          "\xC0\x80"
+                          "a"
+                          "\xED\x9F\xBF"
+                          "b"
+                          "\xEF\xBF\xB0"
+                          "c"
+                          "\xF7\xB0\x80\x81"
+                          "d"
+                          "]]>]]<![CDATA[>");
 }
 
 TEST_F(TestWrapperWindowsTest, TestIFStreamNoData) {