Windows: rewrite and move path normalization

The new implementation:

- does not store substrings in the 'segments'
  vector, only indices
- compares parts of the original string without
  creating other string objects
- creates substrings only while merging remaining
  segments, thus creating at most as many strings
  (and potentially fewer) than the old
  implementation

Closes #8471.

PiperOrigin-RevId: 250238394
diff --git a/src/main/cpp/util/path_platform.h b/src/main/cpp/util/path_platform.h
index d4bf98a..db9ed89 100644
--- a/src/main/cpp/util/path_platform.h
+++ b/src/main/cpp/util/path_platform.h
@@ -83,8 +83,7 @@
 
 bool IsRootDirectoryW(const std::wstring &path);
 
-bool TestOnly_NormalizeWindowsPath(const std::string &path,
-                                   std::string *result);
+std::string TestOnly_NormalizeWindowsPath(const std::string &path);
 
 // Converts 'path' to Windows style.
 //
diff --git a/src/main/cpp/util/path_windows.cc b/src/main/cpp/util/path_windows.cc
index dad839d..7f6196b 100644
--- a/src/main/cpp/util/path_windows.cc
+++ b/src/main/cpp/util/path_windows.cc
@@ -275,104 +275,87 @@
 //   just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The
 //   result will preserve the casing of the input, so "D:/Bar" becomes
 //   "D:\\Bar".
-template <typename char_type>
-static bool NormalizeWindowsPath(const std::basic_string<char_type>& path,
-                                 std::basic_string<char_type>* result) {
-  if (path.empty()) {
-    *result = path;
-    return true;
+template <typename C>
+std::basic_string<C> NormalizeWindowsPath(const std::basic_string<C>& p) {
+  if (p.empty()) {
+    return p;
   }
-  if (IsPathSeparator(path[0])) {
-    return false;
-  }
-
-  static const std::basic_string<char_type> kDot =
-      std::basic_string<char_type>(1, '.');
-  static const std::basic_string<char_type> kDotDot =
-      std::basic_string<char_type>(2, '.');
-
-  std::vector<std::basic_string<char_type> > segments;
-  std::basic_string<char_type>::size_type seg_start = path.size();
-  std::basic_string<char_type>::size_type total_len = 0;
-  for (std::basic_string<char_type>::size_type i =
-           HasUncPrefix(path.c_str()) ? 4 : 0;
-       i <= path.size(); ++i) {
-    if (i == path.size() || IsPathSeparator(path[i])) {
-      // The current character ends a segment.
-      if (seg_start < path.size() && i > seg_start) {
-        std::basic_string<char_type> seg =
-            i == path.size() ? path.substr(seg_start)
-                             : path.substr(seg_start, i - seg_start);
-        if (seg == kDotDot) {
-          if (segments.empty() || segments.back() == kDotDot) {
-            // Preserve ".." if the path is relative and there are only ".."
-            // segment(s) at the front.
-            segments.push_back(seg);
-            total_len += 2;
-          } else if (segments.size() == 1 && segments.back() == kDot) {
-            // Replace the existing "." if that was the only path segment.
-            segments[0] = seg;
-            total_len = 2;
-          } else if (segments.size() > 1 ||
-                     !HasDriveSpecifierPrefix(segments.back().c_str())) {
-            // Remove the last segment unless the path is already at the root
-            // directory.
-            total_len -= segments.back().size();
-            segments.pop_back();
-          }
-        } else if (seg == kDot) {
-          if (segments.empty()) {
-            // Preserve "." if and only if it's the first path segment.
-            // Subsequent segments may replace this segment.
-            segments.push_back(seg);
-            total_len = 1;
-          }
-        } else {
-          // This is a normal path segment, i.e. neither "." nor ".."
-          if (segments.size() == 1 && segments[0] == kDot) {
-            // Replace the only path segment if it was "."
-            segments[0] = seg;
-            total_len = seg.size();
-          } else {
-            // Store the current segment.
-            segments.push_back(seg);
-            total_len += seg.size();
-          }
-        }
-      }
-      // Indicate that there's no segment started.
-      seg_start = path.size();
-    } else {
-      // The current character starts a new segment, or is inside one.
-      if (seg_start == path.size()) {
-        // The current character starts the segment.
+  typedef std::basic_string<C> Str;
+  static const Str kDot(1, '.');
+  static const Str kDotDot(2, '.');
+  std::vector<std::pair<Str::size_type, Str::size_type> > segments;
+  Str::size_type seg_start = Str::npos;
+  bool first = true;
+  bool abs = false;
+  bool starts_with_dot = false;
+  for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) {
+    if (seg_start == Str::npos) {
+      if (i < p.size() && p[i] != '/' && p[i] != '\\') {
         seg_start = i;
       }
+    } else {
+      if (i == p.size() || p[i] == '/' || p[i] == '\\') {
+        // The current character ends a segment.
+        Str::size_type len = i - seg_start;
+        if (first) {
+          first = false;
+          abs = len == 2 &&
+                ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') ||
+                 (p[seg_start] >= 'a' && p[seg_start] <= 'z')) &&
+                p[seg_start + 1] == ':';
+          segments.push_back(std::make_pair(seg_start, len));
+          starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0;
+        } else {
+          if (p.compare(seg_start, len, kDot) == 0) {
+            if (segments.empty()) {
+              // Retain "." if that is the first (and possibly only segment).
+              segments.push_back(std::make_pair(seg_start, len));
+              starts_with_dot = true;
+            }
+          } else {
+            if (starts_with_dot) {
+              // Delete the existing "." if that was the only path segment.
+              segments.clear();
+              starts_with_dot = false;
+            }
+            if (p.compare(seg_start, len, kDotDot) == 0) {
+              if (segments.empty() ||
+                  p.compare(segments.back().first, segments.back().second,
+                            kDotDot) == 0) {
+                // Preserve ".." if the path is relative and there are only ".."
+                // segment(s) at the front.
+                segments.push_back(std::make_pair(seg_start, len));
+              } else if (!abs || segments.size() > 1) {
+                // Remove the last segment unless the path is already at the
+                // root directory.
+                segments.pop_back();
+              }  // Ignore ".." otherwise.
+            } else {
+              // This is a normal path segment, i.e. neither "." nor ".."
+              segments.push_back(std::make_pair(seg_start, len));
+            }
+          }
+        }
+        // Indicate that there's no segment started.
+        seg_start = Str::npos;
+      }
     }
   }
-  if (segments.empty()) {
-    result->clear();
-    return true;
-  }
-  if (segments.size() == 1 &&
-      HasDriveSpecifierPrefix(segments.back().c_str())) {
-    *result = segments.back() + std::basic_string<char_type>(1, '\\');
-    return true;
-  }
-  // Reserve enough space for all segments plus separators between them (one
-  // less than segments.size()).
-  *result = std::basic_string<char_type>(total_len + segments.size() - 1, 0);
-  std::basic_string<char_type>::iterator pos = result->begin();
-  std::basic_string<char_type>::size_type idx = 0;
-  for (const auto& seg : segments) {
-    std::copy(seg.cbegin(), seg.cend(), pos);
-    pos += seg.size();
-    if (pos < result->cend() - 1) {
-      // Add a separator if we haven't reached the end of the string yet.
-      *pos++ = '\\';
+  std::basic_stringstream<C> res;
+  first = true;
+  for (const auto& i : segments) {
+    Str s = p.substr(i.first, i.second);
+    if (first) {
+      first = false;
+    } else {
+      res << '\\';
     }
+    res << s;
   }
-  return true;
+  if (abs && segments.size() == 1) {
+    res << '\\';
+  }
+  return res.str();
 }
 
 template <typename char_type>
@@ -424,12 +407,7 @@
     mutable_path = drive + path;
   }  // otherwise this is a relative path, or absolute Windows path.
 
-  if (!NormalizeWindowsPath(mutable_path, result)) {
-    if (error) {
-      *error = "path normalization failed";
-    }
-    return false;
-  }
+  *result = NormalizeWindowsPath(mutable_path);
   return true;
 }
 
@@ -467,19 +445,11 @@
     if (result->empty() || (result->size() == 1 && (*result)[0] == '.')) {
       *result = GetCwdW();
     } else {
-      std::wstring wd = RemoveUncPrefixMaybe(GetCwdW().c_str());
-      std::wstring joined = wd + L"\\" + *result;
-      if (!NormalizeWindowsPath(joined, result)) {
-        BAZEL_DIE(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR)
-            << "NormalizePath(" << WstringToCstring(joined.c_str()).get()
-            << ")";
-      }
+      *result = GetCwdW() + L"\\" + *result;
     }
   }
 
-  if (!HasUncPrefix(result->c_str())) {
-    *result = std::wstring(L"\\\\?\\") + *result;
-  }
+  *result = std::wstring(L"\\\\?\\") + NormalizeWindowsPath(*result);
   return true;
 }
 
@@ -601,9 +571,8 @@
   return 'a' + wdrive - offset;
 }
 
-bool TestOnly_NormalizeWindowsPath(const std::string& path,
-                                   std::string* result) {
-  return NormalizeWindowsPath(path, result);
+std::string TestOnly_NormalizeWindowsPath(const std::string& path) {
+  return NormalizeWindowsPath(path);
 }
 
 }  // namespace blaze_util
diff --git a/src/test/cpp/util/path_windows_test.cc b/src/test/cpp/util/path_windows_test.cc
index 1da465b..03e2762 100644
--- a/src/test/cpp/util/path_windows_test.cc
+++ b/src/test/cpp/util/path_windows_test.cc
@@ -46,12 +46,7 @@
 using std::wstring;
 
 TEST(PathWindowsTest, TestNormalizeWindowsPath) {
-#define ASSERT_NORMALIZE(x, y)                                          \
-  {                                                                     \
-    std::string result;                                                 \
-    EXPECT_TRUE(blaze_util::TestOnly_NormalizeWindowsPath(x, &result)); \
-    EXPECT_EQ(result, y);                                               \
-  }
+#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(TestOnly_NormalizeWindowsPath(x), y);
 
   ASSERT_NORMALIZE("", "");
   ASSERT_NORMALIZE("a", "a");
@@ -115,6 +110,8 @@
   ASSERT_NORMALIZE("c:/a/..", "c:\\");
   ASSERT_NORMALIZE("c:/a/../", "c:\\");
   ASSERT_NORMALIZE("c:/a/./../", "c:\\");
+  ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e");
+  ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e");
 
   ASSERT_NORMALIZE("foo", "foo");
   ASSERT_NORMALIZE("foo/", "foo");