Windows: move path normalization into JNI library

Motivation: need to support process creation from
relative paths. Absolutizing the paths requires
normalization.

Closes #8473.

PiperOrigin-RevId: 250245572
diff --git a/src/main/cpp/util/path_platform.h b/src/main/cpp/util/path_platform.h
index db9ed89..da5bf4b 100644
--- a/src/main/cpp/util/path_platform.h
+++ b/src/main/cpp/util/path_platform.h
@@ -83,8 +83,6 @@
 
 bool IsRootDirectoryW(const std::wstring &path);
 
-std::string TestOnly_NormalizeWindowsPath(const std::string &path);
-
 // Converts 'path' to Windows style.
 //
 // 'path' is absolute or relative or current-drive-relative (e.g.
diff --git a/src/main/cpp/util/path_windows.cc b/src/main/cpp/util/path_windows.cc
index 7f6196b..bb4da57 100644
--- a/src/main/cpp/util/path_windows.cc
+++ b/src/main/cpp/util/path_windows.cc
@@ -242,122 +242,6 @@
 
 void assignNUL(std::wstring* s) { s->assign(L"NUL"); }
 
-// Returns a normalized form of the input `path`.
-//
-// Normalization:
-//   Normalization means removing "." references, resolving ".." references,
-//   and deduplicating "/" characters while converting them to "\\".  For
-//   example if `path` is "foo/../bar/.//qux", the result is "bar\\qux".
-//
-//   Uplevel references ("..") that cannot go any higher in the directory tree
-//   are preserved if the path is relative, and ignored if the path is
-//   absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is
-//   normalized to "c:\\".
-//
-//   This method does not check the semantics of the `path` beyond checking if
-//   it starts with a directory separator. Illegal paths such as one with a
-//   drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the
-//   caller's responsibility to pass a path that, when normalized, will be
-//   semantically correct.
-//
-//   Current directory references (".") are preserved if and only if they are
-//   the only path segment, so "./" becomes "." but "./foo" becomes "foo".
-//
-// Arguments:
-//   `path` must be a relative or absolute Windows path, it may use "/" instead
-//   of "\\". The path should not start with "/" or "\\".
-//
-// Result:
-//   Returns false if and only if the path starts with a directory separator.
-//
-//   The result won't have a UNC prefix, even if `path` did. The result won't
-//   have a trailing "\\" except when and only when the path is normalized to
-//   just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The
-//   result will preserve the casing of the input, so "D:/Bar" becomes
-//   "D:\\Bar".
-template <typename C>
-std::basic_string<C> NormalizeWindowsPath(const std::basic_string<C>& p) {
-  if (p.empty()) {
-    return p;
-  }
-  typedef std::basic_string<C> Str;
-  static const Str kDot(1, '.');
-  static const Str kDotDot(2, '.');
-  std::vector<std::pair<Str::size_type, Str::size_type> > segments;
-  Str::size_type seg_start = Str::npos;
-  bool first = true;
-  bool abs = false;
-  bool starts_with_dot = false;
-  for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) {
-    if (seg_start == Str::npos) {
-      if (i < p.size() && p[i] != '/' && p[i] != '\\') {
-        seg_start = i;
-      }
-    } else {
-      if (i == p.size() || p[i] == '/' || p[i] == '\\') {
-        // The current character ends a segment.
-        Str::size_type len = i - seg_start;
-        if (first) {
-          first = false;
-          abs = len == 2 &&
-                ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') ||
-                 (p[seg_start] >= 'a' && p[seg_start] <= 'z')) &&
-                p[seg_start + 1] == ':';
-          segments.push_back(std::make_pair(seg_start, len));
-          starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0;
-        } else {
-          if (p.compare(seg_start, len, kDot) == 0) {
-            if (segments.empty()) {
-              // Retain "." if that is the first (and possibly only segment).
-              segments.push_back(std::make_pair(seg_start, len));
-              starts_with_dot = true;
-            }
-          } else {
-            if (starts_with_dot) {
-              // Delete the existing "." if that was the only path segment.
-              segments.clear();
-              starts_with_dot = false;
-            }
-            if (p.compare(seg_start, len, kDotDot) == 0) {
-              if (segments.empty() ||
-                  p.compare(segments.back().first, segments.back().second,
-                            kDotDot) == 0) {
-                // Preserve ".." if the path is relative and there are only ".."
-                // segment(s) at the front.
-                segments.push_back(std::make_pair(seg_start, len));
-              } else if (!abs || segments.size() > 1) {
-                // Remove the last segment unless the path is already at the
-                // root directory.
-                segments.pop_back();
-              }  // Ignore ".." otherwise.
-            } else {
-              // This is a normal path segment, i.e. neither "." nor ".."
-              segments.push_back(std::make_pair(seg_start, len));
-            }
-          }
-        }
-        // Indicate that there's no segment started.
-        seg_start = Str::npos;
-      }
-    }
-  }
-  std::basic_stringstream<C> res;
-  first = true;
-  for (const auto& i : segments) {
-    Str s = p.substr(i.first, i.second);
-    if (first) {
-      first = false;
-    } else {
-      res << '\\';
-    }
-    res << s;
-  }
-  if (abs && segments.size() == 1) {
-    res << '\\';
-  }
-  return res.str();
-}
-
 template <typename char_type>
 static bool AsWindowsPathImpl(const std::basic_string<char_type>& path,
                               std::basic_string<char_type>* result,
@@ -407,7 +291,7 @@
     mutable_path = drive + path;
   }  // otherwise this is a relative path, or absolute Windows path.
 
-  *result = NormalizeWindowsPath(mutable_path);
+  *result = bazel::windows::Normalize(mutable_path);
   return true;
 }
 
@@ -449,7 +333,7 @@
     }
   }
 
-  *result = std::wstring(L"\\\\?\\") + NormalizeWindowsPath(*result);
+  *result = std::wstring(L"\\\\?\\") + bazel::windows::Normalize(*result);
   return true;
 }
 
@@ -571,8 +455,4 @@
   return 'a' + wdrive - offset;
 }
 
-std::string TestOnly_NormalizeWindowsPath(const std::string& path) {
-  return NormalizeWindowsPath(path);
-}
-
 }  // namespace blaze_util
diff --git a/src/main/native/windows/file.cc b/src/main/native/windows/file.cc
index e9b842a..b7fb4b3 100644
--- a/src/main/native/windows/file.cc
+++ b/src/main/native/windows/file.cc
@@ -16,16 +16,17 @@
 #define WIN32_LEAN_AND_MEAN
 #endif
 
-#include <windows.h>
-#include <WinIoCtl.h>
+#include "src/main/native/windows/file.h"
 
+#include <WinIoCtl.h>
 #include <stdint.h>  // uint8_t
+#include <windows.h>
 
 #include <memory>
 #include <sstream>
 #include <string>
+#include <vector>
 
-#include "src/main/native/windows/file.h"
 #include "src/main/native/windows/util.h"
 
 namespace bazel {
@@ -685,5 +686,92 @@
   return DeletePathResult::kSuccess;
 }
 
+template <typename C>
+std::basic_string<C> NormalizeImpl(const std::basic_string<C>& p) {
+  if (p.empty()) {
+    return p;
+  }
+  typedef std::basic_string<C> Str;
+  static const Str kDot(1, '.');
+  static const Str kDotDot(2, '.');
+  std::vector<std::pair<Str::size_type, Str::size_type> > segments;
+  Str::size_type seg_start = Str::npos;
+  bool first = true;
+  bool abs = false;
+  bool starts_with_dot = false;
+  for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) {
+    if (seg_start == Str::npos) {
+      if (i < p.size() && p[i] != '/' && p[i] != '\\') {
+        seg_start = i;
+      }
+    } else {
+      if (i == p.size() || (p[i] == '/' || p[i] == '\\')) {
+        // The current character ends a segment.
+        Str::size_type len = i - seg_start;
+        if (first) {
+          first = false;
+          abs = len == 2 &&
+                ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') ||
+                 (p[seg_start] >= 'a' && p[seg_start] <= 'z')) &&
+                p[seg_start + 1] == ':';
+          segments.push_back(std::make_pair(seg_start, len));
+          starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0;
+        } else {
+          if (p.compare(seg_start, len, kDot) == 0) {
+            if (segments.empty()) {
+              // Retain "." if that is the first (and possibly only segment).
+              segments.push_back(std::make_pair(seg_start, len));
+              starts_with_dot = true;
+            }
+          } else {
+            if (starts_with_dot) {
+              // Delete the existing "." if that was the only path segment.
+              segments.clear();
+              starts_with_dot = false;
+            }
+            if (p.compare(seg_start, len, kDotDot) == 0) {
+              if (segments.empty() ||
+                  p.compare(segments.back().first, segments.back().second,
+                            kDotDot) == 0) {
+                // Preserve ".." if the path is relative and there are only ".."
+                // segment(s) at the front.
+                segments.push_back(std::make_pair(seg_start, len));
+              } else if (!abs || segments.size() > 1) {
+                // Remove the last segment unless the path is already at the
+                // root directory.
+                segments.pop_back();
+              }  // Ignore ".." otherwise.
+            } else {
+              // This is a normal path segment, i.e. neither "." nor ".."
+              segments.push_back(std::make_pair(seg_start, len));
+            }
+          }
+        }
+        // Indicate that there's no segment started.
+        seg_start = Str::npos;
+      }
+    }
+  }
+  std::basic_stringstream<C> res;
+  first = true;
+  for (const auto& i : segments) {
+    Str s = p.substr(i.first, i.second);
+    if (first) {
+      first = false;
+    } else {
+      res << '\\';
+    }
+    res << s;
+  }
+  if (abs && segments.size() == 1) {
+    res << '\\';
+  }
+  return res.str();
+}
+
+std::string Normalize(const std::string& p) { return NormalizeImpl(p); }
+
+std::wstring Normalize(const std::wstring& p) { return NormalizeImpl(p); }
+
 }  // namespace windows
 }  // namespace bazel
diff --git a/src/main/native/windows/file.h b/src/main/native/windows/file.h
index 8063a4c..5b73c3e 100644
--- a/src/main/native/windows/file.h
+++ b/src/main/native/windows/file.h
@@ -148,6 +148,42 @@
 // function writes an error message into it.
 int DeletePath(const wstring& path, wstring* error);
 
+// Returns a normalized form of the input `path`.
+//
+// Normalization:
+//   Normalization means removing "." references, resolving ".." references,
+//   and deduplicating "/" characters while converting them to "\\".  For
+//   example if `path` is "foo/../bar/.//qux", the result is "bar\\qux".
+//
+//   Uplevel references ("..") that cannot go any higher in the directory tree
+//   are preserved if the path is relative, and ignored if the path is
+//   absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is
+//   normalized to "c:\\".
+//
+//   This method does not check the semantics of the `path` beyond checking if
+//   it starts with a directory separator. Illegal paths such as one with a
+//   drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the
+//   caller's responsibility to pass a path that, when normalized, will be
+//   semantically correct.
+//
+//   Current directory references (".") are preserved if and only if they are
+//   the only path segment, so "./" becomes "." but "./foo" becomes "foo".
+//
+// Arguments:
+//   `path` must be a relative or absolute Windows path, it may use "/" instead
+//   of "\\". The path should not start with "/" or "\\".
+//
+// Result:
+//   Returns false if and only if the path starts with a directory separator.
+//
+//   The result won't have a UNC prefix, even if `path` did. The result won't
+//   have a trailing "\\" except when and only when the path is normalized to
+//   just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The
+//   result will preserve the casing of the input, so "D:/Bar" becomes
+//   "D:\\Bar".
+std::string Normalize(const std::string& p);
+std::wstring Normalize(const std::wstring& p);
+
 }  // namespace windows
 }  // namespace bazel
 
diff --git a/src/test/cpp/util/path_windows_test.cc b/src/test/cpp/util/path_windows_test.cc
index 03e2762..403d7a0 100644
--- a/src/test/cpp/util/path_windows_test.cc
+++ b/src/test/cpp/util/path_windows_test.cc
@@ -45,87 +45,6 @@
 using std::unique_ptr;
 using std::wstring;
 
-TEST(PathWindowsTest, TestNormalizeWindowsPath) {
-#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(TestOnly_NormalizeWindowsPath(x), y);
-
-  ASSERT_NORMALIZE("", "");
-  ASSERT_NORMALIZE("a", "a");
-  ASSERT_NORMALIZE("foo/bar", "foo\\bar");
-  ASSERT_NORMALIZE("foo/../bar", "bar");
-  ASSERT_NORMALIZE("a/", "a");
-  ASSERT_NORMALIZE("foo", "foo");
-  ASSERT_NORMALIZE("foo/", "foo");
-  ASSERT_NORMALIZE(".", ".");
-  ASSERT_NORMALIZE("./", ".");
-  ASSERT_NORMALIZE("..", "..");
-  ASSERT_NORMALIZE("../", "..");
-  ASSERT_NORMALIZE("./..", "..");
-  ASSERT_NORMALIZE("./../", "..");
-  ASSERT_NORMALIZE("../.", "..");
-  ASSERT_NORMALIZE(".././", "..");
-  ASSERT_NORMALIZE("...", "...");
-  ASSERT_NORMALIZE(".../", "...");
-  ASSERT_NORMALIZE("a/", "a");
-  ASSERT_NORMALIZE(".a", ".a");
-  ASSERT_NORMALIZE("..a", "..a");
-  ASSERT_NORMALIZE("...a", "...a");
-  ASSERT_NORMALIZE("./a", "a");
-  ASSERT_NORMALIZE("././a", "a");
-  ASSERT_NORMALIZE("./../a", "..\\a");
-  ASSERT_NORMALIZE(".././a", "..\\a");
-  ASSERT_NORMALIZE("../../a", "..\\..\\a");
-  ASSERT_NORMALIZE("../.../a", "..\\...\\a");
-  ASSERT_NORMALIZE(".../../a", "a");
-  ASSERT_NORMALIZE("a/..", "");
-  ASSERT_NORMALIZE("a/../", "");
-  ASSERT_NORMALIZE("a/./../", "");
-
-  ASSERT_NORMALIZE("c:/", "c:\\");
-  ASSERT_NORMALIZE("c:/a", "c:\\a");
-  ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar");
-  ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar");
-  ASSERT_NORMALIZE("d:/a/", "d:\\a");
-  ASSERT_NORMALIZE("D:/foo", "D:\\foo");
-  ASSERT_NORMALIZE("c:/foo/", "c:\\foo");
-  ASSERT_NORMALIZE("c:/.", "c:\\");
-  ASSERT_NORMALIZE("c:/./", "c:\\");
-  ASSERT_NORMALIZE("c:/..", "c:\\");
-  ASSERT_NORMALIZE("c:/../", "c:\\");
-  ASSERT_NORMALIZE("c:/./..", "c:\\");
-  ASSERT_NORMALIZE("c:/./../", "c:\\");
-  ASSERT_NORMALIZE("c:/../.", "c:\\");
-  ASSERT_NORMALIZE("c:/.././", "c:\\");
-  ASSERT_NORMALIZE("c:/...", "c:\\...");
-  ASSERT_NORMALIZE("c:/.../", "c:\\...");
-  ASSERT_NORMALIZE("c:/.a", "c:\\.a");
-  ASSERT_NORMALIZE("c:/..a", "c:\\..a");
-  ASSERT_NORMALIZE("c:/...a", "c:\\...a");
-  ASSERT_NORMALIZE("c:/./a", "c:\\a");
-  ASSERT_NORMALIZE("c:/././a", "c:\\a");
-  ASSERT_NORMALIZE("c:/./../a", "c:\\a");
-  ASSERT_NORMALIZE("c:/.././a", "c:\\a");
-  ASSERT_NORMALIZE("c:/../../a", "c:\\a");
-  ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a");
-  ASSERT_NORMALIZE("c:/.../../a", "c:\\a");
-  ASSERT_NORMALIZE("c:/a/..", "c:\\");
-  ASSERT_NORMALIZE("c:/a/../", "c:\\");
-  ASSERT_NORMALIZE("c:/a/./../", "c:\\");
-  ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e");
-  ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e");
-
-  ASSERT_NORMALIZE("foo", "foo");
-  ASSERT_NORMALIZE("foo/", "foo");
-  ASSERT_NORMALIZE("foo//bar", "foo\\bar");
-  ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar");
-  ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar");
-  ASSERT_NORMALIZE("c:", "c:\\");
-  ASSERT_NORMALIZE("c:/", "c:\\");
-  ASSERT_NORMALIZE("c:\\", "c:\\");
-  ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar");
-  ASSERT_NORMALIZE("../foo", "..\\foo");
-#undef ASSERT_NORMALIZE
-}
-
 TEST(PathWindowsTest, TestDirname) {
   ASSERT_EQ("", Dirname(""));
   ASSERT_EQ("/", Dirname("/"));
diff --git a/src/test/native/windows/file_test.cc b/src/test/native/windows/file_test.cc
index 08d2675..49ce133 100644
--- a/src/test/native/windows/file_test.cc
+++ b/src/test/native/windows/file_test.cc
@@ -375,5 +375,85 @@
 #undef TOWSTRING
 #undef WLINE
 
+TEST(FileTests, TestNormalize) {
+#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(Normalize(x), y);
+  ASSERT_NORMALIZE("", "");
+  ASSERT_NORMALIZE("a", "a");
+  ASSERT_NORMALIZE("foo/bar", "foo\\bar");
+  ASSERT_NORMALIZE("foo/../bar", "bar");
+  ASSERT_NORMALIZE("a/", "a");
+  ASSERT_NORMALIZE("foo", "foo");
+  ASSERT_NORMALIZE("foo/", "foo");
+  ASSERT_NORMALIZE(".", ".");
+  ASSERT_NORMALIZE("./", ".");
+  ASSERT_NORMALIZE("..", "..");
+  ASSERT_NORMALIZE("../", "..");
+  ASSERT_NORMALIZE("./..", "..");
+  ASSERT_NORMALIZE("./../", "..");
+  ASSERT_NORMALIZE("../.", "..");
+  ASSERT_NORMALIZE(".././", "..");
+  ASSERT_NORMALIZE("...", "...");
+  ASSERT_NORMALIZE(".../", "...");
+  ASSERT_NORMALIZE("a/", "a");
+  ASSERT_NORMALIZE(".a", ".a");
+  ASSERT_NORMALIZE("..a", "..a");
+  ASSERT_NORMALIZE("...a", "...a");
+  ASSERT_NORMALIZE("./a", "a");
+  ASSERT_NORMALIZE("././a", "a");
+  ASSERT_NORMALIZE("./../a", "..\\a");
+  ASSERT_NORMALIZE(".././a", "..\\a");
+  ASSERT_NORMALIZE("../../a", "..\\..\\a");
+  ASSERT_NORMALIZE("../.../a", "..\\...\\a");
+  ASSERT_NORMALIZE(".../../a", "a");
+  ASSERT_NORMALIZE("a/..", "");
+  ASSERT_NORMALIZE("a/../", "");
+  ASSERT_NORMALIZE("a/./../", "");
+
+  ASSERT_NORMALIZE("c:/", "c:\\");
+  ASSERT_NORMALIZE("c:/a", "c:\\a");
+  ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar");
+  ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar");
+  ASSERT_NORMALIZE("d:/a/", "d:\\a");
+  ASSERT_NORMALIZE("D:/foo", "D:\\foo");
+  ASSERT_NORMALIZE("c:/foo/", "c:\\foo");
+  ASSERT_NORMALIZE("c:/.", "c:\\");
+  ASSERT_NORMALIZE("c:/./", "c:\\");
+  ASSERT_NORMALIZE("c:/..", "c:\\");
+  ASSERT_NORMALIZE("c:/../", "c:\\");
+  ASSERT_NORMALIZE("c:/./..", "c:\\");
+  ASSERT_NORMALIZE("c:/./../", "c:\\");
+  ASSERT_NORMALIZE("c:/../.", "c:\\");
+  ASSERT_NORMALIZE("c:/.././", "c:\\");
+  ASSERT_NORMALIZE("c:/...", "c:\\...");
+  ASSERT_NORMALIZE("c:/.../", "c:\\...");
+  ASSERT_NORMALIZE("c:/.a", "c:\\.a");
+  ASSERT_NORMALIZE("c:/..a", "c:\\..a");
+  ASSERT_NORMALIZE("c:/...a", "c:\\...a");
+  ASSERT_NORMALIZE("c:/./a", "c:\\a");
+  ASSERT_NORMALIZE("c:/././a", "c:\\a");
+  ASSERT_NORMALIZE("c:/./../a", "c:\\a");
+  ASSERT_NORMALIZE("c:/.././a", "c:\\a");
+  ASSERT_NORMALIZE("c:/../../a", "c:\\a");
+  ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a");
+  ASSERT_NORMALIZE("c:/.../../a", "c:\\a");
+  ASSERT_NORMALIZE("c:/a/..", "c:\\");
+  ASSERT_NORMALIZE("c:/a/../", "c:\\");
+  ASSERT_NORMALIZE("c:/a/./../", "c:\\");
+  ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e");
+  ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e");
+
+  ASSERT_NORMALIZE("foo", "foo");
+  ASSERT_NORMALIZE("foo/", "foo");
+  ASSERT_NORMALIZE("foo//bar", "foo\\bar");
+  ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar");
+  ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar");
+  ASSERT_NORMALIZE("c:", "c:\\");
+  ASSERT_NORMALIZE("c:/", "c:\\");
+  ASSERT_NORMALIZE("c:\\", "c:\\");
+  ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar");
+  ASSERT_NORMALIZE("../foo", "..\\foo");
+#undef ASSERT_NORMALIZE
+}
+
 }  // namespace windows
 }  // namespace bazel