Windows: move path normalization into JNI library
Motivation: need to support process creation from
relative paths. Absolutizing the paths requires
normalization.
Closes #8473.
PiperOrigin-RevId: 250245572
diff --git a/src/main/cpp/util/path_platform.h b/src/main/cpp/util/path_platform.h
index db9ed89..da5bf4b 100644
--- a/src/main/cpp/util/path_platform.h
+++ b/src/main/cpp/util/path_platform.h
@@ -83,8 +83,6 @@
bool IsRootDirectoryW(const std::wstring &path);
-std::string TestOnly_NormalizeWindowsPath(const std::string &path);
-
// Converts 'path' to Windows style.
//
// 'path' is absolute or relative or current-drive-relative (e.g.
diff --git a/src/main/cpp/util/path_windows.cc b/src/main/cpp/util/path_windows.cc
index 7f6196b..bb4da57 100644
--- a/src/main/cpp/util/path_windows.cc
+++ b/src/main/cpp/util/path_windows.cc
@@ -242,122 +242,6 @@
void assignNUL(std::wstring* s) { s->assign(L"NUL"); }
-// Returns a normalized form of the input `path`.
-//
-// Normalization:
-// Normalization means removing "." references, resolving ".." references,
-// and deduplicating "/" characters while converting them to "\\". For
-// example if `path` is "foo/../bar/.//qux", the result is "bar\\qux".
-//
-// Uplevel references ("..") that cannot go any higher in the directory tree
-// are preserved if the path is relative, and ignored if the path is
-// absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is
-// normalized to "c:\\".
-//
-// This method does not check the semantics of the `path` beyond checking if
-// it starts with a directory separator. Illegal paths such as one with a
-// drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the
-// caller's responsibility to pass a path that, when normalized, will be
-// semantically correct.
-//
-// Current directory references (".") are preserved if and only if they are
-// the only path segment, so "./" becomes "." but "./foo" becomes "foo".
-//
-// Arguments:
-// `path` must be a relative or absolute Windows path, it may use "/" instead
-// of "\\". The path should not start with "/" or "\\".
-//
-// Result:
-// Returns false if and only if the path starts with a directory separator.
-//
-// The result won't have a UNC prefix, even if `path` did. The result won't
-// have a trailing "\\" except when and only when the path is normalized to
-// just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The
-// result will preserve the casing of the input, so "D:/Bar" becomes
-// "D:\\Bar".
-template <typename C>
-std::basic_string<C> NormalizeWindowsPath(const std::basic_string<C>& p) {
- if (p.empty()) {
- return p;
- }
- typedef std::basic_string<C> Str;
- static const Str kDot(1, '.');
- static const Str kDotDot(2, '.');
- std::vector<std::pair<Str::size_type, Str::size_type> > segments;
- Str::size_type seg_start = Str::npos;
- bool first = true;
- bool abs = false;
- bool starts_with_dot = false;
- for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) {
- if (seg_start == Str::npos) {
- if (i < p.size() && p[i] != '/' && p[i] != '\\') {
- seg_start = i;
- }
- } else {
- if (i == p.size() || p[i] == '/' || p[i] == '\\') {
- // The current character ends a segment.
- Str::size_type len = i - seg_start;
- if (first) {
- first = false;
- abs = len == 2 &&
- ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') ||
- (p[seg_start] >= 'a' && p[seg_start] <= 'z')) &&
- p[seg_start + 1] == ':';
- segments.push_back(std::make_pair(seg_start, len));
- starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0;
- } else {
- if (p.compare(seg_start, len, kDot) == 0) {
- if (segments.empty()) {
- // Retain "." if that is the first (and possibly only segment).
- segments.push_back(std::make_pair(seg_start, len));
- starts_with_dot = true;
- }
- } else {
- if (starts_with_dot) {
- // Delete the existing "." if that was the only path segment.
- segments.clear();
- starts_with_dot = false;
- }
- if (p.compare(seg_start, len, kDotDot) == 0) {
- if (segments.empty() ||
- p.compare(segments.back().first, segments.back().second,
- kDotDot) == 0) {
- // Preserve ".." if the path is relative and there are only ".."
- // segment(s) at the front.
- segments.push_back(std::make_pair(seg_start, len));
- } else if (!abs || segments.size() > 1) {
- // Remove the last segment unless the path is already at the
- // root directory.
- segments.pop_back();
- } // Ignore ".." otherwise.
- } else {
- // This is a normal path segment, i.e. neither "." nor ".."
- segments.push_back(std::make_pair(seg_start, len));
- }
- }
- }
- // Indicate that there's no segment started.
- seg_start = Str::npos;
- }
- }
- }
- std::basic_stringstream<C> res;
- first = true;
- for (const auto& i : segments) {
- Str s = p.substr(i.first, i.second);
- if (first) {
- first = false;
- } else {
- res << '\\';
- }
- res << s;
- }
- if (abs && segments.size() == 1) {
- res << '\\';
- }
- return res.str();
-}
-
template <typename char_type>
static bool AsWindowsPathImpl(const std::basic_string<char_type>& path,
std::basic_string<char_type>* result,
@@ -407,7 +291,7 @@
mutable_path = drive + path;
} // otherwise this is a relative path, or absolute Windows path.
- *result = NormalizeWindowsPath(mutable_path);
+ *result = bazel::windows::Normalize(mutable_path);
return true;
}
@@ -449,7 +333,7 @@
}
}
- *result = std::wstring(L"\\\\?\\") + NormalizeWindowsPath(*result);
+ *result = std::wstring(L"\\\\?\\") + bazel::windows::Normalize(*result);
return true;
}
@@ -571,8 +455,4 @@
return 'a' + wdrive - offset;
}
-std::string TestOnly_NormalizeWindowsPath(const std::string& path) {
- return NormalizeWindowsPath(path);
-}
-
} // namespace blaze_util
diff --git a/src/main/native/windows/file.cc b/src/main/native/windows/file.cc
index e9b842a..b7fb4b3 100644
--- a/src/main/native/windows/file.cc
+++ b/src/main/native/windows/file.cc
@@ -16,16 +16,17 @@
#define WIN32_LEAN_AND_MEAN
#endif
-#include <windows.h>
-#include <WinIoCtl.h>
+#include "src/main/native/windows/file.h"
+#include <WinIoCtl.h>
#include <stdint.h> // uint8_t
+#include <windows.h>
#include <memory>
#include <sstream>
#include <string>
+#include <vector>
-#include "src/main/native/windows/file.h"
#include "src/main/native/windows/util.h"
namespace bazel {
@@ -685,5 +686,92 @@
return DeletePathResult::kSuccess;
}
+template <typename C>
+std::basic_string<C> NormalizeImpl(const std::basic_string<C>& p) {
+ if (p.empty()) {
+ return p;
+ }
+ typedef std::basic_string<C> Str;
+ static const Str kDot(1, '.');
+ static const Str kDotDot(2, '.');
+ std::vector<std::pair<Str::size_type, Str::size_type> > segments;
+ Str::size_type seg_start = Str::npos;
+ bool first = true;
+ bool abs = false;
+ bool starts_with_dot = false;
+ for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) {
+ if (seg_start == Str::npos) {
+ if (i < p.size() && p[i] != '/' && p[i] != '\\') {
+ seg_start = i;
+ }
+ } else {
+ if (i == p.size() || (p[i] == '/' || p[i] == '\\')) {
+ // The current character ends a segment.
+ Str::size_type len = i - seg_start;
+ if (first) {
+ first = false;
+ abs = len == 2 &&
+ ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') ||
+ (p[seg_start] >= 'a' && p[seg_start] <= 'z')) &&
+ p[seg_start + 1] == ':';
+ segments.push_back(std::make_pair(seg_start, len));
+ starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0;
+ } else {
+ if (p.compare(seg_start, len, kDot) == 0) {
+ if (segments.empty()) {
+ // Retain "." if that is the first (and possibly only segment).
+ segments.push_back(std::make_pair(seg_start, len));
+ starts_with_dot = true;
+ }
+ } else {
+ if (starts_with_dot) {
+ // Delete the existing "." if that was the only path segment.
+ segments.clear();
+ starts_with_dot = false;
+ }
+ if (p.compare(seg_start, len, kDotDot) == 0) {
+ if (segments.empty() ||
+ p.compare(segments.back().first, segments.back().second,
+ kDotDot) == 0) {
+ // Preserve ".." if the path is relative and there are only ".."
+ // segment(s) at the front.
+ segments.push_back(std::make_pair(seg_start, len));
+ } else if (!abs || segments.size() > 1) {
+ // Remove the last segment unless the path is already at the
+ // root directory.
+ segments.pop_back();
+ } // Ignore ".." otherwise.
+ } else {
+ // This is a normal path segment, i.e. neither "." nor ".."
+ segments.push_back(std::make_pair(seg_start, len));
+ }
+ }
+ }
+ // Indicate that there's no segment started.
+ seg_start = Str::npos;
+ }
+ }
+ }
+ std::basic_stringstream<C> res;
+ first = true;
+ for (const auto& i : segments) {
+ Str s = p.substr(i.first, i.second);
+ if (first) {
+ first = false;
+ } else {
+ res << '\\';
+ }
+ res << s;
+ }
+ if (abs && segments.size() == 1) {
+ res << '\\';
+ }
+ return res.str();
+}
+
+std::string Normalize(const std::string& p) { return NormalizeImpl(p); }
+
+std::wstring Normalize(const std::wstring& p) { return NormalizeImpl(p); }
+
} // namespace windows
} // namespace bazel
diff --git a/src/main/native/windows/file.h b/src/main/native/windows/file.h
index 8063a4c..5b73c3e 100644
--- a/src/main/native/windows/file.h
+++ b/src/main/native/windows/file.h
@@ -148,6 +148,42 @@
// function writes an error message into it.
int DeletePath(const wstring& path, wstring* error);
+// Returns a normalized form of the input `path`.
+//
+// Normalization:
+// Normalization means removing "." references, resolving ".." references,
+// and deduplicating "/" characters while converting them to "\\". For
+// example if `path` is "foo/../bar/.//qux", the result is "bar\\qux".
+//
+// Uplevel references ("..") that cannot go any higher in the directory tree
+// are preserved if the path is relative, and ignored if the path is
+// absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is
+// normalized to "c:\\".
+//
+// This method does not check the semantics of the `path` beyond checking if
+// it starts with a directory separator. Illegal paths such as one with a
+// drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the
+// caller's responsibility to pass a path that, when normalized, will be
+// semantically correct.
+//
+// Current directory references (".") are preserved if and only if they are
+// the only path segment, so "./" becomes "." but "./foo" becomes "foo".
+//
+// Arguments:
+// `path` must be a relative or absolute Windows path, it may use "/" instead
+// of "\\". The path should not start with "/" or "\\".
+//
+// Result:
+// Returns false if and only if the path starts with a directory separator.
+//
+// The result won't have a UNC prefix, even if `path` did. The result won't
+// have a trailing "\\" except when and only when the path is normalized to
+// just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The
+// result will preserve the casing of the input, so "D:/Bar" becomes
+// "D:\\Bar".
+std::string Normalize(const std::string& p);
+std::wstring Normalize(const std::wstring& p);
+
} // namespace windows
} // namespace bazel
diff --git a/src/test/cpp/util/path_windows_test.cc b/src/test/cpp/util/path_windows_test.cc
index 03e2762..403d7a0 100644
--- a/src/test/cpp/util/path_windows_test.cc
+++ b/src/test/cpp/util/path_windows_test.cc
@@ -45,87 +45,6 @@
using std::unique_ptr;
using std::wstring;
-TEST(PathWindowsTest, TestNormalizeWindowsPath) {
-#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(TestOnly_NormalizeWindowsPath(x), y);
-
- ASSERT_NORMALIZE("", "");
- ASSERT_NORMALIZE("a", "a");
- ASSERT_NORMALIZE("foo/bar", "foo\\bar");
- ASSERT_NORMALIZE("foo/../bar", "bar");
- ASSERT_NORMALIZE("a/", "a");
- ASSERT_NORMALIZE("foo", "foo");
- ASSERT_NORMALIZE("foo/", "foo");
- ASSERT_NORMALIZE(".", ".");
- ASSERT_NORMALIZE("./", ".");
- ASSERT_NORMALIZE("..", "..");
- ASSERT_NORMALIZE("../", "..");
- ASSERT_NORMALIZE("./..", "..");
- ASSERT_NORMALIZE("./../", "..");
- ASSERT_NORMALIZE("../.", "..");
- ASSERT_NORMALIZE(".././", "..");
- ASSERT_NORMALIZE("...", "...");
- ASSERT_NORMALIZE(".../", "...");
- ASSERT_NORMALIZE("a/", "a");
- ASSERT_NORMALIZE(".a", ".a");
- ASSERT_NORMALIZE("..a", "..a");
- ASSERT_NORMALIZE("...a", "...a");
- ASSERT_NORMALIZE("./a", "a");
- ASSERT_NORMALIZE("././a", "a");
- ASSERT_NORMALIZE("./../a", "..\\a");
- ASSERT_NORMALIZE(".././a", "..\\a");
- ASSERT_NORMALIZE("../../a", "..\\..\\a");
- ASSERT_NORMALIZE("../.../a", "..\\...\\a");
- ASSERT_NORMALIZE(".../../a", "a");
- ASSERT_NORMALIZE("a/..", "");
- ASSERT_NORMALIZE("a/../", "");
- ASSERT_NORMALIZE("a/./../", "");
-
- ASSERT_NORMALIZE("c:/", "c:\\");
- ASSERT_NORMALIZE("c:/a", "c:\\a");
- ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar");
- ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar");
- ASSERT_NORMALIZE("d:/a/", "d:\\a");
- ASSERT_NORMALIZE("D:/foo", "D:\\foo");
- ASSERT_NORMALIZE("c:/foo/", "c:\\foo");
- ASSERT_NORMALIZE("c:/.", "c:\\");
- ASSERT_NORMALIZE("c:/./", "c:\\");
- ASSERT_NORMALIZE("c:/..", "c:\\");
- ASSERT_NORMALIZE("c:/../", "c:\\");
- ASSERT_NORMALIZE("c:/./..", "c:\\");
- ASSERT_NORMALIZE("c:/./../", "c:\\");
- ASSERT_NORMALIZE("c:/../.", "c:\\");
- ASSERT_NORMALIZE("c:/.././", "c:\\");
- ASSERT_NORMALIZE("c:/...", "c:\\...");
- ASSERT_NORMALIZE("c:/.../", "c:\\...");
- ASSERT_NORMALIZE("c:/.a", "c:\\.a");
- ASSERT_NORMALIZE("c:/..a", "c:\\..a");
- ASSERT_NORMALIZE("c:/...a", "c:\\...a");
- ASSERT_NORMALIZE("c:/./a", "c:\\a");
- ASSERT_NORMALIZE("c:/././a", "c:\\a");
- ASSERT_NORMALIZE("c:/./../a", "c:\\a");
- ASSERT_NORMALIZE("c:/.././a", "c:\\a");
- ASSERT_NORMALIZE("c:/../../a", "c:\\a");
- ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a");
- ASSERT_NORMALIZE("c:/.../../a", "c:\\a");
- ASSERT_NORMALIZE("c:/a/..", "c:\\");
- ASSERT_NORMALIZE("c:/a/../", "c:\\");
- ASSERT_NORMALIZE("c:/a/./../", "c:\\");
- ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e");
- ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e");
-
- ASSERT_NORMALIZE("foo", "foo");
- ASSERT_NORMALIZE("foo/", "foo");
- ASSERT_NORMALIZE("foo//bar", "foo\\bar");
- ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar");
- ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar");
- ASSERT_NORMALIZE("c:", "c:\\");
- ASSERT_NORMALIZE("c:/", "c:\\");
- ASSERT_NORMALIZE("c:\\", "c:\\");
- ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar");
- ASSERT_NORMALIZE("../foo", "..\\foo");
-#undef ASSERT_NORMALIZE
-}
-
TEST(PathWindowsTest, TestDirname) {
ASSERT_EQ("", Dirname(""));
ASSERT_EQ("/", Dirname("/"));
diff --git a/src/test/native/windows/file_test.cc b/src/test/native/windows/file_test.cc
index 08d2675..49ce133 100644
--- a/src/test/native/windows/file_test.cc
+++ b/src/test/native/windows/file_test.cc
@@ -375,5 +375,85 @@
#undef TOWSTRING
#undef WLINE
+TEST(FileTests, TestNormalize) {
+#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(Normalize(x), y);
+ ASSERT_NORMALIZE("", "");
+ ASSERT_NORMALIZE("a", "a");
+ ASSERT_NORMALIZE("foo/bar", "foo\\bar");
+ ASSERT_NORMALIZE("foo/../bar", "bar");
+ ASSERT_NORMALIZE("a/", "a");
+ ASSERT_NORMALIZE("foo", "foo");
+ ASSERT_NORMALIZE("foo/", "foo");
+ ASSERT_NORMALIZE(".", ".");
+ ASSERT_NORMALIZE("./", ".");
+ ASSERT_NORMALIZE("..", "..");
+ ASSERT_NORMALIZE("../", "..");
+ ASSERT_NORMALIZE("./..", "..");
+ ASSERT_NORMALIZE("./../", "..");
+ ASSERT_NORMALIZE("../.", "..");
+ ASSERT_NORMALIZE(".././", "..");
+ ASSERT_NORMALIZE("...", "...");
+ ASSERT_NORMALIZE(".../", "...");
+ ASSERT_NORMALIZE("a/", "a");
+ ASSERT_NORMALIZE(".a", ".a");
+ ASSERT_NORMALIZE("..a", "..a");
+ ASSERT_NORMALIZE("...a", "...a");
+ ASSERT_NORMALIZE("./a", "a");
+ ASSERT_NORMALIZE("././a", "a");
+ ASSERT_NORMALIZE("./../a", "..\\a");
+ ASSERT_NORMALIZE(".././a", "..\\a");
+ ASSERT_NORMALIZE("../../a", "..\\..\\a");
+ ASSERT_NORMALIZE("../.../a", "..\\...\\a");
+ ASSERT_NORMALIZE(".../../a", "a");
+ ASSERT_NORMALIZE("a/..", "");
+ ASSERT_NORMALIZE("a/../", "");
+ ASSERT_NORMALIZE("a/./../", "");
+
+ ASSERT_NORMALIZE("c:/", "c:\\");
+ ASSERT_NORMALIZE("c:/a", "c:\\a");
+ ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar");
+ ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar");
+ ASSERT_NORMALIZE("d:/a/", "d:\\a");
+ ASSERT_NORMALIZE("D:/foo", "D:\\foo");
+ ASSERT_NORMALIZE("c:/foo/", "c:\\foo");
+ ASSERT_NORMALIZE("c:/.", "c:\\");
+ ASSERT_NORMALIZE("c:/./", "c:\\");
+ ASSERT_NORMALIZE("c:/..", "c:\\");
+ ASSERT_NORMALIZE("c:/../", "c:\\");
+ ASSERT_NORMALIZE("c:/./..", "c:\\");
+ ASSERT_NORMALIZE("c:/./../", "c:\\");
+ ASSERT_NORMALIZE("c:/../.", "c:\\");
+ ASSERT_NORMALIZE("c:/.././", "c:\\");
+ ASSERT_NORMALIZE("c:/...", "c:\\...");
+ ASSERT_NORMALIZE("c:/.../", "c:\\...");
+ ASSERT_NORMALIZE("c:/.a", "c:\\.a");
+ ASSERT_NORMALIZE("c:/..a", "c:\\..a");
+ ASSERT_NORMALIZE("c:/...a", "c:\\...a");
+ ASSERT_NORMALIZE("c:/./a", "c:\\a");
+ ASSERT_NORMALIZE("c:/././a", "c:\\a");
+ ASSERT_NORMALIZE("c:/./../a", "c:\\a");
+ ASSERT_NORMALIZE("c:/.././a", "c:\\a");
+ ASSERT_NORMALIZE("c:/../../a", "c:\\a");
+ ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a");
+ ASSERT_NORMALIZE("c:/.../../a", "c:\\a");
+ ASSERT_NORMALIZE("c:/a/..", "c:\\");
+ ASSERT_NORMALIZE("c:/a/../", "c:\\");
+ ASSERT_NORMALIZE("c:/a/./../", "c:\\");
+ ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e");
+ ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e");
+
+ ASSERT_NORMALIZE("foo", "foo");
+ ASSERT_NORMALIZE("foo/", "foo");
+ ASSERT_NORMALIZE("foo//bar", "foo\\bar");
+ ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar");
+ ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar");
+ ASSERT_NORMALIZE("c:", "c:\\");
+ ASSERT_NORMALIZE("c:/", "c:\\");
+ ASSERT_NORMALIZE("c:\\", "c:\\");
+ ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar");
+ ASSERT_NORMALIZE("../foo", "..\\foo");
+#undef ASSERT_NORMALIZE
+}
+
} // namespace windows
} // namespace bazel