Windows: move path normalization into JNI library Motivation: need to support process creation from relative paths. Absolutizing the paths requires normalization. Closes #8473. PiperOrigin-RevId: 250245572
diff --git a/src/main/cpp/util/path_platform.h b/src/main/cpp/util/path_platform.h index db9ed89..da5bf4b 100644 --- a/src/main/cpp/util/path_platform.h +++ b/src/main/cpp/util/path_platform.h
@@ -83,8 +83,6 @@ bool IsRootDirectoryW(const std::wstring &path); -std::string TestOnly_NormalizeWindowsPath(const std::string &path); - // Converts 'path' to Windows style. // // 'path' is absolute or relative or current-drive-relative (e.g.
diff --git a/src/main/cpp/util/path_windows.cc b/src/main/cpp/util/path_windows.cc index 7f6196b..bb4da57 100644 --- a/src/main/cpp/util/path_windows.cc +++ b/src/main/cpp/util/path_windows.cc
@@ -242,122 +242,6 @@ void assignNUL(std::wstring* s) { s->assign(L"NUL"); } -// Returns a normalized form of the input `path`. -// -// Normalization: -// Normalization means removing "." references, resolving ".." references, -// and deduplicating "/" characters while converting them to "\\". For -// example if `path` is "foo/../bar/.//qux", the result is "bar\\qux". -// -// Uplevel references ("..") that cannot go any higher in the directory tree -// are preserved if the path is relative, and ignored if the path is -// absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is -// normalized to "c:\\". -// -// This method does not check the semantics of the `path` beyond checking if -// it starts with a directory separator. Illegal paths such as one with a -// drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the -// caller's responsibility to pass a path that, when normalized, will be -// semantically correct. -// -// Current directory references (".") are preserved if and only if they are -// the only path segment, so "./" becomes "." but "./foo" becomes "foo". -// -// Arguments: -// `path` must be a relative or absolute Windows path, it may use "/" instead -// of "\\". The path should not start with "/" or "\\". -// -// Result: -// Returns false if and only if the path starts with a directory separator. -// -// The result won't have a UNC prefix, even if `path` did. The result won't -// have a trailing "\\" except when and only when the path is normalized to -// just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The -// result will preserve the casing of the input, so "D:/Bar" becomes -// "D:\\Bar". -template <typename C> -std::basic_string<C> NormalizeWindowsPath(const std::basic_string<C>& p) { - if (p.empty()) { - return p; - } - typedef std::basic_string<C> Str; - static const Str kDot(1, '.'); - static const Str kDotDot(2, '.'); - std::vector<std::pair<Str::size_type, Str::size_type> > segments; - Str::size_type seg_start = Str::npos; - bool first = true; - bool abs = false; - bool starts_with_dot = false; - for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) { - if (seg_start == Str::npos) { - if (i < p.size() && p[i] != '/' && p[i] != '\\') { - seg_start = i; - } - } else { - if (i == p.size() || p[i] == '/' || p[i] == '\\') { - // The current character ends a segment. - Str::size_type len = i - seg_start; - if (first) { - first = false; - abs = len == 2 && - ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') || - (p[seg_start] >= 'a' && p[seg_start] <= 'z')) && - p[seg_start + 1] == ':'; - segments.push_back(std::make_pair(seg_start, len)); - starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0; - } else { - if (p.compare(seg_start, len, kDot) == 0) { - if (segments.empty()) { - // Retain "." if that is the first (and possibly only segment). - segments.push_back(std::make_pair(seg_start, len)); - starts_with_dot = true; - } - } else { - if (starts_with_dot) { - // Delete the existing "." if that was the only path segment. - segments.clear(); - starts_with_dot = false; - } - if (p.compare(seg_start, len, kDotDot) == 0) { - if (segments.empty() || - p.compare(segments.back().first, segments.back().second, - kDotDot) == 0) { - // Preserve ".." if the path is relative and there are only ".." - // segment(s) at the front. - segments.push_back(std::make_pair(seg_start, len)); - } else if (!abs || segments.size() > 1) { - // Remove the last segment unless the path is already at the - // root directory. - segments.pop_back(); - } // Ignore ".." otherwise. - } else { - // This is a normal path segment, i.e. neither "." nor ".." - segments.push_back(std::make_pair(seg_start, len)); - } - } - } - // Indicate that there's no segment started. - seg_start = Str::npos; - } - } - } - std::basic_stringstream<C> res; - first = true; - for (const auto& i : segments) { - Str s = p.substr(i.first, i.second); - if (first) { - first = false; - } else { - res << '\\'; - } - res << s; - } - if (abs && segments.size() == 1) { - res << '\\'; - } - return res.str(); -} - template <typename char_type> static bool AsWindowsPathImpl(const std::basic_string<char_type>& path, std::basic_string<char_type>* result, @@ -407,7 +291,7 @@ mutable_path = drive + path; } // otherwise this is a relative path, or absolute Windows path. - *result = NormalizeWindowsPath(mutable_path); + *result = bazel::windows::Normalize(mutable_path); return true; } @@ -449,7 +333,7 @@ } } - *result = std::wstring(L"\\\\?\\") + NormalizeWindowsPath(*result); + *result = std::wstring(L"\\\\?\\") + bazel::windows::Normalize(*result); return true; } @@ -571,8 +455,4 @@ return 'a' + wdrive - offset; } -std::string TestOnly_NormalizeWindowsPath(const std::string& path) { - return NormalizeWindowsPath(path); -} - } // namespace blaze_util
diff --git a/src/main/native/windows/file.cc b/src/main/native/windows/file.cc index e9b842a..b7fb4b3 100644 --- a/src/main/native/windows/file.cc +++ b/src/main/native/windows/file.cc
@@ -16,16 +16,17 @@ #define WIN32_LEAN_AND_MEAN #endif -#include <windows.h> -#include <WinIoCtl.h> +#include "src/main/native/windows/file.h" +#include <WinIoCtl.h> #include <stdint.h> // uint8_t +#include <windows.h> #include <memory> #include <sstream> #include <string> +#include <vector> -#include "src/main/native/windows/file.h" #include "src/main/native/windows/util.h" namespace bazel { @@ -685,5 +686,92 @@ return DeletePathResult::kSuccess; } +template <typename C> +std::basic_string<C> NormalizeImpl(const std::basic_string<C>& p) { + if (p.empty()) { + return p; + } + typedef std::basic_string<C> Str; + static const Str kDot(1, '.'); + static const Str kDotDot(2, '.'); + std::vector<std::pair<Str::size_type, Str::size_type> > segments; + Str::size_type seg_start = Str::npos; + bool first = true; + bool abs = false; + bool starts_with_dot = false; + for (Str::size_type i = HasUncPrefix(p.c_str()) ? 4 : 0; i <= p.size(); ++i) { + if (seg_start == Str::npos) { + if (i < p.size() && p[i] != '/' && p[i] != '\\') { + seg_start = i; + } + } else { + if (i == p.size() || (p[i] == '/' || p[i] == '\\')) { + // The current character ends a segment. + Str::size_type len = i - seg_start; + if (first) { + first = false; + abs = len == 2 && + ((p[seg_start] >= 'A' && p[seg_start] <= 'Z') || + (p[seg_start] >= 'a' && p[seg_start] <= 'z')) && + p[seg_start + 1] == ':'; + segments.push_back(std::make_pair(seg_start, len)); + starts_with_dot = !abs && p.compare(seg_start, len, kDot) == 0; + } else { + if (p.compare(seg_start, len, kDot) == 0) { + if (segments.empty()) { + // Retain "." if that is the first (and possibly only segment). + segments.push_back(std::make_pair(seg_start, len)); + starts_with_dot = true; + } + } else { + if (starts_with_dot) { + // Delete the existing "." if that was the only path segment. + segments.clear(); + starts_with_dot = false; + } + if (p.compare(seg_start, len, kDotDot) == 0) { + if (segments.empty() || + p.compare(segments.back().first, segments.back().second, + kDotDot) == 0) { + // Preserve ".." if the path is relative and there are only ".." + // segment(s) at the front. + segments.push_back(std::make_pair(seg_start, len)); + } else if (!abs || segments.size() > 1) { + // Remove the last segment unless the path is already at the + // root directory. + segments.pop_back(); + } // Ignore ".." otherwise. + } else { + // This is a normal path segment, i.e. neither "." nor ".." + segments.push_back(std::make_pair(seg_start, len)); + } + } + } + // Indicate that there's no segment started. + seg_start = Str::npos; + } + } + } + std::basic_stringstream<C> res; + first = true; + for (const auto& i : segments) { + Str s = p.substr(i.first, i.second); + if (first) { + first = false; + } else { + res << '\\'; + } + res << s; + } + if (abs && segments.size() == 1) { + res << '\\'; + } + return res.str(); +} + +std::string Normalize(const std::string& p) { return NormalizeImpl(p); } + +std::wstring Normalize(const std::wstring& p) { return NormalizeImpl(p); } + } // namespace windows } // namespace bazel
diff --git a/src/main/native/windows/file.h b/src/main/native/windows/file.h index 8063a4c..5b73c3e 100644 --- a/src/main/native/windows/file.h +++ b/src/main/native/windows/file.h
@@ -148,6 +148,42 @@ // function writes an error message into it. int DeletePath(const wstring& path, wstring* error); +// Returns a normalized form of the input `path`. +// +// Normalization: +// Normalization means removing "." references, resolving ".." references, +// and deduplicating "/" characters while converting them to "\\". For +// example if `path` is "foo/../bar/.//qux", the result is "bar\\qux". +// +// Uplevel references ("..") that cannot go any higher in the directory tree +// are preserved if the path is relative, and ignored if the path is +// absolute, e.g. "../../foo" is normalized to "..\\..\\foo" but "c:/.." is +// normalized to "c:\\". +// +// This method does not check the semantics of the `path` beyond checking if +// it starts with a directory separator. Illegal paths such as one with a +// drive specifier in the middle (e.g. "foo/c:/bar") are accepted -- it's the +// caller's responsibility to pass a path that, when normalized, will be +// semantically correct. +// +// Current directory references (".") are preserved if and only if they are +// the only path segment, so "./" becomes "." but "./foo" becomes "foo". +// +// Arguments: +// `path` must be a relative or absolute Windows path, it may use "/" instead +// of "\\". The path should not start with "/" or "\\". +// +// Result: +// Returns false if and only if the path starts with a directory separator. +// +// The result won't have a UNC prefix, even if `path` did. The result won't +// have a trailing "\\" except when and only when the path is normalized to +// just a drive specifier (e.g. when `path` is "c:/" or "c:/foo/.."). The +// result will preserve the casing of the input, so "D:/Bar" becomes +// "D:\\Bar". +std::string Normalize(const std::string& p); +std::wstring Normalize(const std::wstring& p); + } // namespace windows } // namespace bazel
diff --git a/src/test/cpp/util/path_windows_test.cc b/src/test/cpp/util/path_windows_test.cc index 03e2762..403d7a0 100644 --- a/src/test/cpp/util/path_windows_test.cc +++ b/src/test/cpp/util/path_windows_test.cc
@@ -45,87 +45,6 @@ using std::unique_ptr; using std::wstring; -TEST(PathWindowsTest, TestNormalizeWindowsPath) { -#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(TestOnly_NormalizeWindowsPath(x), y); - - ASSERT_NORMALIZE("", ""); - ASSERT_NORMALIZE("a", "a"); - ASSERT_NORMALIZE("foo/bar", "foo\\bar"); - ASSERT_NORMALIZE("foo/../bar", "bar"); - ASSERT_NORMALIZE("a/", "a"); - ASSERT_NORMALIZE("foo", "foo"); - ASSERT_NORMALIZE("foo/", "foo"); - ASSERT_NORMALIZE(".", "."); - ASSERT_NORMALIZE("./", "."); - ASSERT_NORMALIZE("..", ".."); - ASSERT_NORMALIZE("../", ".."); - ASSERT_NORMALIZE("./..", ".."); - ASSERT_NORMALIZE("./../", ".."); - ASSERT_NORMALIZE("../.", ".."); - ASSERT_NORMALIZE(".././", ".."); - ASSERT_NORMALIZE("...", "..."); - ASSERT_NORMALIZE(".../", "..."); - ASSERT_NORMALIZE("a/", "a"); - ASSERT_NORMALIZE(".a", ".a"); - ASSERT_NORMALIZE("..a", "..a"); - ASSERT_NORMALIZE("...a", "...a"); - ASSERT_NORMALIZE("./a", "a"); - ASSERT_NORMALIZE("././a", "a"); - ASSERT_NORMALIZE("./../a", "..\\a"); - ASSERT_NORMALIZE(".././a", "..\\a"); - ASSERT_NORMALIZE("../../a", "..\\..\\a"); - ASSERT_NORMALIZE("../.../a", "..\\...\\a"); - ASSERT_NORMALIZE(".../../a", "a"); - ASSERT_NORMALIZE("a/..", ""); - ASSERT_NORMALIZE("a/../", ""); - ASSERT_NORMALIZE("a/./../", ""); - - ASSERT_NORMALIZE("c:/", "c:\\"); - ASSERT_NORMALIZE("c:/a", "c:\\a"); - ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar"); - ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar"); - ASSERT_NORMALIZE("d:/a/", "d:\\a"); - ASSERT_NORMALIZE("D:/foo", "D:\\foo"); - ASSERT_NORMALIZE("c:/foo/", "c:\\foo"); - ASSERT_NORMALIZE("c:/.", "c:\\"); - ASSERT_NORMALIZE("c:/./", "c:\\"); - ASSERT_NORMALIZE("c:/..", "c:\\"); - ASSERT_NORMALIZE("c:/../", "c:\\"); - ASSERT_NORMALIZE("c:/./..", "c:\\"); - ASSERT_NORMALIZE("c:/./../", "c:\\"); - ASSERT_NORMALIZE("c:/../.", "c:\\"); - ASSERT_NORMALIZE("c:/.././", "c:\\"); - ASSERT_NORMALIZE("c:/...", "c:\\..."); - ASSERT_NORMALIZE("c:/.../", "c:\\..."); - ASSERT_NORMALIZE("c:/.a", "c:\\.a"); - ASSERT_NORMALIZE("c:/..a", "c:\\..a"); - ASSERT_NORMALIZE("c:/...a", "c:\\...a"); - ASSERT_NORMALIZE("c:/./a", "c:\\a"); - ASSERT_NORMALIZE("c:/././a", "c:\\a"); - ASSERT_NORMALIZE("c:/./../a", "c:\\a"); - ASSERT_NORMALIZE("c:/.././a", "c:\\a"); - ASSERT_NORMALIZE("c:/../../a", "c:\\a"); - ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a"); - ASSERT_NORMALIZE("c:/.../../a", "c:\\a"); - ASSERT_NORMALIZE("c:/a/..", "c:\\"); - ASSERT_NORMALIZE("c:/a/../", "c:\\"); - ASSERT_NORMALIZE("c:/a/./../", "c:\\"); - ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e"); - ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e"); - - ASSERT_NORMALIZE("foo", "foo"); - ASSERT_NORMALIZE("foo/", "foo"); - ASSERT_NORMALIZE("foo//bar", "foo\\bar"); - ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar"); - ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar"); - ASSERT_NORMALIZE("c:", "c:\\"); - ASSERT_NORMALIZE("c:/", "c:\\"); - ASSERT_NORMALIZE("c:\\", "c:\\"); - ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar"); - ASSERT_NORMALIZE("../foo", "..\\foo"); -#undef ASSERT_NORMALIZE -} - TEST(PathWindowsTest, TestDirname) { ASSERT_EQ("", Dirname("")); ASSERT_EQ("/", Dirname("/"));
diff --git a/src/test/native/windows/file_test.cc b/src/test/native/windows/file_test.cc index 08d2675..49ce133 100644 --- a/src/test/native/windows/file_test.cc +++ b/src/test/native/windows/file_test.cc
@@ -375,5 +375,85 @@ #undef TOWSTRING #undef WLINE +TEST(FileTests, TestNormalize) { +#define ASSERT_NORMALIZE(x, y) EXPECT_EQ(Normalize(x), y); + ASSERT_NORMALIZE("", ""); + ASSERT_NORMALIZE("a", "a"); + ASSERT_NORMALIZE("foo/bar", "foo\\bar"); + ASSERT_NORMALIZE("foo/../bar", "bar"); + ASSERT_NORMALIZE("a/", "a"); + ASSERT_NORMALIZE("foo", "foo"); + ASSERT_NORMALIZE("foo/", "foo"); + ASSERT_NORMALIZE(".", "."); + ASSERT_NORMALIZE("./", "."); + ASSERT_NORMALIZE("..", ".."); + ASSERT_NORMALIZE("../", ".."); + ASSERT_NORMALIZE("./..", ".."); + ASSERT_NORMALIZE("./../", ".."); + ASSERT_NORMALIZE("../.", ".."); + ASSERT_NORMALIZE(".././", ".."); + ASSERT_NORMALIZE("...", "..."); + ASSERT_NORMALIZE(".../", "..."); + ASSERT_NORMALIZE("a/", "a"); + ASSERT_NORMALIZE(".a", ".a"); + ASSERT_NORMALIZE("..a", "..a"); + ASSERT_NORMALIZE("...a", "...a"); + ASSERT_NORMALIZE("./a", "a"); + ASSERT_NORMALIZE("././a", "a"); + ASSERT_NORMALIZE("./../a", "..\\a"); + ASSERT_NORMALIZE(".././a", "..\\a"); + ASSERT_NORMALIZE("../../a", "..\\..\\a"); + ASSERT_NORMALIZE("../.../a", "..\\...\\a"); + ASSERT_NORMALIZE(".../../a", "a"); + ASSERT_NORMALIZE("a/..", ""); + ASSERT_NORMALIZE("a/../", ""); + ASSERT_NORMALIZE("a/./../", ""); + + ASSERT_NORMALIZE("c:/", "c:\\"); + ASSERT_NORMALIZE("c:/a", "c:\\a"); + ASSERT_NORMALIZE("c:/foo/bar", "c:\\foo\\bar"); + ASSERT_NORMALIZE("c:/foo/../bar", "c:\\bar"); + ASSERT_NORMALIZE("d:/a/", "d:\\a"); + ASSERT_NORMALIZE("D:/foo", "D:\\foo"); + ASSERT_NORMALIZE("c:/foo/", "c:\\foo"); + ASSERT_NORMALIZE("c:/.", "c:\\"); + ASSERT_NORMALIZE("c:/./", "c:\\"); + ASSERT_NORMALIZE("c:/..", "c:\\"); + ASSERT_NORMALIZE("c:/../", "c:\\"); + ASSERT_NORMALIZE("c:/./..", "c:\\"); + ASSERT_NORMALIZE("c:/./../", "c:\\"); + ASSERT_NORMALIZE("c:/../.", "c:\\"); + ASSERT_NORMALIZE("c:/.././", "c:\\"); + ASSERT_NORMALIZE("c:/...", "c:\\..."); + ASSERT_NORMALIZE("c:/.../", "c:\\..."); + ASSERT_NORMALIZE("c:/.a", "c:\\.a"); + ASSERT_NORMALIZE("c:/..a", "c:\\..a"); + ASSERT_NORMALIZE("c:/...a", "c:\\...a"); + ASSERT_NORMALIZE("c:/./a", "c:\\a"); + ASSERT_NORMALIZE("c:/././a", "c:\\a"); + ASSERT_NORMALIZE("c:/./../a", "c:\\a"); + ASSERT_NORMALIZE("c:/.././a", "c:\\a"); + ASSERT_NORMALIZE("c:/../../a", "c:\\a"); + ASSERT_NORMALIZE("c:/../.../a", "c:\\...\\a"); + ASSERT_NORMALIZE("c:/.../../a", "c:\\a"); + ASSERT_NORMALIZE("c:/a/..", "c:\\"); + ASSERT_NORMALIZE("c:/a/../", "c:\\"); + ASSERT_NORMALIZE("c:/a/./../", "c:\\"); + ASSERT_NORMALIZE("c:/../d:/e", "c:\\d:\\e"); + ASSERT_NORMALIZE("c:/../d:/../e", "c:\\e"); + + ASSERT_NORMALIZE("foo", "foo"); + ASSERT_NORMALIZE("foo/", "foo"); + ASSERT_NORMALIZE("foo//bar", "foo\\bar"); + ASSERT_NORMALIZE("../..//foo/./bar", "..\\..\\foo\\bar"); + ASSERT_NORMALIZE("../foo/baz/../bar", "..\\foo\\bar"); + ASSERT_NORMALIZE("c:", "c:\\"); + ASSERT_NORMALIZE("c:/", "c:\\"); + ASSERT_NORMALIZE("c:\\", "c:\\"); + ASSERT_NORMALIZE("c:\\..//foo/./bar/", "c:\\foo\\bar"); + ASSERT_NORMALIZE("../foo", "..\\foo"); +#undef ASSERT_NORMALIZE +} + } // namespace windows } // namespace bazel