Bazel client: move NormalizePath to Windows code This method was only used by the Windows-specific code so move it there and adapt it to Windows paths. Update AsWindowsPath to normalize its output: this is necessary because AsWindowsPath's output is a widechar path string which we often prefix with the UNC prefix, but such paths must be normalized (the kernel won't do it if the path has an UNC prefix). Finally, add an AsWindowsPathWithUncPrefix method which does what the name suggests: converts the path to Windows path, makes it absolute, and adds the UNC prefix if necessary. (This is a very common operation when calling WinAPI functions and we'll use it a lot in subsequent changes.) See https://github.com/bazelbuild/bazel/issues/2107 -- PiperOrigin-RevId: 144060297 MOS_MIGRATED_REVID=144060297
diff --git a/src/main/cpp/util/file.cc b/src/main/cpp/util/file.cc index 8d66710..392af37 100644 --- a/src/main/cpp/util/file.cc +++ b/src/main/cpp/util/file.cc
@@ -18,7 +18,6 @@ #include <algorithm> #include <cstdlib> -#include <sstream> // ostringstream #include <vector> #include "src/main/cpp/util/file_platform.h" @@ -32,61 +31,6 @@ using std::string; using std::vector; -string NormalizePath(const string &path) { - if (path.empty()) { - return string(); - } - - static const string dot("."); - static const string dotdot(".."); - - vector<string> segments; - int segment_start = -1; - // Find the path segments in `path` (separated by "/"). - for (int i = 0;; ++i) { - if (path[i] != '/' && path[i] != '\0') { - // The current character does not end a segment, so start one unless it's - // already started. - if (segment_start < 0) { - segment_start = i; - } - } else if (segment_start >= 0 && i > segment_start) { - // The current character is "/" or "\0", so this ends a segment. - // Add that to `segments` if there's anything to add; handle "." and "..". - string segment(path, segment_start, i - segment_start); - segment_start = -1; - if (segment == dotdot) { - if (!segments.empty()) { - segments.pop_back(); - } - } else if (segment != dot) { - segments.push_back(segment); - } - } - if (path[i] == '\0') { - break; - } - } - - // Handle the case when `path` was just "/" (or some degenerate form of it, - // e.g. "/.."). - if (segments.empty() && path[0] == '/') { - return "/"; - } - - // Join all segments, make sure we preserve the leading "/" if any. - bool first = true; - std::ostringstream result; - for (const auto &s : segments) { - if (!first || path[0] == '/') { - result << "/"; - } - first = false; - result << s; - } - return result.str(); -} - bool ReadFrom(const std::function<int(void *, int)> &read_func, string *content, int max_size) { content->clear();
diff --git a/src/main/cpp/util/file.h b/src/main/cpp/util/file.h index d380c33..efe963c 100644 --- a/src/main/cpp/util/file.h +++ b/src/main/cpp/util/file.h
@@ -34,15 +34,6 @@ virtual int Receive(void *buffer, int size) = 0; }; -// Returns a normalized form of the input `path`. -// Normalization means removing "." references, resolving ".." references, and -// deduplicating "/" characters. -// For example if `path` is "foo/../bar/.//qux", the result is "bar/qux". -// Uplevel references that cannot go any higher in the directory tree are simply -// ignored, e.g. "/.." is normalized to "/" and "../../foo" is normalized to -// "foo". -std::string NormalizePath(const std::string &path); - // Replaces 'content' with data read from a source using `read_func`. // If `max_size` is positive, the method reads at most that many bytes; // otherwise the method reads everything.
diff --git a/src/main/cpp/util/file_platform.h b/src/main/cpp/util/file_platform.h index b0458e8..9c95468 100644 --- a/src/main/cpp/util/file_platform.h +++ b/src/main/cpp/util/file_platform.h
@@ -117,22 +117,21 @@ DirectoryEntryConsumer *consume); #if defined(COMPILER_MSVC) || defined(__CYGWIN__) -// Converts a UTF8-encoded `path` to a widechar Windows path. +// Converts a UTF8-encoded `path` to a normalized, widechar Windows path. // // Returns true if conversion succeeded and sets the contents of `result` to it. // // The `path` may be absolute or relative, and may be a Windows or MSYS path. -// In every case, this method replaces forward slashes with backslashes if -// necessary. +// In every case, the output is normalized (see NormalizeWindowsPath). +// The output won't have a UNC prefix, even if `path` did. // // Recognizes the drive letter in MSYS paths, so e.g. "/c/windows" becomes // "c:\windows". Prepends the MSYS root (computed from the BAZEL_SH envvar) to // absolute MSYS paths, so e.g. "/usr" becomes "c:\tools\msys64\usr". // // The result may be longer than MAX_PATH. It's the caller's responsibility to -// prepend the long path prefix ("\\?\") in case they need to pass it to a -// Windows API function (some require the prefix, some don't), or to quote the -// path if necessary. +// prepend the UNC prefix in case they need to pass it to a WinAPI function +// (some require the prefix, some don't), or to quote the path if necessary. bool AsWindowsPath(const std::string &path, std::wstring *result); #endif // defined(COMPILER_MSVC) || defined(__CYGWIN__)
diff --git a/src/main/cpp/util/file_windows.cc b/src/main/cpp/util/file_windows.cc index 6a35e45..75555bb 100644 --- a/src/main/cpp/util/file_windows.cc +++ b/src/main/cpp/util/file_windows.cc
@@ -34,6 +34,30 @@ using std::vector; using std::wstring; +// Returns the current working directory as a Windows path. +// The result may have a UNC prefix. +static unique_ptr<WCHAR[]> GetCwdW(); + +// Like `AsWindowsPath` but the result is absolute and has UNC prefix if needed. +static bool AsWindowsPathWithUncPrefix(const string& path, wstring* wpath); + +// Returns a normalized form of the input `path`. +// +// `path` must be a relative or absolute Windows path, it may use "/" instead of +// "\" but must not be an absolute MSYS path. +// The result won't have a UNC prefix, even if `path` did. +// +// Normalization means removing "." references, resolving ".." references, and +// deduplicating "/" characters while converting them to "\". +// For example if `path` is "foo/../bar/.//qux", the result is "bar\qux". +// +// Uplevel references that cannot go any higher in the directory tree are simply +// ignored, e.g. "c:/.." is normalized to "c:\" and "../../foo" is normalized to +// "foo". +// +// Visible for testing, would be static otherwise. +string NormalizeWindowsPath(string path); + template <typename char_type> struct CharTraits { static bool IsAlpha(char_type ch); @@ -71,8 +95,6 @@ } } -static unique_ptr<WCHAR[]> GetCwdW(); - class WindowsPipe : public IPipe { public: WindowsPipe(const HANDLE& read_handle, const HANDLE& write_handle) @@ -279,27 +301,30 @@ } } // otherwise this is a relative path, or absolute Windows path. - unique_ptr<WCHAR[]> mutable_wpath(CstringToWstring(mutable_path.c_str())); - WCHAR* p = mutable_wpath.get(); - // Replace forward slashes with backslashes. - while (*p != L'\0') { - if (*p == L'/') { - *p = L'\\'; - } - ++p; + result->assign( + CstringToWstring(NormalizeWindowsPath(mutable_path).c_str()).get()); + return true; +} + +static bool AsWindowsPathWithUncPrefix(const string& path, wstring* wpath) { + if (!AsWindowsPath(path, wpath)) { + return false; } - result->assign(mutable_wpath.get()); + if (!IsAbsolute(path)) { + wpath->assign(wstring(GetCwdW().get()) + L"\\" + *wpath); + } + AddUncPrefixMaybe(wpath); return true; } bool ReadFile(const string& filename, string* content, int max_size) { - wstring wfilename; - if (!AsWindowsPath(filename, &wfilename)) { - // Failed to convert the path because it was an absolute MSYS path but we - // could not retrieve the BAZEL_SH envvar. + if (filename.empty()) { return false; } - AddUncPrefixMaybe(&wfilename); + wstring wfilename; + if (!AsWindowsPathWithUncPrefix(filename, &wfilename)) { + return false; + } HANDLE handle = CreateFileW( /* lpFileName */ wfilename.c_str(), /* dwDesiredAccess */ GENERIC_READ, @@ -482,22 +507,11 @@ return false; } wstring wpath; - if (!AsWindowsPath(NormalizePath(path), &wpath)) { - PrintError("could not convert path to widechar, path=(%s), err=%d\n", + if (!AsWindowsPathWithUncPrefix(path, &wpath)) { + PrintError("PathExists(%s): AsWindowsPathWithUncPrefix failed, err=%d\n", path.c_str(), GetLastError()); return false; } - if (!IsAbsolute(path)) { - DWORD len = ::GetCurrentDirectoryW(0, nullptr); - unique_ptr<WCHAR[]> cwd(new WCHAR[len]); - if (!GetCurrentDirectoryW(len, cwd.get())) { - PrintError("could not make the path absolute, path=(%s), err=%d\n", - path.c_str(), GetLastError()); - return false; - } - wpath = wstring(cwd.get()) + L"\\" + wpath; - } - AddUncPrefixMaybe(&wpath); return JunctionResolver().Resolve(wpath.c_str(), nullptr); } @@ -590,4 +604,69 @@ #else // not COMPILER_MSVC #endif // COMPILER_MSVC +string NormalizeWindowsPath(string path) { + if (path.empty()) { + return ""; + } + if (path[0] == '/') { + // This is an absolute MSYS path, error out. + pdie(255, "NormalizeWindowsPath: expected a Windows path, path=(%s)", + path.c_str()); + } + if (path.size() >= 4 && HasUncPrefix(path.c_str())) { + path = path.substr(4); + } + + static const string dot("."); + static const string dotdot(".."); + + vector<string> segments; + int segment_start = -1; + // Find the path segments in `path` (separated by "/"). + for (int i = 0;; ++i) { + if (!IsPathSeparator(path[i]) && path[i] != '\0') { + // The current character does not end a segment, so start one unless it's + // already started. + if (segment_start < 0) { + segment_start = i; + } + } else if (segment_start >= 0 && i > segment_start) { + // The current character is "/" or "\0", so this ends a segment. + // Add that to `segments` if there's anything to add; handle "." and "..". + string segment(path, segment_start, i - segment_start); + segment_start = -1; + if (segment == dotdot) { + if (!segments.empty() && + !HasDriveSpecifierPrefix(segments[0].c_str())) { + segments.pop_back(); + } + } else if (segment != dot) { + segments.push_back(segment); + } + } + if (path[i] == '\0') { + break; + } + } + + // Handle the case when `path` is just a drive specifier (or some degenerate + // form of it, e.g. "c:\.."). + if (segments.size() == 1 && segments[0].size() == 2 && + HasDriveSpecifierPrefix(segments[0].c_str())) { + return segments[0] + '\\'; + } + + // Join all segments. + bool first = true; + std::ostringstream result; + for (const auto& s : segments) { + if (!first) { + result << '\\'; + } + first = false; + result << s; + } + return result.str(); +} + } // namespace blaze_util
diff --git a/src/test/cpp/util/file_test.cc b/src/test/cpp/util/file_test.cc index 7c15ceb..aa654cc 100644 --- a/src/test/cpp/util/file_test.cc +++ b/src/test/cpp/util/file_test.cc
@@ -25,22 +25,6 @@ using std::string; -TEST(FileTest, TestNormalizePath) { - ASSERT_EQ(string(""), NormalizePath("")); - ASSERT_EQ(string(""), NormalizePath(".")); - ASSERT_EQ(string("/"), NormalizePath("/")); - ASSERT_EQ(string("/"), NormalizePath("//")); - ASSERT_EQ(string("foo"), NormalizePath("foo")); - ASSERT_EQ(string("foo"), NormalizePath("foo/")); - ASSERT_EQ(string("foo/bar"), NormalizePath("foo//bar")); - ASSERT_EQ(string("foo/bar"), NormalizePath("../..//foo//bar")); - ASSERT_EQ(string("/foo"), NormalizePath("/foo")); - ASSERT_EQ(string("/foo"), NormalizePath("/foo/")); - ASSERT_EQ(string("/foo/bar"), NormalizePath("/foo/./bar/")); - ASSERT_EQ(string("foo/bar"), NormalizePath("../foo/baz/../bar")); - ASSERT_EQ(string("foo/bar"), NormalizePath("../foo//./baz/../bar///")); -} - TEST(FileTest, TestSingleThreadedPipe) { std::unique_ptr<IPipe> pipe(CreatePipe()); char buffer[50] = {0};
diff --git a/src/test/cpp/util/file_windows_test.cc b/src/test/cpp/util/file_windows_test.cc index 66c1af4..983763a 100644 --- a/src/test/cpp/util/file_windows_test.cc +++ b/src/test/cpp/util/file_windows_test.cc
@@ -26,8 +26,31 @@ namespace blaze_util { using std::string; +using std::wstring; -void ResetMsysRootForTesting(); // defined in file_windows.cc +// Methods defined in file_windows.cc that are only visible for testing. +void ResetMsysRootForTesting(); +string NormalizeWindowsPath(string path); + +static string GetTestTmpDir() { + char buf[MAX_PATH] = {0}; + DWORD len = GetEnvironmentVariableA("TEST_TMPDIR", buf, MAX_PATH); + return string(buf); +} + +TEST(FileTest, TestNormalizeWindowsPath) { + ASSERT_EQ(string(""), NormalizeWindowsPath("")); + ASSERT_EQ(string(""), NormalizeWindowsPath(".")); + ASSERT_EQ(string("foo"), NormalizeWindowsPath("foo")); + ASSERT_EQ(string("foo"), NormalizeWindowsPath("foo/")); + ASSERT_EQ(string("foo\\bar"), NormalizeWindowsPath("foo//bar")); + ASSERT_EQ(string("foo\\bar"), NormalizeWindowsPath("../..//foo/./bar")); + ASSERT_EQ(string("foo\\bar"), NormalizeWindowsPath("../foo/baz/../bar")); + ASSERT_EQ(string("c:\\"), NormalizeWindowsPath("c:")); + ASSERT_EQ(string("c:\\"), NormalizeWindowsPath("c:/")); + ASSERT_EQ(string("c:\\"), NormalizeWindowsPath("c:\\")); + ASSERT_EQ(string("c:\\foo\\bar"), NormalizeWindowsPath("c:\\..//foo/./bar/")); +} TEST(FileTest, TestDirname) { ASSERT_EQ("", Dirname("")); @@ -102,56 +125,71 @@ TEST(FileTest, TestAsWindowsPath) { SetEnvironmentVariableA("BAZEL_SH", "c:\\msys\\some\\long\\path\\bash.exe"); ResetMsysRootForTesting(); - std::wstring actual; + wstring actual; ASSERT_TRUE(AsWindowsPath("", &actual)); - ASSERT_EQ(std::wstring(L""), actual); + ASSERT_EQ(wstring(L""), actual); ASSERT_TRUE(AsWindowsPath("", &actual)); - ASSERT_EQ(std::wstring(L""), actual); + ASSERT_EQ(wstring(L""), actual); ASSERT_TRUE(AsWindowsPath("foo/bar", &actual)); - ASSERT_EQ(std::wstring(L"foo\\bar"), actual); + ASSERT_EQ(wstring(L"foo\\bar"), actual); + + ASSERT_TRUE(AsWindowsPath("c:", &actual)); + ASSERT_EQ(wstring(L"c:\\"), actual); + + ASSERT_TRUE(AsWindowsPath("c:/", &actual)); + ASSERT_EQ(wstring(L"c:\\"), actual); + + ASSERT_TRUE(AsWindowsPath("c:\\", &actual)); + ASSERT_EQ(wstring(L"c:\\"), actual); + + ASSERT_TRUE(AsWindowsPath("\\\\?\\c:\\", &actual)); + ASSERT_EQ(wstring(L"c:\\"), actual); + + ASSERT_TRUE(AsWindowsPath("\\\\?\\c://../foo", &actual)); + ASSERT_EQ(wstring(L"c:\\foo"), actual); ASSERT_TRUE(AsWindowsPath("/c", &actual)); - ASSERT_EQ(std::wstring(L"c:\\"), actual); + ASSERT_EQ(wstring(L"c:\\"), actual); ASSERT_TRUE(AsWindowsPath("/c/", &actual)); - ASSERT_EQ(std::wstring(L"c:\\"), actual); + ASSERT_EQ(wstring(L"c:\\"), actual); ASSERT_TRUE(AsWindowsPath("/c/blah", &actual)); - ASSERT_EQ(std::wstring(L"c:\\blah"), actual); + ASSERT_EQ(wstring(L"c:\\blah"), actual); ASSERT_TRUE(AsWindowsPath("/d/progra~1/micros~1", &actual)); - ASSERT_EQ(std::wstring(L"d:\\progra~1\\micros~1"), actual); + ASSERT_EQ(wstring(L"d:\\progra~1\\micros~1"), actual); ASSERT_TRUE(AsWindowsPath("/foo", &actual)); - ASSERT_EQ(std::wstring(L"c:\\msys\\foo"), actual); + ASSERT_EQ(wstring(L"c:\\msys\\foo"), actual); - std::wstring wlongpath(L"dummy_long_path\\"); - std::string longpath("dummy_long_path/"); + wstring wlongpath(L"\\dummy_long_path"); + string longpath("dummy_long_path/"); while (longpath.size() <= MAX_PATH) { wlongpath += wlongpath; longpath += longpath; } - wlongpath = std::wstring(L"c:\\") + wlongpath; - longpath = std::string("/c/") + longpath; + wlongpath = wstring(L"c:") + wlongpath; + longpath = string("/c/") + longpath; ASSERT_TRUE(AsWindowsPath(longpath, &actual)); ASSERT_EQ(wlongpath, actual); } TEST(FileTest, TestMsysRootRetrieval) { - std::wstring actual; + wstring actual; SetEnvironmentVariableA("BAZEL_SH", "c:/foo/msys/bar/qux.exe"); ResetMsysRootForTesting(); ASSERT_TRUE(AsWindowsPath("/blah", &actual)); - ASSERT_EQ(std::wstring(L"c:\\foo\\msys\\blah"), actual); + ASSERT_EQ(wstring(L"c:\\foo\\msys\\blah"), actual); SetEnvironmentVariableA("BAZEL_SH", "c:/foo/MSYS64/bar/qux.exe"); ResetMsysRootForTesting(); ASSERT_TRUE(AsWindowsPath("/blah", &actual)); - ASSERT_EQ(std::wstring(L"c:\\foo\\msys64\\blah"), actual); + ASSERT_EQ(wstring(L"c:\\foo\\msys64\\blah"), actual); SetEnvironmentVariableA("BAZEL_SH", "c:/qux.exe"); ResetMsysRootForTesting(); @@ -195,10 +233,8 @@ ASSERT_TRUE(PathExists(".")); ASSERT_FALSE(PathExists("non.existent")); - char buf[MAX_PATH] = {0}; - DWORD len = GetEnvironmentVariableA("TEST_TMPDIR", buf, MAX_PATH); - ASSERT_GT(len, 0); - string tmpdir(buf); + string tmpdir(GetTestTmpDir()); + ASSERT_LT(0, tmpdir.size()); ASSERT_TRUE(PathExists(tmpdir)); // Create a fake msys root. We'll also use it as a junction target.