Bazel client: implement directory tree walking
This change:
- merges the //src/{main,test}/cpp:file and
//src/{main,test}/cpp:file_platform libraries
because "file" and "file_platform" need each other
and this makes logical sense anyway
- implements a function in file_<platform> to run
a custom function on every child of a directory
- implements a function in file.cc to recursively
traverse a directory tree, based on the previosly
mentioned function
- removes the corresponding logic from the Bazel
client to make it more portable
--
MOS_MIGRATED_REVID=139309562
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 7b3d893..019df86 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -27,7 +27,6 @@
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@@ -867,42 +866,6 @@
#endif
}
-// Walks the temporary directory recursively and collects full file paths.
-static void CollectExtractedFiles(const string &dir_path, vector<string> &files) {
- DIR *dir;
- struct dirent *ent;
-
- if ((dir = opendir(dir_path.c_str())) == NULL) {
- die(blaze_exit_code::INTERNAL_ERROR, "opendir failed");
- }
-
- while ((ent = readdir(dir)) != NULL) {
- if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
- continue;
- }
-
- string filename(blaze_util::JoinPath(dir_path, ent->d_name));
- bool is_directory;
- if (ent->d_type == DT_UNKNOWN) {
- struct stat buf;
- if (lstat(filename.c_str(), &buf) == -1) {
- die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
- }
- is_directory = S_ISDIR(buf.st_mode);
- } else {
- is_directory = (ent->d_type == DT_DIR);
- }
-
- if (is_directory) {
- CollectExtractedFiles(filename, files);
- } else {
- files.push_back(filename);
- }
- }
-
- closedir(dir);
-}
-
// A devtools_ijar::ZipExtractorProcessor to extract the files from the blaze
// zip.
class ExtractBlazeZipProcessor : public devtools_ijar::ZipExtractorProcessor {
@@ -974,19 +937,20 @@
// on the disk.
vector<string> extracted_files;
- CollectExtractedFiles(embedded_binaries, extracted_files);
+
+ // Walks the temporary directory recursively and collects full file paths.
+ blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
set<string> synced_directories;
- for (vector<string>::iterator it = extracted_files.begin(); it != extracted_files.end(); it++) {
-
- const char *extracted_path = it->c_str();
+ for (const auto &it : extracted_files) {
+ const char *extracted_path = it.c_str();
// Set the time to a distantly futuristic value so we can observe tampering.
- // Note that keeping the default timestamp set by unzip (1970-01-01) and using
- // that to detect tampering is not enough, because we also need the timestamp
- // to change between Blaze releases so that the metadata cache knows that
- // the files may have changed. This is important for actions that use
- // embedded binaries as artifacts.
+ // Note that keeping the default timestamp set by unzip (1970-01-01) and
+ // using that to detect tampering is not enough, because we also need the
+ // timestamp to change between Blaze releases so that the metadata cache
+ // knows that the files may have changed. This is important for actions that
+ // use embedded binaries as artifacts.
struct utimbuf times = { future_time, future_time };
if (utime(extracted_path, ×) == -1) {
pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
diff --git a/src/main/cpp/util/BUILD b/src/main/cpp/util/BUILD
index 2d772be..bd1e80b 100644
--- a/src/main/cpp/util/BUILD
+++ b/src/main/cpp/util/BUILD
@@ -15,7 +15,6 @@
":blaze_exit_code",
":errors",
":file",
- ":file_platform",
":numbers",
":port",
":strings",
@@ -23,8 +22,8 @@
)
cc_library(
- name = "file_platform",
- srcs = select({
+ name = "file",
+ srcs = ["file.cc"] + select({
"//src:windows_msvc": [
"file_windows.cc",
],
@@ -32,22 +31,10 @@
"file_posix.cc",
],
}),
- hdrs = ["file_platform.h"],
- visibility = [
- "//src/test/cpp/util:__pkg__",
+ hdrs = [
+ "file.h",
+ "file_platform.h",
],
- deps = [
- ":blaze_exit_code",
- ":errors",
- ":file",
- ":strings",
- ],
-)
-
-cc_library(
- name = "file",
- srcs = ["file.cc"],
- hdrs = ["file.h"],
visibility = [
"//src/test/cpp/util:__pkg__",
"//src/tools/singlejar:__pkg__",
diff --git a/src/main/cpp/util/file.cc b/src/main/cpp/util/file.cc
index 0957a56..2aa6642 100644
--- a/src/main/cpp/util/file.cc
+++ b/src/main/cpp/util/file.cc
@@ -18,15 +18,16 @@
#include <cstdlib>
#include <vector>
+#include "src/main/cpp/util/file_platform.h"
#include "src/main/cpp/util/errors.h"
#include "src/main/cpp/util/exit_code.h"
#include "src/main/cpp/util/strings.h"
-using std::pair;
-
namespace blaze_util {
+using std::pair;
using std::string;
+using std::vector;
pair<string, string> SplitPath(const string &path) {
size_t pos = path.rfind('/');
@@ -73,4 +74,35 @@
}
}
+class DirectoryTreeWalker : public DirectoryEntryConsumer {
+ public:
+ DirectoryTreeWalker(vector<string> *files,
+ _ForEachDirectoryEntry walk_entries)
+ : _files(files), _walk_entries(walk_entries) {}
+
+ void Consume(const string &path, bool is_directory) override {
+ if (is_directory) {
+ Walk(path);
+ } else {
+ _files->push_back(path);
+ }
+ }
+
+ void Walk(const string &path) { _walk_entries(path, this); }
+
+ private:
+ vector<string> *_files;
+ _ForEachDirectoryEntry _walk_entries;
+};
+
+void GetAllFilesUnder(const string &path, vector<string> *result) {
+ _GetAllFilesUnder(path, result, &ForEachDirectoryEntry);
+}
+
+void _GetAllFilesUnder(const string &path,
+ vector<string> *result,
+ _ForEachDirectoryEntry walk_entries) {
+ DirectoryTreeWalker(result, walk_entries).Walk(path);
+}
+
} // namespace blaze_util
diff --git a/src/main/cpp/util/file.h b/src/main/cpp/util/file.h
index 238ccf0..dfa08ad 100644
--- a/src/main/cpp/util/file.h
+++ b/src/main/cpp/util/file.h
@@ -15,6 +15,7 @@
#define BAZEL_SRC_MAIN_CPP_UTIL_FILE_H_
#include <string>
+#include <vector>
namespace blaze_util {
@@ -29,6 +30,26 @@
std::string JoinPath(const std::string &path1, const std::string &path2);
+// Lists all files in `path` and all of its subdirectories.
+//
+// Does not follow symlinks / junctions.
+//
+// Populates `result` with the full paths of the files. Every entry will have
+// `path` as its prefix. If `path` is a file, `result` contains just this file.
+void GetAllFilesUnder(const std::string &path,
+ std::vector<std::string> *result);
+
+class DirectoryEntryConsumer;
+
+// Visible for testing only.
+typedef void (*_ForEachDirectoryEntry)(const std::string &path,
+ DirectoryEntryConsumer *consume);
+
+// Visible for testing only.
+void _GetAllFilesUnder(const std::string &path,
+ std::vector<std::string> *result,
+ _ForEachDirectoryEntry walk_entries);
+
} // namespace blaze_util
#endif // BAZEL_SRC_MAIN_CPP_UTIL_FILE_H_
diff --git a/src/main/cpp/util/file_platform.h b/src/main/cpp/util/file_platform.h
index b0df5ea..c43be79 100644
--- a/src/main/cpp/util/file_platform.h
+++ b/src/main/cpp/util/file_platform.h
@@ -40,6 +40,27 @@
// Changes the current working directory to `path`, returns true upon success.
bool ChangeDirectory(const std::string& path);
+// Interface to be implemented by ForEachDirectoryEntry clients.
+class DirectoryEntryConsumer {
+ public:
+ virtual ~DirectoryEntryConsumer() {}
+
+ // This method is called for each entry in a directory.
+ // `name` is the full path of the entry.
+ // `is_directory` is true if this entry is a directory (but false if this is a
+ // symlink pointing to a directory).
+ virtual void Consume(const std::string &name, bool is_directory) = 0;
+};
+
+// Executes a function for each entry in a directory (except "." and "..").
+//
+// Returns true if the `path` referred to a directory or directory symlink,
+// false otherwise.
+//
+// See DirectoryEntryConsumer for more details.
+void ForEachDirectoryEntry(const std::string &path,
+ DirectoryEntryConsumer *consume);
+
} // namespace blaze_util
#endif // BAZEL_SRC_MAIN_CPP_UTIL_FILE_PLATFORM_H_
diff --git a/src/main/cpp/util/file_posix.cc b/src/main/cpp/util/file_posix.cc
index 90355b9..32f86da 100644
--- a/src/main/cpp/util/file_posix.cc
+++ b/src/main/cpp/util/file_posix.cc
@@ -14,6 +14,7 @@
#include "src/main/cpp/util/file_platform.h"
#include <sys/stat.h>
+#include <dirent.h> // DIR, dirent, opendir, closedir
#include <limits.h> // PATH_MAX
#include <stdlib.h> // getenv
#include <unistd.h> // access
@@ -84,4 +85,37 @@
return chdir(path.c_str()) == 0;
}
+void ForEachDirectoryEntry(const string &path,
+ DirectoryEntryConsumer *consume) {
+ DIR *dir;
+ struct dirent *ent;
+
+ if ((dir = opendir(path.c_str())) == NULL) {
+ // This is not a directory or it cannot be opened.
+ return;
+ }
+
+ while ((ent = readdir(dir)) != NULL) {
+ if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
+ continue;
+ }
+
+ string filename(blaze_util::JoinPath(path, ent->d_name));
+ bool is_directory;
+ if (ent->d_type == DT_UNKNOWN) {
+ struct stat buf;
+ if (lstat(filename.c_str(), &buf) == -1) {
+ die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
+ }
+ is_directory = S_ISDIR(buf.st_mode);
+ } else {
+ is_directory = (ent->d_type == DT_DIR);
+ }
+
+ consume->Consume(filename, is_directory);
+ }
+
+ closedir(dir);
+}
+
} // namespace blaze_util
diff --git a/src/main/cpp/util/file_windows.cc b/src/main/cpp/util/file_windows.cc
index 5cf9987..c9df3af 100644
--- a/src/main/cpp/util/file_windows.cc
+++ b/src/main/cpp/util/file_windows.cc
@@ -43,4 +43,10 @@
pdie(255, "blaze_util::ChangeDirectory is not implemented on Windows");
}
+void ForEachDirectoryEntry(const string &path,
+ DirectoryEntryConsumer *consume) {
+ // TODO(bazel-team): implement this.
+ pdie(255, "blaze_util::ForEachDirectoryEntry is not implemented on Windows");
+}
+
} // namespace blaze_util
diff --git a/src/test/cpp/util/BUILD b/src/test/cpp/util/BUILD
index e5524e9..60c057b 100644
--- a/src/test/cpp/util/BUILD
+++ b/src/test/cpp/util/BUILD
@@ -20,20 +20,13 @@
cc_test(
name = "file_test",
- srcs = ["file_test.cc"],
- deps = [
- "//src/main/cpp/util:file",
- "//third_party:gtest",
+ srcs = [
+ "file_posix_test.cc",
+ "file_test.cc",
],
-)
-
-cc_test(
- name = "file_platform_test",
- srcs = ["file_posix_test.cc"],
tags = ["manual"],
deps = [
"//src/main/cpp/util:file",
- "//src/main/cpp/util:file_platform",
"//third_party:gtest",
],
)
diff --git a/src/test/cpp/util/file_posix_test.cc b/src/test/cpp/util/file_posix_test.cc
index 52ddbd6..446ada9 100644
--- a/src/test/cpp/util/file_posix_test.cc
+++ b/src/test/cpp/util/file_posix_test.cc
@@ -14,11 +14,17 @@
#include <fcntl.h>
#include <unistd.h>
+#include <algorithm>
+
#include "src/main/cpp/util/file_platform.h"
#include "gtest/gtest.h"
namespace blaze_util {
+using std::pair;
+using std::string;
+using std::vector;
+
TEST(FilePosixTest, Which) {
ASSERT_EQ("", Which(""));
ASSERT_EQ("", Which("foo"));
@@ -119,4 +125,100 @@
ASSERT_EQ(string(old_wd), string(new_wd));
}
+class MockDirectoryEntryConsumer : public DirectoryEntryConsumer {
+ public:
+ void Consume(const std::string &name, bool is_directory) override {
+ entries.push_back(pair<string, bool>(name, is_directory));
+ }
+
+ vector<pair<string, bool> > entries;
+};
+
+TEST(FilePosixTest, ForEachDirectoryEntry) {
+ // Get the test's temp dir.
+ char* tmpdir_cstr = getenv("TEST_TMPDIR");
+ ASSERT_FALSE(tmpdir_cstr == NULL);
+ string tempdir(tmpdir_cstr);
+ ASSERT_FALSE(tempdir.empty());
+ if (tempdir.back() == '/') {
+ tempdir = tempdir.substr(0, tempdir.size() - 1);
+ }
+
+ // Create the root directory for the mock directory tree.
+ string root = tempdir + "/FilePosixTest.ForEachDirectoryEntry.root";
+ ASSERT_EQ(0, mkdir(root.c_str(), 0700));
+
+ // Names of mock files and directories.
+ string dir = root + "/dir";
+ string file = root + "/file";
+ string dir_sym = root + "/dir_sym";
+ string file_sym = root + "/file_sym";
+ string subfile = dir + "/subfile";
+ string subfile_through_sym = dir_sym + "/subfile";
+
+ // Create mock directory, file, and symlinks.
+ int fd = open(file.c_str(), O_CREAT, 0700);
+ ASSERT_GT(fd, 0);
+ close(fd);
+ ASSERT_EQ(0, mkdir(dir.c_str(), 0700));
+ ASSERT_EQ(0, symlink("dir", dir_sym.c_str()));
+ ASSERT_EQ(0, symlink("file", file_sym.c_str()));
+ fd = open(subfile.c_str(), O_CREAT, 0700);
+ ASSERT_GT(fd, 0);
+ close(fd);
+
+ // Assert that stat'ing the symlinks (with following them) point to the right
+ // filesystem entry types.
+ struct stat stat_buf;
+ ASSERT_EQ(0, stat(dir_sym.c_str(), &stat_buf));
+ ASSERT_TRUE(S_ISDIR(stat_buf.st_mode));
+ ASSERT_EQ(0, stat(file_sym.c_str(), &stat_buf));
+ ASSERT_FALSE(S_ISDIR(stat_buf.st_mode));
+
+ // Actual test: list the directory.
+ MockDirectoryEntryConsumer consumer;
+ ForEachDirectoryEntry(root, &consumer);
+ ASSERT_EQ(4, consumer.entries.size());
+
+ // Sort the collected directory entries.
+ struct {
+ bool operator()(const pair<string, bool> &a, const pair<string, bool> &b) {
+ return a.first < b.first;
+ }
+ } sort_pairs;
+
+ std::sort(consumer.entries.begin(), consumer.entries.end(), sort_pairs);
+
+ // Assert that the directory entries have the right name and type.
+ pair<string, bool> expected;
+ expected = pair<string, bool>(dir, true);
+ ASSERT_EQ(expected, consumer.entries[0]);
+ expected = pair<string, bool>(dir_sym, false);
+ ASSERT_EQ(expected, consumer.entries[1]);
+ expected = pair<string, bool>(file, false);
+ ASSERT_EQ(expected, consumer.entries[2]);
+ expected = pair<string, bool>(file_sym, false);
+ ASSERT_EQ(expected, consumer.entries[3]);
+
+ // Actual test: list a directory symlink.
+ consumer.entries.clear();
+ ForEachDirectoryEntry(dir_sym, &consumer);
+ ASSERT_EQ(1, consumer.entries.size());
+ expected = pair<string, bool>(subfile_through_sym, false);
+ ASSERT_EQ(expected, consumer.entries[0]);
+
+ // Actual test: list a path that's actually a file, not a directory.
+ consumer.entries.clear();
+ ForEachDirectoryEntry(file, &consumer);
+ ASSERT_TRUE(consumer.entries.empty());
+
+ // Cleanup: delete mock directory tree.
+ rmdir(subfile.c_str());
+ rmdir(dir.c_str());
+ unlink(dir_sym.c_str());
+ unlink(file.c_str());
+ unlink(file_sym.c_str());
+ rmdir(root.c_str());
+}
+
} // namespace blaze_util
diff --git a/src/test/cpp/util/file_test.cc b/src/test/cpp/util/file_test.cc
index 9ea3a85..3e3ff3b 100644
--- a/src/test/cpp/util/file_test.cc
+++ b/src/test/cpp/util/file_test.cc
@@ -11,12 +11,18 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <algorithm>
+
#include "src/main/cpp/util/file.h"
+#include "src/main/cpp/util/file_platform.h"
#include "gtest/gtest.h"
namespace blaze_util {
-TEST(BlazeUtil, JoinPath) {
+using std::string;
+using std::vector;
+
+TEST(FileTest, JoinPath) {
std::string path = JoinPath("", "");
ASSERT_EQ("", path);
@@ -36,4 +42,37 @@
ASSERT_EQ("/", path);
}
+void MockDirectoryListingFunction(const string &path,
+ DirectoryEntryConsumer *consume) {
+ if (path == "root") {
+ consume->Consume("root/file1", false);
+ consume->Consume("root/dir2", true);
+ consume->Consume("root/dir1", true);
+ } else if (path == "root/dir1") {
+ consume->Consume("root/dir1/dir3", true);
+ consume->Consume("root/dir1/file2", false);
+ } else if (path == "root/dir2") {
+ consume->Consume("root/dir2/file3", false);
+ } else if (path == "root/dir1/dir3") {
+ consume->Consume("root/dir1/dir3/file4", false);
+ consume->Consume("root/dir1/dir3/file5", false);
+ } else {
+ // Unexpected path
+ GTEST_FAIL();
+ }
+}
+
+TEST(FileTest, GetAllFilesUnder) {
+ vector<string> result;
+ _GetAllFilesUnder("root", &result, &MockDirectoryListingFunction);
+ std::sort(result.begin(), result.end());
+
+ vector<string> expected({"root/dir1/dir3/file4",
+ "root/dir1/dir3/file5",
+ "root/dir1/file2",
+ "root/dir2/file3",
+ "root/file1"});
+ ASSERT_EQ(expected, result);
+}
+
} // namespace blaze_util