Bazel client: implement directory tree walking

This change:

- merges the //src/{main,test}/cpp:file and
//src/{main,test}/cpp:file_platform libraries
because "file" and "file_platform" need each other
and this makes logical sense anyway

- implements a function in file_<platform> to run
a custom function on every child of a directory

- implements a function in file.cc to recursively
traverse a directory tree, based on the previosly
mentioned function

- removes the corresponding logic from the Bazel
client to make it more portable

--
MOS_MIGRATED_REVID=139309562
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 7b3d893..019df86 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -27,7 +27,6 @@
 
 #include <assert.h>
 #include <ctype.h>
-#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -867,42 +866,6 @@
 #endif
 }
 
-// Walks the temporary directory recursively and collects full file paths.
-static void CollectExtractedFiles(const string &dir_path, vector<string> &files) {
-  DIR *dir;
-  struct dirent *ent;
-
-  if ((dir = opendir(dir_path.c_str())) == NULL) {
-    die(blaze_exit_code::INTERNAL_ERROR, "opendir failed");
-  }
-
-  while ((ent = readdir(dir)) != NULL) {
-    if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
-      continue;
-    }
-
-    string filename(blaze_util::JoinPath(dir_path, ent->d_name));
-    bool is_directory;
-    if (ent->d_type == DT_UNKNOWN) {
-      struct stat buf;
-      if (lstat(filename.c_str(), &buf) == -1) {
-        die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
-      }
-      is_directory = S_ISDIR(buf.st_mode);
-    } else {
-      is_directory = (ent->d_type == DT_DIR);
-    }
-
-    if (is_directory) {
-      CollectExtractedFiles(filename, files);
-    } else {
-      files.push_back(filename);
-    }
-  }
-
-  closedir(dir);
-}
-
 // A devtools_ijar::ZipExtractorProcessor to extract the files from the blaze
 // zip.
 class ExtractBlazeZipProcessor : public devtools_ijar::ZipExtractorProcessor {
@@ -974,19 +937,20 @@
   // on the disk.
 
   vector<string> extracted_files;
-  CollectExtractedFiles(embedded_binaries, extracted_files);
+
+  // Walks the temporary directory recursively and collects full file paths.
+  blaze_util::GetAllFilesUnder(embedded_binaries, &extracted_files);
 
   set<string> synced_directories;
-  for (vector<string>::iterator it = extracted_files.begin(); it != extracted_files.end(); it++) {
-
-    const char *extracted_path = it->c_str();
+  for (const auto &it : extracted_files) {
+    const char *extracted_path = it.c_str();
 
     // Set the time to a distantly futuristic value so we can observe tampering.
-    // Note that keeping the default timestamp set by unzip (1970-01-01) and using
-    // that to detect tampering is not enough, because we also need the timestamp
-    // to change between Blaze releases so that the metadata cache knows that
-    // the files may have changed. This is important for actions that use
-    // embedded binaries as artifacts.
+    // Note that keeping the default timestamp set by unzip (1970-01-01) and
+    // using that to detect tampering is not enough, because we also need the
+    // timestamp to change between Blaze releases so that the metadata cache
+    // knows that the files may have changed. This is important for actions that
+    // use embedded binaries as artifacts.
     struct utimbuf times = { future_time, future_time };
     if (utime(extracted_path, &times) == -1) {
       pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
diff --git a/src/main/cpp/util/BUILD b/src/main/cpp/util/BUILD
index 2d772be..bd1e80b 100644
--- a/src/main/cpp/util/BUILD
+++ b/src/main/cpp/util/BUILD
@@ -15,7 +15,6 @@
         ":blaze_exit_code",
         ":errors",
         ":file",
-        ":file_platform",
         ":numbers",
         ":port",
         ":strings",
@@ -23,8 +22,8 @@
 )
 
 cc_library(
-    name = "file_platform",
-    srcs = select({
+    name = "file",
+    srcs = ["file.cc"] + select({
         "//src:windows_msvc": [
             "file_windows.cc",
         ],
@@ -32,22 +31,10 @@
             "file_posix.cc",
         ],
     }),
-    hdrs = ["file_platform.h"],
-    visibility = [
-        "//src/test/cpp/util:__pkg__",
+    hdrs = [
+        "file.h",
+        "file_platform.h",
     ],
-    deps = [
-        ":blaze_exit_code",
-        ":errors",
-        ":file",
-        ":strings",
-    ],
-)
-
-cc_library(
-    name = "file",
-    srcs = ["file.cc"],
-    hdrs = ["file.h"],
     visibility = [
         "//src/test/cpp/util:__pkg__",
         "//src/tools/singlejar:__pkg__",
diff --git a/src/main/cpp/util/file.cc b/src/main/cpp/util/file.cc
index 0957a56..2aa6642 100644
--- a/src/main/cpp/util/file.cc
+++ b/src/main/cpp/util/file.cc
@@ -18,15 +18,16 @@
 #include <cstdlib>
 #include <vector>
 
+#include "src/main/cpp/util/file_platform.h"
 #include "src/main/cpp/util/errors.h"
 #include "src/main/cpp/util/exit_code.h"
 #include "src/main/cpp/util/strings.h"
 
-using std::pair;
-
 namespace blaze_util {
 
+using std::pair;
 using std::string;
+using std::vector;
 
 pair<string, string> SplitPath(const string &path) {
   size_t pos = path.rfind('/');
@@ -73,4 +74,35 @@
   }
 }
 
+class DirectoryTreeWalker : public DirectoryEntryConsumer {
+ public:
+  DirectoryTreeWalker(vector<string> *files,
+                      _ForEachDirectoryEntry walk_entries)
+      : _files(files), _walk_entries(walk_entries) {}
+
+  void Consume(const string &path, bool is_directory) override {
+    if (is_directory) {
+      Walk(path);
+    } else {
+      _files->push_back(path);
+    }
+  }
+
+  void Walk(const string &path) { _walk_entries(path, this); }
+
+ private:
+  vector<string> *_files;
+  _ForEachDirectoryEntry _walk_entries;
+};
+
+void GetAllFilesUnder(const string &path, vector<string> *result) {
+  _GetAllFilesUnder(path, result, &ForEachDirectoryEntry);
+}
+
+void _GetAllFilesUnder(const string &path,
+                       vector<string> *result,
+                       _ForEachDirectoryEntry walk_entries) {
+  DirectoryTreeWalker(result, walk_entries).Walk(path);
+}
+
 }  // namespace blaze_util
diff --git a/src/main/cpp/util/file.h b/src/main/cpp/util/file.h
index 238ccf0..dfa08ad 100644
--- a/src/main/cpp/util/file.h
+++ b/src/main/cpp/util/file.h
@@ -15,6 +15,7 @@
 #define BAZEL_SRC_MAIN_CPP_UTIL_FILE_H_
 
 #include <string>
+#include <vector>
 
 namespace blaze_util {
 
@@ -29,6 +30,26 @@
 
 std::string JoinPath(const std::string &path1, const std::string &path2);
 
+// Lists all files in `path` and all of its subdirectories.
+//
+// Does not follow symlinks / junctions.
+//
+// Populates `result` with the full paths of the files. Every entry will have
+// `path` as its prefix. If `path` is a file, `result` contains just this file.
+void GetAllFilesUnder(const std::string &path,
+                      std::vector<std::string> *result);
+
+class DirectoryEntryConsumer;
+
+// Visible for testing only.
+typedef void (*_ForEachDirectoryEntry)(const std::string &path,
+                                       DirectoryEntryConsumer *consume);
+
+// Visible for testing only.
+void _GetAllFilesUnder(const std::string &path,
+                       std::vector<std::string> *result,
+                       _ForEachDirectoryEntry walk_entries);
+
 }  // namespace blaze_util
 
 #endif  // BAZEL_SRC_MAIN_CPP_UTIL_FILE_H_
diff --git a/src/main/cpp/util/file_platform.h b/src/main/cpp/util/file_platform.h
index b0df5ea..c43be79 100644
--- a/src/main/cpp/util/file_platform.h
+++ b/src/main/cpp/util/file_platform.h
@@ -40,6 +40,27 @@
 // Changes the current working directory to `path`, returns true upon success.
 bool ChangeDirectory(const std::string& path);
 
+// Interface to be implemented by ForEachDirectoryEntry clients.
+class DirectoryEntryConsumer {
+ public:
+  virtual ~DirectoryEntryConsumer() {}
+
+  // This method is called for each entry in a directory.
+  // `name` is the full path of the entry.
+  // `is_directory` is true if this entry is a directory (but false if this is a
+  // symlink pointing to a directory).
+  virtual void Consume(const std::string &name, bool is_directory) = 0;
+};
+
+// Executes a function for each entry in a directory (except "." and "..").
+//
+// Returns true if the `path` referred to a directory or directory symlink,
+// false otherwise.
+//
+// See DirectoryEntryConsumer for more details.
+void ForEachDirectoryEntry(const std::string &path,
+                           DirectoryEntryConsumer *consume);
+
 }  // namespace blaze_util
 
 #endif  // BAZEL_SRC_MAIN_CPP_UTIL_FILE_PLATFORM_H_
diff --git a/src/main/cpp/util/file_posix.cc b/src/main/cpp/util/file_posix.cc
index 90355b9..32f86da 100644
--- a/src/main/cpp/util/file_posix.cc
+++ b/src/main/cpp/util/file_posix.cc
@@ -14,6 +14,7 @@
 #include "src/main/cpp/util/file_platform.h"
 
 #include <sys/stat.h>
+#include <dirent.h>  // DIR, dirent, opendir, closedir
 #include <limits.h>  // PATH_MAX
 #include <stdlib.h>  // getenv
 #include <unistd.h>  // access
@@ -84,4 +85,37 @@
   return chdir(path.c_str()) == 0;
 }
 
+void ForEachDirectoryEntry(const string &path,
+                           DirectoryEntryConsumer *consume) {
+  DIR *dir;
+  struct dirent *ent;
+
+  if ((dir = opendir(path.c_str())) == NULL) {
+    // This is not a directory or it cannot be opened.
+    return;
+  }
+
+  while ((ent = readdir(dir)) != NULL) {
+    if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) {
+      continue;
+    }
+
+    string filename(blaze_util::JoinPath(path, ent->d_name));
+    bool is_directory;
+    if (ent->d_type == DT_UNKNOWN) {
+      struct stat buf;
+      if (lstat(filename.c_str(), &buf) == -1) {
+        die(blaze_exit_code::INTERNAL_ERROR, "stat failed");
+      }
+      is_directory = S_ISDIR(buf.st_mode);
+    } else {
+      is_directory = (ent->d_type == DT_DIR);
+    }
+
+    consume->Consume(filename, is_directory);
+  }
+
+  closedir(dir);
+}
+
 }  // namespace blaze_util
diff --git a/src/main/cpp/util/file_windows.cc b/src/main/cpp/util/file_windows.cc
index 5cf9987..c9df3af 100644
--- a/src/main/cpp/util/file_windows.cc
+++ b/src/main/cpp/util/file_windows.cc
@@ -43,4 +43,10 @@
   pdie(255, "blaze_util::ChangeDirectory is not implemented on Windows");
 }
 
+void ForEachDirectoryEntry(const string &path,
+                           DirectoryEntryConsumer *consume) {
+  // TODO(bazel-team): implement this.
+  pdie(255, "blaze_util::ForEachDirectoryEntry is not implemented on Windows");
+}
+
 }  // namespace blaze_util
diff --git a/src/test/cpp/util/BUILD b/src/test/cpp/util/BUILD
index e5524e9..60c057b 100644
--- a/src/test/cpp/util/BUILD
+++ b/src/test/cpp/util/BUILD
@@ -20,20 +20,13 @@
 
 cc_test(
     name = "file_test",
-    srcs = ["file_test.cc"],
-    deps = [
-        "//src/main/cpp/util:file",
-        "//third_party:gtest",
+    srcs = [
+        "file_posix_test.cc",
+        "file_test.cc",
     ],
-)
-
-cc_test(
-    name = "file_platform_test",
-    srcs = ["file_posix_test.cc"],
     tags = ["manual"],
     deps = [
         "//src/main/cpp/util:file",
-        "//src/main/cpp/util:file_platform",
         "//third_party:gtest",
     ],
 )
diff --git a/src/test/cpp/util/file_posix_test.cc b/src/test/cpp/util/file_posix_test.cc
index 52ddbd6..446ada9 100644
--- a/src/test/cpp/util/file_posix_test.cc
+++ b/src/test/cpp/util/file_posix_test.cc
@@ -14,11 +14,17 @@
 #include <fcntl.h>
 #include <unistd.h>
 
+#include <algorithm>
+
 #include "src/main/cpp/util/file_platform.h"
 #include "gtest/gtest.h"
 
 namespace blaze_util {
 
+using std::pair;
+using std::string;
+using std::vector;
+
 TEST(FilePosixTest, Which) {
   ASSERT_EQ("", Which(""));
   ASSERT_EQ("", Which("foo"));
@@ -119,4 +125,100 @@
   ASSERT_EQ(string(old_wd), string(new_wd));
 }
 
+class MockDirectoryEntryConsumer : public DirectoryEntryConsumer {
+ public:
+  void Consume(const std::string &name, bool is_directory) override {
+    entries.push_back(pair<string, bool>(name, is_directory));
+  }
+
+  vector<pair<string, bool> > entries;
+};
+
+TEST(FilePosixTest, ForEachDirectoryEntry) {
+  // Get the test's temp dir.
+  char* tmpdir_cstr = getenv("TEST_TMPDIR");
+  ASSERT_FALSE(tmpdir_cstr == NULL);
+  string tempdir(tmpdir_cstr);
+  ASSERT_FALSE(tempdir.empty());
+  if (tempdir.back() == '/') {
+    tempdir = tempdir.substr(0, tempdir.size() - 1);
+  }
+
+  // Create the root directory for the mock directory tree.
+  string root = tempdir + "/FilePosixTest.ForEachDirectoryEntry.root";
+  ASSERT_EQ(0, mkdir(root.c_str(), 0700));
+
+  // Names of mock files and directories.
+  string dir = root + "/dir";
+  string file = root + "/file";
+  string dir_sym = root + "/dir_sym";
+  string file_sym = root + "/file_sym";
+  string subfile = dir + "/subfile";
+  string subfile_through_sym = dir_sym + "/subfile";
+
+  // Create mock directory, file, and symlinks.
+  int fd = open(file.c_str(), O_CREAT, 0700);
+  ASSERT_GT(fd, 0);
+  close(fd);
+  ASSERT_EQ(0, mkdir(dir.c_str(), 0700));
+  ASSERT_EQ(0, symlink("dir", dir_sym.c_str()));
+  ASSERT_EQ(0, symlink("file", file_sym.c_str()));
+  fd = open(subfile.c_str(), O_CREAT, 0700);
+  ASSERT_GT(fd, 0);
+  close(fd);
+
+  // Assert that stat'ing the symlinks (with following them) point to the right
+  // filesystem entry types.
+  struct stat stat_buf;
+  ASSERT_EQ(0, stat(dir_sym.c_str(), &stat_buf));
+  ASSERT_TRUE(S_ISDIR(stat_buf.st_mode));
+  ASSERT_EQ(0, stat(file_sym.c_str(), &stat_buf));
+  ASSERT_FALSE(S_ISDIR(stat_buf.st_mode));
+
+  // Actual test: list the directory.
+  MockDirectoryEntryConsumer consumer;
+  ForEachDirectoryEntry(root, &consumer);
+  ASSERT_EQ(4, consumer.entries.size());
+
+  // Sort the collected directory entries.
+  struct {
+    bool operator()(const pair<string, bool> &a, const pair<string, bool> &b) {
+      return a.first < b.first;
+    }
+  } sort_pairs;
+
+  std::sort(consumer.entries.begin(), consumer.entries.end(), sort_pairs);
+
+  // Assert that the directory entries have the right name and type.
+  pair<string, bool> expected;
+  expected = pair<string, bool>(dir, true);
+  ASSERT_EQ(expected, consumer.entries[0]);
+  expected = pair<string, bool>(dir_sym, false);
+  ASSERT_EQ(expected, consumer.entries[1]);
+  expected = pair<string, bool>(file, false);
+  ASSERT_EQ(expected, consumer.entries[2]);
+  expected = pair<string, bool>(file_sym, false);
+  ASSERT_EQ(expected, consumer.entries[3]);
+
+  // Actual test: list a directory symlink.
+  consumer.entries.clear();
+  ForEachDirectoryEntry(dir_sym, &consumer);
+  ASSERT_EQ(1, consumer.entries.size());
+  expected = pair<string, bool>(subfile_through_sym, false);
+  ASSERT_EQ(expected, consumer.entries[0]);
+
+  // Actual test: list a path that's actually a file, not a directory.
+  consumer.entries.clear();
+  ForEachDirectoryEntry(file, &consumer);
+  ASSERT_TRUE(consumer.entries.empty());
+
+  // Cleanup: delete mock directory tree.
+  rmdir(subfile.c_str());
+  rmdir(dir.c_str());
+  unlink(dir_sym.c_str());
+  unlink(file.c_str());
+  unlink(file_sym.c_str());
+  rmdir(root.c_str());
+}
+
 }  // namespace blaze_util
diff --git a/src/test/cpp/util/file_test.cc b/src/test/cpp/util/file_test.cc
index 9ea3a85..3e3ff3b 100644
--- a/src/test/cpp/util/file_test.cc
+++ b/src/test/cpp/util/file_test.cc
@@ -11,12 +11,18 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <algorithm>
+
 #include "src/main/cpp/util/file.h"
+#include "src/main/cpp/util/file_platform.h"
 #include "gtest/gtest.h"
 
 namespace blaze_util {
 
-TEST(BlazeUtil, JoinPath) {
+using std::string;
+using std::vector;
+
+TEST(FileTest, JoinPath) {
   std::string path = JoinPath("", "");
   ASSERT_EQ("", path);
 
@@ -36,4 +42,37 @@
   ASSERT_EQ("/", path);
 }
 
+void MockDirectoryListingFunction(const string &path,
+                                  DirectoryEntryConsumer *consume) {
+  if (path == "root") {
+    consume->Consume("root/file1", false);
+    consume->Consume("root/dir2", true);
+    consume->Consume("root/dir1", true);
+  } else if (path == "root/dir1") {
+    consume->Consume("root/dir1/dir3", true);
+    consume->Consume("root/dir1/file2", false);
+  } else if (path == "root/dir2") {
+    consume->Consume("root/dir2/file3", false);
+  } else if (path == "root/dir1/dir3") {
+    consume->Consume("root/dir1/dir3/file4", false);
+    consume->Consume("root/dir1/dir3/file5", false);
+  } else {
+    // Unexpected path
+    GTEST_FAIL();
+  }
+}
+
+TEST(FileTest, GetAllFilesUnder) {
+  vector<string> result;
+  _GetAllFilesUnder("root", &result, &MockDirectoryListingFunction);
+  std::sort(result.begin(), result.end());
+
+  vector<string> expected({"root/dir1/dir3/file4",
+                           "root/dir1/dir3/file5",
+                           "root/dir1/file2",
+                           "root/dir2/file3",
+                           "root/file1"});
+  ASSERT_EQ(expected, result);
+}
+
 }  // namespace blaze_util