bazel-diff: Use separate workspace for diffbase (#1645)
Previously we performed all operations in the current workspace
directory (including checking out the diffbase commit). This had some
unintended consequences. As a result, this change creates a separate
directory for the diffbase commit. The script simply downloads the
.tar.gz archive of the given commit since it should be faster than "git
clone".
Progress towards #1643
diff --git a/buildkite/bazelci.py b/buildkite/bazelci.py
index a6a9a3f..2896736 100755
--- a/buildkite/bazelci.py
+++ b/buildkite/bazelci.py
@@ -34,6 +34,7 @@
import stat
import subprocess
import sys
+import tarfile
import tempfile
import threading
import time
@@ -1310,23 +1311,36 @@
if use_bazelisk_migrate() and platform == "windows":
os.environ["BAZELISK_SHUTDOWN"] = "1"
- cmd_exec_func = execute_batch_commands if platform == "windows" else execute_shell_commands
- cmd_exec_func(task_config.get("setup", None))
-
- # Allow the config to override the current working directory.
- required_prefix = os.getcwd()
- requested_working_dir = os.path.abspath(task_config.get("working_directory", ""))
- if os.path.commonpath([required_prefix, requested_working_dir]) != required_prefix:
- raise BuildkiteException("working_directory refers to a path outside the workspace")
- os.chdir(requested_working_dir)
-
# Set OUTPUT_BASE environment variable
os.environ["OUTPUT_BASE"] = get_output_base(bazel_binary, platform)
- if platform == "windows":
- execute_batch_commands(task_config.get("batch_commands", None))
- else:
- execute_shell_commands(task_config.get("shell_commands", None))
+ cmd_exec_func = execute_batch_commands if platform == "windows" else execute_shell_commands
+ cmd_exec_func(task_config.get("setup", None))
+
+ def PrepareRepoInCwd():
+ # Allow the config to override the current working directory.
+ requested_working_dir = task_config.get("working_directory")
+ if requested_working_dir:
+ if os.path.isabs(requested_working_dir):
+ raise BuildkiteException(
+ f"working_directory must be relative to the repository root, "
+ "but was {requested_working_dir}"
+ )
+
+ full_requested_working_dir = os.path.abspath(requested_working_dir)
+ if not os.path.isdir(full_requested_working_dir):
+ raise BuildkiteException(
+ f"{full_requested_working_dir} does not exist or is not a directory"
+ )
+
+ os.chdir(full_requested_working_dir)
+
+ if platform == "windows":
+ execute_batch_commands(task_config.get("batch_commands", None))
+ else:
+ execute_shell_commands(task_config.get("shell_commands", None))
+
+ PrepareRepoInCwd()
bazel_version = print_bazel_version_info(bazel_binary, platform)
@@ -1352,7 +1366,8 @@
bazel_binary,
build_only,
test_only,
- requested_working_dir,
+ os.getcwd(),
+ PrepareRepoInCwd,
git_commit,
test_flags,
)
@@ -2105,7 +2120,14 @@
def calculate_targets(
- task_config, bazel_binary, build_only, test_only, workspace_dir, git_commit, test_flags
+ task_config,
+ bazel_binary,
+ build_only,
+ test_only,
+ workspace_dir,
+ ws_setup_func,
+ git_commit,
+ test_flags,
):
print_collapsed_group(":dart: Calculating targets")
@@ -2149,7 +2171,12 @@
actual_test_targets = (
filter_unchanged_targets(
- expanded_test_targets, workspace_dir, bazel_binary, diffbase, git_commit
+ expanded_test_targets,
+ workspace_dir,
+ ws_setup_func,
+ bazel_binary,
+ diffbase,
+ git_commit,
)
if use_bazel_diff
else expanded_test_targets
@@ -2257,7 +2284,7 @@
def filter_unchanged_targets(
- expanded_test_targets, workspace_dir, bazel_binary, diffbase, git_commit
+ expanded_test_targets, workspace_dir, ws_setup_func, bazel_binary, diffbase, git_commit
):
print_collapsed_group(
f":scissors: Filtering targets that haven't been affected since {diffbase}"
@@ -2265,14 +2292,25 @@
tmpdir = tempfile.mkdtemp()
try:
- eprint(f"Downloading bazel-diff to {tmpdir}")
- bazel_diff_path = download_bazel_diff(tmpdir)
resolved_diffbase = resolve_diffbase(diffbase)
+ eprint(f"Resolved diffbase to {resolved_diffbase}")
+ eprint("Cloning comparison repository...")
+ diffbase_archive_url = get_commit_archive_url(resolved_diffbase)
+ local_archive_path = download_file(diffbase_archive_url, tmpdir, "repo.tar.gz")
+ diffbase_repo_dir = os.path.join(tmpdir, resolved_diffbase)
+ extract_archive(local_archive_path, diffbase_repo_dir)
+
+ eprint("Setting up comparison repository...")
+ os.chdir(diffbase_repo_dir)
+ ws_setup_func()
+
+ eprint(f"Downloading bazel-diff to {tmpdir}")
+ bazel_diff_path = download_file(BAZEL_DIFF_URL, tmpdir, "bazel-diff.jar")
eprint(f"Running bazel-diff for {resolved_diffbase} and {git_commit}")
affected_targets = run_bazel_diff(
- bazel_diff_path, workspace_dir, bazel_binary, resolved_diffbase, git_commit, tmpdir
+ bazel_diff_path, diffbase_repo_dir, workspace_dir, bazel_binary, tmpdir
)
except (BuildkiteException, BuildkiteInfraException) as ex:
try:
@@ -2296,6 +2334,7 @@
return expanded_test_targets
finally:
shutil.rmtree(tmpdir)
+ os.chdir(workspace_dir)
config_target_set = set(expanded_test_targets)
remaining_targets = list(config_target_set.intersection(affected_targets))
@@ -2326,7 +2365,7 @@
def resolve_diffbase(diffbase):
if diffbase in AUTO_DIFFBASE_VALUES:
- return "HEAD^"
+ return resolve_revision("HEAD^")
elif COMMIT_RE.fullmatch(diffbase):
return diffbase
@@ -2337,39 +2376,42 @@
)
-def download_bazel_diff(directory):
- local_path = os.path.join(directory, "bazel-diff.jar")
+def get_commit_archive_url(resolved_diffbase):
+ repo_url = os.getenv("BUILDKITE_REPO", "")
+ prefix = "+" if "googlesource" in repo_url else ""
+ return repo_url.replace(".git", "/{}archive/{}.tar.gz".format(prefix, resolved_diffbase))
+
+
+def extract_archive(archive_url, dest_dir):
+ if not os.path.isdir(dest_dir):
+ os.mkdir(dest_dir)
+
try:
- execute_command(["curl", "-sSL", BAZEL_DIFF_URL, "-o", local_path])
+ with tarfile.open(archive_url, mode="r:gz") as archive:
+ archive.extractall(dest_dir)
+ except tarfile.TarError as ex:
+ raise BuildkiteInfraException("Failed to extract repository archive: {}".format(ex)) from ex
+
+
+def download_file(url, dest_dir, dest_filename):
+ local_path = os.path.join(dest_dir, dest_filename)
+ try:
+ execute_command(["curl", "-sSL", url, "-o", local_path])
except subprocess.CalledProcessError as ex:
- raise BuildkiteInfraException(
- "Failed to download {}: {}\n{}".format(BAZEL_DIFF_URL, ex, ex.stderr)
- )
+ raise BuildkiteInfraException("Failed to download {}: {}\n{}".format(url, ex, ex.stderr))
return local_path
-def run_bazel_diff(
- bazel_diff_path, workspace_dir, bazel_binary, start_commit, end_commit, data_dir
-):
+def run_bazel_diff(bazel_diff_path, old_workspace_dir, new_workspace_dir, bazel_binary, data_dir):
before_json = os.path.join(data_dir, "before.json")
after_json = os.path.join(data_dir, "after.json")
targets_file = os.path.join(data_dir, "targets.txt")
try:
- for commit, json_path in ((start_commit, before_json), (end_commit, after_json)):
- # TODO: use -C once all machines run Git 1.8+
- execute_command(
- [
- "git",
- "--git-dir",
- os.path.join(workspace_dir, ".git"),
- "--work-tree",
- workspace_dir,
- "checkout",
- commit,
- "--quiet",
- ]
- )
+ for repo_dir, json_path in (
+ (old_workspace_dir, before_json),
+ (new_workspace_dir, after_json),
+ ):
execute_command(
[
"java",
@@ -2377,7 +2419,7 @@
bazel_diff_path,
"generate-hashes",
"-w",
- workspace_dir,
+ repo_dir,
"-b",
bazel_binary,
json_path,
@@ -3486,7 +3528,7 @@
def update_last_green_commit_if_newer(last_green_commit_url):
last_green_commit = get_last_green_commit(last_green_commit_url)
- current_commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
+ current_commit = resolve_revision("HEAD")
if last_green_commit:
success = False
try:
@@ -3527,6 +3569,10 @@
)
+def resolve_revision(rev):
+ return subprocess.check_output(["git", "rev-parse", rev]).decode("utf-8").strip()
+
+
def try_update_last_green_downstream_commit():
last_green_commit_url = bazelci_last_green_downstream_commit_url()
update_last_green_commit_if_newer(last_green_commit_url)