| commit | cbb12ea4b4c5d128e5cf7bee8941847ef60e155d | [log] [tgz] |
|---|---|---|
| author | Yun Peng <pcloudy@google.com> | Tue Oct 14 14:49:46 2025 +0200 |
| committer | GitHub <noreply@github.com> | Tue Oct 14 14:49:46 2025 +0200 |
| tree | 131a28b970cd829beaa236fe05e21afe6abc5fe1 | |
| parent | ef50fc95ce2bc69cb48a47e604cebc0e61c296ae [diff] |
Unquote URL when mirroring (#2387) ``` https://github.com/adoptium/temurin24-binaries/releases/download/jdk-24.0.2%2B12/OpenJDK24U-jdk_s390x_linux_hotspot_24.0.2_12.tar.gz ``` => ``` "https://github.com/adoptium/temurin24-binaries/releases/download/jdk-24.0.2+12/OpenJDK24U-jdk_s390x_linux_hotspot_24.0.2_12.tar.gz", ```
diff --git a/buildkite/mirror_404_downloads.py b/buildkite/mirror_404_downloads.py index 1797382..31e06ce 100755 --- a/buildkite/mirror_404_downloads.py +++ b/buildkite/mirror_404_downloads.py
@@ -40,7 +40,7 @@ GCS_BUCKET = "bazel-mirror" BUILDKITE_ORG = "bazel" BUILDKITE_PIPELINE = "bazel-bazel" -URL_RE = re.compile( +URL_RE = re.compile( # Matches URLs with optional URL-encoded characters r"Download from (https?://mirror\.bazel\.build\S+)\s+failed: class java.io.FileNotFoundException GET returned 404 Not Found" ) @@ -71,7 +71,10 @@ def parse_urls_from_logs(logs: str) -> Set[str]: """Parses failed download URLs from the given logs.""" - return set(URL_RE.findall(logs)) + found_urls = URL_RE.findall(logs) + # URL-decode the found URLs to handle characters like %2B + decoded_urls = {requests.utils.unquote(url) for url in found_urls} + return decoded_urls def mirror_url(url: str, bucket: str) -> MirrorResult: