Update commons-compress to 1.26.1

Fixes #20269.

Update commons-compress to 1.26.1 and swap use of GZIPInputStream to commons-compress' GzipCompressorInputStream, which [deals correctly with concatenated gz files](https://github.com/apache/commons-compress/blob/53c5e19208caaf63946a41d2763cda1f1b7eadc8/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java#L38-L70). Add a test to demonstrate this fixes the ruff extraction (thanks, fmeum) and update all related lockfiles.

Closes #22213.

PiperOrigin-RevId: 631509796
Change-Id: I4038244bfbdfbace747554e988587663ca580c16
diff --git a/MODULE.bazel b/MODULE.bazel
index 493432b..f5a0f59 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -156,7 +156,7 @@
         "javax.inject:javax.inject:1",
         "net.bytebuddy:byte-buddy:1.14.5",
         "net.bytebuddy:byte-buddy-agent:1.14.5",
-        "org.apache.commons:commons-compress:1.20",
+        "org.apache.commons:commons-compress:1.26.1",
         "org.apache.commons:commons-pool2:2.8.0",
         "org.apache.tomcat:tomcat-annotations-api:8.0.5",
         "org.apache.velocity:velocity:1.7",
diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock
index 344180e..ea85f24 100644
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
@@ -1,6 +1,6 @@
 {
   "lockFileVersion": 6,
-  "moduleFileHash": "eba5503742af5785c2d0d81d88e7407c7f23494b5162c055227435549b8774d1",
+  "moduleFileHash": "34bc7fff6553fd04a7df2f9cf415de0813e9a1e62e28b1cf2ce4fd0eb8cdac59",
   "flags": {
     "cmdRegistries": [
       "https://bcr.bazel.build/"
@@ -133,7 +133,7 @@
                   "javax.inject:javax.inject:1",
                   "net.bytebuddy:byte-buddy:1.14.5",
                   "net.bytebuddy:byte-buddy-agent:1.14.5",
-                  "org.apache.commons:commons-compress:1.20",
+                  "org.apache.commons:commons-compress:1.26.1",
                   "org.apache.commons:commons-pool2:2.8.0",
                   "org.apache.tomcat:tomcat-annotations-api:8.0.5",
                   "org.apache.velocity:velocity:1.7",
@@ -2916,7 +2916,7 @@
       "general": {
         "bzlTransitiveDigest": "tunTSmgwd2uvTzkCLtdbuCp0AI+WR+ftiPNqZ0rmcZk=",
         "recordedFileInputs": {
-          "@@//MODULE.bazel": "eba5503742af5785c2d0d81d88e7407c7f23494b5162c055227435549b8774d1",
+          "@@//MODULE.bazel": "34bc7fff6553fd04a7df2f9cf415de0813e9a1e62e28b1cf2ce4fd0eb8cdac59",
           "@@//src/test/tools/bzlmod/MODULE.bazel.lock": "547b1ca7af37ca0b4e7c7de36093d66b81d46440b58b41c76fe9d6df3af9ea52"
         },
         "recordedDirentsInputs": {},
@@ -5421,7 +5421,7 @@
         "recordedFileInputs": {
           "@@//src/tools/android/maven_android_install.json": "09bff3e33d291336046f7c9201630fb5e014f0e60b78b6f09b84e4f5f73ed04f",
           "@@rules_jvm_external~//rules_jvm_external_deps_install.json": "cafb5d2d8119391eb2b322ce3840d3352ea82d496bdb8cbd4b6779ec4d044dda",
-          "@@//maven_install.json": "eeabc47f580911512dad33c1bcbd54f676d70f4ee1b209978d900ad6cd60527b"
+          "@@//maven_install.json": "f43bfe813d2967c815549f8fbb36a234c8e2235c8f54b10dc5da177b1aa0ea9f"
         },
         "recordedDirentsInputs": {},
         "envVariables": {},
@@ -6097,7 +6097,7 @@
                 "{ \"group\": \"javax.inject\", \"artifact\": \"javax.inject\", \"version\": \"1\" }",
                 "{ \"group\": \"net.bytebuddy\", \"artifact\": \"byte-buddy\", \"version\": \"1.14.5\" }",
                 "{ \"group\": \"net.bytebuddy\", \"artifact\": \"byte-buddy-agent\", \"version\": \"1.14.5\" }",
-                "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-compress\", \"version\": \"1.20\" }",
+                "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-compress\", \"version\": \"1.26.1\" }",
                 "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-pool2\", \"version\": \"2.8.0\" }",
                 "{ \"group\": \"org.apache.tomcat\", \"artifact\": \"tomcat-annotations-api\", \"version\": \"8.0.5\" }",
                 "{ \"group\": \"org.apache.velocity\", \"artifact\": \"velocity\", \"version\": \"1.7\" }",
@@ -6908,12 +6908,24 @@
               "repin_instructions": ""
             }
           },
+          "org_apache_commons_commons_lang3_3_14_0": {
+            "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
+            "ruleClassName": "http_file",
+            "attributes": {
+              "sha256": "7b96bf3ee68949abb5bc465559ac270e0551596fa34523fddf890ec418dde13c",
+              "urls": [
+                "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar"
+              ],
+              "downloaded_file_path": "v1/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar"
+            }
+          },
           "org_apache_commons_commons_compress_1_20": {
             "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
             "ruleClassName": "http_file",
             "attributes": {
               "sha256": "0aeb625c948c697ea7b205156e112363b59ed5e2551212cd4e460bdb72c7c06e",
               "urls": [
+                "https://dl.google.com/android/maven2/org/apache/commons/commons-compress/1.20/commons-compress-1.20.jar",
                 "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.20/commons-compress-1.20.jar"
               ],
               "downloaded_file_path": "v1/org/apache/commons/commons-compress/1.20/commons-compress-1.20.jar"
@@ -8029,6 +8041,17 @@
               "downloaded_file_path": "v1/software/amazon/awssdk/s3/2.20.128/s3-2.20.128.jar"
             }
           },
+          "org_apache_commons_commons_compress_1_26_1": {
+            "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
+            "ruleClassName": "http_file",
+            "attributes": {
+              "sha256": "27bb5d40f37c3bb7205b4a0540247df057715e9f6cbbd97d626ab8b50318bb04",
+              "urls": [
+                "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.1/commons-compress-1.26.1.jar"
+              ],
+              "downloaded_file_path": "v1/org/apache/commons/commons-compress/1.26.1/commons-compress-1.26.1.jar"
+            }
+          },
           "com_squareup_javapoet_1_8_0": {
             "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
             "ruleClassName": "http_file",
@@ -8343,6 +8366,17 @@
               "downloaded_file_path": "v1/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar"
             }
           },
+          "commons_io_commons_io_2_15_1": {
+            "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
+            "ruleClassName": "http_file",
+            "attributes": {
+              "sha256": "a58af12ee1b68cfd2ebb0c27caef164f084381a00ec81a48cc275fd7ea54e154",
+              "urls": [
+                "https://repo1.maven.org/maven2/commons-io/commons-io/2.15.1/commons-io-2.15.1.jar"
+              ],
+              "downloaded_file_path": "v1/commons-io/commons-io/2.15.1/commons-io-2.15.1.jar"
+            }
+          },
           "org_jetbrains_annotations_13_0": {
             "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
             "ruleClassName": "http_file",
@@ -8480,6 +8514,17 @@
               "downloaded_file_path": "v1/com/google/api-client/google-api-client/1.35.2/google-api-client-1.35.2.jar"
             }
           },
+          "commons_codec_commons_codec_1_16_1": {
+            "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
+            "ruleClassName": "http_file",
+            "attributes": {
+              "sha256": "ec87bfb55f22cbd1b21e2190eeda28b2b312ed2a431ee49fbdcc01812d04a5e4",
+              "urls": [
+                "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.16.1/commons-codec-1.16.1.jar"
+              ],
+              "downloaded_file_path": "v1/commons-codec/commons-codec/1.16.1/commons-codec-1.16.1.jar"
+            }
+          },
           "org_ow2_asm_asm_commons_9_2": {
             "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl",
             "ruleClassName": "http_file",
@@ -8690,7 +8735,7 @@
                 "{ \"group\": \"javax.inject\", \"artifact\": \"javax.inject\", \"version\": \"1\" }",
                 "{ \"group\": \"net.bytebuddy\", \"artifact\": \"byte-buddy\", \"version\": \"1.14.5\" }",
                 "{ \"group\": \"net.bytebuddy\", \"artifact\": \"byte-buddy-agent\", \"version\": \"1.14.5\" }",
-                "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-compress\", \"version\": \"1.20\" }",
+                "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-compress\", \"version\": \"1.26.1\" }",
                 "{ \"group\": \"org.apache.commons\", \"artifact\": \"commons-pool2\", \"version\": \"2.8.0\" }",
                 "{ \"group\": \"org.apache.tomcat\", \"artifact\": \"tomcat-annotations-api\", \"version\": \"8.0.5\" }",
                 "{ \"group\": \"org.apache.velocity\", \"artifact\": \"velocity\", \"version\": \"1.7\" }",
diff --git a/maven_install.json b/maven_install.json
index 08c1424..af63033 100644
--- a/maven_install.json
+++ b/maven_install.json
@@ -1,7 +1,7 @@
 {
   "__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL",
-  "__INPUT_ARTIFACTS_HASH": -1562612404,
-  "__RESOLVED_ARTIFACTS_HASH": 1921205679,
+  "__INPUT_ARTIFACTS_HASH": -983466987,
+  "__RESOLVED_ARTIFACTS_HASH": 1834716018,
   "conflict_resolution": {
     "com.google.auto.value:auto-value-annotations:1.9": "com.google.auto.value:auto-value-annotations:1.10.4",
     "com.google.code.gson:gson:2.8.9": "com.google.code.gson:gson:2.9.0",
@@ -312,12 +312,24 @@
       },
       "version": "1.12.0"
     },
+    "commons-codec:commons-codec": {
+      "shasums": {
+        "jar": "ec87bfb55f22cbd1b21e2190eeda28b2b312ed2a431ee49fbdcc01812d04a5e4"
+      },
+      "version": "1.16.1"
+    },
     "commons-collections:commons-collections": {
       "shasums": {
         "jar": "eeeae917917144a68a741d4c0dff66aa5c5c5fd85593ff217bced3fc8ca783b8"
       },
       "version": "3.2.2"
     },
+    "commons-io:commons-io": {
+      "shasums": {
+        "jar": "a58af12ee1b68cfd2ebb0c27caef164f084381a00ec81a48cc275fd7ea54e154"
+      },
+      "version": "2.15.1"
+    },
     "commons-lang:commons-lang": {
       "shasums": {
         "jar": "50f11b09f877c294d56f24463f47d28f929cf5044f648661c0f0cfbae9a2f49c"
@@ -588,9 +600,15 @@
     },
     "org.apache.commons:commons-compress": {
       "shasums": {
-        "jar": "0aeb625c948c697ea7b205156e112363b59ed5e2551212cd4e460bdb72c7c06e"
+        "jar": "27bb5d40f37c3bb7205b4a0540247df057715e9f6cbbd97d626ab8b50318bb04"
       },
-      "version": "1.20"
+      "version": "1.26.1"
+    },
+    "org.apache.commons:commons-lang3": {
+      "shasums": {
+        "jar": "7b96bf3ee68949abb5bc465559ac270e0551596fa34523fddf890ec418dde13c"
+      },
+      "version": "3.14.0"
     },
     "org.apache.commons:commons-math3": {
       "shasums": {
@@ -1143,6 +1161,11 @@
     "junit:junit": [
       "org.hamcrest:hamcrest-core"
     ],
+    "org.apache.commons:commons-compress": [
+      "commons-codec:commons-codec",
+      "commons-io:commons-io",
+      "org.apache.commons:commons-lang3"
+    ],
     "org.apache.velocity:velocity": [
       "commons-collections:commons-collections",
       "commons-lang:commons-lang"
@@ -1574,6 +1597,15 @@
     "com.squareup:javapoet": [
       "com.squareup.javapoet"
     ],
+    "commons-codec:commons-codec": [
+      "org.apache.commons.codec",
+      "org.apache.commons.codec.binary",
+      "org.apache.commons.codec.cli",
+      "org.apache.commons.codec.digest",
+      "org.apache.commons.codec.language",
+      "org.apache.commons.codec.language.bm",
+      "org.apache.commons.codec.net"
+    ],
     "commons-collections:commons-collections": [
       "org.apache.commons.collections",
       "org.apache.commons.collections.bag",
@@ -1588,6 +1620,23 @@
       "org.apache.commons.collections.map",
       "org.apache.commons.collections.set"
     ],
+    "commons-io:commons-io": [
+      "org.apache.commons.io",
+      "org.apache.commons.io.build",
+      "org.apache.commons.io.channels",
+      "org.apache.commons.io.charset",
+      "org.apache.commons.io.comparator",
+      "org.apache.commons.io.file",
+      "org.apache.commons.io.file.attribute",
+      "org.apache.commons.io.file.spi",
+      "org.apache.commons.io.filefilter",
+      "org.apache.commons.io.function",
+      "org.apache.commons.io.input",
+      "org.apache.commons.io.input.buffer",
+      "org.apache.commons.io.monitor",
+      "org.apache.commons.io.output",
+      "org.apache.commons.io.serialization"
+    ],
     "commons-lang:commons-lang": [
       "org.apache.commons.lang",
       "org.apache.commons.lang.builder",
@@ -1993,9 +2042,36 @@
       "org.apache.commons.compress.compressors.xz",
       "org.apache.commons.compress.compressors.z",
       "org.apache.commons.compress.compressors.zstandard",
+      "org.apache.commons.compress.harmony",
+      "org.apache.commons.compress.harmony.archive.internal.nls",
+      "org.apache.commons.compress.harmony.pack200",
+      "org.apache.commons.compress.harmony.unpack200",
+      "org.apache.commons.compress.harmony.unpack200.bytecode",
+      "org.apache.commons.compress.harmony.unpack200.bytecode.forms",
+      "org.apache.commons.compress.java.util.jar",
       "org.apache.commons.compress.parallel",
       "org.apache.commons.compress.utils"
     ],
+    "org.apache.commons:commons-lang3": [
+      "org.apache.commons.lang3",
+      "org.apache.commons.lang3.arch",
+      "org.apache.commons.lang3.builder",
+      "org.apache.commons.lang3.compare",
+      "org.apache.commons.lang3.concurrent",
+      "org.apache.commons.lang3.concurrent.locks",
+      "org.apache.commons.lang3.event",
+      "org.apache.commons.lang3.exception",
+      "org.apache.commons.lang3.function",
+      "org.apache.commons.lang3.math",
+      "org.apache.commons.lang3.mutable",
+      "org.apache.commons.lang3.reflect",
+      "org.apache.commons.lang3.stream",
+      "org.apache.commons.lang3.text",
+      "org.apache.commons.lang3.text.translate",
+      "org.apache.commons.lang3.time",
+      "org.apache.commons.lang3.tuple",
+      "org.apache.commons.lang3.util"
+    ],
     "org.apache.commons:commons-math3": [
       "org.apache.commons.math3",
       "org.apache.commons.math3.analysis",
@@ -2371,7 +2447,9 @@
       "com.ryanharter.auto.value:auto-value-gson-factory",
       "com.ryanharter.auto.value:auto-value-gson-runtime",
       "com.squareup:javapoet",
+      "commons-codec:commons-codec",
       "commons-collections:commons-collections",
+      "commons-io:commons-io",
       "commons-lang:commons-lang",
       "io.github.eisop:dataflow-errorprone",
       "io.github.java-diff-utils:java-diff-utils",
@@ -2426,6 +2504,7 @@
       "net.bytebuddy:byte-buddy-agent",
       "net.sf.jopt-simple:jopt-simple",
       "org.apache.commons:commons-compress",
+      "org.apache.commons:commons-lang3",
       "org.apache.commons:commons-math3",
       "org.apache.commons:commons-pool2",
       "org.apache.tomcat:tomcat-annotations-api",
diff --git a/scripts/bootstrap/compile.sh b/scripts/bootstrap/compile.sh
index 3c49679..30b32da 100755
--- a/scripts/bootstrap/compile.sh
+++ b/scripts/bootstrap/compile.sh
@@ -155,11 +155,16 @@
   local output=$3
   shift 3
   local packages=""
+  # Only keep the services subdirectory of META-INF (needed for AutoService).
+  for i in $output/classes/META-INF/*; do
+    local package=$(basename $i)
+    if [[ "$package" != "services" ]]; then
+      rm -r "$i"
+    fi
+  done
   for i in $output/classes/*; do
     local package=$(basename $i)
-    if [[ "$package" != "META-INF" ]]; then
-      packages="$packages -C $output/classes $package"
-    fi
+    packages="$packages -C $output/classes $package"
   done
 
   log "Creating $name.jar..."
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarBz2Function.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarBz2Function.java
index 8d25d7d..af8c9c4 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarBz2Function.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarBz2Function.java
@@ -36,6 +36,7 @@
       throws IOException {
     return new BZip2CompressorInputStream(
         new BufferedInputStream(
-            new FileInputStream(descriptor.archivePath().getPathFile()), BUFFER_SIZE));
+            new FileInputStream(descriptor.archivePath().getPathFile()), BUFFER_SIZE),
+        true);
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
index e1e1204..bb19659 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
@@ -19,7 +19,7 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.zip.GZIPInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 
 /**
  * Creates a repository by unarchiving a .tar.gz file.
@@ -34,8 +34,9 @@
   @Override
   protected InputStream getDecompressorStream(DecompressorDescriptor descriptor)
       throws IOException {
-    return new GZIPInputStream(
+    return new GzipCompressorInputStream(
         new BufferedInputStream(
-            new FileInputStream(descriptor.archivePath().getPathFile()), BUFFER_SIZE));
+            new FileInputStream(descriptor.archivePath().getPathFile()), BUFFER_SIZE),
+        true);
   }
 }
diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh
index 8056929..dc21ce0 100755
--- a/src/test/shell/bazel/bazel_workspaces_test.sh
+++ b/src/test/shell/bazel/bazel_workspaces_test.sh
@@ -504,6 +504,22 @@
   ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar"
 }
 
+function test_sparse_tar() {
+  set_workspace_command "
+  repository_ctx.download_and_extract(
+      url='https://mirror.bazel.build/github.com/astral-sh/ruff/releases/download/v0.1.6/ruff-aarch64-apple-darwin.tar.gz',
+      sha256='0b626e88762b16908b3dbba8327341ddc13b37ebe6ec1a0db3f033ce5a44162d',
+  )"
+
+  build_and_process_log --exclude_rule "repository @@local_config_cc"
+
+  ensure_contains_exactly 'location: .*repos.bzl:3:38' 1
+  ensure_contains_atleast 'context: "repository @@repo"' 2
+  ensure_contains_exactly 'download_and_extract_event' 1
+
+  [[ -f "$(bazel info output_base)/external/repo/ruff" ]] || fail "Expected ruff binary to be extracted"
+}
+
 function test_file() {
   set_workspace_command 'repository_ctx.file("filefile.sh", "echo filefile", True)'
 
diff --git a/src/test/shell/bazel/jdeps_class_denylist.txt b/src/test/shell/bazel/jdeps_class_denylist.txt
index 4f22e1a..2686802 100644
--- a/src/test/shell/bazel/jdeps_class_denylist.txt
+++ b/src/test/shell/bazel/jdeps_class_denylist.txt
@@ -9,3 +9,9 @@
 ./lombok/javac/java6/CommentCollectingScannerFactory.class
 ./lombok/javac/java7/CommentCollectingParser.class
 ./lombok/javac/java7/CommentCollectingScanner.class
+./org/apache/commons/compress/harmony/pack200/Pack200Adapter.class
+./org/apache/commons/compress/java/util/jar/Pack200.class
+./org/apache/commons/compress/java/util/jar/Pack200$Packer.class
+./org/apache/commons/compress/java/util/jar/Pack200$Unpacker.class
+./org/apache/commons/lang3/concurrent/AbstractCircuitBreaker.class
+./org/apache/commons/lang3/concurrent/EventCountCircuitBreaker.class
diff --git a/src/test/shell/integration/minimal_jdk_test.sh b/src/test/shell/integration/minimal_jdk_test.sh
index 0a9c126..067bba6 100755
--- a/src/test/shell/integration/minimal_jdk_test.sh
+++ b/src/test/shell/integration/minimal_jdk_test.sh
@@ -42,13 +42,13 @@
 source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \
   || { echo "integration_test_setup.sh not found!" >&2; exit 1; }
 
-# Bazel's install base is < 460MB with minimal JDK and > 460MB with an all
+# Bazel's install base is < 465MB with minimal JDK and > 465MB with an all
 # modules JDK.
-function test_size_less_than_460MB() {
+function test_size_less_than_465MB() {
   bazel info
   ib=$(bazel info install_base)
   size=$(du -s "$ib" | cut -d\	 -f1)
-  maxsize=$((1024*460))
+  maxsize=$((1024*465))
   if [ $size -gt $maxsize ]; then
     echo "$ib was too big:" 1>&2
     du -a "$ib" 1>&2