Take type in consideration in http_archive

In order to fix the issue where hash sum comparison fails while downloading a .tar.gz file when a server provides Content-Encoding: x-gzip in the HttpResponse the type parameter provided by the user is used to decided whether a file should be unzipped.

Closes #11686

Closes #11738.

PiperOrigin-RevId: 333290852
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DelegatingDownloader.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DelegatingDownloader.java
index 6000262..364e2d8 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DelegatingDownloader.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DelegatingDownloader.java
@@ -52,13 +52,14 @@
       String canonicalId,
       Path destination,
       ExtendedEventHandler eventHandler,
-      Map<String, String> clientEnv)
+      Map<String, String> clientEnv,
+      Optional<String> type)
       throws IOException, InterruptedException {
     Downloader downloader = defaultDelegate;
     if (delegate != null) {
       downloader = delegate;
     }
     downloader.download(
-        urls, authHeaders, checksum, canonicalId, destination, eventHandler, clientEnv);
+        urls, authHeaders, checksum, canonicalId, destination, eventHandler, clientEnv, type);
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DownloadManager.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DownloadManager.java
index a89077e..c556829 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DownloadManager.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DownloadManager.java
@@ -187,7 +187,7 @@
 
     try {
       downloader.download(
-          urls, authHeaders, checksum, canonicalId, destination, eventHandler, clientEnv);
+          urls, authHeaders, checksum, canonicalId, destination, eventHandler, clientEnv, type);
     } catch (InterruptedIOException e) {
       throw new InterruptedException(e.getMessage());
     }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Downloader.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Downloader.java
index 202ece2..5115a6a 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Downloader.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Downloader.java
@@ -36,6 +36,7 @@
    * @param authHeaders map of authentication headers per URL
    * @param checksum valid checksum which is checked, or absent to disable
    * @param output path to the destination file to write
+   * @param type extension, e.g. "tar.gz" to force on downloaded filename, or empty to not do this
    * @throws IOException if download was attempted and ended up failing
    * @throws InterruptedException if this thread is being cast into oblivion
    */
@@ -46,6 +47,7 @@
       String canonicalId,
       Path output,
       ExtendedEventHandler eventHandler,
-      Map<String, String> clientEnv)
+      Map<String, String> clientEnv,
+      Optional<String> type)
       throws IOException, InterruptedException;
 }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexer.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexer.java
index 73d175c..a6856f9 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexer.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexer.java
@@ -98,7 +98,14 @@
   }
 
   public HttpStream connect(List<URL> urls, Optional<Checksum> checksum) throws IOException {
-    return connect(urls, checksum, ImmutableMap.<URI, Map<String, String>>of());
+    return connect(
+        urls, checksum, ImmutableMap.<URI, Map<String, String>>of(), Optional.<String>absent());
+  }
+
+  public HttpStream connect(
+      List<URL> urls, Optional<Checksum> checksum, Map<URI, Map<String, String>> authHeaders)
+      throws IOException {
+    return connect(urls, checksum, authHeaders, Optional.<String>absent());
   }
 
   /**
@@ -120,12 +127,16 @@
    * @param urls mirrors by preference; each URL can be: file, http, or https
    * @param checksum checksum lazily checked on entire payload, or empty to disable
    * @return an {@link InputStream} of response payload
+   * @param type extension, e.g. "tar.gz" to force on downloaded filename, or empty to not do this
    * @throws IOException if all mirrors are down and contains suppressed exception of each attempt
    * @throws InterruptedIOException if current thread is being cast into oblivion
    * @throws IllegalArgumentException if {@code urls} is empty or has an unsupported protocol
    */
   public HttpStream connect(
-      List<URL> urls, Optional<Checksum> checksum, Map<URI, Map<String, String>> authHeaders)
+      List<URL> urls,
+      Optional<Checksum> checksum,
+      Map<URI, Map<String, String>> authHeaders,
+      Optional<String> type)
       throws IOException {
     HttpUtils.checkUrlsArgument(urls);
     if (Thread.interrupted()) {
@@ -133,7 +144,7 @@
     }
     // If there's only one URL then there's no need for us to run all our fancy thread stuff.
     if (urls.size() == 1) {
-      return establishConnection(urls.get(0), checksum, authHeaders);
+      return establishConnection(urls.get(0), checksum, authHeaders, type);
     }
     MutexConditionSharedMemory context = new MutexConditionSharedMemory();
     // The parent thread always holds the lock except when released by wait().
@@ -269,7 +280,9 @@
         // Now we're actually going to attempt to connect to the remote server.
         HttpStream result;
         try {
-          result = establishConnection(work.url, work.checksum, work.authHeaders);
+          result =
+              establishConnection(
+                  work.url, work.checksum, work.authHeaders, Optional.<String>absent());
         } catch (SocketTimeoutException e) {
           // SocketTimeoutException derives from InterruptedIOException, but its occurrence
           // is truly exceptional, so we handle it separately here. Failing to do so hides
@@ -344,7 +357,10 @@
   }
 
   private HttpStream establishConnection(
-      final URL url, Optional<Checksum> checksum, Map<URI, Map<String, String>> additionalHeaders)
+      final URL url,
+      Optional<Checksum> checksum,
+      Map<URI, Map<String, String>> additionalHeaders,
+      Optional<String> type)
       throws IOException {
     final Function<URL, ImmutableMap<String, String>> headerFunction =
         getHeaderFunction(REQUEST_HEADERS, additionalHeaders);
@@ -371,7 +387,8 @@
                   }
                 });
           }
-        });
+        },
+        type);
   }
 
   private static String describeErrors(Collection<Throwable> errors) {
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpDownloader.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpDownloader.java
index 5691fd4..af633e1 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpDownloader.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpDownloader.java
@@ -65,7 +65,8 @@
       String canonicalId,
       Path destination,
       ExtendedEventHandler eventHandler,
-      Map<String, String> clientEnv)
+      Map<String, String> clientEnv,
+      Optional<String> type)
       throws IOException, InterruptedException {
     Clock clock = new JavaClock();
     Sleeper sleeper = new JavaSleeper();
@@ -89,7 +90,7 @@
       semaphore.acquire();
 
       try (HttpStream payload =
-              multiplexer.connect(Collections.singletonList(url), checksum, authHeaders);
+              multiplexer.connect(Collections.singletonList(url), checksum, authHeaders, type);
           OutputStream out = destination.getOutputStream()) {
         try {
           ByteStreams.copy(payload, out);
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java
index 621c4c8..f5e0318 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java
@@ -57,13 +57,23 @@
       this.progressInputStreamFactory = progressInputStreamFactory;
     }
 
-    @SuppressWarnings("resource")
     HttpStream create(
         @WillCloseWhenClosed URLConnection connection,
         URL originalUrl,
         Optional<Checksum> checksum,
         Reconnector reconnector)
         throws IOException {
+      return create(connection, originalUrl, checksum, reconnector, Optional.<String>absent());
+    }
+
+    @SuppressWarnings("resource")
+    HttpStream create(
+        @WillCloseWhenClosed URLConnection connection,
+        URL originalUrl,
+        Optional<Checksum> checksum,
+        Reconnector reconnector,
+        Optional<String> type)
+        throws IOException {
       InputStream stream = new InterruptibleInputStream(connection.getInputStream());
       try {
         // If server supports range requests, we can retry on read errors. See RFC7233 § 2.3.
@@ -81,10 +91,13 @@
 
         // Determine if we need to transparently gunzip. See RFC2616 § 3.5 and § 14.11. Please note
         // that some web servers will send Content-Encoding: gzip even when we didn't request it if
-        // the file is a .gz file.
+        // the file is a .gz file. Therefore we take the type parameter from the rule http_archive
+        // in consideration. If the repository/file that we are downloading is already compressed we
+        // should not decompress it to preserve the desired file format.
         if (GZIP_CONTENT_ENCODING.contains(Strings.nullToEmpty(connection.getContentEncoding()))
             && !GZIPPED_EXTENSIONS.contains(HttpUtils.getExtension(connection.getURL().getPath()))
-            && !GZIPPED_EXTENSIONS.contains(HttpUtils.getExtension(originalUrl.getPath()))) {
+            && !GZIPPED_EXTENSIONS.contains(HttpUtils.getExtension(originalUrl.getPath()))
+            && !typeIsGZIP(type)) {
           stream = new GZIPInputStream(stream, GZIP_BUFFER_BYTES);
         }
 
@@ -117,6 +130,26 @@
       }
       return new HttpStream(stream, connection.getURL());
     }
+
+    /**
+     * Checks if the given type is GZIP
+     *
+     * @param type extension, e.g. "tar.gz"
+     * @return whether the type is GZIP or not
+     */
+    private static boolean typeIsGZIP(Optional<String> type) {
+      if (type.isPresent()) {
+        String t = type.get();
+
+        if (t.contains(".")) {
+          // We only want to look at the last extension.
+          t = HttpUtils.getExtension(t);
+        }
+
+        return GZIPPED_EXTENSIONS.contains(t);
+      }
+      return false;
+    }
   }
 
   private final URL url;
diff --git a/src/main/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloader.java b/src/main/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloader.java
index 36732c6..37b6399 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloader.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloader.java
@@ -106,7 +106,8 @@
       String canonicalId,
       Path destination,
       ExtendedEventHandler eventHandler,
-      Map<String, String> clientEnv)
+      Map<String, String> clientEnv,
+      com.google.common.base.Optional<String> type)
       throws IOException, InterruptedException {
     final FetchBlobRequest request =
         newFetchBlobRequest(options.remoteInstanceName, urls, authHeaders, checksum, canonicalId);
diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexerTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexerTest.java
index 33eb8e4..1df50e6 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpConnectorMultiplexerTest.java
@@ -104,13 +104,25 @@
     when(connector.connect(eq(URL2), any(Function.class))).thenReturn(connection2);
     when(connector.connect(eq(URL3), any(Function.class))).thenReturn(connection3);
     when(streamFactory.create(
-            same(connection1), any(URL.class), any(Optional.class), any(Reconnector.class)))
+            same(connection1),
+            any(URL.class),
+            any(Optional.class),
+            any(Reconnector.class),
+            any(Optional.class)))
         .thenReturn(stream1);
     when(streamFactory.create(
-            same(connection2), any(URL.class), any(Optional.class), any(Reconnector.class)))
+            same(connection2),
+            any(URL.class),
+            any(Optional.class),
+            any(Reconnector.class),
+            any(Optional.class)))
         .thenReturn(stream2);
     when(streamFactory.create(
-            same(connection3), any(URL.class), any(Optional.class), any(Reconnector.class)))
+            same(connection3),
+            any(URL.class),
+            any(Optional.class),
+            any(Reconnector.class),
+            any(Optional.class)))
         .thenReturn(stream3);
   }
 
@@ -157,7 +169,11 @@
     verify(connector).connect(eq(URL1), any(Function.class));
     verify(streamFactory)
         .create(
-            any(URLConnection.class), any(URL.class), eq(DUMMY_CHECKSUM), any(Reconnector.class));
+            any(URLConnection.class),
+            any(URL.class),
+            eq(DUMMY_CHECKSUM),
+            any(Reconnector.class),
+            any(Optional.class));
     verifyNoMoreInteractions(sleeper, connector, streamFactory);
   }
 
@@ -190,7 +206,11 @@
     verify(connector).connect(eq(URL2), any(Function.class));
     verify(streamFactory)
         .create(
-            any(URLConnection.class), any(URL.class), eq(DUMMY_CHECKSUM), any(Reconnector.class));
+            any(URLConnection.class),
+            any(URL.class),
+            eq(DUMMY_CHECKSUM),
+            any(Reconnector.class),
+            any(Optional.class));
     verify(sleeper).sleepMillis(anyLong());
     verifyNoMoreInteractions(sleeper, connector, streamFactory);
   }
diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStreamTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStreamTest.java
index a8ecb2f..84cd37a 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStreamTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStreamTest.java
@@ -267,4 +267,45 @@
     }
     return baos.toByteArray();
   }
+
+  @Test
+  public void tarballHasNoFormatAndTypeIsGzipped_doesntAutomaticallyGunzip() throws Exception {
+    byte[] gzData = gzipData(data);
+    when(connection.getURL()).thenReturn(new URL("http://doodle.example/foo"));
+    when(connection.getContentEncoding()).thenReturn("gzip");
+    when(connection.getInputStream()).thenReturn(new ByteArrayInputStream(gzData));
+    try (HttpStream stream =
+        streamFactory.create(
+            connection, AURL, Optional.absent(), reconnector, Optional.of("tgz"))) {
+      assertThat(toByteArray(stream)).isEqualTo(gzData);
+    }
+  }
+
+  @Test
+  public void tarballHasNoFormatAndTypeIsGzippedAndHasMultipleExtensions_doesntAutomaticallyGunzip()
+      throws Exception {
+    // Similar to tarballHasNoFormatAndTypeIsGzipped_doesntAutomaticallyGunzip but also
+    // checks if the private method typeIsGZIP can handle separation of file extensions.
+    byte[] gzData = gzipData(data);
+    when(connection.getURL()).thenReturn(new URL("http://doodle.example/foo"));
+    when(connection.getContentEncoding()).thenReturn("gzip");
+    when(connection.getInputStream()).thenReturn(new ByteArrayInputStream(gzData));
+    try (HttpStream stream =
+        streamFactory.create(
+            connection, AURL, Optional.absent(), reconnector, Optional.of("tar.gz"))) {
+      assertThat(toByteArray(stream)).isEqualTo(gzData);
+    }
+  }
+
+  @Test
+  public void tarballHasNoFormatAndTypeIsNotGzipped_automaticallyGunzip() throws Exception {
+    when(connection.getURL()).thenReturn(new URL("http://doodle.example/foo"));
+    when(connection.getContentEncoding()).thenReturn("gzip");
+    when(connection.getInputStream()).thenReturn(new ByteArrayInputStream(gzipData(data)));
+    try (HttpStream stream =
+        streamFactory.create(
+            connection, AURL, Optional.absent(), reconnector, Optional.of("tar"))) {
+      assertThat(toByteArray(stream)).isEqualTo(data);
+    }
+  }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloaderTest.java b/src/test/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloaderTest.java
index cd4e6ac..c9dfcc0 100644
--- a/src/test/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloaderTest.java
+++ b/src/test/java/com/google/devtools/build/lib/remote/downloader/GrpcRemoteDownloaderTest.java
@@ -150,7 +150,14 @@
     Scratch scratch = new Scratch();
     final Path destination = scratch.resolve("output file path");
     downloader.download(
-        urls, authHeaders, guavaChecksum, canonicalId, destination, eventHandler, clientEnv);
+        urls,
+        authHeaders,
+        guavaChecksum,
+        canonicalId,
+        destination,
+        eventHandler,
+        clientEnv,
+        com.google.common.base.Optional.<String>absent());
 
     try (InputStream in = destination.getInputStream()) {
       return ByteStreams.toByteArray(in);