Add an option to remove a directory prefix when extracting an archive

Fixes #221.

RELNOTES: new_http_archive can specify a root directory.

--
MOS_MIGRATED_REVID=103556111
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
index 484c6e6..db1e4a5 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
@@ -14,6 +14,7 @@
 
 package com.google.devtools.build.lib.bazel.repository;
 
+import com.google.common.base.Optional;
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.skyframe.SkyKey;
 import com.google.devtools.build.skyframe.SkyValue;
@@ -21,6 +22,8 @@
 import java.io.IOException;
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * The contents of decompressed archive.
  */
@@ -70,10 +73,17 @@
   public static SkyKey key(
       String targetKind, String targetName, Path archivePath, Path repositoryPath)
       throws IOException {
+    return key(targetKind, targetName, archivePath, repositoryPath, null);
+  }
+
+  public static SkyKey key(
+      String targetKind, String targetName, Path archivePath, Path repositoryPath,
+      @Nullable String prefix)
+      throws IOException {
     String baseName = archivePath.getBaseName();
 
     DecompressorDescriptor descriptor =
-        new DecompressorDescriptor(targetKind, targetName, archivePath, repositoryPath);
+        new DecompressorDescriptor(targetKind, targetName, archivePath, repositoryPath, prefix);
 
     if (baseName.endsWith(".zip") || baseName.endsWith(".jar") || baseName.endsWith(".war")) {
       return new SkyKey(ZipFunction.NAME, descriptor);
@@ -95,13 +105,20 @@
     private final String targetName;
     private final Path archivePath;
     private final Path repositoryPath;
+    private final Optional<String> prefix;
 
-    public DecompressorDescriptor(String targetKind, String targetName, Path archivePath,
+    private DecompressorDescriptor(String targetKind, String targetName, Path archivePath,
         Path repositoryPath) {
+      this(targetKind, targetName, archivePath, repositoryPath, null);
+    }
+
+    private DecompressorDescriptor(String targetKind, String targetName, Path archivePath,
+        Path repositoryPath, @Nullable String prefix) {
       this.targetKind = targetKind;
       this.targetName = targetName;
       this.archivePath = archivePath;
       this.repositoryPath = repositoryPath;
+      this.prefix = Optional.fromNullable(prefix);
     }
 
     public String targetKind() {
@@ -120,6 +137,10 @@
       return repositoryPath;
     }
 
+    public Optional<String> prefix() {
+      return prefix;
+    }
+
     @Override
     public boolean equals(Object other) {
       if (this == other) {
@@ -134,21 +155,13 @@
       return targetKind.equals(descriptor.targetKind)
           && targetName.equals(descriptor.targetName)
           && archivePath.equals(descriptor.archivePath)
-          && repositoryPath.equals(descriptor.repositoryPath);
+          && repositoryPath.equals(descriptor.repositoryPath)
+          && prefix.equals(descriptor.prefix);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(targetKind, targetName, archivePath, repositoryPath);
-    }
-  }
-
-  /**
-   * Exceptions thrown when something goes wrong decompressing an archive.
-   */
-  static class DecompressorException extends Exception {
-    public DecompressorException(String message) {
-      super(message);
+      return Objects.hash(targetKind, targetName, archivePath, repositoryPath, prefix);
     }
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/NewHttpArchiveFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/NewHttpArchiveFunction.java
index a2bf9ec..336ca01 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/NewHttpArchiveFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/NewHttpArchiveFunction.java
@@ -16,8 +16,10 @@
 
 import com.google.devtools.build.lib.bazel.rules.workspace.NewHttpArchiveRule;
 import com.google.devtools.build.lib.cmdline.PackageIdentifier.RepositoryName;
+import com.google.devtools.build.lib.packages.AggregatingAttributeMapper;
 import com.google.devtools.build.lib.packages.Rule;
 import com.google.devtools.build.lib.skyframe.FileValue;
+import com.google.devtools.build.lib.syntax.Type;
 import com.google.devtools.build.lib.vfs.FileSystemUtils;
 import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.skyframe.SkyFunction;
@@ -77,9 +79,15 @@
     // Decompress.
     DecompressorValue decompressed;
     try {
+      AggregatingAttributeMapper mapper = AggregatingAttributeMapper.of(rule);
+      String prefix = null;
+      if (mapper.has("rm_path_prefix", Type.STRING)
+          && !mapper.get("rm_path_prefix", Type.STRING).isEmpty()) {
+        prefix = mapper.get("rm_path_prefix", Type.STRING);
+      }
       decompressed = (DecompressorValue) env.getValueOrThrow(
           DecompressorValue.key(rule.getTargetKind(), rule.getName(),
-              downloadedFileValue.getPath(), outputDirectory), IOException.class);
+              downloadedFileValue.getPath(), outputDirectory, prefix), IOException.class);
       if (decompressed == null) {
         return null;
       }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java
new file mode 100644
index 0000000..f1520f8
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java
@@ -0,0 +1,69 @@
+// Copyright 2014 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.bazel.repository;
+
+import com.google.common.base.Optional;
+import com.google.devtools.build.lib.concurrent.ThreadSafety;
+import com.google.devtools.build.lib.vfs.PathFragment;
+
+/**
+ * Utility class for removing a prefix from an archive's path.
+ */
+@ThreadSafety.Immutable
+public final class StripPrefixedPath {
+  private final PathFragment pathFragment;
+  private final boolean found;
+  private final boolean skip;
+
+  public static StripPrefixedPath maybeDeprefix(String entry, Optional<String> prefix) {
+    boolean found = false;
+    PathFragment entryPath = new PathFragment(entry);
+    if (!prefix.isPresent()) {
+      return new StripPrefixedPath(entryPath, false, false);
+    }
+
+    PathFragment prefixPath = new PathFragment(prefix.get());
+    boolean skip = false;
+    if (entryPath.startsWith(prefixPath)) {
+      found = true;
+      entryPath = entryPath.relativeTo(prefixPath);
+      if (entryPath.getPathString().isEmpty()) {
+        skip = true;
+      }
+    } else {
+      skip = true;
+    }
+    return new StripPrefixedPath(entryPath, found, skip);
+  }
+
+  private StripPrefixedPath(PathFragment pathFragment, boolean found, boolean skip) {
+    this.pathFragment = pathFragment;
+    this.found = found;
+    this.skip = skip;
+  }
+
+  public PathFragment getPathFragment() {
+    return pathFragment;
+  }
+
+  public boolean foundPrefix() {
+    return found;
+  }
+
+  public boolean skip() {
+    return skip;
+  }
+
+}
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
index 625755f..cd47a52 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java
@@ -14,6 +14,7 @@
 
 package com.google.devtools.build.lib.bazel.repository;
 
+import com.google.common.base.Optional;
 import com.google.devtools.build.lib.bazel.repository.DecompressorValue.DecompressorDescriptor;
 import com.google.devtools.build.lib.bazel.repository.RepositoryFunction.RepositoryFunctionException;
 import com.google.devtools.build.lib.vfs.FileSystemUtils;
@@ -22,7 +23,6 @@
 import com.google.devtools.build.skyframe.SkyFunction;
 import com.google.devtools.build.skyframe.SkyFunctionException.Transience;
 import com.google.devtools.build.skyframe.SkyFunctionName;
-
 import com.google.devtools.build.skyframe.SkyKey;
 import com.google.devtools.build.skyframe.SkyValue;
 
@@ -48,13 +48,21 @@
   @Override
   public SkyValue compute(SkyKey skyKey, Environment env) throws RepositoryFunctionException {
     DecompressorDescriptor descriptor = (DecompressorDescriptor) skyKey.argument();
+    Optional<String> prefix = descriptor.prefix();
+    boolean foundPrefix = false;
 
     try (GZIPInputStream gzipStream = new GZIPInputStream(
         new FileInputStream(descriptor.archivePath().getPathFile()))) {
       TarArchiveInputStream tarStream = new TarArchiveInputStream(gzipStream);
       TarArchiveEntry entry;
       while ((entry = tarStream.getNextTarEntry()) != null) {
-        Path filename = descriptor.repositoryPath().getRelative(entry.getName());
+        StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entry.getName(), prefix);
+        foundPrefix = foundPrefix || entryPath.foundPrefix();
+        if (entryPath.skip()) {
+          continue;
+        }
+
+        Path filename = descriptor.repositoryPath().getRelative(entryPath.getPathFragment());
         FileSystemUtils.createDirectoryAndParents(filename.getParentDirectory());
         if (entry.isDirectory()) {
           FileSystemUtils.createDirectoryAndParents(filename);
@@ -76,6 +84,13 @@
     } catch (IOException e) {
       throw new RepositoryFunctionException(e, Transience.TRANSIENT);
     }
+
+    if (prefix.isPresent() && !foundPrefix) {
+      throw new RepositoryFunctionException(
+          new IOException("Prefix " + prefix.get() + " was given, but not found in the archive"),
+          Transience.PERSISTENT);
+    }
+
     return new DecompressorValue(descriptor.repositoryPath());
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipFunction.java
index 98a4e20..87811da 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipFunction.java
@@ -14,6 +14,7 @@
 
 package com.google.devtools.build.lib.bazel.repository;
 
+import com.google.common.base.Optional;
 import com.google.devtools.build.lib.bazel.repository.DecompressorValue.DecompressorDescriptor;
 import com.google.devtools.build.lib.bazel.repository.RepositoryFunction.RepositoryFunctionException;
 import com.google.devtools.build.lib.vfs.FileSystemUtils;
@@ -62,9 +63,17 @@
   public SkyValue compute(SkyKey skyKey, Environment env) throws RepositoryFunctionException {
     DecompressorDescriptor descriptor = (DecompressorDescriptor) skyKey.argument();
     Path destinationDirectory = descriptor.archivePath().getParentDirectory();
+    Optional<String> prefix = descriptor.prefix();
+    boolean foundPrefix = false;
     try (ZipReader reader = new ZipReader(descriptor.archivePath().getPathFile())) {
       Collection<ZipFileEntry> entries = reader.entries();
       for (ZipFileEntry entry : entries) {
+        StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entry.getName(), prefix);
+        foundPrefix = foundPrefix || entryPath.foundPrefix();
+        if (entryPath.skip()) {
+          continue;
+        }
+        entry.setName(entryPath.getPathFragment().getPathString());
         extractZipEntry(reader, entry, destinationDirectory);
       }
     } catch (IOException e) {
@@ -73,6 +82,13 @@
               descriptor.archivePath(), destinationDirectory, e.getMessage())),
           Transience.TRANSIENT);
     }
+
+    if (prefix.isPresent() && !foundPrefix) {
+      throw new RepositoryFunctionException(
+          new IOException("Prefix " + prefix.get() + " was given, but not found in the zip"),
+          Transience.PERSISTENT);
+    }
+
     return new DecompressorValue(destinationDirectory);
   }
 
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/workspace/NewHttpArchiveRule.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/workspace/NewHttpArchiveRule.java
index e796c70..bc5ebd0 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/workspace/NewHttpArchiveRule.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/workspace/NewHttpArchiveRule.java
@@ -63,6 +63,27 @@
          "tgz" here.</p>
          <!-- #END_BLAZE_RULE.ATTRIBUTE --> */
         .add(attr("type", STRING))
+        /* <!-- #BLAZE_RULE(http_archive).ATTRIBUTE(strip_prefix) -->
+         A directory prefix to strip from the extracted files.
+         ${SYNOPSIS}
+
+         <p>Many archives contain a top-level directory that contains all of the useful files in
+         archive. Instead of needing to specify this prefix over and over in the
+         <code>build_file</code>, this field can be used to strip it from all of the extracted
+         files.</p>
+
+         <p>For example, suppose you are using foo-lib-latest.zip, which contains the directory
+         foo-lib-1.2.3/ under which there are src/, lib/, and test/ directories that contain the
+         actual code you wish to build. Specify <code>strip_prefix = "foo-lib-1.2.3"</code> and
+         your <code>build_file</code> will not have to account for this top-level directory.</p>
+
+         <p>Note that if there are files outside of this directory, they will be discarded and
+         inaccessible (e.g., a top-level license file). This includes files/directories that
+         start with the prefix but are not in the directory (e.g., foo-lib-1.2.3.release-notes).
+         If the specified prefix does not match a directory in the archive, Bazel will return an
+         error.</p>
+         <!-- #END_BLAZE_RULE.ATTRIBUTE --> */
+        .add(attr("strip_prefix", STRING))
         .setWorkspaceOnly()
         .build();
   }
diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java
new file mode 100644
index 0000000..4a7dd1a
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java
@@ -0,0 +1,40 @@
+// Copyright 2014 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.bazel.repository;
+
+import com.google.common.base.Optional;
+import com.google.devtools.build.lib.vfs.PathFragment;
+import junit.framework.TestCase;
+
+/**
+ * Tests {@link StripPrefixedPath}.
+ */
+public class StripPrefixedPathTest extends TestCase {
+  public void testStrip() {
+    StripPrefixedPath result = StripPrefixedPath.maybeDeprefix("foo/bar", Optional.of("foo"));
+    assertEquals(result.getPathFragment(), new PathFragment("bar"));
+    assertTrue(result.foundPrefix());
+    assertFalse(result.skip());
+
+    result = StripPrefixedPath.maybeDeprefix("foo", Optional.of("foo"));
+    assertTrue(result.skip());
+
+    result = StripPrefixedPath.maybeDeprefix("bar/baz", Optional.of("foo"));
+    assertFalse(result.foundPrefix());
+
+    result = StripPrefixedPath.maybeDeprefix("foof/bar", Optional.of("foo"));
+    assertFalse(result.foundPrefix());
+  }
+}
diff --git a/src/test/shell/bazel/external_integration_test.sh b/src/test/shell/bazel/external_integration_test.sh
index d16e0cc..24b5d9c 100755
--- a/src/test/shell/bazel/external_integration_test.sh
+++ b/src/test/shell/bazel/external_integration_test.sh
@@ -500,4 +500,33 @@
   expect_log "bazel fetch //..."
 }
 
+function test_prefix_stripping() {
+  mkdir -p x/y/z
+  echo "abc" > x/y/z/w
+  tar czf x.tar.gz x
+  local sha256=$(sha256sum x.tar.gz | cut -f 1 -d ' ')
+  serve_file x.tar.gz
+
+  cat > WORKSPACE <<EOF
+new_http_archive(
+    name = "x",
+    url = "http://localhost:$nc_port/x.tar.gz",
+    sha256 = "$sha256",
+    rm_path_prefix = "x/y/z",
+    build_file = "x.BUILD",
+)
+EOF
+  cat > x.BUILD <<EOF
+genrule(
+    name = "catter",
+    cmd = "cat \$< > \$@",
+    outs = ["catter.out"],
+    srcs = ["w"],
+)
+EOF
+
+  bazel build @x//:catter &> $TEST_log || fail "Build failed"
+  assert_contains "abc" bazel-genfiles/external/x/catter.out
+}
+
 run_suite "external tests"