Docker autoconfig as a regular build rule (#207)

* properly produce outputs for the docker autoconfig rule
* fix tests to work with rule using build instead of run
* use run_and_extract
diff --git a/rules/BUILD b/rules/BUILD
index d499b3d..0094af5 100644
--- a/rules/BUILD
+++ b/rules/BUILD
@@ -19,12 +19,19 @@
 exports_files([
     "install_bazel_head.sh",
     "install_bazel_version.sh",
-    "docker_config.sh.tpl",
 ])
 
 load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
 load("@bazel_skylib//:skylark_library.bzl", "skylark_library")
 
+filegroup(
+    name = "bazel_installers",
+    srcs = [
+        "install_bazel_head.sh",
+        "install_bazel_version.sh",
+    ],
+)
+
 pkg_tar(
     name = "cc-sample-project-tar",
     srcs = [
diff --git a/rules/docker_config.bzl b/rules/docker_config.bzl
index a9a40c8..02eee47 100644
--- a/rules/docker_config.bzl
+++ b/rules/docker_config.bzl
@@ -26,7 +26,6 @@
   remote repositories, inside the container.
 - Copy toolchain configs (outputs of remote repo targets) produced
   from the execution of Bazel inside the container to the host.
-- Optionally copy outputs to a folder defined via build variables.
 
 Example:
 
@@ -101,12 +100,11 @@
 
 To use the rule run:
 
-  bazel run --define=DOCKER_AUTOCONF_OUTPUT=<some_output_dir> //<location_of_rule>:my-autoconfig-rule
+  bazel build //<location_of_rule>:my-autoconfig-rule
 
-Once rule finishes running the file <some_output_dir>/my-autoconfig-rule.tar
+Once rule finishes running the file my-autoconfig-rule_output.tar
 will be created with all toolchain configs generated by
-"local_config_cc" and "<some_other_skylark_repo>". If no value for
-DOCKER_AUTOCONF_OUTPUT is passed, the resulting tar file is left in /tmp.
+"local_config_cc" and "<some_other_skylark_repo>".
 
 Known issues:
 
@@ -118,6 +116,7 @@
     "@io_bazel_rules_docker//container:container.bzl",
     _container = "container",
 )
+load("@base_images_docker//util:run.bzl", _extract = "extract")
 load("@bazel_toolchains//rules/container:docker_toolchains.bzl", "toolchain_container")
 
 # External folder is set to be deprecated, lets keep it here for easy
@@ -133,10 +132,6 @@
 # Default cc project to use if no git_repo is provided.
 _DEFAULT_AUTOCONFIG_PROJECT_PKG_TAR = _WORKSPACE_PREFIX + "rules:cc-sample-project-tar"
 
-# Build variable to define the location of the output tar produced by
-# docker_toolchain_autoconfig
-_DOCKER_AUTOCONF_OUTPUT = "DOCKER_AUTOCONF_OUTPUT"
-
 # Filetype to restrict inputs
 tar_filetype = [
     ".tar",
@@ -154,12 +149,18 @@
     """
     bazel_config_dir = "/bazel-config"
     project_repo_dir = "project_src"
+    name = ctx.attr.name
 
     # Command to retrieve the project from github if requested.
     clone_repo_cmd = "cd ."
     if ctx.attr.git_repo:
         clone_repo_cmd = ("cd " + bazel_config_dir + " && git clone " +
                           ctx.attr.git_repo + " " + project_repo_dir)
+    if ctx.attr.repo_pkg_tar:
+        # if package tar was used then the command should expand it
+        repo_dir = bazel_config_dir + "/" + project_repo_dir
+        clone_repo_cmd = ("mkdir %s && tar -xf /%s -C %s " %
+                          (repo_dir, ctx.file.repo_pkg_tar.basename, repo_dir))
 
     # Command to install custom Bazel version (if requested)
     install_bazel_cmd = "cd ."
@@ -188,16 +189,13 @@
         deref_symlinks_cmd.append(symlinks_cmd)
     deref_symlinks_cmd = " && ".join(deref_symlinks_cmd)
 
-    # Command to copy produced toolchain configs to outside of
-    # the containter (to the mount_point).
+    # Command to copy produced toolchain configs to a tar at the root
+    # of the container.
     copy_cmd = []
     for config_repo in ctx.attr.config_repos:
         src_dir = "$(bazel info output_base)/" + _EXTERNAL_FOLDER_PREFIX + config_repo
-        copy_cmd.append("cp -dr " + src_dir + " " + bazel_config_dir + "/")
-
-        # We need to change the owner of the files we copied, so that they can
-        # be manipulated from outside the container.
-        copy_cmd.append("chown -R $USER_ID " + bazel_config_dir + "/" + config_repo)
+        copy_cmd.append("cp -dr " + src_dir + " " + "/")
+    copy_cmd.append("tar -cf /outputs.tar /" + " ".join(ctx.attr.config_repos))
     output_copy_cmd = " && ".join(copy_cmd)
 
     # Command to run autoconfigure targets.
@@ -221,17 +219,14 @@
     if ctx.attr.git_repo:
         clean_cmd += " && cd " + bazel_config_dir + " && rm -drf " + project_repo_dir
 
-    # Full command to use for docker container
-    # TODO(xingao): Make sure the command exits with error right away if a sub
-    # command fails.
-    docker_cmd = [
-        "/bin/sh",
-        "-c",
-        " && ".join([
+    install_sh = ctx.new_file(name + "_install.sh")
+    ctx.actions.write(
+        output = install_sh,
+        content = "\n ".join([
             "set -ex",
             ctx.attr.setup_cmd,
             install_bazel_cmd,
-            "echo === Cloning project repo ===",
+            "echo === Cloning / expand project repo ===",
             clone_repo_cmd,
             "echo === Running Bazel autoconfigure command ===",
             bazel_cmd,
@@ -241,59 +236,32 @@
             "echo === Cleaning up ===",
             clean_cmd,
         ]),
-    ]
-
-    # Expand contents of repo_pkg_tar
-    # (and remove them after we're done running the docker command).
-    # A dummy command that does nothing in case git_repo was used
-    expand_repo_cmd = "cd ."
-    remove_repo_cmd = "cd ."
-    if ctx.attr.repo_pkg_tar:
-        repo_pkg_tar = str(ctx.attr.repo_pkg_tar.label.name)
-        package_name = _EXTERNAL_FOLDER_PREFIX + _WORKSPACE_NAME + "/" + str(ctx.attr.repo_pkg_tar.label.package)
-
-        # Expand the tar file pointed by repo_pkg_tar
-        expand_repo_cmd = ("mkdir ./%s ; tar -xf %s/%s.tar -C ./%s" %
-                           (project_repo_dir, package_name, repo_pkg_tar, project_repo_dir))
-        remove_repo_cmd = ("rm -drf ./%s" % project_repo_dir)
-
-    result = _container.image.implementation(ctx, cmd = docker_cmd, output_executable = ctx.outputs.load_image)
-
-    # By default we copy the produced tar file to /tmp/
-    output_location = "/tmp/" + ctx.attr.name + ".tar"
-    if _DOCKER_AUTOCONF_OUTPUT in ctx.var:
-        output_location = ctx.var[_DOCKER_AUTOCONF_OUTPUT] + "/" + ctx.attr.name + ".tar"
-
-    # Create the script to load image and run it
-    ctx.actions.expand_template(
-        template = ctx.files.run_tpl[0],
-        substitutions = {
-            "%{EXPAND_REPO_CMD}": expand_repo_cmd,
-            "%{LOAD_IMAGE_SH}": ctx.outputs.load_image.short_path,
-            "%{IMAGE_NAME}": "bazel/" + ctx.label.package + ":" + ctx.label.name,
-            "%{RM_REPO_CMD}": remove_repo_cmd,
-            "%{CONFIG_REPOS}": " ".join(ctx.attr.config_repos),
-            "%{OUTPUT}": output_location,
-        },
-        output = ctx.outputs.executable,
-        is_executable = True,
     )
 
-    # add to the runfiles the script to load image and (if needed) the repo_pkg_tar file
-    if hasattr(result.providers[1], "runfiles"):
-        result_runfiles = result.providers[1].runfiles
-    else:
-        result_runfiles = result.providers[1].default_runfiles
-    runfiles_list = result_runfiles.files.to_list() + [ctx.outputs.load_image]
+    # Include the repo_pkg_tar if needed
+    files = [install_sh] + ctx.files._installers
     if ctx.attr.repo_pkg_tar:
-        runfiles_list += ctx.files.repo_pkg_tar
+        files += [ctx.file.repo_pkg_tar]
 
-    runfiles = ctx.runfiles(files = runfiles_list)
+    image_tar = ctx.new_file(name + ".tar")
 
-    return struct(
-        runfiles = runfiles,
-        files = result.providers[1].files,
-        container_parts = result.container_parts,
+    # TODO(nlopezgi): fix upsream issue that output_executable is required
+    load_image_sh_file = ctx.new_file(name + "load.sh")
+    _container.image.implementation(
+        ctx,
+        files = files,
+        output_executable = load_image_sh_file,
+        output_tarball = image_tar,
+        workdir = bazel_config_dir,
+    )
+
+    _extract.implementation(
+        ctx,
+        name = ctx.attr.name + "_extract",
+        image = image_tar,
+        commands = ["/" + ctx.attr.name + "_install.sh"],
+        extract_file = "/outputs.tar",
+        output_file = ctx.outputs.output_tar,
     )
 
 docker_toolchain_autoconfig_ = rule(
@@ -301,21 +269,31 @@
         "config_repos": attr.string_list(default = ["local_config_cc"]),
         "use_default_project": attr.bool(default = False),
         "git_repo": attr.string(),
-        "repo_pkg_tar": attr.label(allow_files = tar_filetype),
+        "repo_pkg_tar": attr.label(allow_files = tar_filetype, single_file = True),
         "bazel_version": attr.string(),
         "bazel_rc_version": attr.string(),
         "use_bazel_head": attr.bool(default = False),
-        "run_tpl": attr.label(allow_files = True),
         "setup_cmd": attr.string(default = "cd ."),
         "packages": attr.string_list(),
         "additional_repos": attr.string_list(),
         "keys": attr.string_list(),
         "incompatible_changes_off": attr.bool(default = False),
         "test": attr.bool(default = True),
+        "_installers": attr.label(default = ":bazel_installers", allow_files = True),
+        # TODO(nlopezgi): fix upstream attr declaration that is missing repo name
+        "_extract_tpl": attr.label(
+            default = Label("@base_images_docker//util:extract.sh.tpl"),
+            allow_files = True,
+            single_file = True,
+        ),
+        "_image_id_extractor": attr.label(
+            default = "@io_bazel_rules_docker//contrib:extract_image_id.py",
+            allow_files = True,
+            single_file = True,
+        ),
     },
-    executable = True,
     outputs = _container.image.outputs + {
-        "load_image": "%{name}_load_image.sh",
+        "output_tar": "%{name}_outputs.tar",
     },
     implementation = _docker_toolchain_autoconfig_impl,
 )
@@ -327,7 +305,6 @@
     "files",
     "debs",
     "repo_pkg_tar",
-    "run_tpl",
     # all the attrs from docker_build we dont want users to set
     "directory",
     "tars",
@@ -384,9 +361,7 @@
     https://github.com/GoogleCloudPlatform/distroless/tree/master/package_manager).
     The rule creates the container with a command that pulls a repo from github,
     and runs bazel build for a series of remote repos. Files generated in these
-    repos are copied to a mount point inside the Bazel output tree, and finally
-    copied to the /tmp directory or to the DOCKER_AUTOCONF_OUTPUT directory
-    if passed as build variable.
+    repos are copied to a mount point inside the Bazel output tree.
 
     Args:
       **kwargs:
@@ -453,10 +428,6 @@
         _WORKSPACE_PREFIX + "rules:install_bazel_version.sh",
     ]
 
-    # The template for the main script to execute for this rule, which produces
-    # the toolchain configs
-    kwargs["run_tpl"] = _WORKSPACE_PREFIX + "rules:docker_config.sh.tpl"
-
     # Do not install packags if 'packages' is not specified or is an ampty list.
     if not packages_is_empty:
         # "additional_repos" and "keys" are optional for docker_toolchain_autoconfig,
@@ -488,7 +459,7 @@
             size = "medium",
             timeout = "long",
             srcs = ["@bazel_toolchains//tests/config:autoconfig_test.sh"],
-            data = [":" + kwargs["name"]],
+            data = [":" + kwargs["name"] + "_outputs.tar"],
         )
 
     docker_toolchain_autoconfig_(**kwargs)
diff --git a/rules/docker_config.sh.tpl b/rules/docker_config.sh.tpl
deleted file mode 100644
index e786bd9..0000000
--- a/rules/docker_config.sh.tpl
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2017 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-# Main script for the docker_autoconfigure rule.
-# Loads a container, runs it, copies outputs to tar file.
-set -ex
-
-main() {
-  export PYTHON_RUNFILES=$(pwd)/../
-  trap cleanup_on_finish EXIT # always cleanup
-  # Expand a tar file with the repo if needed
-  %{EXPAND_REPO_CMD}
-  # Call the script produced by the docker_build rule to load the image
-  %{LOAD_IMAGE_SH}
-  # Run the container image to build the config repos
-  docker run --rm -e USER_ID="$(id -u)" -v $(pwd):/bazel-config -i %{IMAGE_NAME}
-  # Create a tar file with all the config repos that were built
-  tar -cf outputs.tar %{CONFIG_REPOS}
-  # Copy the tar file to its desired output location
-  cp outputs.tar %{OUTPUT}
-}
-
-cleanup_on_finish() {
-  # Remove the expanded repo if needed
-  %{RM_REPO_CMD}
-  # Remove the produced tar file
-  rm outputs.tar
-}
-
-main $@
diff --git a/tests/config/BUILD b/tests/config/BUILD
index ff87e34..9d880a0 100644
--- a/tests/config/BUILD
+++ b/tests/config/BUILD
@@ -160,7 +160,7 @@
     timeout = "long",
     srcs = ["debian8_clang_autoconfig_test.sh"],
     data = [
-        ":debian8-clang-0.3.0-bazel_0.10.0-autoconfig",
+        ":debian8-clang-0.3.0-bazel_0.10.0-autoconfig_outputs.tar",
         "@bazel_toolchains_test//file",
     ],
 )
diff --git a/tests/config/autoconfig_test.sh b/tests/config/autoconfig_test.sh
index 0d6fd21..558f1f4 100755
--- a/tests/config/autoconfig_test.sh
+++ b/tests/config/autoconfig_test.sh
@@ -22,45 +22,16 @@
 # where {docker_toolchain_autoconfig_name} is the docker_toolchain_autoconfig
 # rule you would like to build and run.
 
-set -e
+set -ex
 
 # Define constants.
 WORKSPACE_ROOT=$(pwd)
 # The test name is hardcoded as {docker_toolchain_autoconfig_name}_test.
 TARGET=${TEST_BINARY%_test}
-NAME=${TARGET##*/}
-DIR=${TARGET%${NAME}}
-autoconfig_script=${WORKSPACE_ROOT}/${DIR}${NAME}
 
-# Helper function for always delete the containers / temporary files on exit
-function cleanup_on_finish {
-  echo "=== Deleting images  ==="
-  # Images to be removed are expected to have "rbe-test-" as name prefix.
-  images=($(docker images -a | grep "rbe-test-" | awk '{print $3}'))
-  for image in "${images[@]}"
-  do
-    echo "Attempting to delete ${image}..."
-    # Only delete the image if it is not used by any running container.
-    # Do not return error code if unable to delete.
-    if [[ -z $(docker ps -q -f ancestor=${image}) ]]; then
-      docker rmi -f ${image} | true
-      echo "${image} deleted..."
-    else
-      echo "${image} is used by another container, not deleted..."
-    fi
-  done
-  echo "Deleting all dangling images..."
-  docker images -f "dangling=true" -q | xargs -r docker rmi -f | true
-}
-
-trap cleanup_on_finish EXIT # always delete the containers
-
-# Change the output location to a tmp location inside the current Bazel workspace.
-sed -i "s|/tmp|${TEST_TMPDIR}|g" ${autoconfig_script}
-
-# Execute the autoconfig script and unpack toolchain config tarball.
-${autoconfig_script}
-tar -xf ${TEST_TMPDIR}/${NAME}.tar -C ${TEST_TMPDIR}
+# Unpack toolchain config tarball.
+find .
+tar -xf ${WORKSPACE_ROOT}/${TARGET}_outputs.tar -C ${TEST_TMPDIR}
 
 # Check existence of generated file.
 file ${TEST_TMPDIR}/local_config_cc/CROSSTOOL
diff --git a/tests/config/debian8_clang_autoconfig_test.sh b/tests/config/debian8_clang_autoconfig_test.sh
index ebbc1f1..50e4d83 100755
--- a/tests/config/debian8_clang_autoconfig_test.sh
+++ b/tests/config/debian8_clang_autoconfig_test.sh
@@ -20,7 +20,7 @@
 # toolchain configs for Bazel 0.10.0 release, and compares newly generated
 # configs with published ones.
 
-set -e
+set -ex
 
 # Define constants.
 WORKSPACE_ROOT=$(pwd)
@@ -30,34 +30,8 @@
 TEST_CONFIGS_DIR=${TEST_TMPDIR}/bazel-toolchains-${COMMIT}/configs/debian8_clang/${CONFIG_VERSION}/bazel_${BAZEL_VERSION}/
 AUTOCONFIG_SCRIPT=${WORKSPACE_ROOT}/tests/config/debian8-clang-${CONFIG_VERSION}-bazel_${BAZEL_VERSION}-autoconfig
 
-# Helper function for always delete the containers / temporary files on exit
-function cleanup_on_finish {
-  echo "=== Deleting images  ==="
-  images=($(docker images -a | grep "debian8-clang-${CONFIG_VERSION}-bazel_${BAZEL_VERSION}-autoconfig" | awk '{print $3}'))
-  for image in "${images[@]}"
-  do
-    echo "Attempting to delete ${image}..."
-    # Only delete the image if it is not used by any running container.
-    # Do not return error code if unable to delete.
-    if [[ -z $(docker ps -q -f ancestor=${image}) ]]; then
-      docker rmi -f ${image} | true
-      echo "${image} deleted..."
-    else
-      echo "${image} is used by another container, not deleted..."
-    fi
-  done
-  echo "Deleting all dangling images..."
-  docker images -f "dangling=true" -q | xargs -r docker rmi -f | true
-}
-
-trap cleanup_on_finish EXIT # always delete the containers
-
-# Change the output location to a tmp location inside the current Bazel workspace.
-sed -i "s|/tmp|${TEST_TMPDIR}|g" ${AUTOCONFIG_SCRIPT}
-
 # Execute the autoconfig script and unpack toolchain config tarball.
-${AUTOCONFIG_SCRIPT}
-tar -xf ${TEST_TMPDIR}/debian8-clang-${CONFIG_VERSION}-bazel_${BAZEL_VERSION}-autoconfig.tar -C ${TEST_TMPDIR}
+tar -xf ${WORKSPACE_ROOT}/tests/config/debian8-clang-${CONFIG_VERSION}-bazel_${BAZEL_VERSION}-autoconfig_outputs.tar -C ${TEST_TMPDIR}
 
 # Remove generated files that are not part of toolchain configs
 rm -rf ${TEST_TMPDIR}/local_config_cc/tools ${TEST_TMPDIR}/local_config_cc/WORKSPACE