Replace create_embedded_tools.sh with a faster Python version.

This is functionally equivalent, but ~30x faster on Windows, ~2x faster on macOS and ~1.5x faster on Linux.

RELNOTES: None.

Change-Id: Ib4a7e10400a3955e47772425acfce2d9530de462
PiperOrigin-RevId: 163346634
diff --git a/src/BUILD b/src/BUILD
index e6d9c06..581b88b 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -1,5 +1,7 @@
 # Packaging
 
+load(":embedded_tools.bzl", "srcsfile")
+
 md5_cmd = "set -e -o pipefail && cat $(SRCS) | sort | %s | awk '{ print $$1; }' > $@"
 
 # TODO(bazel-team): find a better way to handle dylib extensions.
@@ -29,7 +31,7 @@
         "//src/main/tools:process-wrapper",
         "//src/main/tools:linux-sandbox",
         "//tools/osx:xcode-locator",
-    ] + embedded_tools,
+    ] + embedded_tools_target,
     outs = ["install_base_key" + suffix],
     cmd = select({
         ":darwin": md5_cmd % "/sbin/md5",
@@ -37,7 +39,7 @@
         ":freebsd": md5_cmd % "/sbin/md5",
         "//conditions:default": md5_cmd % "md5sum",
     }),
-) for suffix, embedded_tools in {
+) for suffix, embedded_tools_target in {
     "": [":embedded_tools"],
     "_with_jdk": [":embedded_tools_with_jdk"],
     "_notools": [],
@@ -120,11 +122,17 @@
     ],
 )
 
-[genrule(
-    name = "embedded_tools" + suffix,
+py_binary(
+    name = "create_embedded_tools",
+    srcs = ["create_embedded_tools.py"],
+)
+
+# TODO(philwo): Clean this up, once a Bazel that can run py_binary and sh_binary
+# in Skylark rules on Windows has been released.
+[filegroup(
+    name = "embedded_tools" + suffix + "_srcs",
     srcs = [
         "BUILD.tools",
-        ":create_embedded_tools.sh",
         "//tools:embedded_tools_srcs",
         "//third_party:gpl-srcs",
         "//third_party/java/j2objc:embedded_tools_srcs",
@@ -209,13 +217,42 @@
             "@openjdk_linux//file",
         ],
     }) if (suffix == "_with_jdk") else []),
-    outs = ["embedded_tools" + suffix + ".zip"],
-    cmd = "$(location :create_embedded_tools.sh) $@ $(SRCS)",
 ) for suffix in [
     "",
     "_with_jdk",
 ]]
 
+[srcsfile(
+    name = "embedded_tools" + suffix + "_params",
+    srcs = [":embedded_tools" + suffix + "_srcs"],
+    out = "embedded_tools" + suffix + ".params",
+) for suffix in [
+    "",
+    "_with_jdk",
+]]
+
+genrule(
+    name = "embedded_tools",
+    srcs = [
+        ":embedded_tools_params",
+        ":embedded_tools_srcs",
+    ],
+    outs = ["embedded_tools.zip"],
+    cmd = "$(location :create_embedded_tools) \"$@\" $(location :embedded_tools_params)",
+    tools = [":create_embedded_tools"],
+)
+
+genrule(
+    name = "embedded_tools_with_jdk",
+    srcs = [
+        ":embedded_tools_with_jdk_params",
+        ":embedded_tools_with_jdk_srcs",
+    ],
+    outs = ["embedded_tools_with_jdk.zip"],
+    cmd = "$(location :create_embedded_tools) \"$@\" $(location :embedded_tools_with_jdk_params)",
+    tools = [":create_embedded_tools"],
+)
+
 [genrule(
     name = "package-zip" + suffix,
     srcs = ([":embedded_tools" + suffix + ".zip"] if embed else []) + [
diff --git a/src/create_embedded_tools.py b/src/create_embedded_tools.py
new file mode 100644
index 0000000..e938314
--- /dev/null
+++ b/src/create_embedded_tools.py
@@ -0,0 +1,176 @@
+# pylint: disable=g-bad-file-header
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Creates the embedded_tools.zip that is part of the Bazel binary."""
+
+import fnmatch
+import os
+import os.path
+import re
+import stat
+import sys
+import tarfile
+import zipfile
+
+output_paths = [
+    ('*tools/jdk/BUILD*', lambda x: 'tools/jdk/BUILD'),
+    ('*tools/platforms/platforms.BUILD', lambda x: 'platforms/BUILD'),
+    ('*tools/platforms/*', lambda x: 'platforms/' + os.path.basename(x)),
+    ('*JavaBuilder*_deploy.jar', lambda x: 'tools/jdk/' + os.path.basename(x)),
+    ('*JacocoCoverage*_deploy.jar',
+     lambda x: 'tools/jdk/JacocoCoverage_deploy.jar'),
+    ('*turbine_deploy.jar', lambda x: 'tools/jdk/turbine_deploy.jar'),
+    ('*javac-9-dev-r4023-2.jar',
+     lambda x: 'third_party/java/jdk/langtools/javac-9-dev-r4023-2.jar'),
+    ('*SingleJar_deploy.jar',
+     lambda x: 'tools/jdk/singlejar/SingleJar_deploy.jar'),
+    ('*GenClass_deploy.jar', lambda x: 'tools/jdk/GenClass_deploy.jar'),
+    ('*ExperimentalRunner_deploy.jar',
+     lambda x: 'tools/jdk/ExperimentalTestRunner_deploy.jar'),
+    ('*Runner_deploy.jar', lambda x: 'tools/jdk/TestRunner_deploy.jar'),
+    ('*singlejar', lambda x: 'tools/jdk/singlejar/singlejar'),
+    ('*launcher.exe', lambda x: 'tools/launcher/launcher.exe'),
+    ('*ijar.exe', lambda x: 'tools/jdk/ijar/ijar.exe'),
+    ('*ijar', lambda x: 'tools/jdk/ijar/ijar'),
+    ('*zipper.exe', lambda x: 'tools/zip/zipper/zipper.exe'),
+    ('*zipper', lambda x: 'tools/zip/zipper/zipper'),
+    ('*src/objc_tools/*',
+     lambda x: 'tools/objc/precomp_' + os.path.basename(x)),
+    ('*xcode*StdRedirect.dylib', lambda x: 'tools/objc/StdRedirect.dylib'),
+    ('*xcode*make_hashed_objlist.py',
+     lambda x: 'tools/objc/make_hashed_objlist.py'),
+    ('*xcode*realpath', lambda x: 'tools/objc/realpath'),
+    ('*xcode*xcode-locator', lambda x: 'tools/objc/xcode-locator'),
+    ('*src/tools/xcode/*.sh', lambda x: 'tools/objc/' + os.path.basename(x)),
+    ('*src/tools/xcode/*',
+     lambda x: 'tools/objc/' + os.path.basename(x) + '.sh'),
+    ('*external/openjdk_*/file/*.tar.gz', lambda x: 'jdk.tar.gz'),
+    ('*external/openjdk_*/file/*.zip', lambda x: 'jdk.zip'),
+    ('*', lambda x: re.sub(r'^.*bazel-out/[^/]*/bin/', '', x, count=1)),
+]
+
+
+def get_output_path(path):
+  for pattern, transformer in output_paths:
+    if fnmatch.fnmatch(path.replace('\\', '/'), pattern):
+      # BUILD.tools are stored as BUILD files.
+      return transformer(path).replace('/BUILD.tools', '/BUILD')
+
+
+def is_mode_executable(mode):
+  return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) > 0
+
+
+def is_executable(path):
+  return is_mode_executable(os.stat(path)[stat.ST_MODE])
+
+
+def get_input_files(argsfile):
+  """Returns a sorted list of tuples (archive_file, input_file).
+
+  This describes the files that should be put into the generated archive.
+
+  Args:
+    argsfile: The file containing the list of input files.
+  """
+  with open(argsfile, 'r') as f:
+    input_files = set(x.strip() for x in f.readlines())
+
+    result = {}
+    for input_file in input_files:
+      # If we have both a BUILD and a BUILD.tools file, take the latter only.
+      if (os.path.basename(input_file) == 'BUILD' and
+          input_file + '.tools' in input_files):
+        continue
+
+      # This gives us the same behavior as the older bash version of this
+      # tool: If two input files map to the same output files, the one that
+      # comes last in the list of input files overrides all earlier ones.
+      result[get_output_path(input_file)] = input_file
+
+    # By sorting the file list, the resulting ZIP file will not be reproducible
+    # and deterministic.
+    return sorted(result.items())
+
+
+def copy_jdk_into_archive(output_zip, archive_file, input_file):
+  # The JDK is special - it's extracted instead of copied.
+  if archive_file.endswith('.tar.gz'):
+    with tarfile.open(input_file, 'r', errorlevel=2) as jdk_tar:
+      while True:
+        jdk_tarinfo = jdk_tar.next()
+        if jdk_tarinfo is None:
+          break
+        # Rename the first folder to 'jdk', because Bazel looks for a
+        # bundled JDK in the embedded tools using that folder name.
+        filename = 'jdk/' + '/'.join(jdk_tarinfo.name.split('/')[1:])
+        zipinfo = zipfile.ZipInfo(filename, (1980, 1, 1, 0, 0, 0))
+        if jdk_tarinfo.isreg():
+          if is_mode_executable(jdk_tarinfo.mode):
+            zipinfo.external_attr = 0o755 << 16
+          else:
+            zipinfo.external_attr = 0o644 << 16
+          zipinfo.compress_type = zipfile.ZIP_DEFLATED
+          output_zip.writestr(zipinfo, jdk_tar.extractfile(jdk_tarinfo).read())
+        elif jdk_tarinfo.issym():
+          # 0120000 originally comes from the definition of S_IFLNK and
+          # marks a symbolic link in the Zip file format.
+          zipinfo.external_attr = 0o120000 << 16
+          output_zip.writestr(zipinfo, jdk_tarinfo.linkname)
+        else:
+          # Ignore directories, hard links, special files, ...
+          pass
+  elif archive_file.endswith('.zip'):
+    with zipfile.ZipFile(input_file, 'r') as jdk_zip:
+      for jdk_zipinfo in jdk_zip.infolist():
+        # Rename the first folder to 'jdk', because Bazel looks for a
+        # bundled JDK in the embedded tools using that folder name.
+        filename = 'jdk/' + '/'.join(jdk_zipinfo.filename.split('/')[1:])
+        zipinfo = zipfile.ZipInfo(filename, (1980, 1, 1, 0, 0, 0))
+        if is_mode_executable(jdk_zipinfo.external_attr >> 16 & 0xFFFF):
+          zipinfo.external_attr = 0o755 << 16
+        else:
+          zipinfo.external_attr = 0o644 << 16
+        zipinfo.compress_type = jdk_zipinfo.compress_type
+        output_zip.writestr(zipinfo, jdk_zip.read(jdk_zipinfo))
+
+
+def main():
+  output_zip = os.path.join(os.getcwd(), sys.argv[1])
+  input_files = get_input_files(sys.argv[2])
+
+  # Copy all the input_files into output_zip.
+  with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as output_zip:
+    zipinfo = zipfile.ZipInfo('WORKSPACE', (1980, 1, 1, 0, 0, 0))
+    zipinfo.external_attr = 0o644 << 16
+    output_zip.writestr(zipinfo, 'workspace(name = "bazel_tools")\n')
+
+    zipinfo = zipfile.ZipInfo('tools/defaults/BUILD', (1980, 1, 1, 0, 0, 0))
+    zipinfo.external_attr = 0o644 << 16
+    output_zip.writestr(zipinfo, '')
+
+    for archive_file, input_file in input_files:
+      if os.path.basename(archive_file) in ('jdk.tar.gz', 'jdk.zip'):
+        copy_jdk_into_archive(output_zip, archive_file, input_file)
+      else:
+        zipinfo = zipfile.ZipInfo(archive_file, (1980, 1, 1, 0, 0, 0))
+        zipinfo.external_attr = 0o755 << 16 if is_executable(
+            input_file) else 0o644 << 16
+        zipinfo.compress_type = zipfile.ZIP_DEFLATED
+        with open(input_file, 'rb') as f:
+          output_zip.writestr(zipinfo, f.read())
+
+
+if __name__ == '__main__':
+  main()
diff --git a/src/create_embedded_tools.sh b/src/create_embedded_tools.sh
deleted file mode 100755
index b5aaa6b..0000000
--- a/src/create_embedded_tools.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/bin/sh
-
-# Copyright 2015 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -eu
-
-# This script is used to create the directory tree embedded into the Bazel
-# binary that is used as the default source for the @bazel_tools repository.
-# It shuffles around files compiled in other rules, then zips them up.
-
-OUTPUT="${PWD}/$1"
-shift
-
-TMP_DIR=${TMPDIR:-/tmp}
-PACKAGE_DIR="$(mktemp -d ${TMP_DIR%%/}/bazel.XXXXXXXX)"
-mkdir -p "${PACKAGE_DIR}"
-trap "rm -fr \"${PACKAGE_DIR}\"" EXIT
-
-for i in $*; do
-
-  case "$i" in
-    *tools/jdk/BUILD*) OUTPUT_PATH=tools/jdk/BUILD ;;
-    *tools/platforms/platforms.BUILD) OUTPUT_PATH=platforms/BUILD ;;
-    *tools/platforms/*) OUTPUT_PATH=platforms/${i##*/} ;;
-    *JavaBuilder*_deploy.jar) OUTPUT_PATH=tools/jdk/${i##*/} ;;
-    *JacocoCoverage*_deploy.jar) OUTPUT_PATH=tools/jdk/JacocoCoverage_deploy.jar ;;
-    *turbine_deploy.jar) OUTPUT_PATH=tools/jdk/turbine_deploy.jar ;;
-    *javac-9-dev-r4023-2.jar) OUTPUT_PATH=third_party/java/jdk/langtools/javac-9-dev-r4023-2.jar ;;
-    *javac7.jar) OUTPUT_PATH=third_party/java/jdk/langtools/javac7.jar ;;
-    *SingleJar_deploy.jar) OUTPUT_PATH=tools/jdk/singlejar/SingleJar_deploy.jar ;;
-    *GenClass_deploy.jar) OUTPUT_PATH=tools/jdk/GenClass_deploy.jar ;;
-    *ExperimentalRunner_deploy.jar) OUTPUT_PATH=tools/jdk/ExperimentalTestRunner_deploy.jar ;;
-    *Runner_deploy.jar) OUTPUT_PATH=tools/jdk/TestRunner_deploy.jar ;;
-    *singlejar) OUTPUT_PATH=tools/jdk/singlejar/singlejar ;;
-    *ijar.exe) OUTPUT_PATH=tools/jdk/ijar/ijar.exe ;;
-    *ijar) OUTPUT_PATH=tools/jdk/ijar/ijar ;;
-    *zipper.exe) OUTPUT_PATH=tools/zip/zipper/zipper.exe ;;
-    *zipper) OUTPUT_PATH=tools/zip/zipper/zipper ;;
-    *src/objc_tools/*) OUTPUT_PATH=tools/objc/precomp_${i##*/} ;;
-    *xcode*StdRedirect.dylib) OUTPUT_PATH=tools/objc/StdRedirect.dylib ;;
-    *xcode*make_hashed_objlist.py) OUTPUT_PATH=tools/objc/make_hashed_objlist.py ;;
-    *xcode*realpath) OUTPUT_PATH=tools/objc/realpath ;;
-    *xcode*xcode-locator) OUTPUT_PATH=tools/objc/xcode-locator ;;
-    *src/tools/xcode/*.sh) OUTPUT_PATH=tools/objc/${i##*/} ;;
-    *src/tools/xcode/*) OUTPUT_PATH=tools/objc/${i##*/}.sh ;;
-    *external/openjdk_*/file/*.tar.gz) OUTPUT_PATH=jdk.tar.gz ;;
-    *external/openjdk_*/file/*.zip) OUTPUT_PATH=jdk.zip ;;
-    *) OUTPUT_PATH=$(echo $i | sed 's_^.*bazel-out/[^/]*/bin/__') ;;
-  esac
-
-  mkdir -p "${PACKAGE_DIR}/$(dirname "${OUTPUT_PATH}")"
-  cp "$i" "${PACKAGE_DIR}/${OUTPUT_PATH}"
-  chmod u+w "${PACKAGE_DIR}/${OUTPUT_PATH}"
-done
-
-if [ -f ${PACKAGE_DIR}/jdk.tar.gz ]; then
-  tar xz -C ${PACKAGE_DIR} -f ${PACKAGE_DIR}/jdk.tar.gz
-  rm ${PACKAGE_DIR}/jdk.tar.gz
-  mv ${PACKAGE_DIR}/zulu* ${PACKAGE_DIR}/jdk
-fi
-
-if [ -f ${PACKAGE_DIR}/jdk.zip ]; then
-  unzip -d ${PACKAGE_DIR} ${PACKAGE_DIR}/jdk.zip > /dev/null
-  rm ${PACKAGE_DIR}/jdk.zip
-  mv ${PACKAGE_DIR}/zulu* ${PACKAGE_DIR}/jdk
-fi
-
-if [ ! -f ${PACKAGE_DIR}/third_party/java/jdk/langtools/javac-9-dev-r4023-2.jar ]; then
-  cp ${PACKAGE_DIR}/third_party/java/jdk/langtools/javac7.jar \
-      ${PACKAGE_DIR}/third_party/java/jdk/langtools/javac-9-dev-r4023-2.jar
-fi
-
-cat > "${PACKAGE_DIR}/WORKSPACE" <<EOF
-workspace(name = "bazel_tools")
-EOF
-mkdir -p "${PACKAGE_DIR}/tools/defaults"
-touch "${PACKAGE_DIR}/tools/defaults/BUILD"
-for i in $(find "${PACKAGE_DIR}" -name BUILD.tools); do
-  mv "$i" "$(dirname "$i")/BUILD"
-done
-find "${PACKAGE_DIR}" -exec touch -t 198001010000.00 '{}' '+'
-(cd "${PACKAGE_DIR}" && find . -type f | sort | zip -qDX@ "${OUTPUT}")
diff --git a/src/embedded_tools.bzl b/src/embedded_tools.bzl
new file mode 100644
index 0000000..b36cc14
--- /dev/null
+++ b/src/embedded_tools.bzl
@@ -0,0 +1,50 @@
+# pylint: disable=g-bad-file-header
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains Skylark rules used to build the embedded_tools.zip."""
+
+def _embedded_tools(ctx):
+  # The list of arguments we pass to the script.
+  args_file = ctx.new_file(ctx.label.name + ".params")
+  ctx.file_action(output=args_file, content="\n".join([f.path for f in ctx.files.srcs]))
+  # Action to call the script.
+  ctx.action(
+      inputs=ctx.files.srcs,
+      outputs=[ctx.outputs.out],
+      arguments=[ctx.outputs.out.path, args_file.path],
+      progress_message="Creating embedded tools: %s" % ctx.outputs.out.short_path,
+      executable=ctx.executable.tool)
+
+embedded_tools = rule(
+    implementation=_embedded_tools,
+    attrs={
+        "srcs": attr.label_list(allow_files=True),
+        "out": attr.output(mandatory=True),
+        "tool": attr.label(executable=True, cfg="host", allow_files=True,
+                           default=Label("//src:create_embedded_tools_sh"))
+    }
+)
+
+def _srcsfile(ctx):
+  ctx.file_action(
+      output=ctx.outputs.out,
+      content="\n".join([f.path for f in ctx.files.srcs]))
+
+srcsfile = rule(
+    implementation=_srcsfile,
+    attrs={
+        "srcs": attr.label_list(allow_files=True),
+        "out": attr.output(mandatory=True),
+    }
+)