Replace create_embedded_tools.sh with a faster Python version.

This is functionally equivalent, but ~30x faster on Windows, ~2x faster on macOS and ~1.5x faster on Linux.

RELNOTES: None.

Change-Id: Ib4a7e10400a3955e47772425acfce2d9530de462
PiperOrigin-RevId: 163346634
diff --git a/src/create_embedded_tools.py b/src/create_embedded_tools.py
new file mode 100644
index 0000000..e938314
--- /dev/null
+++ b/src/create_embedded_tools.py
@@ -0,0 +1,176 @@
+# pylint: disable=g-bad-file-header
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Creates the embedded_tools.zip that is part of the Bazel binary."""
+
+import fnmatch
+import os
+import os.path
+import re
+import stat
+import sys
+import tarfile
+import zipfile
+
+output_paths = [
+    ('*tools/jdk/BUILD*', lambda x: 'tools/jdk/BUILD'),
+    ('*tools/platforms/platforms.BUILD', lambda x: 'platforms/BUILD'),
+    ('*tools/platforms/*', lambda x: 'platforms/' + os.path.basename(x)),
+    ('*JavaBuilder*_deploy.jar', lambda x: 'tools/jdk/' + os.path.basename(x)),
+    ('*JacocoCoverage*_deploy.jar',
+     lambda x: 'tools/jdk/JacocoCoverage_deploy.jar'),
+    ('*turbine_deploy.jar', lambda x: 'tools/jdk/turbine_deploy.jar'),
+    ('*javac-9-dev-r4023-2.jar',
+     lambda x: 'third_party/java/jdk/langtools/javac-9-dev-r4023-2.jar'),
+    ('*SingleJar_deploy.jar',
+     lambda x: 'tools/jdk/singlejar/SingleJar_deploy.jar'),
+    ('*GenClass_deploy.jar', lambda x: 'tools/jdk/GenClass_deploy.jar'),
+    ('*ExperimentalRunner_deploy.jar',
+     lambda x: 'tools/jdk/ExperimentalTestRunner_deploy.jar'),
+    ('*Runner_deploy.jar', lambda x: 'tools/jdk/TestRunner_deploy.jar'),
+    ('*singlejar', lambda x: 'tools/jdk/singlejar/singlejar'),
+    ('*launcher.exe', lambda x: 'tools/launcher/launcher.exe'),
+    ('*ijar.exe', lambda x: 'tools/jdk/ijar/ijar.exe'),
+    ('*ijar', lambda x: 'tools/jdk/ijar/ijar'),
+    ('*zipper.exe', lambda x: 'tools/zip/zipper/zipper.exe'),
+    ('*zipper', lambda x: 'tools/zip/zipper/zipper'),
+    ('*src/objc_tools/*',
+     lambda x: 'tools/objc/precomp_' + os.path.basename(x)),
+    ('*xcode*StdRedirect.dylib', lambda x: 'tools/objc/StdRedirect.dylib'),
+    ('*xcode*make_hashed_objlist.py',
+     lambda x: 'tools/objc/make_hashed_objlist.py'),
+    ('*xcode*realpath', lambda x: 'tools/objc/realpath'),
+    ('*xcode*xcode-locator', lambda x: 'tools/objc/xcode-locator'),
+    ('*src/tools/xcode/*.sh', lambda x: 'tools/objc/' + os.path.basename(x)),
+    ('*src/tools/xcode/*',
+     lambda x: 'tools/objc/' + os.path.basename(x) + '.sh'),
+    ('*external/openjdk_*/file/*.tar.gz', lambda x: 'jdk.tar.gz'),
+    ('*external/openjdk_*/file/*.zip', lambda x: 'jdk.zip'),
+    ('*', lambda x: re.sub(r'^.*bazel-out/[^/]*/bin/', '', x, count=1)),
+]
+
+
+def get_output_path(path):
+  for pattern, transformer in output_paths:
+    if fnmatch.fnmatch(path.replace('\\', '/'), pattern):
+      # BUILD.tools are stored as BUILD files.
+      return transformer(path).replace('/BUILD.tools', '/BUILD')
+
+
+def is_mode_executable(mode):
+  return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) > 0
+
+
+def is_executable(path):
+  return is_mode_executable(os.stat(path)[stat.ST_MODE])
+
+
+def get_input_files(argsfile):
+  """Returns a sorted list of tuples (archive_file, input_file).
+
+  This describes the files that should be put into the generated archive.
+
+  Args:
+    argsfile: The file containing the list of input files.
+  """
+  with open(argsfile, 'r') as f:
+    input_files = set(x.strip() for x in f.readlines())
+
+    result = {}
+    for input_file in input_files:
+      # If we have both a BUILD and a BUILD.tools file, take the latter only.
+      if (os.path.basename(input_file) == 'BUILD' and
+          input_file + '.tools' in input_files):
+        continue
+
+      # This gives us the same behavior as the older bash version of this
+      # tool: If two input files map to the same output files, the one that
+      # comes last in the list of input files overrides all earlier ones.
+      result[get_output_path(input_file)] = input_file
+
+    # By sorting the file list, the resulting ZIP file will not be reproducible
+    # and deterministic.
+    return sorted(result.items())
+
+
+def copy_jdk_into_archive(output_zip, archive_file, input_file):
+  # The JDK is special - it's extracted instead of copied.
+  if archive_file.endswith('.tar.gz'):
+    with tarfile.open(input_file, 'r', errorlevel=2) as jdk_tar:
+      while True:
+        jdk_tarinfo = jdk_tar.next()
+        if jdk_tarinfo is None:
+          break
+        # Rename the first folder to 'jdk', because Bazel looks for a
+        # bundled JDK in the embedded tools using that folder name.
+        filename = 'jdk/' + '/'.join(jdk_tarinfo.name.split('/')[1:])
+        zipinfo = zipfile.ZipInfo(filename, (1980, 1, 1, 0, 0, 0))
+        if jdk_tarinfo.isreg():
+          if is_mode_executable(jdk_tarinfo.mode):
+            zipinfo.external_attr = 0o755 << 16
+          else:
+            zipinfo.external_attr = 0o644 << 16
+          zipinfo.compress_type = zipfile.ZIP_DEFLATED
+          output_zip.writestr(zipinfo, jdk_tar.extractfile(jdk_tarinfo).read())
+        elif jdk_tarinfo.issym():
+          # 0120000 originally comes from the definition of S_IFLNK and
+          # marks a symbolic link in the Zip file format.
+          zipinfo.external_attr = 0o120000 << 16
+          output_zip.writestr(zipinfo, jdk_tarinfo.linkname)
+        else:
+          # Ignore directories, hard links, special files, ...
+          pass
+  elif archive_file.endswith('.zip'):
+    with zipfile.ZipFile(input_file, 'r') as jdk_zip:
+      for jdk_zipinfo in jdk_zip.infolist():
+        # Rename the first folder to 'jdk', because Bazel looks for a
+        # bundled JDK in the embedded tools using that folder name.
+        filename = 'jdk/' + '/'.join(jdk_zipinfo.filename.split('/')[1:])
+        zipinfo = zipfile.ZipInfo(filename, (1980, 1, 1, 0, 0, 0))
+        if is_mode_executable(jdk_zipinfo.external_attr >> 16 & 0xFFFF):
+          zipinfo.external_attr = 0o755 << 16
+        else:
+          zipinfo.external_attr = 0o644 << 16
+        zipinfo.compress_type = jdk_zipinfo.compress_type
+        output_zip.writestr(zipinfo, jdk_zip.read(jdk_zipinfo))
+
+
+def main():
+  output_zip = os.path.join(os.getcwd(), sys.argv[1])
+  input_files = get_input_files(sys.argv[2])
+
+  # Copy all the input_files into output_zip.
+  with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as output_zip:
+    zipinfo = zipfile.ZipInfo('WORKSPACE', (1980, 1, 1, 0, 0, 0))
+    zipinfo.external_attr = 0o644 << 16
+    output_zip.writestr(zipinfo, 'workspace(name = "bazel_tools")\n')
+
+    zipinfo = zipfile.ZipInfo('tools/defaults/BUILD', (1980, 1, 1, 0, 0, 0))
+    zipinfo.external_attr = 0o644 << 16
+    output_zip.writestr(zipinfo, '')
+
+    for archive_file, input_file in input_files:
+      if os.path.basename(archive_file) in ('jdk.tar.gz', 'jdk.zip'):
+        copy_jdk_into_archive(output_zip, archive_file, input_file)
+      else:
+        zipinfo = zipfile.ZipInfo(archive_file, (1980, 1, 1, 0, 0, 0))
+        zipinfo.external_attr = 0o755 << 16 if is_executable(
+            input_file) else 0o644 << 16
+        zipinfo.compress_type = zipfile.ZIP_DEFLATED
+        with open(input_file, 'rb') as f:
+          output_zip.writestr(zipinfo, f.read())
+
+
+if __name__ == '__main__':
+  main()