Create mini_tar() for use in our blackbox tests.
This PR uses selected parts of tools/build_defs/pkg. It is not the way I would have wrote it, but it is based on stuff that has been working for years, so I am disinclined to rewrite it totally. It is solely intended for use bazel's tests, and not distributed as part of the product.
It also contains some vestigial features from the old code that are only half enabled here - like adding symlinks. I want to keep that around in case I need it for the blackbox tests. If not, I can fully delete or enable it after I make that assessment.
Part of #11183
Next PR: Use it in src/test/java/com/google/devtools/build/lib/blackbox/tests/workspace.
Closes #15028.
PiperOrigin-RevId: 450908743
Change-Id: I7c393ab264924ef475e65321bdcd817b02cb0819
diff --git a/tools/BUILD b/tools/BUILD
index cddfc5b..f084355 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -33,6 +33,7 @@
"//tools/sh:srcs",
"//tools/allowlists:srcs",
"//tools/zip:srcs",
+ "//tools/mini_tar:srcs",
],
)
diff --git a/tools/mini_tar/BUILD b/tools/mini_tar/BUILD
new file mode 100644
index 0000000..d52bd29
--- /dev/null
+++ b/tools/mini_tar/BUILD
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+load("//tools/mini_tar:tar.bzl", "mini_tar")
+load("//tools/python:private/defs.bzl", "py_binary", "py_test")
+
+licenses(["notice"])
+
+filegroup(
+ name = "srcs",
+ srcs = glob(["**"]),
+ visibility = ["//visibility:public"],
+)
+
+exports_files([
+ "BUILD",
+ "tar.bzl",
+])
+
+py_binary(
+ name = "mini_tar",
+ srcs = ["mini_tar.py"],
+ python_version = "PY3",
+ srcs_version = "PY3",
+ visibility = ["//visibility:public"],
+)
+
+# Integration tests may depend on this target to pull mini_tar into a test.
+# Then can use
+# http_repository(name='mini_tar', url="file://mini_tar_srcs.tar")
+mini_tar(
+ name = "mini_tar_srcs",
+ srcs = [":srcs"],
+ package_dir = "foo",
+ visibility = ["//visibility:public"],
+)
+
+# tests
+
+py_test(
+ name = "mini_tar_test",
+ srcs = [
+ "mini_tar.py",
+ "mini_tar_test.py",
+ ],
+ data = [":mini_tar_srcs"],
+ python_version = "PY3",
+ srcs_version = "PY3",
+)
diff --git a/tools/mini_tar/mini_tar.py b/tools/mini_tar/mini_tar.py
new file mode 100644
index 0000000..05ced43
--- /dev/null
+++ b/tools/mini_tar/mini_tar.py
@@ -0,0 +1,287 @@
+# Lint as: python3
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This tool build tar files from a list of inputs."""
+
+import argparse
+import os
+import tarfile
+
+# Use a deterministic mtime that doesn't confuse other programs.
+# See: https://github.com/bazelbuild/bazel/issues/1299
+PORTABLE_MTIME = 946684800 # 2000-01-01 00:00:00.000 UTC
+
+
+class TarFileWriter(object):
+ """A wrapper to write tar files."""
+
+ class Error(Exception):
+ pass
+
+ def __init__(self,
+ name,
+ root_directory='',
+ default_uid=0,
+ default_gid=0,
+ default_mtime=None):
+ """TarFileWriter wraps tarfile.open().
+
+ Args:
+ name: the tar file name.
+ root_directory: virtual root to prepend to elements in the archive.
+ default_uid: uid to assign to files in the archive.
+ default_gid: gid to assign to files in the archive.
+ default_mtime: default mtime to use for elements in the archive. May be an
+ integer or the value 'portable' to use the date 2000-01-01, which is
+ compatible with non *nix OSes'.
+ """
+ mode = 'w:'
+ self.name = name
+ self.root_directory = root_directory.strip('/')
+ self.default_gid = default_gid
+ self.default_uid = default_uid
+ if default_mtime is None:
+ self.default_mtime = 0
+ elif default_mtime == 'portable':
+ self.default_mtime = PORTABLE_MTIME
+ else:
+ self.default_mtime = int(default_mtime)
+ self.tar = tarfile.open(name=name, mode=mode)
+ self.members = set()
+ self.directories = set(['.'])
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, t, v, traceback):
+ self.close()
+
+ def close(self):
+ """Close the output tar file."""
+ self.tar.close()
+
+ def _addfile(self, info, fileobj=None):
+ """Add a file in the tar file if there is no conflict."""
+ if info.type == tarfile.DIRTYPE:
+ # Enforce the ending / for directories so we correctly deduplicate.
+ if not info.name.endswith('/'):
+ info.name += '/'
+ if info.name not in self.members:
+ self.tar.addfile(info, fileobj)
+ self.members.add(info.name)
+ elif info.type != tarfile.DIRTYPE:
+ print(('Duplicate file in archive: %s, '
+ 'picking first occurrence' % info.name))
+
+ def add_parents(self, path, mode=0o755):
+ """Add the parents of this path to the archive.
+
+ Args:
+ path: destination path in archive.
+ mode: unix permission mode of the dir, default 0o755.
+ """
+
+ def add_dirs(path):
+ """Helper to add dirs."""
+ path = path.strip('/')
+ if not path:
+ return
+ if path in self.directories:
+ return
+ components = path.rsplit('/', 1)
+ if len(components) > 1:
+ add_dirs(components[0])
+ self.directories.add(path)
+ tarinfo = tarfile.TarInfo(path + '/')
+ tarinfo.mtime = self.default_mtime
+ tarinfo.uid = self.default_uid
+ tarinfo.gid = self.default_gid
+ tarinfo.type = tarfile.DIRTYPE
+ tarinfo.mode = mode or 0o755
+ self.tar.addfile(tarinfo, fileobj=None)
+
+ components = path.rsplit('/', 1)
+ if len(components) > 1:
+ add_dirs(components[0])
+
+ def add_tree(self, input_path, dest_path, mode=None):
+ """Recursively add a tree of files.
+
+ Args:
+ input_path: the path of the directory to add.
+ dest_path: the destination path of the directory to add.
+ mode: unix permission mode of the file, default 0644 (0755).
+ """
+ # Add the x bit to directories to prevent non-traversable directories.
+ # The x bit is set only to if the read bit is set.
+ dirmode = (mode | ((0o444 & mode) >> 2)) if mode else mode
+ self.add_parents(dest_path, mode=dirmode)
+
+ if os.path.isdir(input_path):
+ dest_path = dest_path.rstrip('/') + '/'
+ # Iterate over the sorted list of file so we get a deterministic result.
+ filelist = os.listdir(input_path)
+ filelist.sort()
+ for f in filelist:
+ self.add_tree(
+ input_path=input_path + '/' + f, dest_path=dest_path + f, mode=mode)
+ else:
+ self.add_file_and_parents(
+ dest_path, tarfile.REGTYPE, file_content=input_path, mode=mode)
+
+ def add_file_and_parents(self,
+ name,
+ kind=tarfile.REGTYPE,
+ link=None,
+ file_content=None,
+ mode=None):
+ """Add a file to the current tar.
+
+ Creates parent directories if needed.
+
+ Args:
+ name: the name of the file to add.
+ kind: the type of the file to add, see tarfile.*TYPE.
+ link: if the file is a link, the destination of the link.
+ file_content: file to read the content from. Provide either this one or
+ `content` to specifies a content for the file.
+ mode: unix permission mode of the file, default 0644 (0755).
+ """
+ if self.root_directory and (
+ not (name == self.root_directory or name.startswith('/') or
+ name.startswith(self.root_directory + '/'))):
+ name = self.root_directory + '/' + name
+ self.add_parents(name, mode=0o755)
+
+ if kind == tarfile.DIRTYPE:
+ name = name.rstrip('/')
+ if name in self.directories:
+ return
+
+ if file_content and os.path.isdir(file_content):
+ self.add_tree(input_path=file_content, dest_path=name, mode=mode)
+ return
+
+ tarinfo = tarfile.TarInfo(name)
+ tarinfo.mtime = self.default_mtime
+ tarinfo.uid = self.default_uid
+ tarinfo.gid = self.default_gid
+ tarinfo.type = kind
+ if mode is None:
+ tarinfo.mode = 0o644 if kind == tarfile.REGTYPE else 0o755
+ else:
+ tarinfo.mode = mode
+ if link:
+ tarinfo.linkname = link
+ if file_content:
+ with open(file_content, 'rb') as f:
+ tarinfo.size = os.fstat(f.fileno()).st_size
+ self._addfile(tarinfo, fileobj=f)
+ else:
+ self._addfile(tarinfo, fileobj=None)
+
+ def add_file_at_dest(self, in_path, dest_path, mode=None):
+ """Add a file to the tar file.
+
+ Args:
+ in_path: the path of the file to add to the artifact
+ dest_path: the name of the file in the artifact
+ mode: force to the specified mode. Default is mode from the file.
+ """
+ # Make a clean, '/' deliminted destination path
+ dest = os.path.normpath(dest_path.strip('/')).replace(os.path.sep, '/')
+ # If mode is unspecified, derive the mode from the file's mode.
+ if mode is None:
+ mode = 0o755 if os.access(dest, os.X_OK) else 0o644
+ self.add_file_and_parents(dest, file_content=in_path, mode=mode)
+
+
+def unquote_and_split(arg, c):
+ """Split a string at the first unquoted occurrence of a character.
+
+ Split the string arg at the first unquoted occurrence of the character c.
+ Here, in the first part of arg, the backslash is considered the
+ quoting character indicating that the next character is to be
+ added literally to the first part, even if it is the split character.
+
+ Args:
+ arg: the string to be split
+ c: the character at which to split
+
+ Returns:
+ The unquoted string before the separator and the string after the
+ separator.
+ """
+ head = ''
+ i = 0
+ while i < len(arg):
+ if arg[i] == c:
+ return (head, arg[i + 1:])
+ elif arg[i] == '\\':
+ i += 1
+ if i == len(arg):
+ # dangling quotation symbol
+ return (head, '')
+ else:
+ head += arg[i]
+ else:
+ head += arg[i]
+ i += 1
+ # if we leave the loop, the character c was not found unquoted
+ return (head, '')
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Helper for building tar packages', fromfile_prefix_chars='@')
+ parser.add_argument(
+ '--output', required=True, help='The output file, mandatory.')
+ parser.add_argument(
+ '--mode', help='Force the mode on the added files (in octal).')
+ parser.add_argument(
+ '--directory',
+ help='Directory in which to store the file inside the layer')
+ parser.add_argument('--file', action='append', help='input_paty=dest_path')
+ parser.add_argument(
+ '--owner',
+ default='0.0',
+ help='Specify the numeric default owner of all files. E.g. 0.0')
+ options = parser.parse_args()
+
+ # Parse modes arguments
+ default_mode = None
+ if options.mode:
+ # Convert from octal
+ default_mode = int(options.mode, 8)
+
+ uid = gid = 0
+ if options.owner:
+ ids = options.owner.split('.', 1)
+ uid = int(ids[0])
+ gid = int(ids[1])
+
+ # Add objects to the tar file
+ with TarFileWriter(
+ name=options.output,
+ root_directory=options.directory or '',
+ default_uid=uid,
+ default_gid=gid,
+ default_mtime=PORTABLE_MTIME) as output:
+ for f in options.file:
+ (input_path, dest) = unquote_and_split(f, '=')
+ output.add_file_at_dest(input_path, dest, mode=default_mode)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/mini_tar/mini_tar_test.py b/tools/mini_tar/mini_tar_test.py
new file mode 100644
index 0000000..48f4102
--- /dev/null
+++ b/tools/mini_tar/mini_tar_test.py
@@ -0,0 +1,200 @@
+# Lint as: python3
+# Copyright 2022 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Testing for archive."""
+
+import copy
+import os
+import tarfile
+import unittest
+
+from tools.mini_tar import mini_tar
+
+
+class TarFileWriterTest(unittest.TestCase):
+ """Testing for TarFileWriter class."""
+
+ def assertTarFileContent(self, tar, content):
+ """Assert that tarfile contains exactly the entry described by `content`.
+
+ Args:
+ tar: the path to the TAR file to test.
+ content: an array describing the expected content of the TAR file. Each
+ entry in that list should be a dictionary where each field is a
+ field to test in the corresponding TarInfo. For testing the
+ presence of a file "x", then the entry could simply be
+ `{"name": "x"}`, the missing field will be ignored. To match the
+ content of a file entry, use the key "data".
+ """
+ with tarfile.open(tar, "r:") as f:
+ i = 0
+ for current in f:
+ error_msg = "Extraneous file at end of archive %s: %s" % (tar,
+ current.name)
+ self.assertLess(i, len(content), error_msg)
+ for k, v in content[i].items():
+ if k == "data":
+ value = f.extractfile(current).read()
+ else:
+ value = getattr(current, k)
+ error_msg = " ".join([
+ "Value `%s` for key `%s` of file" % (value, k),
+ "%s in archive %s does" % (current.name, tar),
+ "not match expected value `%s`" % v
+ ])
+ self.assertEqual(value, v, error_msg)
+ i += 1
+ if i < len(content):
+ self.fail("Missing file %s in archive %s" % (content[i], tar))
+
+ def setUp(self):
+ super(TarFileWriterTest, self).setUp()
+ self.tempfile = os.path.join(os.environ["TEST_TMPDIR"], "test.tar")
+
+ def tearDown(self):
+ super(TarFileWriterTest, self).tearDown()
+ if os.path.exists(self.tempfile):
+ os.remove(self.tempfile)
+
+ def test_empty_tar_file(self):
+ with mini_tar.TarFileWriter(self.tempfile):
+ pass
+ self.assertTarFileContent(self.tempfile, [])
+
+ def test_default_mtime_not_provided(self):
+ with mini_tar.TarFileWriter(self.tempfile) as f:
+ self.assertEqual(f.default_mtime, 0)
+
+ def test_default_mtime_provided(self):
+ with mini_tar.TarFileWriter(self.tempfile, default_mtime=1234) as f:
+ self.assertEqual(f.default_mtime, 1234)
+
+ def test_portable_mtime(self):
+ with mini_tar.TarFileWriter(self.tempfile, default_mtime="portable") as f:
+ self.assertEqual(f.default_mtime, 946684800)
+
+ def test_files_with_dots(self):
+ with mini_tar.TarFileWriter(self.tempfile) as f:
+ f.add_file_and_parents("a")
+ f.add_file_and_parents("b/.c")
+ f.add_file_and_parents("..d")
+ f.add_file_and_parents(".e")
+ content = [
+ {
+ "name": "a"
+ },
+ {
+ "name": "b"
+ },
+ {
+ "name": "b/.c"
+ },
+ {
+ "name": "..d"
+ },
+ {
+ "name": ".e"
+ },
+ ]
+ self.assertTarFileContent(self.tempfile, content)
+
+ def test_add_parents(self):
+ with mini_tar.TarFileWriter(self.tempfile) as f:
+ f.add_parents("a/b/c/d/file")
+ f.add_file_and_parents("a/b/foo")
+ f.add_parents("a/b/e/file")
+ content = [
+ {
+ "name": "a",
+ "mode": 0o755
+ },
+ {
+ "name": "a/b",
+ "mode": 0o755
+ },
+ {
+ "name": "a/b/c",
+ "mode": 0o755
+ },
+ {
+ "name": "a/b/c/d",
+ "mode": 0o755
+ },
+ {
+ "name": "a/b/foo",
+ "mode": 0o644
+ },
+ {
+ "name": "a/b/e",
+ "mode": 0o755
+ },
+ ]
+ self.assertTarFileContent(self.tempfile, content)
+
+ def test_adding_tree(self):
+ content = [
+ {
+ "name": "./a",
+ "mode": 0o750
+ },
+ {
+ "name": "./a/b",
+ "data": b"ab",
+ "mode": 0o640
+ },
+ {
+ "name": "./a/c",
+ "mode": 0o750
+ },
+ {
+ "name": "./a/c/d",
+ "data": b"acd",
+ "mode": 0o640
+ },
+ ]
+ tempdir = os.path.join(os.environ["TEST_TMPDIR"], "test_dir")
+ # Iterate over the `content` array to create the directory
+ # structure it describes.
+ for c in content:
+ if "data" in c:
+ p = os.path.join(tempdir, c["name"])
+ os.makedirs(os.path.dirname(p))
+ with open(p, "wb") as f:
+ f.write(c["data"])
+ with mini_tar.TarFileWriter(self.tempfile) as f:
+ f.add_file_at_dest(in_path=tempdir, dest_path=".", mode=0o640)
+ self.assertTarFileContent(self.tempfile, content)
+
+ # Try it again, but re-rooted
+ with mini_tar.TarFileWriter(self.tempfile, root_directory="foo") as f:
+ f.add_file_at_dest(in_path=tempdir, dest_path="x", mode=0o640)
+ n_content = [
+ {
+ "name": "foo",
+ "mode": 0o755
+ },
+ {
+ "name": "foo/x",
+ "mode": 0o750
+ },
+ ]
+ for c in content:
+ nc = copy.copy(c)
+ nc["name"] = "foo/x/" + c["name"][2:]
+ n_content.append(nc)
+ self.assertTarFileContent(self.tempfile, n_content)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tools/mini_tar/tar.bzl b/tools/mini_tar/tar.bzl
new file mode 100644
index 0000000..f40b336
--- /dev/null
+++ b/tools/mini_tar/tar.bzl
@@ -0,0 +1,87 @@
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""mini_tar: A limited functionality tar utility."""
+
+# Filetype to restrict inputs
+tar_filetype = [".tar", ".tar.gz", ".tgz", ".tar.bz2"]
+
+def _quote(filename, protect = "="):
+ """Quote the filename, by escaping = by \\= and \\ by \\\\"""
+ return filename.replace("\\", "\\\\").replace(protect, "\\" + protect)
+
+def _mini_tar_impl(ctx):
+ """Implementation of the mini_tar rule."""
+
+ to_strip = ctx.label.package + "/"
+
+ def dest_path(file):
+ # print('FILE', file.path, file.short_path)
+ ret = file.short_path
+ if ret.startswith(to_strip):
+ ret = ret[len(to_strip):]
+ return ret
+
+ # Start building the arguments.
+ args = ctx.actions.args()
+ args.add("--output", ctx.outputs.out.path)
+ args.add("--mode", ctx.attr.mode)
+ args.add("--owner", ctx.attr.owner)
+ if ctx.attr.package_dir:
+ args.add("--directory", ctx.attr.package_dir)
+ if ctx.attr.mtime != -1: # Note: Must match default in rule def.
+ args.append("--mtime=%d" % ctx.attr.mtime)
+
+ file_inputs = ctx.files.srcs[:]
+ for f in file_inputs:
+ args.add("--file=%s=%s" % (_quote(f.path), dest_path(f)))
+ args.set_param_file_format("flag_per_line")
+ args.use_param_file("@%s", use_always = False)
+ ctx.actions.run(
+ inputs = file_inputs,
+ executable = ctx.executable._mini_tar,
+ arguments = [args],
+ outputs = [ctx.outputs.out],
+ mnemonic = "PackageTar",
+ use_default_shell_env = True,
+ )
+
+# A rule for creating a tar file, see README.md
+_real_mini_tar = rule(
+ implementation = _mini_tar_impl,
+ attrs = {
+ "mode": attr.string(default = "0555"),
+ "mtime": attr.int(default = -1),
+ "out": attr.output(),
+ "owner": attr.string(default = "0.0"),
+ "package_dir": attr.string(),
+ "srcs": attr.label_list(allow_files = True),
+
+ # Implicit dependencies.
+ "_mini_tar": attr.label(
+ default = Label("//tools/mini_tar:mini_tar"),
+ cfg = "exec",
+ executable = True,
+ allow_files = True,
+ ),
+ },
+)
+
+def mini_tar(name, out = None, **kwargs):
+ if not out:
+ out = name + ".tar"
+ _real_mini_tar(
+ name = name,
+ out = out,
+ **kwargs
+ )