First check-in of configuration overhead measurement tool
i.e. first check-in of [Measuring Configuration Overhead](https://docs.google.com/document/d/10ZxO2wZdKJATnYBqAm22xT1k5r4Vp6QX96TkqSUIhs0/edit).
This just establishes supporting structure. The tool is not yet functional.
Specifically:
- `types.py`: defines data structures for "configuration" and "configured target"
- `bazel_api.py`: API to translate `bazel cquery` and `bazel config` calls into the above data structures
- `bazel_api_test.py`: tests
- `ctexplain.py`: stump of an entry point
The tests utilize an existing Python test framework for invoking Bazel (`//src/test/py/bazel:test_base`).
Work towards https://github.com/bazelbuild/bazel/issues/10613
Closes #11511.
PiperOrigin-RevId: 321409588
diff --git a/src/test/py/bazel/BUILD b/src/test/py/bazel/BUILD
index 95617dd..b3fad7f 100644
--- a/src/test/py/bazel/BUILD
+++ b/src/test/py/bazel/BUILD
@@ -29,6 +29,7 @@
"//third_party/def_parser:__pkg__",
"//tools/android:__pkg__",
"//tools/build_rules:__pkg__",
+ "//tools/ctexplain:__pkg__",
"//tools/python:__pkg__",
],
)
diff --git a/tools/BUILD b/tools/BUILD
index 732230f..025e098 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -15,6 +15,7 @@
"//tools/build_rules:srcs",
"//tools/config:srcs",
"//tools/coverage:srcs",
+ "//tools/ctexplain:srcs",
"//tools/distributions:srcs",
"//tools/java:srcs",
"//tools/jdk:srcs",
diff --git a/tools/ctexplain/BUILD b/tools/ctexplain/BUILD
new file mode 100644
index 0000000..02f4c73
--- /dev/null
+++ b/tools/ctexplain/BUILD
@@ -0,0 +1,60 @@
+# Description:
+# Tool for measuring how configuration transitions affect build graph size.
+load("//tools/python:private/defs.bzl", "py_binary", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"]) # Apache 2.0
+
+py_binary(
+ name = "ctexplain",
+ srcs = ["ctexplain.py"],
+ python_version = "PY3",
+ deps = [":bazel_api"],
+)
+
+py_library(
+ name = "bazel_api",
+ srcs = ["bazel_api.py"],
+ srcs_version = "PY3ONLY",
+ deps = [":base"],
+)
+
+py_test(
+ name = "bazel_api_test",
+ size = "small",
+ srcs = ["bazel_api_test.py"],
+ python_version = "PY3",
+ deps = [
+ ":bazel_api",
+ "//src/test/py/bazel:test_base",
+ ],
+)
+
+py_library(
+ name = "base",
+ srcs = [
+ "types.py",
+ ],
+ srcs_version = "PY3ONLY",
+ deps = [
+ "//third_party/py/dataclasses", # Backport for Python < 3.7.
+ "//third_party/py/frozendict",
+ ],
+)
+
+py_test(
+ name = "types_test",
+ size = "small",
+ srcs = ["types_test.py"],
+ python_version = "PY3",
+ deps = [
+ ":base",
+ "//third_party/py/frozendict",
+ ],
+)
+
+filegroup(
+ name = "srcs",
+ srcs = glob(["*"]),
+)
diff --git a/tools/ctexplain/bazel_api.py b/tools/ctexplain/bazel_api.py
new file mode 100644
index 0000000..4fd776c
--- /dev/null
+++ b/tools/ctexplain/bazel_api.py
@@ -0,0 +1,158 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""API for Bazel calls for config, cquery, and required fragment info.
+
+There's no Python Bazel API so we invoke Bazel as a subprocess.
+"""
+import json
+import os
+import subprocess
+from typing import Callable
+from typing import List
+from typing import Tuple
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+from tools.ctexplain.types import Configuration
+from tools.ctexplain.types import ConfiguredTarget
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+def run_bazel_in_client(args: List[str]) -> Tuple[int, List[str], List[str]]:
+ """Calls bazel within the current workspace.
+
+ For production use. Tests use an alternative invoker that goes through test
+ infrastructure.
+
+ Args:
+ args: the arguments to call Bazel with
+
+ Returns:
+ Tuple of (return code, stdout, stderr)
+ """
+ result = subprocess.run(
+ ["bazel"] + args,
+ cwd=os.getcwd(),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ check=True)
+ return (result.returncode, result.stdout.decode("utf-8").split(os.linesep),
+ result.stderr)
+
+
+class BazelApi():
+ """API that accepts injectable Bazel invocation logic."""
+
+ def __init__(self,
+ run_bazel: Callable[[List[str]],
+ Tuple[int, List[str],
+ List[str]]] = run_bazel_in_client):
+ self.run_bazel = run_bazel
+
+ def cquery(self,
+ args: List[str]) -> Tuple[bool, str, Tuple[ConfiguredTarget, ...]]:
+ """Calls cquery with the given arguments.
+
+ Args:
+ args: A list of cquery command-line arguments, one argument per entry.
+
+ Returns:
+ (success, stderr, cts), where success is True iff the query succeeded,
+ stderr contains the query's stderr (regardless of success value), and cts
+ is the configured targets found by the query if successful, empty
+ otherwise.
+ """
+ base_args = ["cquery", "--show_config_fragments=transitive"]
+ (returncode, stdout, stderr) = self.run_bazel(base_args + args)
+ if returncode != 0:
+ return (False, stderr, ())
+
+ cts = set()
+ for line in stdout:
+ ctinfo = _parse_cquery_result_line(line)
+ if ctinfo is not None:
+ cts.add(ctinfo)
+
+ return (True, stderr, tuple(cts))
+
+ def get_config(self, config_hash: str) -> Configuration:
+ """Calls "bazel config" with the given config hash.
+
+ Args:
+ config_hash: A config hash as reported by "bazel cquery".
+
+ Returns:
+ The matching configuration or None if no match is found.
+
+ Raises:
+ ValueError on any parsing problems.
+ """
+ if config_hash == "HOST":
+ return HostConfiguration()
+ elif config_hash == "null":
+ return NullConfiguration()
+
+ base_args = ["config", "--output=json"]
+ (returncode, stdout, stderr) = self.run_bazel(base_args + [config_hash])
+ if returncode != 0:
+ raise ValueError("Could not get config: " + stderr)
+ config_json = json.loads(os.linesep.join(stdout))
+ fragments = [
+ fragment["name"].split(".")[-1] for fragment in config_json["fragments"]
+ ]
+ options = frozendict({
+ entry["name"].split(".")[-1]: frozendict(entry["options"])
+ for entry in config_json["fragmentOptions"]
+ })
+ return Configuration(fragments, options)
+
+
+# TODO(gregce): have cquery --output=jsonproto support --show_config_fragments
+# so we can replace all this regex parsing with JSON reads.
+def _parse_cquery_result_line(line: str) -> ConfiguredTarget:
+ """Converts a cquery output line to a ConfiguredTarget.
+
+ Expected input is:
+
+ "<label> (<config hash>) [configFragment1, configFragment2, ...]"
+
+ or:
+ "<label> (null)"
+
+ Args:
+ line: The expected input.
+
+ Returns:
+ Corresponding ConfiguredTarget if the line matches else None.
+ """
+ tokens = line.split(maxsplit=2)
+ label = tokens[0]
+ if tokens[1][0] != "(" or tokens[1][-1] != ")":
+ raise ValueError(f"{tokens[1]} in {line} not surrounded by parentheses")
+ config_hash = tokens[1][1:-1]
+ if config_hash == "null":
+ fragments = ()
+ else:
+ if tokens[2][0] != "[" or tokens[2][-1] != "]":
+ raise ValueError(f"{tokens[2]} in {line} not surrounded by [] brackets")
+ # The fragments list looks like '[Fragment1, Fragment2, ...]'. Split the
+ # whole line on ' [' to get just this list, then remove the final ']', then
+ # split again on ', ' to convert it to a structured tuple.
+ fragments = tuple(line.split(" [")[1][0:-1].split(", "))
+ return ConfiguredTarget(
+ label=label,
+ config=None, # Not yet available: we'll need `bazel config` to get this.
+ config_hash=config_hash,
+ transitive_fragments=fragments)
diff --git a/tools/ctexplain/bazel_api_test.py b/tools/ctexplain/bazel_api_test.py
new file mode 100644
index 0000000..3587251
--- /dev/null
+++ b/tools/ctexplain/bazel_api_test.py
@@ -0,0 +1,147 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for bazel_api.py."""
+import os
+import unittest
+from src.test.py.bazel import test_base
+from tools.ctexplain.bazel_api import BazelApi
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+class BazelApiTest(test_base.TestBase):
+
+ _bazel_api: BazelApi = None
+
+ def setUp(self):
+ test_base.TestBase.setUp(self)
+ self._bazel_api = BazelApi(self.RunBazel)
+ self.ScratchFile('WORKSPACE')
+ self.CreateWorkspaceWithDefaultRepos('repo/WORKSPACE')
+
+ def tearDown(self):
+ test_base.TestBase.tearDown(self)
+
+ def testBasicCquery(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ res = self._bazel_api.cquery(['//testapp:all'])
+ success = res[0]
+ cts = res[2]
+ self.assertTrue(success)
+ self.assertEqual(len(cts), 1)
+ self.assertEqual(cts[0].label, '//testapp:fg')
+ self.assertIsNone(cts[0].config)
+ self.assertGreater(len(cts[0].config_hash), 10)
+ self.assertIn('PlatformConfiguration', cts[0].transitive_fragments)
+
+ def testFailedCquery(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ (success, stderr, cts) = self._bazel_api.cquery(['//testapp:typo'])
+ self.assertFalse(success)
+ self.assertEqual(len(cts), 0)
+ self.assertIn("target 'typo' not declared in package 'testapp'",
+ os.linesep.join(stderr))
+
+ def testTransitiveFragmentsAccuracy(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ 'filegroup(name = "ccfg", srcs = [":ccbin"])',
+ 'cc_binary(name = "ccbin", srcs = ["ccbin.cc"])'
+ ])
+ cts1 = self._bazel_api.cquery(['//testapp:fg'])[2]
+ self.assertNotIn('CppConfiguration', cts1[0].transitive_fragments)
+ cts2 = self._bazel_api.cquery(['//testapp:ccfg'])[2]
+ self.assertIn('CppConfiguration', cts2[0].transitive_fragments)
+
+ def testGetTargetConfig(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ cts = self._bazel_api.cquery(['//testapp:fg'])[2]
+ config = self._bazel_api.get_config(cts[0].config_hash)
+ expected_fragments = ['PlatformConfiguration', 'JavaConfiguration']
+ for exp in expected_fragments:
+ self.assertIn(exp, config.fragments)
+ core_options = config.options['CoreOptions']
+ self.assertIsNotNone(core_options)
+ self.assertIn(('stamp', 'false'), core_options.items())
+
+ def testGetHostConfig(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'genrule(',
+ ' name = "g",',
+ ' srcs = [],',
+ ' cmd = "",',
+ ' outs = ["g.out"],',
+ ' tools = [":fg"])',
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ query = ['//testapp:fg', '--universe_scope=//testapp:g']
+ cts = self._bazel_api.cquery(query)[2]
+ config = self._bazel_api.get_config(cts[0].config_hash)
+ self.assertIsInstance(config, HostConfiguration)
+ # We don't currently populate or read a host configuration's details.
+ self.assertEqual(len(config.fragments), 0)
+ self.assertEqual(len(config.options), 0)
+
+ def testGetNullConfig(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ cts = self._bazel_api.cquery(['//testapp:a.file'])[2]
+ config = self._bazel_api.get_config(cts[0].config_hash)
+ self.assertIsInstance(config, NullConfiguration)
+ # Null configurations have no information by definition.
+ self.assertEqual(len(config.fragments), 0)
+ self.assertEqual(len(config.options), 0)
+
+ def testConfigWithDefines(self):
+ self.ScratchFile('testapp/BUILD', [
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ cquery_args = ['//testapp:fg', '--define', 'a=b']
+ cts = self._bazel_api.cquery(cquery_args)[2]
+ config = self._bazel_api.get_config(cts[0].config_hash)
+ user_defined_options = config.options['user-defined']
+ self.assertIsNotNone(user_defined_options)
+ self.assertDictEqual(user_defined_options._dict, {'--define:a': 'b'})
+
+ def testConfigWithStarlarkFlags(self):
+ self.ScratchFile('testapp/defs.bzl', [
+ 'def _flag_impl(settings, attr):', ' pass', 'string_flag = rule(',
+ ' implementation = _flag_impl,',
+ ' build_setting = config.string(flag = True)'
+ ')'
+ ])
+ self.ScratchFile('testapp/BUILD', [
+ 'load(":defs.bzl", "string_flag")',
+ 'string_flag(name = "my_flag", build_setting_default = "nada")',
+ 'filegroup(name = "fg", srcs = ["a.file"])',
+ ])
+ cquery_args = ['//testapp:fg', '--//testapp:my_flag', 'algo']
+ cts = self._bazel_api.cquery(cquery_args)[2]
+ config = self._bazel_api.get_config(cts[0].config_hash)
+ user_defined_options = config.options['user-defined']
+ self.assertIsNotNone(user_defined_options)
+ self.assertDictEqual(user_defined_options._dict,
+ {'//testapp:my_flag': 'algo'})
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/ctexplain/ctexplain.py b/tools/ctexplain/ctexplain.py
new file mode 100644
index 0000000..b66d78d
--- /dev/null
+++ b/tools/ctexplain/ctexplain.py
@@ -0,0 +1,25 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ctexplain main entry point.
+
+Currently a stump.
+"""
+from tools.ctexplain.bazel_api import BazelApi
+
+bazel_api = BazelApi()
+
+# TODO(gregce): move all logic to a _lib library so we can easily include
+# end-to-end testing. We'll only handle flag parsing here, which we pass
+# into the main invoker as standard Python args.
diff --git a/tools/ctexplain/types.py b/tools/ctexplain/types.py
new file mode 100644
index 0000000..2ac480f
--- /dev/null
+++ b/tools/ctexplain/types.py
@@ -0,0 +1,85 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The core data types ctexplain manipulates."""
+
+from typing import Mapping
+from typing import Optional
+from typing import Tuple
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from dataclasses import dataclass
+from dataclasses import field
+from frozendict import frozendict
+
+
+@dataclass(frozen=True)
+class Configuration():
+ """Stores a build configuration as a collection of fragments and options."""
+ # BuildConfiguration.Fragments in this configuration, as base names without
+ # packages. For example: ["PlatformConfiguration", ...].
+ fragments: Tuple[str, ...]
+ # Mapping of FragmentOptions to option key/value pairs. For example:
+ # {"CoreOptions": {"action_env": "[]", "cpu": "x86", ...}, ...}.
+ #
+ # Option values are stored as strings of whatever "bazel config" outputs.
+ #
+ # Note that Fragment and FragmentOptions aren't the same thing.
+ options: [Mapping[str, Mapping[str, str]]]
+
+
+@dataclass(frozen=True)
+class ConfiguredTarget():
+ """Encapsulates a target + configuration + required fragments."""
+ # Label of the target this represents.
+ label: str
+ # Configuration this target is applied to. May be None.
+ config: Optional[Configuration]
+ # The hash of this configuration as reported by Bazel.
+ config_hash: str
+ # Fragments required by this configured target and its transitive
+ # dependencies. Stored as base names without packages. For example:
+ # "PlatformOptions".
+ transitive_fragments: Tuple[str, ...]
+
+
+@dataclass(frozen=True)
+class HostConfiguration(Configuration):
+ """Special marker for the host configuration.
+
+ There's exactly one host configuration per build, so we shouldn't suggest
+ merging it with other configurations.
+
+ TODO(gregce): suggest host configuration trimming once we figure out the right
+ criteria. Even if Bazel's not technically equipped to do the trimming, it's
+ still theoretically valuable information. Note that moving from host to exec
+ configurations make this all a little less relevant, since exec configurations
+ aren't "special" compared to normal configurations.
+ """
+ # We don't currently read the host config's fragments or option values.
+ fragments: Tuple[str, ...] = ()
+ options: Mapping[str,
+ Mapping[str,
+ str]] = field(default_factory=lambda: frozendict({}))
+
+
+@dataclass(frozen=True)
+class NullConfiguration(Configuration):
+ """Special marker for the null configuration.
+
+ By definition this has no fragments or options.
+ """
+ fragments: Tuple[str, ...] = ()
+ options: Mapping[str,
+ Mapping[str,
+ str]] = field(default_factory=lambda: frozendict({}))
diff --git a/tools/ctexplain/types_test.py b/tools/ctexplain/types_test.py
new file mode 100644
index 0000000..504dc3c
--- /dev/null
+++ b/tools/ctexplain/types_test.py
@@ -0,0 +1,70 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for types.py."""
+import unittest
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+from tools.ctexplain.types import Configuration
+
+
+class TypesTest(unittest.TestCase):
+
+ def testConfigurationIsHashable(self):
+ options = frozendict({'o1': frozendict({'k1': 'v1'})})
+ c = Configuration(fragments=('F1'), options=options)
+ some_dict = {}
+ some_dict[c] = 4
+
+ def testConfigurationHashAccuracy(self):
+ d = {}
+
+ options1 = frozendict({'o1': frozendict({'k1': 'v1'})})
+ d[Configuration(fragments=('F1'), options=options1)] = 4
+ self.assertEqual(len(d), 1)
+
+ options2 = frozendict({'o1': frozendict({'k1': 'v1'})})
+ d[Configuration(fragments=('F1'), options=options2)] = 4
+ self.assertEqual(len(d), 1)
+
+ options3 = frozendict({'o1': frozendict({'k1': 'v1'})})
+ d[Configuration(fragments=('F2'), options=options3)] = 4
+ self.assertEqual(len(d), 2)
+
+ options4 = frozendict({'o2': frozendict({'k1': 'v1'})})
+ d[Configuration(fragments=('F2'), options=options4)] = 4
+ self.assertEqual(len(d), 3)
+
+ options5 = frozendict({'o2': frozendict({'k2': 'v1'})})
+ d[Configuration(fragments=('F2'), options=options5)] = 4
+ self.assertEqual(len(d), 4)
+
+ options6 = frozendict({'o2': frozendict({'k2': 'v2'})})
+ d[Configuration(fragments=('F2'), options=options6)] = 4
+ self.assertEqual(len(d), 5)
+
+ def testConfigurationEquality(self):
+ c1 = Configuration(fragments=('F1'), options={'o1': {'k1': 'v1'}})
+ c2 = Configuration(fragments=('F1'), options={'o1': {'k1': 'v1'}})
+ c3 = Configuration(fragments=('F2'), options={'o1': {'k1': 'v1'}})
+ c4 = Configuration(fragments=('F1'), options={'o2': {'k2': 'v2'}})
+
+ self.assertEqual(c1, c2)
+ self.assertNotEqual(c1, c3)
+ self.assertNotEqual(c1, c4)
+ self.assertNotEqual(c3, c4)
+
+
+if __name__ == '__main__':
+ unittest.main()