First check-in of configuration overhead measurement tool

i.e. first check-in of [Measuring Configuration Overhead](https://docs.google.com/document/d/10ZxO2wZdKJATnYBqAm22xT1k5r4Vp6QX96TkqSUIhs0/edit).

This just establishes supporting structure. The tool is not yet functional.

Specifically:
- `types.py`: defines data structures for "configuration" and "configured target"
- `bazel_api.py`: API to translate `bazel cquery` and `bazel config` calls into the above data structures
- `bazel_api_test.py`: tests
- `ctexplain.py`: stump of an entry point

The tests utilize an existing Python test framework for invoking Bazel (`//src/test/py/bazel:test_base`).

Work towards https://github.com/bazelbuild/bazel/issues/10613

Closes #11511.

PiperOrigin-RevId: 321409588
diff --git a/src/test/py/bazel/BUILD b/src/test/py/bazel/BUILD
index 95617dd..b3fad7f 100644
--- a/src/test/py/bazel/BUILD
+++ b/src/test/py/bazel/BUILD
@@ -29,6 +29,7 @@
         "//third_party/def_parser:__pkg__",
         "//tools/android:__pkg__",
         "//tools/build_rules:__pkg__",
+        "//tools/ctexplain:__pkg__",
         "//tools/python:__pkg__",
     ],
 )
diff --git a/tools/BUILD b/tools/BUILD
index 732230f..025e098 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -15,6 +15,7 @@
         "//tools/build_rules:srcs",
         "//tools/config:srcs",
         "//tools/coverage:srcs",
+        "//tools/ctexplain:srcs",
         "//tools/distributions:srcs",
         "//tools/java:srcs",
         "//tools/jdk:srcs",
diff --git a/tools/ctexplain/BUILD b/tools/ctexplain/BUILD
new file mode 100644
index 0000000..02f4c73
--- /dev/null
+++ b/tools/ctexplain/BUILD
@@ -0,0 +1,60 @@
+# Description:
+#   Tool for measuring how configuration transitions affect build graph size.
+load("//tools/python:private/defs.bzl", "py_binary", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+py_binary(
+    name = "ctexplain",
+    srcs = ["ctexplain.py"],
+    python_version = "PY3",
+    deps = [":bazel_api"],
+)
+
+py_library(
+    name = "bazel_api",
+    srcs = ["bazel_api.py"],
+    srcs_version = "PY3ONLY",
+    deps = [":base"],
+)
+
+py_test(
+    name = "bazel_api_test",
+    size = "small",
+    srcs = ["bazel_api_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":bazel_api",
+        "//src/test/py/bazel:test_base",
+    ],
+)
+
+py_library(
+    name = "base",
+    srcs = [
+        "types.py",
+    ],
+    srcs_version = "PY3ONLY",
+    deps = [
+        "//third_party/py/dataclasses",  # Backport for Python < 3.7.
+        "//third_party/py/frozendict",
+    ],
+)
+
+py_test(
+    name = "types_test",
+    size = "small",
+    srcs = ["types_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":base",
+        "//third_party/py/frozendict",
+    ],
+)
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["*"]),
+)
diff --git a/tools/ctexplain/bazel_api.py b/tools/ctexplain/bazel_api.py
new file mode 100644
index 0000000..4fd776c
--- /dev/null
+++ b/tools/ctexplain/bazel_api.py
@@ -0,0 +1,158 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""API for Bazel calls for config, cquery, and required fragment info.
+
+There's no Python Bazel API so we invoke Bazel as a subprocess.
+"""
+import json
+import os
+import subprocess
+from typing import Callable
+from typing import List
+from typing import Tuple
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+from tools.ctexplain.types import Configuration
+from tools.ctexplain.types import ConfiguredTarget
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+def run_bazel_in_client(args: List[str]) -> Tuple[int, List[str], List[str]]:
+  """Calls bazel within the current workspace.
+
+  For production use. Tests use an alternative invoker that goes through test
+  infrastructure.
+
+  Args:
+    args: the arguments to call Bazel with
+
+  Returns:
+    Tuple of (return code, stdout, stderr)
+  """
+  result = subprocess.run(
+      ["bazel"] + args,
+      cwd=os.getcwd(),
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      check=True)
+  return (result.returncode, result.stdout.decode("utf-8").split(os.linesep),
+          result.stderr)
+
+
+class BazelApi():
+  """API that accepts injectable Bazel invocation logic."""
+
+  def __init__(self,
+               run_bazel: Callable[[List[str]],
+                                   Tuple[int, List[str],
+                                         List[str]]] = run_bazel_in_client):
+    self.run_bazel = run_bazel
+
+  def cquery(self,
+             args: List[str]) -> Tuple[bool, str, Tuple[ConfiguredTarget, ...]]:
+    """Calls cquery with the given arguments.
+
+    Args:
+      args: A list of cquery command-line arguments, one argument per entry.
+
+    Returns:
+      (success, stderr, cts), where success is True iff the query succeeded,
+      stderr contains the query's stderr (regardless of success value), and cts
+      is the configured targets found by the query if successful, empty
+      otherwise.
+    """
+    base_args = ["cquery", "--show_config_fragments=transitive"]
+    (returncode, stdout, stderr) = self.run_bazel(base_args + args)
+    if returncode != 0:
+      return (False, stderr, ())
+
+    cts = set()
+    for line in stdout:
+      ctinfo = _parse_cquery_result_line(line)
+      if ctinfo is not None:
+        cts.add(ctinfo)
+
+    return (True, stderr, tuple(cts))
+
+  def get_config(self, config_hash: str) -> Configuration:
+    """Calls "bazel config" with the given config hash.
+
+    Args:
+      config_hash: A config hash as reported by "bazel cquery".
+
+    Returns:
+      The matching configuration or None if no match is found.
+
+    Raises:
+      ValueError on any parsing problems.
+    """
+    if config_hash == "HOST":
+      return HostConfiguration()
+    elif config_hash == "null":
+      return NullConfiguration()
+
+    base_args = ["config", "--output=json"]
+    (returncode, stdout, stderr) = self.run_bazel(base_args + [config_hash])
+    if returncode != 0:
+      raise ValueError("Could not get config: " + stderr)
+    config_json = json.loads(os.linesep.join(stdout))
+    fragments = [
+        fragment["name"].split(".")[-1] for fragment in config_json["fragments"]
+    ]
+    options = frozendict({
+        entry["name"].split(".")[-1]: frozendict(entry["options"])
+        for entry in config_json["fragmentOptions"]
+    })
+    return Configuration(fragments, options)
+
+
+# TODO(gregce): have cquery --output=jsonproto support --show_config_fragments
+# so we can replace all this regex parsing with JSON reads.
+def _parse_cquery_result_line(line: str) -> ConfiguredTarget:
+  """Converts a cquery output line to a ConfiguredTarget.
+
+  Expected input is:
+
+      "<label> (<config hash>) [configFragment1, configFragment2, ...]"
+
+  or:
+      "<label> (null)"
+
+  Args:
+    line: The expected input.
+
+  Returns:
+    Corresponding ConfiguredTarget if the line matches else None.
+  """
+  tokens = line.split(maxsplit=2)
+  label = tokens[0]
+  if tokens[1][0] != "(" or tokens[1][-1] != ")":
+    raise ValueError(f"{tokens[1]} in {line} not surrounded by parentheses")
+  config_hash = tokens[1][1:-1]
+  if config_hash == "null":
+    fragments = ()
+  else:
+    if tokens[2][0] != "[" or tokens[2][-1] != "]":
+      raise ValueError(f"{tokens[2]} in {line} not surrounded by [] brackets")
+    # The fragments list looks like '[Fragment1, Fragment2, ...]'. Split the
+    # whole line on ' [' to get just this list, then remove the final ']', then
+    # split again on ', ' to convert it to a structured tuple.
+    fragments = tuple(line.split(" [")[1][0:-1].split(", "))
+  return ConfiguredTarget(
+      label=label,
+      config=None,  # Not yet available: we'll need `bazel config` to get this.
+      config_hash=config_hash,
+      transitive_fragments=fragments)
diff --git a/tools/ctexplain/bazel_api_test.py b/tools/ctexplain/bazel_api_test.py
new file mode 100644
index 0000000..3587251
--- /dev/null
+++ b/tools/ctexplain/bazel_api_test.py
@@ -0,0 +1,147 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for bazel_api.py."""
+import os
+import unittest
+from src.test.py.bazel import test_base
+from tools.ctexplain.bazel_api import BazelApi
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+class BazelApiTest(test_base.TestBase):
+
+  _bazel_api: BazelApi = None
+
+  def setUp(self):
+    test_base.TestBase.setUp(self)
+    self._bazel_api = BazelApi(self.RunBazel)
+    self.ScratchFile('WORKSPACE')
+    self.CreateWorkspaceWithDefaultRepos('repo/WORKSPACE')
+
+  def tearDown(self):
+    test_base.TestBase.tearDown(self)
+
+  def testBasicCquery(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    res = self._bazel_api.cquery(['//testapp:all'])
+    success = res[0]
+    cts = res[2]
+    self.assertTrue(success)
+    self.assertEqual(len(cts), 1)
+    self.assertEqual(cts[0].label, '//testapp:fg')
+    self.assertIsNone(cts[0].config)
+    self.assertGreater(len(cts[0].config_hash), 10)
+    self.assertIn('PlatformConfiguration', cts[0].transitive_fragments)
+
+  def testFailedCquery(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    (success, stderr, cts) = self._bazel_api.cquery(['//testapp:typo'])
+    self.assertFalse(success)
+    self.assertEqual(len(cts), 0)
+    self.assertIn("target 'typo' not declared in package 'testapp'",
+                  os.linesep.join(stderr))
+
+  def testTransitiveFragmentsAccuracy(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+        'filegroup(name = "ccfg", srcs = [":ccbin"])',
+        'cc_binary(name = "ccbin", srcs = ["ccbin.cc"])'
+    ])
+    cts1 = self._bazel_api.cquery(['//testapp:fg'])[2]
+    self.assertNotIn('CppConfiguration', cts1[0].transitive_fragments)
+    cts2 = self._bazel_api.cquery(['//testapp:ccfg'])[2]
+    self.assertIn('CppConfiguration', cts2[0].transitive_fragments)
+
+  def testGetTargetConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cts = self._bazel_api.cquery(['//testapp:fg'])[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    expected_fragments = ['PlatformConfiguration', 'JavaConfiguration']
+    for exp in expected_fragments:
+      self.assertIn(exp, config.fragments)
+    core_options = config.options['CoreOptions']
+    self.assertIsNotNone(core_options)
+    self.assertIn(('stamp', 'false'), core_options.items())
+
+  def testGetHostConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'genrule(',
+        '    name = "g",',
+        '    srcs = [],',
+        '    cmd = "",',
+        '    outs = ["g.out"],',
+        '    tools = [":fg"])',
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    query = ['//testapp:fg', '--universe_scope=//testapp:g']
+    cts = self._bazel_api.cquery(query)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    self.assertIsInstance(config, HostConfiguration)
+    # We don't currently populate or read a host configuration's details.
+    self.assertEqual(len(config.fragments), 0)
+    self.assertEqual(len(config.options), 0)
+
+  def testGetNullConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cts = self._bazel_api.cquery(['//testapp:a.file'])[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    self.assertIsInstance(config, NullConfiguration)
+    # Null configurations have no information by definition.
+    self.assertEqual(len(config.fragments), 0)
+    self.assertEqual(len(config.options), 0)
+
+  def testConfigWithDefines(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cquery_args = ['//testapp:fg', '--define', 'a=b']
+    cts = self._bazel_api.cquery(cquery_args)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    user_defined_options = config.options['user-defined']
+    self.assertIsNotNone(user_defined_options)
+    self.assertDictEqual(user_defined_options._dict, {'--define:a': 'b'})
+
+  def testConfigWithStarlarkFlags(self):
+    self.ScratchFile('testapp/defs.bzl', [
+        'def _flag_impl(settings, attr):', '  pass', 'string_flag = rule(',
+        '  implementation = _flag_impl,',
+        '  build_setting = config.string(flag = True)'
+        ')'
+    ])
+    self.ScratchFile('testapp/BUILD', [
+        'load(":defs.bzl", "string_flag")',
+        'string_flag(name = "my_flag", build_setting_default = "nada")',
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cquery_args = ['//testapp:fg', '--//testapp:my_flag', 'algo']
+    cts = self._bazel_api.cquery(cquery_args)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    user_defined_options = config.options['user-defined']
+    self.assertIsNotNone(user_defined_options)
+    self.assertDictEqual(user_defined_options._dict,
+                         {'//testapp:my_flag': 'algo'})
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tools/ctexplain/ctexplain.py b/tools/ctexplain/ctexplain.py
new file mode 100644
index 0000000..b66d78d
--- /dev/null
+++ b/tools/ctexplain/ctexplain.py
@@ -0,0 +1,25 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ctexplain main entry point.
+
+Currently a stump.
+"""
+from tools.ctexplain.bazel_api import BazelApi
+
+bazel_api = BazelApi()
+
+# TODO(gregce): move all logic to a _lib library so we can easily include
+# end-to-end testing. We'll only handle flag parsing here, which we pass
+# into the main invoker as standard Python args.
diff --git a/tools/ctexplain/types.py b/tools/ctexplain/types.py
new file mode 100644
index 0000000..2ac480f
--- /dev/null
+++ b/tools/ctexplain/types.py
@@ -0,0 +1,85 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The core data types ctexplain manipulates."""
+
+from typing import Mapping
+from typing import Optional
+from typing import Tuple
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from dataclasses import dataclass
+from dataclasses import field
+from frozendict import frozendict
+
+
+@dataclass(frozen=True)
+class Configuration():
+  """Stores a build configuration as a collection of fragments and options."""
+  # BuildConfiguration.Fragments in this configuration, as base names without
+  # packages. For example: ["PlatformConfiguration", ...].
+  fragments: Tuple[str, ...]
+  # Mapping of FragmentOptions to option key/value pairs. For example:
+  # {"CoreOptions": {"action_env": "[]", "cpu": "x86", ...}, ...}.
+  #
+  # Option values are stored as strings of whatever "bazel config" outputs.
+  #
+  # Note that Fragment and FragmentOptions aren't the same thing.
+  options: [Mapping[str, Mapping[str, str]]]
+
+
+@dataclass(frozen=True)
+class ConfiguredTarget():
+  """Encapsulates a target + configuration + required fragments."""
+  # Label of the target this represents.
+  label: str
+  # Configuration this target is applied to. May be None.
+  config: Optional[Configuration]
+  # The hash of this configuration as reported by Bazel.
+  config_hash: str
+  # Fragments required by this configured target and its transitive
+  # dependencies. Stored as base names without packages. For example:
+  # "PlatformOptions".
+  transitive_fragments: Tuple[str, ...]
+
+
+@dataclass(frozen=True)
+class HostConfiguration(Configuration):
+  """Special marker for the host configuration.
+
+  There's exactly one host configuration per build, so we shouldn't suggest
+  merging it with other configurations.
+
+  TODO(gregce): suggest host configuration trimming once we figure out the right
+  criteria. Even if Bazel's not technically equipped to do the trimming, it's
+  still theoretically valuable information. Note that moving from host to exec
+  configurations make this all a little less relevant, since exec configurations
+  aren't "special" compared to normal configurations.
+  """
+  # We don't currently read the host config's fragments or option values.
+  fragments: Tuple[str, ...] = ()
+  options: Mapping[str,
+                   Mapping[str,
+                           str]] = field(default_factory=lambda: frozendict({}))
+
+
+@dataclass(frozen=True)
+class NullConfiguration(Configuration):
+  """Special marker for the null configuration.
+
+  By definition this has no fragments or options.
+  """
+  fragments: Tuple[str, ...] = ()
+  options: Mapping[str,
+                   Mapping[str,
+                           str]] = field(default_factory=lambda: frozendict({}))
diff --git a/tools/ctexplain/types_test.py b/tools/ctexplain/types_test.py
new file mode 100644
index 0000000..504dc3c
--- /dev/null
+++ b/tools/ctexplain/types_test.py
@@ -0,0 +1,70 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for types.py."""
+import unittest
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+from tools.ctexplain.types import Configuration
+
+
+class TypesTest(unittest.TestCase):
+
+  def testConfigurationIsHashable(self):
+    options = frozendict({'o1': frozendict({'k1': 'v1'})})
+    c = Configuration(fragments=('F1'), options=options)
+    some_dict = {}
+    some_dict[c] = 4
+
+  def testConfigurationHashAccuracy(self):
+    d = {}
+
+    options1 = frozendict({'o1': frozendict({'k1': 'v1'})})
+    d[Configuration(fragments=('F1'), options=options1)] = 4
+    self.assertEqual(len(d), 1)
+
+    options2 = frozendict({'o1': frozendict({'k1': 'v1'})})
+    d[Configuration(fragments=('F1'), options=options2)] = 4
+    self.assertEqual(len(d), 1)
+
+    options3 = frozendict({'o1': frozendict({'k1': 'v1'})})
+    d[Configuration(fragments=('F2'), options=options3)] = 4
+    self.assertEqual(len(d), 2)
+
+    options4 = frozendict({'o2': frozendict({'k1': 'v1'})})
+    d[Configuration(fragments=('F2'), options=options4)] = 4
+    self.assertEqual(len(d), 3)
+
+    options5 = frozendict({'o2': frozendict({'k2': 'v1'})})
+    d[Configuration(fragments=('F2'), options=options5)] = 4
+    self.assertEqual(len(d), 4)
+
+    options6 = frozendict({'o2': frozendict({'k2': 'v2'})})
+    d[Configuration(fragments=('F2'), options=options6)] = 4
+    self.assertEqual(len(d), 5)
+
+  def testConfigurationEquality(self):
+    c1 = Configuration(fragments=('F1'), options={'o1': {'k1': 'v1'}})
+    c2 = Configuration(fragments=('F1'), options={'o1': {'k1': 'v1'}})
+    c3 = Configuration(fragments=('F2'), options={'o1': {'k1': 'v1'}})
+    c4 = Configuration(fragments=('F1'), options={'o2': {'k2': 'v2'}})
+
+    self.assertEqual(c1, c2)
+    self.assertNotEqual(c1, c3)
+    self.assertNotEqual(c1, c4)
+    self.assertNotEqual(c3, c4)
+
+
+if __name__ == '__main__':
+  unittest.main()