| # Copyright 2020 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| r"""Command line diffing tool that compares two bazel aquery invocations. |
| |
| This script compares the proto or textproto output of two bazel aquery |
| invocations. For each set of output files of an action, it compares the command |
| lines that generated the files. |
| |
| Example usage: |
| |
| 1. Prepare 2 aquery output files: |
| bazel aquery //path/to:target_one --output=textproto > \ |
| /path/to/output_one.textproto |
| bazel aquery //path/to:target_two --output=textproto > \ |
| /path/to/output_two.textproto |
| |
| 2. Run the differ from a bazel repo: |
| bazel run //tools/aquery_differ:aquery_differ -- \ |
| --before=/path/to/output_one.textproto \ |
| --after=/path/to/output_two.textproto \ |
| --input_type=textproto \ |
| --attrs=cmdline \ |
| --attrs=inputs |
| """ |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| import difflib |
| import os |
| import sys |
| |
| # Do not edit this line. Copybara replaces it with PY2 migration helper. |
| from absl import app |
| from absl import flags |
| from six.moves import map |
| from google.protobuf import text_format |
| from src.main.protobuf import analysis_v2_pb2 |
| from tools.aquery_differ.resolvers.dep_set_resolver import DepSetResolver |
| from tools.aquery_differ.resolvers.path_fragment_resolver import PathFragmentResolver |
| # pylint: disable=g-import-not-at-top |
| # resource lib isn't available on Windows. |
| if os.name != "nt": |
| import resource |
| # pylint: enable=g-import-not-at-top |
| |
| flags.DEFINE_string("before", None, "Aquery output before the change") |
| flags.DEFINE_string("after", None, "Aquery output after the change") |
| flags.DEFINE_enum( |
| "input_type", "proto", ["proto", "textproto"], |
| "The format of the aquery proto input. One of 'proto' and 'textproto.") |
| flags.DEFINE_multi_enum("attrs", ["cmdline"], ["inputs", "cmdline"], |
| "Attributes of the actions to be compared.") |
| flags.DEFINE_integer( |
| "max_mem_alloc_mb", 3072, |
| "Amount of max memory available for aquery_differ, in MB.") |
| flags.mark_flag_as_required("before") |
| flags.mark_flag_as_required("after") |
| |
| WHITE = "\033[37m%s\033[0m" |
| CYAN = "\033[36m%s\033[0m" |
| RED = "\033[31m%s\033[0m" |
| GREEN = "\033[32m%s\033[0m" |
| |
| |
| def _colorize(line): |
| """Add color to the input string.""" |
| if not sys.stdout.isatty(): |
| return line |
| |
| if line.startswith("+++") or line.startswith("---"): |
| return WHITE % line |
| |
| if line.startswith("@@"): |
| return CYAN % line |
| |
| if line.startswith("+"): |
| return GREEN % line |
| |
| if line.startswith("-"): |
| return RED % line |
| |
| return line |
| |
| |
| def _print_diff(output_files, before_val, after_val, attr, before_file, |
| after_file): |
| diff = "\n".join( |
| map(_colorize, [ |
| s.strip("\n") for s in difflib.unified_diff(before_val, after_val, |
| before_file, after_file) |
| ])) |
| print(("[%s]\n" |
| "Difference in the action that generates the following output(s):" |
| "\n\t%s\n%s\n") % (attr, "\n\t".join(output_files.split()), diff)) |
| |
| |
| def _map_artifact_id_to_path(artifacts, path_fragments): |
| path_fragment_resolver = PathFragmentResolver(path_fragments) |
| return { |
| artifact.id: path_fragment_resolver.resolve(artifact.path_fragment_id) |
| for artifact in artifacts |
| } |
| |
| |
| def _map_action_index_to_output_files(actions, artifacts): |
| """Constructs a map from action index to output files. |
| |
| Args: |
| actions: a list of actions from the action graph container |
| artifacts: a map {artifact_id: artifact path} |
| |
| Returns: |
| A map from action index (in action graph container) to a string of |
| concatenated output artifacts paths. |
| """ |
| action_index_to_output_files = {} |
| for i, action in enumerate(actions): |
| output_files = " ".join( |
| sorted([artifacts[output_id] for output_id in action.output_ids])) |
| action_index_to_output_files[i] = output_files |
| return action_index_to_output_files |
| |
| |
| # output files -> input artifacts |
| def _map_output_files_to_input_artifacts(action_graph_container, |
| artifact_id_to_path, |
| action_index_to_output_files): |
| """Constructs a map from output files to input artifacts. |
| |
| Args: |
| action_graph_container: the full action graph container object |
| artifact_id_to_path: a map {artifact_id: artifact path} |
| action_index_to_output_files: a map from action index (in action graph |
| container) to a string of concatenated output artifacts paths. |
| |
| Returns: |
| A map from output files (string of concatenated output artifacts paths) to a |
| list of input artifacts. |
| """ |
| actions = action_graph_container.actions |
| dep_set_of_files = action_graph_container.dep_set_of_files |
| id_to_dep_set = {dep_set.id: dep_set for dep_set in dep_set_of_files} |
| dep_set_resolver = DepSetResolver(dep_set_of_files, artifact_id_to_path) |
| |
| output_files_to_input_artifacts = {} |
| for i, action in enumerate(actions): |
| input_artifacts = [] |
| |
| for dep_set_id in action.input_dep_set_ids: |
| input_artifacts.extend( |
| dep_set_resolver.resolve(id_to_dep_set[dep_set_id])) |
| |
| output_files_to_input_artifacts[action_index_to_output_files[i]] = list( |
| sorted(input_artifacts)) |
| |
| return output_files_to_input_artifacts |
| |
| |
| # output files -> command line |
| def _map_output_files_to_command_line(actions, action_index_to_output_files): |
| """Constructs a map from output files to command line. |
| |
| Args: |
| actions: a list of actions from the action graph container |
| action_index_to_output_files: a map from action index (in action graph |
| container) to a string of concatenated output artifacts paths. |
| |
| Returns: |
| A map from output files (string of concatenated output artifacts paths) |
| to the command line (a list of arguments). |
| """ |
| output_files_to_command_line = {} |
| for i, action in enumerate(actions): |
| output_files_to_command_line[ |
| action_index_to_output_files[i]] = action.arguments |
| return output_files_to_command_line |
| |
| |
| def _aquery_diff(before_proto, after_proto, attrs, before_file, after_file): |
| """Returns differences between command lines that generate same outputs.""" |
| found_difference = False |
| artifacts_before = _map_artifact_id_to_path(before_proto.artifacts, |
| before_proto.path_fragments) |
| artifacts_after = _map_artifact_id_to_path(after_proto.artifacts, |
| after_proto.path_fragments) |
| |
| action_to_output_files_before = _map_action_index_to_output_files( |
| before_proto.actions, artifacts_before) |
| action_to_output_files_after = _map_action_index_to_output_files( |
| after_proto.actions, artifacts_after) |
| |
| # There's a 1-to-1 mapping between action and outputs |
| output_files_before = set(action_to_output_files_before.values()) |
| output_files_after = set(action_to_output_files_after.values()) |
| |
| before_after_diff = output_files_before - output_files_after |
| after_before_diff = output_files_after - output_files_before |
| |
| if before_after_diff: |
| print(("Aquery output 'before' change contains an action that generates " |
| "the following outputs that aquery output 'after' change doesn't:" |
| "\n%s\n") % "\n".join(before_after_diff)) |
| found_difference = True |
| if after_before_diff: |
| print(("Aquery output 'after' change contains an action that generates " |
| "the following outputs that aquery output 'before' change doesn't:" |
| "\n%s\n") % "\n".join(after_before_diff)) |
| found_difference = True |
| |
| if "cmdline" in attrs: |
| output_to_command_line_before = _map_output_files_to_command_line( |
| before_proto.actions, action_to_output_files_before) |
| output_to_command_line_after = _map_output_files_to_command_line( |
| after_proto.actions, action_to_output_files_after) |
| for output_files in output_to_command_line_before: |
| arguments = output_to_command_line_before[output_files] |
| after_arguments = output_to_command_line_after.get(output_files, None) |
| if after_arguments and arguments != after_arguments: |
| _print_diff(output_files, arguments, after_arguments, "cmdline", |
| before_file, after_file) |
| found_difference = True |
| |
| if "inputs" in attrs: |
| output_to_input_files_before = _map_output_files_to_input_artifacts( |
| before_proto, artifacts_before, action_to_output_files_before) |
| output_to_input_files_after = _map_output_files_to_input_artifacts( |
| after_proto, artifacts_after, action_to_output_files_after) |
| for output_files in output_to_input_files_before: |
| before_inputs = output_to_input_files_before[output_files] |
| after_inputs = output_to_input_files_after.get(output_files, None) |
| if after_inputs and before_inputs != after_inputs: |
| _print_diff(output_files, before_inputs, after_inputs, "inputs", |
| before_file, after_file) |
| found_difference = True |
| |
| if not found_difference: |
| print("No difference") |
| |
| |
| def to_absolute_path(path): |
| path = os.path.expanduser(path) |
| if os.path.isabs(path): |
| return path |
| else: |
| if "BUILD_WORKING_DIRECTORY" in os.environ: |
| return os.path.join(os.environ["BUILD_WORKING_DIRECTORY"], path) |
| else: |
| return path |
| |
| |
| def main(unused_argv): |
| before_file = to_absolute_path(flags.FLAGS.before) |
| after_file = to_absolute_path(flags.FLAGS.after) |
| input_type = flags.FLAGS.input_type |
| attrs = flags.FLAGS.attrs |
| max_mem_alloc_mb = flags.FLAGS.max_mem_alloc_mb |
| |
| # resource lib isn't available on Windows. |
| if os.name != "nt": |
| max_heap_bytes = max_mem_alloc_mb * 1024 * 1024 |
| resource.setrlimit(resource.RLIMIT_AS, (max_heap_bytes, max_heap_bytes)) |
| |
| before_proto = analysis_v2_pb2.ActionGraphContainer() |
| after_proto = analysis_v2_pb2.ActionGraphContainer() |
| try: |
| if input_type == "proto": |
| with open(before_file, "rb") as f: |
| before_proto.ParseFromString(f.read()) |
| with open(after_file, "rb") as f: |
| after_proto.ParseFromString(f.read()) |
| else: |
| with open(before_file, "r") as f: |
| before_text = f.read() |
| text_format.Merge(before_text, before_proto) |
| with open(after_file, "r") as f: |
| after_text = f.read() |
| text_format.Merge(after_text, after_proto) |
| |
| _aquery_diff(before_proto, after_proto, attrs, before_file, after_file) |
| except MemoryError: |
| print( |
| "aquery_differ is known to cause OOM issue with large inputs. More details: b/154620006.", |
| file=sys.stderr) |
| print( |
| "Max mem space of {}MB exceeded".format(max_mem_alloc_mb), |
| file=sys.stderr) |
| sys.exit(1) |
| |
| |
| if __name__ == "__main__": |
| app.run(main) |