| # Copyright 2020 The Bazel Authors. All rights reserved. | 
 | # | 
 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
 | # you may not use this file except in compliance with the License. | 
 | # You may obtain a copy of the License at | 
 | # | 
 | #    http://www.apache.org/licenses/LICENSE-2.0 | 
 | # | 
 | # Unless required by applicable law or agreed to in writing, software | 
 | # distributed under the License is distributed on an "AS IS" BASIS, | 
 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | # See the License for the specific language governing permissions and | 
 | # limitations under the License. | 
 | r"""Command line diffing tool that compares two bazel aquery invocations. | 
 |  | 
 | This script compares the proto or textproto output of two bazel aquery | 
 | invocations. For each set of output files of an action, it compares the command | 
 | lines that generated the files. | 
 |  | 
 | Example usage: | 
 |  | 
 | 1. Prepare 2 aquery output files: | 
 | bazel aquery //path/to:target_one --output=textproto > \ | 
 |     /path/to/output_one.textproto | 
 | bazel aquery //path/to:target_two --output=textproto > \ | 
 |     /path/to/output_two.textproto | 
 |  | 
 | 2. Run the differ from a bazel repo: | 
 | bazel run //tools/aquery_differ:aquery_differ -- \ | 
 | --before=/path/to/output_one.textproto \ | 
 | --after=/path/to/output_two.textproto \ | 
 | --input_type=textproto \ | 
 | --attrs=cmdline \ | 
 | --attrs=inputs | 
 | """ | 
 |  | 
 | from __future__ import absolute_import | 
 | from __future__ import division | 
 | from __future__ import print_function | 
 | import difflib | 
 | import os | 
 | import sys | 
 |  | 
 | # Do not edit this line. Copybara replaces it with PY2 migration helper. | 
 | from absl import app | 
 | from absl import flags | 
 | from six.moves import map | 
 | from google.protobuf import text_format | 
 | from src.main.protobuf import analysis_v2_pb2 | 
 | from tools.aquery_differ.resolvers.dep_set_resolver import DepSetResolver | 
 | from tools.aquery_differ.resolvers.path_fragment_resolver import PathFragmentResolver | 
 | # pylint: disable=g-import-not-at-top | 
 | # resource lib isn't available on Windows. | 
 | if os.name != "nt": | 
 |   import resource | 
 | # pylint: enable=g-import-not-at-top | 
 |  | 
 | flags.DEFINE_string("before", None, "Aquery output before the change") | 
 | flags.DEFINE_string("after", None, "Aquery output after the change") | 
 | flags.DEFINE_enum( | 
 |     "input_type", "proto", ["proto", "textproto"], | 
 |     "The format of the aquery proto input. One of 'proto' and 'textproto.") | 
 | flags.DEFINE_multi_enum("attrs", ["cmdline"], ["inputs", "cmdline"], | 
 |                         "Attributes of the actions to be compared.") | 
 | flags.DEFINE_integer( | 
 |     "max_mem_alloc_mb", 3072, | 
 |     "Amount of max memory available for aquery_differ, in MB.") | 
 | flags.mark_flag_as_required("before") | 
 | flags.mark_flag_as_required("after") | 
 |  | 
 | WHITE = "\033[37m%s\033[0m" | 
 | CYAN = "\033[36m%s\033[0m" | 
 | RED = "\033[31m%s\033[0m" | 
 | GREEN = "\033[32m%s\033[0m" | 
 |  | 
 |  | 
 | def _colorize(line): | 
 |   """Add color to the input string.""" | 
 |   if not sys.stdout.isatty(): | 
 |     return line | 
 |  | 
 |   if line.startswith("+++") or line.startswith("---"): | 
 |     return WHITE % line | 
 |  | 
 |   if line.startswith("@@"): | 
 |     return CYAN % line | 
 |  | 
 |   if line.startswith("+"): | 
 |     return GREEN % line | 
 |  | 
 |   if line.startswith("-"): | 
 |     return RED % line | 
 |  | 
 |   return line | 
 |  | 
 |  | 
 | def _print_diff(output_files, before_val, after_val, attr, before_file, | 
 |                 after_file): | 
 |   diff = "\n".join( | 
 |       map(_colorize, [ | 
 |           s.strip("\n") for s in difflib.unified_diff(before_val, after_val, | 
 |                                                       before_file, after_file) | 
 |       ])) | 
 |   print(("[%s]\n" | 
 |          "Difference in the action that generates the following output(s):" | 
 |          "\n\t%s\n%s\n") % (attr, "\n\t".join(output_files.split()), diff)) | 
 |  | 
 |  | 
 | def _map_artifact_id_to_path(artifacts, path_fragments): | 
 |   path_fragment_resolver = PathFragmentResolver(path_fragments) | 
 |   return { | 
 |       artifact.id: path_fragment_resolver.resolve(artifact.path_fragment_id) | 
 |       for artifact in artifacts | 
 |   } | 
 |  | 
 |  | 
 | def _map_action_index_to_output_files(actions, artifacts): | 
 |   """Constructs a map from action index to output files. | 
 |  | 
 |   Args: | 
 |     actions: a list of actions from the action graph container | 
 |     artifacts: a map {artifact_id: artifact path} | 
 |  | 
 |   Returns: | 
 |     A map from action index (in action graph container) to a string of | 
 |     concatenated output artifacts paths. | 
 |   """ | 
 |   action_index_to_output_files = {} | 
 |   for i, action in enumerate(actions): | 
 |     output_files = " ".join( | 
 |         sorted([artifacts[output_id] for output_id in action.output_ids])) | 
 |     action_index_to_output_files[i] = output_files | 
 |   return action_index_to_output_files | 
 |  | 
 |  | 
 | # output files -> input artifacts | 
 | def _map_output_files_to_input_artifacts(action_graph_container, | 
 |                                          artifact_id_to_path, | 
 |                                          action_index_to_output_files): | 
 |   """Constructs a map from output files to input artifacts. | 
 |  | 
 |   Args: | 
 |     action_graph_container: the full action graph container object | 
 |     artifact_id_to_path: a map {artifact_id: artifact path} | 
 |     action_index_to_output_files: a map from action index (in action graph | 
 |       container) to a string of concatenated output artifacts paths. | 
 |  | 
 |   Returns: | 
 |     A map from output files (string of concatenated output artifacts paths) to a | 
 |     list of input artifacts. | 
 |   """ | 
 |   actions = action_graph_container.actions | 
 |   dep_set_of_files = action_graph_container.dep_set_of_files | 
 |   id_to_dep_set = {dep_set.id: dep_set for dep_set in dep_set_of_files} | 
 |   dep_set_resolver = DepSetResolver(dep_set_of_files, artifact_id_to_path) | 
 |  | 
 |   output_files_to_input_artifacts = {} | 
 |   for i, action in enumerate(actions): | 
 |     input_artifacts = [] | 
 |  | 
 |     for dep_set_id in action.input_dep_set_ids: | 
 |       input_artifacts.extend( | 
 |           dep_set_resolver.resolve(id_to_dep_set[dep_set_id])) | 
 |  | 
 |     output_files_to_input_artifacts[action_index_to_output_files[i]] = list( | 
 |         sorted(input_artifacts)) | 
 |  | 
 |   return output_files_to_input_artifacts | 
 |  | 
 |  | 
 | # output files -> command line | 
 | def _map_output_files_to_command_line(actions, action_index_to_output_files): | 
 |   """Constructs a map from output files to command line. | 
 |  | 
 |   Args: | 
 |     actions: a list of actions from the action graph container | 
 |     action_index_to_output_files: a map from action index (in action graph | 
 |       container) to a string of concatenated output artifacts paths. | 
 |  | 
 |   Returns: | 
 |     A map from output files (string of concatenated output artifacts paths) | 
 |     to the command line (a list of arguments). | 
 |   """ | 
 |   output_files_to_command_line = {} | 
 |   for i, action in enumerate(actions): | 
 |     output_files_to_command_line[ | 
 |         action_index_to_output_files[i]] = action.arguments | 
 |   return output_files_to_command_line | 
 |  | 
 |  | 
 | def _aquery_diff(before_proto, after_proto, attrs, before_file, after_file): | 
 |   """Returns differences between command lines that generate same outputs.""" | 
 |   found_difference = False | 
 |   artifacts_before = _map_artifact_id_to_path(before_proto.artifacts, | 
 |                                               before_proto.path_fragments) | 
 |   artifacts_after = _map_artifact_id_to_path(after_proto.artifacts, | 
 |                                              after_proto.path_fragments) | 
 |  | 
 |   action_to_output_files_before = _map_action_index_to_output_files( | 
 |       before_proto.actions, artifacts_before) | 
 |   action_to_output_files_after = _map_action_index_to_output_files( | 
 |       after_proto.actions, artifacts_after) | 
 |  | 
 |   # There's a 1-to-1 mapping between action and outputs | 
 |   output_files_before = set(action_to_output_files_before.values()) | 
 |   output_files_after = set(action_to_output_files_after.values()) | 
 |  | 
 |   before_after_diff = output_files_before - output_files_after | 
 |   after_before_diff = output_files_after - output_files_before | 
 |  | 
 |   if before_after_diff: | 
 |     print(("Aquery output 'before' change contains an action that generates " | 
 |            "the following outputs that aquery output 'after' change doesn't:" | 
 |            "\n%s\n") % "\n".join(before_after_diff)) | 
 |     found_difference = True | 
 |   if after_before_diff: | 
 |     print(("Aquery output 'after' change contains an action that generates " | 
 |            "the following outputs that aquery output 'before' change doesn't:" | 
 |            "\n%s\n") % "\n".join(after_before_diff)) | 
 |     found_difference = True | 
 |  | 
 |   if "cmdline" in attrs: | 
 |     output_to_command_line_before = _map_output_files_to_command_line( | 
 |         before_proto.actions, action_to_output_files_before) | 
 |     output_to_command_line_after = _map_output_files_to_command_line( | 
 |         after_proto.actions, action_to_output_files_after) | 
 |     for output_files in output_to_command_line_before: | 
 |       arguments = output_to_command_line_before[output_files] | 
 |       after_arguments = output_to_command_line_after.get(output_files, None) | 
 |       if after_arguments and arguments != after_arguments: | 
 |         _print_diff(output_files, arguments, after_arguments, "cmdline", | 
 |                     before_file, after_file) | 
 |         found_difference = True | 
 |  | 
 |   if "inputs" in attrs: | 
 |     output_to_input_files_before = _map_output_files_to_input_artifacts( | 
 |         before_proto, artifacts_before, action_to_output_files_before) | 
 |     output_to_input_files_after = _map_output_files_to_input_artifacts( | 
 |         after_proto, artifacts_after, action_to_output_files_after) | 
 |     for output_files in output_to_input_files_before: | 
 |       before_inputs = output_to_input_files_before[output_files] | 
 |       after_inputs = output_to_input_files_after.get(output_files, None) | 
 |       if after_inputs and before_inputs != after_inputs: | 
 |         _print_diff(output_files, before_inputs, after_inputs, "inputs", | 
 |                     before_file, after_file) | 
 |         found_difference = True | 
 |  | 
 |   if not found_difference: | 
 |     print("No difference") | 
 |  | 
 |  | 
 | def to_absolute_path(path): | 
 |   path = os.path.expanduser(path) | 
 |   if os.path.isabs(path): | 
 |     return path | 
 |   else: | 
 |     if "BUILD_WORKING_DIRECTORY" in os.environ: | 
 |       return os.path.join(os.environ["BUILD_WORKING_DIRECTORY"], path) | 
 |     else: | 
 |       return path | 
 |  | 
 |  | 
 | def main(unused_argv): | 
 |   before_file = to_absolute_path(flags.FLAGS.before) | 
 |   after_file = to_absolute_path(flags.FLAGS.after) | 
 |   input_type = flags.FLAGS.input_type | 
 |   attrs = flags.FLAGS.attrs | 
 |   max_mem_alloc_mb = flags.FLAGS.max_mem_alloc_mb | 
 |  | 
 |   # resource lib isn't available on Windows. | 
 |   if os.name != "nt": | 
 |     max_heap_bytes = max_mem_alloc_mb * 1024 * 1024 | 
 |     resource.setrlimit(resource.RLIMIT_AS, (max_heap_bytes, max_heap_bytes)) | 
 |  | 
 |   before_proto = analysis_v2_pb2.ActionGraphContainer() | 
 |   after_proto = analysis_v2_pb2.ActionGraphContainer() | 
 |   try: | 
 |     if input_type == "proto": | 
 |       with open(before_file, "rb") as f: | 
 |         before_proto.ParseFromString(f.read()) | 
 |       with open(after_file, "rb") as f: | 
 |         after_proto.ParseFromString(f.read()) | 
 |     else: | 
 |       with open(before_file, "r") as f: | 
 |         before_text = f.read() | 
 |         text_format.Merge(before_text, before_proto) | 
 |       with open(after_file, "r") as f: | 
 |         after_text = f.read() | 
 |         text_format.Merge(after_text, after_proto) | 
 |  | 
 |     _aquery_diff(before_proto, after_proto, attrs, before_file, after_file) | 
 |   except MemoryError: | 
 |     print( | 
 |         "aquery_differ is known to cause OOM issue with large inputs. More details: b/154620006.", | 
 |         file=sys.stderr) | 
 |     print( | 
 |         "Max mem space of {}MB exceeded".format(max_mem_alloc_mb), | 
 |         file=sys.stderr) | 
 |     sys.exit(1) | 
 |  | 
 |  | 
 | if __name__ == "__main__": | 
 |   app.run(main) |