Add a script for diffing command lines based on bazel aquery outputs

Progress towards #5380 and #5883

RELNOTES: None.
PiperOrigin-RevId: 227646729
diff --git a/.bazelci/postsubmit.yml b/.bazelci/postsubmit.yml
index c52d2c1..f33af86 100644
--- a/.bazelci/postsubmit.yml
+++ b/.bazelci/postsubmit.yml
@@ -17,6 +17,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
   ubuntu1604:
     shell_commands:
     - sed -i.bak -e 's/^# android_sdk_repository/android_sdk_repository/' -e 's/^#
@@ -34,6 +35,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
   ubuntu1804:
     shell_commands:
     - sed -i.bak -e 's/^# android_sdk_repository/android_sdk_repository/' -e 's/^#
@@ -51,6 +53,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
   ubuntu1804_nojava:
     build_flags:
     - "--javabase=@openjdk_linux_archive//:runtime"
@@ -66,6 +69,7 @@
     - "//src/test/..."
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
+    - "//tools/cmd_line_differ/..."
     # We can't run Android tests without an installed Android SDK / NDK.
     - "-//src/test/java/com/google/devtools/build/android/..."
     - "-//src/test/shell/bazel/android/..."
@@ -123,6 +127,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
   ubuntu1804_java10:
     shell_commands:
     - sed -i.bak -e 's/^# android_sdk_repository/android_sdk_repository/' -e 's/^#
@@ -140,6 +145,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
   macos:
     shell_commands:
     - sed -i.bak -e 's/^# android_sdk_repository/android_sdk_repository/' -e 's/^#
@@ -163,6 +169,7 @@
     - "//src/tools/singlejar/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     - "-//src/test/shell/integration:minimal_jdk_test"
   windows:
     batch_commands:
diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml
index 66aca10..cdae905 100644
--- a/.bazelci/presubmit.yml
+++ b/.bazelci/presubmit.yml
@@ -19,6 +19,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
@@ -42,6 +43,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
@@ -65,6 +67,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
@@ -86,6 +89,7 @@
     - "//src/tools/singlejar/..."
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # We can't run Android tests without an installed Android SDK / NDK.
@@ -147,6 +151,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
@@ -170,6 +175,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Disable Slow Tests
     - "-//src/test/shell/bazel:bazel_determinism_test"
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
@@ -199,6 +205,7 @@
     - "//src/tools/workspacelog/..."
     - "//third_party/ijar/..."
     - "//tools/android/..."
+    - "//tools/cmd_line_differ/..."
     # Re-enable once fixed: https://github.com/bazelbuild/bazel/issues/4663
     - "-//src/test/shell/bazel/android:android_ndk_integration_test"
     # The below tests have been disabled because they are too slow on macOS.
diff --git a/src/main/protobuf/BUILD b/src/main/protobuf/BUILD
index e95881a..05c3224 100644
--- a/src/main/protobuf/BUILD
+++ b/src/main/protobuf/BUILD
@@ -54,6 +54,14 @@
     deps = [":build_proto"],
 )
 
+py_proto_library(
+    name = "analysis_py_proto",
+    srcs = ["analysis.proto"],
+    default_runtime = "//third_party/protobuf:protobuf_python",
+    protoc = "//third_party/protobuf:protoc",
+    deps = [":build_pb_py"],
+)
+
 java_proto_library(
     name = "analysis_java_proto",
     deps = [":analysis_proto"],
diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD
index 303ecb2..d06c7a9 100644
--- a/src/test/shell/bazel/BUILD
+++ b/src/test/shell/bazel/BUILD
@@ -7,6 +7,7 @@
         "//src/test/shell/bazel/apple:srcs",
         "//src/test/shell/bazel/remote:srcs",
         "//src/test/shell/bazel/testdata:srcs",
+        "//tools/cmd_line_differ:srcs",
     ],
     visibility = ["//src/test/shell:__pkg__"],
 )
diff --git a/tools/BUILD b/tools/BUILD
index c9ffaf5..a943a3f 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -72,6 +72,7 @@
         "//tools/android:all_windows_tests",
         "//tools/bash:all_windows_tests",
         "//tools/build_defs:all_windows_tests",
+        "//tools/cmd_line_differ:cmd_line_differ_test",
         "//tools/cpp/runfiles:all_windows_tests",
         "//tools/java:all_windows_tests",
         "//tools/jdk:all_windows_tests",
diff --git a/tools/cmd_line_differ/BUILD b/tools/cmd_line_differ/BUILD
new file mode 100644
index 0000000..34bbffc
--- /dev/null
+++ b/tools/cmd_line_differ/BUILD
@@ -0,0 +1,41 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["**"]),
+)
+
+py_binary(
+    name = "cmd_line_differ",
+    srcs = ["cmd_line_differ.py"],
+    deps = [
+        "//src/main/protobuf:analysis_py_proto",
+        "//third_party/py/abseil",
+    ],
+)
+
+py_test(
+    name = "cmd_line_differ_test",
+    srcs = ["cmd_line_differ_test.py"],
+    deps = [
+        ":cmd_line_differ",
+        "//src/main/protobuf:analysis_py_proto",
+        "//third_party/py/mock",
+    ],
+)
diff --git a/tools/cmd_line_differ/cmd_line_differ.py b/tools/cmd_line_differ/cmd_line_differ.py
new file mode 100644
index 0000000..9ea6dab
--- /dev/null
+++ b/tools/cmd_line_differ/cmd_line_differ.py
@@ -0,0 +1,132 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""Command line diffing tool that compares two bazel aquery invocations.
+
+This script compares the proto output of two bazel aquery invocations. For
+each set of output files of an action, it compares the command lines that
+generated the files.
+
+Example usage:
+bazel aquery //path/to:target_one --output=textproto > \
+    /path/to/output_one.textproto
+bazel aquery //path/to:target_two --output=textproto > \
+    /path/to/output_two.textproto
+
+From a bazel repo:
+bazel run //tools/cmd_line_differ:cmd_line_differ -- \
+--before=/path/to/output_one.textproto \
+--after=/path/to/output_two.textproto
+--input_type=textproto
+"""
+
+from absl import app
+from absl import flags
+from google.protobuf import text_format
+from src.main.protobuf import analysis_pb2
+
+flags.DEFINE_string("before", None, "Aquery output before the change")
+flags.DEFINE_string("after", None, "Aquery output after the change")
+flags.DEFINE_enum(
+    "input_type", "proto", ["proto", "textproto"],
+    "The format of the aquery proto input. One of 'proto' and 'textproto.")
+flags.mark_flag_as_required("before")
+flags.mark_flag_as_required("after")
+
+
+def _map_artifact_id_to_path(artifacts):
+  return {artifact.id: artifact.exec_path for artifact in artifacts}
+
+
+def _map_output_files_to_command_line(actions, artifacts):
+  output_files_to_command_line = {}
+  for action in actions:
+    output_files = " ".join(
+        sorted([artifacts[output_id] for output_id in action.output_ids]))
+    output_files_to_command_line[output_files] = action.arguments
+  return output_files_to_command_line
+
+
+def _aquery_diff(before, after):
+  """Returns differences between command lines that generate same outputs."""
+  # TODO(bazel-team): Currently we compare only command lines of actions that
+  # generate the same output files. Expand the differ to compare other values as
+  # well (e.g. mnemonic, inputs, execution tags...).
+
+  found_difference = False
+  artifacts_before = _map_artifact_id_to_path(before.artifacts)
+  artifacts_after = _map_artifact_id_to_path(after.artifacts)
+
+  output_to_command_line_before = _map_output_files_to_command_line(
+      before.actions, artifacts_before)
+  output_to_command_line_after = _map_output_files_to_command_line(
+      after.actions, artifacts_after)
+
+  output_files_before = set(output_to_command_line_before.keys())
+  output_files_after = set(output_to_command_line_after.keys())
+
+  before_after_diff = output_files_before - output_files_after
+  after_before_diff = output_files_after - output_files_before
+
+  if before_after_diff:
+    print(("Aquery output before change contains an action that generates "
+           "the following outputs that aquery output after change doesn't:"
+           "\n%s\n") % "\n".join(before_after_diff))
+    found_difference = True
+  if after_before_diff:
+    print(("Aquery output after change contains an action that generates "
+           "the following outputs that aquery output before change doesn't:"
+           "\n%s\n") % "\n".join(after_before_diff))
+    found_difference = True
+
+  for output_files in output_to_command_line_before:
+    arguments = output_to_command_line_before[output_files]
+    after_arguments = output_to_command_line_after.get(output_files, None)
+    if after_arguments and arguments != after_arguments:
+      print(("Difference in action that generates the following outputs:\n%s\n"
+             "Aquery output before change has the following command line:\n%s\n"
+             "Aquery output after change has the following command line:\n%s\n")
+            % ("\n".join(output_files.split()), "\n".join(arguments),
+               "\n".join(after_arguments)))
+      found_difference = True
+
+  if not found_difference:
+    print("No difference")
+
+
+def main(unused_argv):
+
+  before_file = flags.FLAGS.before
+  after_file = flags.FLAGS.after
+  input_type = flags.FLAGS.input_type
+
+  before_proto = analysis_pb2.ActionGraphContainer()
+  after_proto = analysis_pb2.ActionGraphContainer()
+  if input_type == "proto":
+    with open(before_file, "rb") as f:
+      before_proto.ParseFromString(f.read())
+    with open(after_file, "rb") as f:
+      after_proto.ParseFromString(f.read())
+  else:
+    with open(before_file, "r") as f:
+      before_text = f.read()
+      text_format.Merge(before_text, before_proto)
+    with open(after_file, "r") as f:
+      after_text = f.read()
+      text_format.Merge(after_text, after_proto)
+
+  _aquery_diff(before_proto, after_proto)
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/tools/cmd_line_differ/cmd_line_differ_test.py b/tools/cmd_line_differ/cmd_line_differ_test.py
new file mode 100644
index 0000000..f5f8f2c
--- /dev/null
+++ b/tools/cmd_line_differ/cmd_line_differ_test.py
@@ -0,0 +1,195 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from src.main.protobuf import analysis_pb2
+from tools.cmd_line_differ import cmd_line_differ
+from third_party.py import mock
+try:
+  # Python 2
+  from cStringIO import StringIO
+except ImportError:
+  # Python 3
+  from io import StringIO
+
+
+def make_aquery_output(actions, artifact_paths):
+  action_graph = analysis_pb2.ActionGraphContainer()
+  for artifact_path in artifact_paths:
+    next_id = len(action_graph.artifacts)
+    artifact = action_graph.artifacts.add()
+    artifact.id = str(next_id)
+    artifact.exec_path = artifact_path
+  for next_action in actions:
+    action = action_graph.actions.add()
+    action.output_ids.extend(next_action["output_ids"])
+    action.arguments.extend(next_action["arguments"])
+  return action_graph
+
+
+class CmdLineDifferTest(unittest.TestCase):
+
+  def test_no_difference(self):
+    action_graph = make_aquery_output(
+        actions=[{
+            "arguments": ["-a", "-b"],
+            "output_ids": ["0", "1"]
+        }, {
+            "arguments": ["-c"],
+            "output_ids": ["2"]
+        }],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"])
+    mock_stdout = StringIO()
+    with mock.patch("sys.stdout", mock_stdout):
+      cmd_line_differ._aquery_diff(action_graph, action_graph)
+      self.assertEqual(mock_stdout.getvalue(), "No difference\n")
+
+  def test_no_difference_different_output_files_order(self):
+    first = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["0", "1"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one"])
+    second = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["1", "0"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one"])
+
+    mock_stdout = StringIO()
+    with mock.patch("sys.stdout", mock_stdout):
+      cmd_line_differ._aquery_diff(first, second)
+      self.assertEqual(mock_stdout.getvalue(), "No difference\n")
+
+  def test_first_has_extra_output_files(self):
+    first = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["0", "1"]
+            },
+            {
+                "arguments": ["-c"],
+                "output_ids": ["2"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+    second = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["1", "0"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+
+    expected_error = ("Aquery output before change contains an action "
+                      "that generates the following outputs that aquery "
+                      "output after change doesn't:\nexec/path/two\n\n")
+    mock_stdout = StringIO()
+    with mock.patch("sys.stdout", mock_stdout):
+      cmd_line_differ._aquery_diff(first, second)
+      self.assertEqual(mock_stdout.getvalue(), expected_error)
+
+  def test_second_has_extra_output_files(self):
+    first = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["0", "1"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+    second = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["0", "1"]
+            },
+            {
+                "arguments": ["-c"],
+                "output_ids": ["2"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+
+    expected_error = ("Aquery output after change contains an action that"
+                      " generates the following outputs that aquery"
+                      " output before change doesn't:\nexec/path/two\n\n")
+    mock_stdout = StringIO()
+    with mock.patch("sys.stdout", mock_stdout):
+      cmd_line_differ._aquery_diff(first, second)
+      self.assertEqual(mock_stdout.getvalue(), expected_error)
+
+  def test_different_command_lines(self):
+    first = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-d"],
+                "output_ids": ["0", "1"]
+            },
+            {
+                "arguments": ["-c"],
+                "output_ids": ["2"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+    second = make_aquery_output(
+        actions=[
+            {
+                "arguments": ["-a", "-b"],
+                "output_ids": ["0", "1"]
+            },
+            {
+                "arguments": ["-c", "-d"],
+                "output_ids": ["2"]
+            },
+        ],
+        artifact_paths=["exec/path/zero", "exec/path/one", "exec/path/two"],
+    )
+
+    expected_error_one = "\n".join([
+        "Difference in action that generates the following outputs:",
+        "exec/path/two",
+        "Aquery output before change has the following command line:", "-c",
+        "Aquery output after change has the following command line:", "-c",
+        "-d", "\n"
+    ])
+    expected_error_two = "\n".join([
+        "Difference in action that generates the following outputs:",
+        "exec/path/one", "exec/path/zero",
+        "Aquery output before change has the following command line:", "-a",
+        "-d", "Aquery output after change has the following command line:",
+        "-a", "-b", "\n"
+    ])
+    mock_stdout = StringIO()
+    with mock.patch("sys.stdout", mock_stdout):
+      cmd_line_differ._aquery_diff(first, second)
+      self.assertIn(expected_error_one, mock_stdout.getvalue())
+      self.assertIn(expected_error_two, mock_stdout.getvalue())
+
+
+if __name__ == "__main__":
+  unittest.main()