[7.4.0] Make repo marker file parsing robust to version changes (#23941)

commits:
* d62e0a0
* c3393da

---------

Co-authored-by: Fredrik Medley <fredrik@meroton.com>
diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java
index 1e47b9b..7a231cd7 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java
@@ -26,6 +26,7 @@
 import com.google.devtools.build.lib.actions.FileValue;
 import com.google.devtools.build.lib.analysis.BlazeDirectories;
 import com.google.devtools.build.lib.cmdline.LabelConstants;
+import com.google.devtools.build.lib.cmdline.LabelSyntaxException;
 import com.google.devtools.build.lib.cmdline.LabelValidator;
 import com.google.devtools.build.lib.cmdline.RepositoryName;
 import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction;
@@ -76,25 +77,32 @@
 
     /**
      * Parses a recorded input from the post-colon substring that identifies the specific input: for
-     * example, the {@code MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}.
+     * example, the {@code MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}. Returns null if the parsed
+     * part is invalid.
      */
     public abstract RepoRecordedInput parse(String s);
   }
 
   private static final Comparator<RepoRecordedInput> COMPARATOR =
-      Comparator.comparing((RepoRecordedInput rri) -> rri.getParser().getPrefix())
-          .thenComparing(RepoRecordedInput::toStringInternal);
+      (o1, o2) ->
+          o1 == o2
+              ? 0
+              : Comparator.comparing((RepoRecordedInput rri) -> rri.getParser().getPrefix())
+                  .thenComparing(RepoRecordedInput::toStringInternal)
+                  .compare(o1, o2);
 
   /**
    * Parses a recorded input from its string representation.
    *
    * @param s the string representation
-   * @return The parsed recorded input object, or {@code null} if the string representation is
-   *     invalid
+   * @return The parsed recorded input object, or {@link #NEVER_UP_TO_DATE} if the string
+   *     representation is invalid
    */
-  @Nullable
   public static RepoRecordedInput parse(String s) {
     List<String> parts = Splitter.on(':').limit(2).splitToList(s);
+    if (parts.size() < 2) {
+      return NEVER_UP_TO_DATE;
+    }
     for (Parser parser :
         new Parser[] {
           File.PARSER, Dirents.PARSER, DirTree.PARSER, EnvVar.PARSER, RecordedRepoMapping.PARSER
@@ -103,7 +111,7 @@
         return parser.parse(parts.get(1));
       }
     }
-    return null;
+    return NEVER_UP_TO_DATE;
   }
 
   /**
@@ -172,6 +180,42 @@
       Environment env, BlazeDirectories directories, @Nullable String oldValue)
       throws InterruptedException;
 
+  /** A sentinel "input" that's always out-of-date to signify parse failure. */
+  public static final RepoRecordedInput NEVER_UP_TO_DATE =
+      new RepoRecordedInput() {
+        @Override
+        public boolean equals(Object obj) {
+          return this == obj;
+        }
+
+        @Override
+        public int hashCode() {
+          return 12345678;
+        }
+
+        @Override
+        public String toStringInternal() {
+          throw new UnsupportedOperationException("this sentinel input should never be serialized");
+        }
+
+        @Override
+        public Parser getParser() {
+          throw new UnsupportedOperationException("this sentinel input should never be parsed");
+        }
+
+        @Override
+        public SkyKey getSkyKey(BlazeDirectories directories) {
+          // Return a random SkyKey to satisfy the contract.
+          return PrecomputedValue.STARLARK_SEMANTICS.getKey();
+        }
+
+        @Override
+        public boolean isUpToDate(
+            Environment env, BlazeDirectories directories, @Nullable String oldValue) {
+          return false;
+        }
+      };
+
   /**
    * Represents a filesystem path stored in a way that is repo-cache-friendly. That is, if the path
    * happens to point inside the current Bazel workspace (in either the main repo or an external
@@ -213,12 +257,12 @@
       return repoName().map(repoName -> repoName + "//" + path()).orElse(path().toString());
     }
 
-    public static RepoCacheFriendlyPath parse(String s) {
+    public static RepoCacheFriendlyPath parse(String s) throws LabelSyntaxException {
       if (LabelValidator.isAbsolute(s)) {
         int doubleSlash = s.indexOf("//");
         int skipAts = s.startsWith("@@") ? 2 : s.startsWith("@") ? 1 : 0;
         return createInsideWorkspace(
-            RepositoryName.createUnvalidated(s.substring(skipAts, doubleSlash)),
+            RepositoryName.create(s.substring(skipAts, doubleSlash)),
             PathFragment.create(s.substring(doubleSlash + 2)));
       }
       return createOutsideWorkspace(PathFragment.create(s));
@@ -264,7 +308,12 @@
 
           @Override
           public RepoRecordedInput parse(String s) {
-            return new File(RepoCacheFriendlyPath.parse(s));
+            try {
+              return new File(RepoCacheFriendlyPath.parse(s));
+            } catch (LabelSyntaxException e) {
+              // malformed inputs cause refetch
+              return NEVER_UP_TO_DATE;
+            }
           }
         };
 
@@ -355,7 +404,12 @@
 
           @Override
           public RepoRecordedInput parse(String s) {
-            return new Dirents(RepoCacheFriendlyPath.parse(s));
+            try {
+              return new Dirents(RepoCacheFriendlyPath.parse(s));
+            } catch (LabelSyntaxException e) {
+              // malformed inputs cause refetch
+              return NEVER_UP_TO_DATE;
+            }
           }
         };
 
@@ -439,7 +493,12 @@
 
           @Override
           public RepoRecordedInput parse(String s) {
-            return new DirTree(RepoCacheFriendlyPath.parse(s));
+            try {
+              return new DirTree(RepoCacheFriendlyPath.parse(s));
+            } catch (LabelSyntaxException e) {
+              // malformed inputs cause refetch
+              return NEVER_UP_TO_DATE;
+            }
           }
         };
 
@@ -578,8 +637,12 @@
           @Override
           public RepoRecordedInput parse(String s) {
             List<String> parts = Splitter.on(',').limit(2).splitToList(s);
-            return new RecordedRepoMapping(
-                RepositoryName.createUnvalidated(parts.get(0)), parts.get(1));
+            try {
+              return new RecordedRepoMapping(RepositoryName.create(parts.get(0)), parts.get(1));
+            } catch (LabelSyntaxException | IndexOutOfBoundsException e) {
+              // malformed inputs cause refetch
+              return NEVER_UP_TO_DATE;
+            }
           }
         };
 
@@ -635,9 +698,14 @@
         throws InterruptedException {
       RepositoryMappingValue repoMappingValue =
           (RepositoryMappingValue) env.getValue(getSkyKey(directories));
-      return repoMappingValue != RepositoryMappingValue.NOT_FOUND_VALUE
-          && RepositoryName.createUnvalidated(oldValue)
-              .equals(repoMappingValue.getRepositoryMapping().get(apparentName));
+      try {
+        return repoMappingValue != RepositoryMappingValue.NOT_FOUND_VALUE
+            && RepositoryName.create(oldValue)
+                .equals(repoMappingValue.getRepositoryMapping().get(apparentName));
+      } catch (LabelSyntaxException e) {
+        // malformed old value causes refetch
+        return false;
+      }
     }
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java
index b44d333..53e6494 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java
@@ -603,6 +603,10 @@
   }
 
   private static class DigestWriter {
+    // Input value map to force repo invalidation.
+    private static final ImmutableMap<RepoRecordedInput, String> NOT_UP_TO_DATE =
+        ImmutableMap.of(RepoRecordedInput.NEVER_UP_TO_DATE, "");
+
     private final BlazeDirectories directories;
     private final Path markerPath;
     private final Rule rule;
@@ -663,53 +667,58 @@
         return null;
       }
 
-      Map<RepoRecordedInput, String> recordedInputValues = new TreeMap<>();
-      String content;
       try {
-        content = FileSystemUtils.readContent(markerPath, UTF_8);
-        String markerRuleKey = readMarkerFile(content, recordedInputValues);
-        boolean verified = false;
-        if (Preconditions.checkNotNull(ruleKey).equals(markerRuleKey)) {
-          verified = handler.verifyRecordedInputs(rule, directories, recordedInputValues, env);
-          if (env.valuesMissing()) {
-            return null;
-          }
-        }
-
-        if (verified) {
-          return new Fingerprint().addString(content).digestAndReset();
-        } else {
+        String content = FileSystemUtils.readContent(markerPath, UTF_8);
+        Map<RepoRecordedInput, String> recordedInputValues =
+            readMarkerFile(content, Preconditions.checkNotNull(ruleKey));
+        if (!handler.verifyRecordedInputs(rule, directories, recordedInputValues, env)) {
           return null;
         }
+        if (env.valuesMissing()) {
+          return null;
+        }
+        return new Fingerprint().addString(content).digestAndReset();
       } catch (IOException e) {
         throw new RepositoryFunctionException(e, Transience.TRANSIENT);
       }
     }
 
     @Nullable
-    private static String readMarkerFile(
-        String content, Map<RepoRecordedInput, String> recordedInputValues) {
-      String markerRuleKey = null;
+    private static Map<RepoRecordedInput, String> readMarkerFile(
+        String content, String expectedRuleKey) {
       Iterable<String> lines = Splitter.on('\n').split(content);
 
-      boolean firstLine = true;
+      @Nullable Map<RepoRecordedInput, String> recordedInputValues = null;
+      boolean firstLineVerified = false;
       for (String line : lines) {
-        if (firstLine) {
-          markerRuleKey = line;
-          firstLine = false;
+        if (line.isEmpty()) {
+          continue;
+        }
+        if (!firstLineVerified) {
+          if (!line.equals(expectedRuleKey)) {
+            // Break early, need to reload anyway. This also detects marker file version changes
+            // so that unknown formats are not parsed.
+            return NOT_UP_TO_DATE;
+          }
+          firstLineVerified = true;
+          recordedInputValues = new TreeMap<>();
         } else {
           int sChar = line.indexOf(' ');
           if (sChar > 0) {
             RepoRecordedInput input = RepoRecordedInput.parse(unescape(line.substring(0, sChar)));
-            if (input == null) {
-              // ignore invalid entries.
+            if (!input.equals(RepoRecordedInput.NEVER_UP_TO_DATE)) {
+              recordedInputValues.put(input, unescape(line.substring(sChar + 1)));
               continue;
             }
-            recordedInputValues.put(input, unescape(line.substring(sChar + 1)));
           }
+          // On parse failure, just forget everything else and mark the whole input out of date.
+          return NOT_UP_TO_DATE;
         }
       }
-      return markerRuleKey;
+      if (!firstLineVerified) {
+        return NOT_UP_TO_DATE;
+      }
+      return Preconditions.checkNotNull(recordedInputValues);
     }
 
     private String computeRuleKey(Rule rule, StarlarkSemantics starlarkSemantics) {
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/PrecomputedValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/PrecomputedValue.java
index 9ebeaa3..5831152 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/PrecomputedValue.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/PrecomputedValue.java
@@ -149,6 +149,10 @@
       this.key = shareable ? Key.create(key) : UnshareableKey.create(key);
     }
 
+    public SkyKey getKey() {
+      return key;
+    }
+
     @VisibleForTesting
     public SkyKey getKeyForTesting() {
       return key;
diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh
index 0d08571..461b9e6 100755
--- a/src/test/shell/bazel/starlark_repository_test.sh
+++ b/src/test/shell/bazel/starlark_repository_test.sh
@@ -2954,6 +2954,54 @@
   expect_log "I see:"
 }
 
+function test_bad_marker_file_ignored() {
+  # when reading a file in another repo, we should watch it.
+  local outside_dir="${TEST_TMPDIR}/outside_dir"
+  mkdir -p "${outside_dir}"
+  echo nothing > ${outside_dir}/data.txt
+
+  create_new_workspace
+  cat > MODULE.bazel <<EOF
+foo = use_repo_rule("//:r.bzl", "foo")
+foo(name = "foo")
+bar = use_repo_rule("//:r.bzl", "bar")
+bar(name = "bar", data = "nothing")
+EOF
+  touch BUILD
+  cat > r.bzl <<EOF
+def _foo(rctx):
+  rctx.file("BUILD", "filegroup(name='foo')")
+  # intentionally grab a file that's not directly addressable by a label
+  otherfile = rctx.path(Label("@bar//subpkg:BUILD")).dirname.dirname.get_child("data.txt")
+  print("I see: " + rctx.read(otherfile))
+foo=repository_rule(_foo)
+def _bar(rctx):
+  rctx.file("subpkg/BUILD")
+  rctx.file("data.txt", rctx.attr.data)
+bar=repository_rule(_bar, attrs={"data":attr.string()})
+EOF
+
+  bazel build @foo >& $TEST_log || fail "expected bazel to succeed"
+  expect_log "I see: nothing"
+
+  local marker_file=$(bazel info output_base)/external/@_main~_repo_rules~foo.marker
+  # the marker file for this repo should contain a reference to "@@_main~_repo_rules~bar". Mangle that.
+  sed -i'' -e 's/@@_main~_repo_rules~bar/@@LOL@@LOL/g' ${marker_file}
+
+  # Running Bazel again shouldn't crash, and should result in a refetch.
+  bazel shutdown
+  bazel build @foo >& $TEST_log || fail "expected bazel to succeed"
+  expect_log "I see: nothing"
+
+  # Test to clear the marker file.
+  echo > ${marker_file}
+
+  # Running Bazel again shouldn't crash, and should result in a refetch.
+  bazel shutdown
+  bazel build @foo >& $TEST_log || fail "expected bazel to succeed"
+  expect_log "I see: nothing"
+}
+
 function test_file_watching_in_undefined_repo() {
   create_new_workspace
   cat > MODULE.bazel <<EOF