Restrict string escape sequences and introduce flag

Related: #8380 , [#38](https://github.com/bazelbuild/starlark/issues/38)

introduce flag --incompatible_restrict_escape_sequences=false
When the flag is enabled, invalid escape sequences like "\z" are
rejected.

RELNOTES: Flag `--incompatible_restrict_escape_sequences` is added. See
https://github.com/bazelbuild/bazel/issues/8380

Closes #8526.

PiperOrigin-RevId: 251434634
diff --git a/site/docs/skylark/backward-compatibility.md b/site/docs/skylark/backward-compatibility.md
index 91587aa..a817d3b 100644
--- a/site/docs/skylark/backward-compatibility.md
+++ b/site/docs/skylark/backward-compatibility.md
@@ -15,6 +15,7 @@
 General Starlark
 
 *   [Dictionary concatenation](#dictionary-concatenation)
+*   [String escapes](#string-escapes)
 *   [Load must appear at top of file](#load-must-appear-at-top-of-file)
 *   [Depset is no longer iterable](#depset-is-no-longer-iterable)
 *   [Depset union](#depset-union)
@@ -74,6 +75,16 @@
 *   Default: `true`
 *   Tracking issue: [#6461](https://github.com/bazelbuild/bazel/issues/6461)
 
+### String escapes
+
+We are restricting unrecognized escape sequences. Trying to include escape
+sequences like `\a`, `\b` or any other escape sequence that is unknown to
+Starlark will result in a syntax error.
+
+*   Flag: `--incompatible_restrict_escape_sequences`
+*   Default: `false`
+*   Tracking issue: [#8380](https://github.com/bazelbuild/bazel/issues/8380)
+
 ### Load must appear at top of file
 
 Previously, the `load` statement could appear anywhere in a `.bzl` file so long
diff --git a/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java b/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
index 016924f..4d52868 100644
--- a/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/packages/StarlarkSemanticsOptions.java
@@ -612,6 +612,18 @@
               + "returns a depset instead.")
   public boolean incompatibleDepsetForLibrariesToLinkGetter;
 
+  @Option(
+      name = "incompatible_restrict_string_escapes",
+      defaultValue = "false",
+      documentationCategory = OptionDocumentationCategory.STARLARK_SEMANTICS,
+      effectTags = {OptionEffectTag.BUILD_FILE_SEMANTICS},
+      metadataTags = {
+        OptionMetadataTag.INCOMPATIBLE_CHANGE,
+        OptionMetadataTag.TRIGGERED_BY_ALL_INCOMPATIBLE_CHANGES
+      },
+      help = "If set to true, unknown string escapes like `\\a` become rejected.")
+  public boolean incompatibleRestrictStringEscapes;
+
   /** Constructs a {@link StarlarkSemantics} object corresponding to this set of option values. */
   public StarlarkSemantics toSkylarkSemantics() {
     return StarlarkSemantics.builder()
@@ -662,6 +674,7 @@
         .internalSkylarkFlagTestCanary(internalSkylarkFlagTestCanary)
         .incompatibleDoNotSplitLinkingCmdline(incompatibleDoNotSplitLinkingCmdline)
         .incompatibleDepsetForLibrariesToLinkGetter(incompatibleDepsetForLibrariesToLinkGetter)
+        .incompatibleRestrictStringEscapes(incompatibleRestrictStringEscapes)
         .build();
   }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkylarkImportLookupFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkylarkImportLookupFunction.java
index da2ee67..fe334f1 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/SkylarkImportLookupFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkylarkImportLookupFunction.java
@@ -541,6 +541,7 @@
   public static void execAndExport(BuildFileAST ast, Label extensionLabel,
       EventHandler eventHandler,
       com.google.devtools.build.lib.syntax.Environment extensionEnv) throws InterruptedException {
+    ast.replayLexerEvents(extensionEnv, eventHandler);
     ImmutableList<Statement> statements = ast.getStatements();
     for (Statement statement : statements) {
       ast.execTopLevelStatement(statement, extensionEnv, eventHandler);
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
index 8902c8d..38485c2 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/BuildFileAST.java
@@ -49,6 +49,8 @@
    */
   private final boolean containsErrors;
 
+  private final List<Event> stringEscapeEvents;
+
   @Nullable private final String contentHashCode;
 
   private BuildFileAST(
@@ -57,13 +59,15 @@
       String contentHashCode,
       Location location,
       ImmutableList<Comment> comments,
-      @Nullable ImmutableList<SkylarkImport> imports) {
+      @Nullable ImmutableList<SkylarkImport> imports,
+      List<Event> stringEscapeEvents) {
     this.statements = statements;
     this.containsErrors = containsErrors;
     this.contentHashCode = contentHashCode;
     this.comments = comments;
     this.setLocation(location);
     this.imports = imports;
+    this.stringEscapeEvents = stringEscapeEvents;
   }
 
   private static BuildFileAST create(
@@ -98,7 +102,8 @@
         contentHashCode,
         result.location,
         ImmutableList.copyOf(result.comments),
-        skylarkImports.second);
+        skylarkImports.second,
+        result.stringEscapeEvents);
   }
 
   private static BuildFileAST create(
@@ -135,7 +140,8 @@
         null,
         this.statements.get(firstStatement).getLocation(),
         ImmutableList.of(),
-        imports.build());
+        imports.build(),
+        stringEscapeEvents);
   }
 
   /**
@@ -202,6 +208,15 @@
     return imports.build();
   }
 
+  /** Returns true if there was no error event. */
+  public boolean replayLexerEvents(Environment env, EventHandler eventHandler) {
+    if (env.getSemantics().incompatibleRestrictStringEscapes() && !stringEscapeEvents.isEmpty()) {
+      Event.replayEventsOn(eventHandler, stringEscapeEvents);
+      return false;
+    }
+    return true;
+  }
+
   /**
    * Executes this build file in a given Environment.
    *
@@ -371,7 +386,8 @@
         /* contentHashCode= */ null,
         result.location,
         ImmutableList.copyOf(result.comments),
-        /* imports= */ null);
+        /* imports= */ null,
+        result.stringEscapeEvents);
   }
 
   /**
@@ -384,7 +400,8 @@
     if (valid || containsErrors) {
       return this;
     }
-    return new BuildFileAST(statements, true, contentHashCode, getLocation(), comments, imports);
+    return new BuildFileAST(
+        statements, true, contentHashCode, getLocation(), comments, imports, stringEscapeEvents);
   }
 
   public static BuildFileAST parseString(EventHandler eventHandler, String... content) {
@@ -446,6 +463,7 @@
   public static BuildFileAST parseAndValidateSkylarkString(Environment env, String[] input)
       throws EvalException {
     BuildFileAST ast = parseString(env.getEventHandler(), input);
+    ast.replayLexerEvents(env, env.getEventHandler());
     ValidationEnvironment.validateAst(env, ast.getStatements());
     return ast;
   }
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index c5a5c1a..c7f424b 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -104,6 +104,14 @@
   private int dents; // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
 
   /**
+   * StringEscapeEvents contains the errors related to invalid escape sequences like "\a". This is
+   * not handled by the normal eventHandler. Instead, it is passed to the parser and then the AST.
+   * During the evaluation, we can decide to show the events based on a flag in StarlarkSemantics.
+   * This code is temporary, during the migration.
+   */
+  private final List<Event> stringEscapeEvents = new ArrayList<>();
+
+  /**
    * Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during
    * lexing are reported on "handler".
    */
@@ -129,6 +137,10 @@
     return comments;
   }
 
+  List<Event> getStringEscapeEvents() {
+    return stringEscapeEvents;
+  }
+
   /**
    * Returns the filename from which the lexer's input came. Returns an empty value if the input
    * came from a string.
@@ -457,10 +469,18 @@
             case 'v':
             case 'x':
               // exists in Python but not implemented in Blaze => error
-              error("escape sequence not implemented: \\" + c, literalStartPos, pos);
+              error("invalid escape sequence: \\" + c, literalStartPos, pos);
               break;
             default:
               // unknown char escape => "\literal"
+              stringEscapeEvents.add(
+                  Event.error(
+                      createLocation(pos - 1, pos),
+                      "invalid escape sequence: \\"
+                          + c
+                          + ". You can enable unknown escape sequences by passing the flag "
+                          + "--incompatible_restrict_string_escapes=false"));
+
               literal.append('\\');
               literal.append(c);
               break;
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
index 497a526..fad276c 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
@@ -62,17 +62,21 @@
     /** Whether the file contained any errors. */
     public final boolean containsErrors;
 
+    public final List<Event> stringEscapeEvents;
+
     public ParseResult(
         List<Statement> statements,
         List<Comment> comments,
         Location location,
-        boolean containsErrors) {
+        boolean containsErrors,
+        List<Event> stringEscapeEvents) {
       // No need to copy here; when the object is created, the parser instance is just about to go
       // out of scope and be garbage collected.
       this.statements = Preconditions.checkNotNull(statements);
       this.comments = Preconditions.checkNotNull(comments);
       this.location = location;
       this.containsErrors = containsErrors;
+      this.stringEscapeEvents = stringEscapeEvents;
     }
   }
 
@@ -205,7 +209,11 @@
     }
     boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
     return new ParseResult(
-        statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
+        statements,
+        lexer.getComments(),
+        locationFromStatements(lexer, statements),
+        errors,
+        lexer.getStringEscapeEvents());
   }
 
   /**
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StarlarkSemantics.java b/src/main/java/com/google/devtools/build/lib/syntax/StarlarkSemantics.java
index a0da4da..3775303 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/StarlarkSemantics.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/StarlarkSemantics.java
@@ -204,6 +204,8 @@
 
   public abstract boolean incompatibleDepsetForLibrariesToLinkGetter();
 
+  public abstract boolean incompatibleRestrictStringEscapes();
+
   /** Returns a {@link Builder} initialized with the values of this instance. */
   public abstract Builder toBuilder();
 
@@ -261,6 +263,7 @@
           .internalSkylarkFlagTestCanary(false)
           .incompatibleDoNotSplitLinkingCmdline(true)
           .incompatibleDepsetForLibrariesToLinkGetter(true)
+          .incompatibleRestrictStringEscapes(false)
           .build();
 
   /** Builder for {@link StarlarkSemantics}. All fields are mandatory. */
@@ -353,6 +356,8 @@
 
     public abstract Builder incompatibleDepsetForLibrariesToLinkGetter(boolean value);
 
+    public abstract Builder incompatibleRestrictStringEscapes(boolean value);
+
     public abstract StarlarkSemantics build();
   }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/packages/SkylarkSemanticsConsistencyTest.java b/src/test/java/com/google/devtools/build/lib/packages/SkylarkSemanticsConsistencyTest.java
index 2f3c401..01ebea9 100644
--- a/src/test/java/com/google/devtools/build/lib/packages/SkylarkSemanticsConsistencyTest.java
+++ b/src/test/java/com/google/devtools/build/lib/packages/SkylarkSemanticsConsistencyTest.java
@@ -165,6 +165,7 @@
         "--incompatible_restrict_named_params=" + rand.nextBoolean(),
         "--incompatible_static_name_resolution_in_build_files=" + rand.nextBoolean(),
         "--incompatible_string_join_requires_strings=" + rand.nextBoolean(),
+        "--incompatible_restrict_string_escapes=" + rand.nextBoolean(),
         "--internal_skylark_flag_test_canary=" + rand.nextBoolean());
   }
 
@@ -218,6 +219,7 @@
         .incompatibleRestrictNamedParams(rand.nextBoolean())
         .incompatibleStaticNameResolutionInBuildFiles(rand.nextBoolean())
         .incompatibleStringJoinRequiresStrings(rand.nextBoolean())
+        .incompatibleRestrictStringEscapes(rand.nextBoolean())
         .internalSkylarkFlagTestCanary(rand.nextBoolean())
         .build();
   }
diff --git a/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java
index d3a853f..f07fc4f 100644
--- a/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java
+++ b/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java
@@ -2994,4 +2994,28 @@
                   + "//test/skylark:ext3.bzl, //test/skylark:ext4.bzl]");
     }
   }
+
+  @Test
+  public void testUnknownStringEscapesForbidden() throws Exception {
+    setSkylarkSemanticsOptions("--incompatible_restrict_string_escapes=true");
+
+    scratch.file("test/extension.bzl", "y = \"\\z\"");
+
+    scratch.file("test/BUILD", "load('//test:extension.bzl', 'y')", "cc_library(name = 'r')");
+
+    reporter.removeHandler(failFastHandler);
+    getConfiguredTarget("//test:r");
+    assertContainsEvent("invalid escape sequence: \\z");
+  }
+
+  @Test
+  public void testUnknownStringEscapes() throws Exception {
+    setSkylarkSemanticsOptions("--incompatible_restrict_string_escapes=false");
+
+    scratch.file("test/extension.bzl", "y = \"\\z\"");
+
+    scratch.file("test/BUILD", "load('//test:extension.bzl', 'y')", "cc_library(name = 'r')");
+
+    getConfiguredTarget("//test:r");
+  }
 }
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
index 7226d1c..77d237a 100644
--- a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
@@ -280,8 +280,7 @@
     assertThat(values(tokens("'a\\\nb'")))
         .isEqualTo("STRING(ab) NEWLINE EOF"); // escape end of line
     assertThat(values(tokens("\"ab\\ucd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
-    assertThat(lastError.toString())
-        .isEqualTo("/some/path.txt:1: escape sequence not implemented: \\u");
+    assertThat(lastError).isEqualTo("/some/path.txt:1: invalid escape sequence: \\u");
   }
 
   @Test
diff --git a/src/test/starlark/testdata/string_misc.sky b/src/test/starlark/testdata/string_misc.sky
index 21ecc29..904b9ab 100644
--- a/src/test/starlark/testdata/string_misc.sky
+++ b/src/test/starlark/testdata/string_misc.sky
@@ -148,3 +148,13 @@
 assert_eq('a '.isalpha(), False)
 assert_eq('A'.isalpha(), True)
 assert_eq('AbZ'.isalpha(), True)
+
+# escape sequences
+assert_eq("\"", '"')
+---
+"\777" ### octal escape sequence out of range (maximum is \377)
+---
+"\" ### unterminated string literal at eol
+---
+""" ### unterminated string literal at eof
+---
diff --git a/tools/build_defs/pkg/pkg.bzl b/tools/build_defs/pkg/pkg.bzl
index ca12b76..9074865 100644
--- a/tools/build_defs/pkg/pkg.bzl
+++ b/tools/build_defs/pkg/pkg.bzl
@@ -27,7 +27,7 @@
     return path
 
 def _quote(filename, protect = "="):
-    """Quote the filename, by escaping = by \= and \ by \\"""
+    """Quote the filename, by escaping = by \\= and \\ by \\\\"""
     return filename.replace("\\", "\\\\").replace(protect, "\\" + protect)
 
 def _pkg_tar_impl(ctx):
diff --git a/tools/cpp/windows_cc_configure.bzl b/tools/cpp/windows_cc_configure.bzl
index 3acbc97..dcdf3d6 100644
--- a/tools/cpp/windows_cc_configure.bzl
+++ b/tools/cpp/windows_cc_configure.bzl
@@ -43,7 +43,7 @@
         return None
 
 def _get_temp_env(repository_ctx):
-    """Returns the value of TMP, or TEMP, or if both undefined then C:\Windows."""
+    """Returns the value of TMP, or TEMP, or if both undefined then C:\\Windows."""
     tmp = _get_path_env_var(repository_ctx, "TMP")
     if not tmp:
         tmp = _get_path_env_var(repository_ctx, "TEMP")
@@ -81,7 +81,7 @@
     return tool_paths, tool_bin_path, include_directories
 
 def _get_system_root(repository_ctx):
-    """Get System root path on Windows, default is C:\\\Windows. Doesn't %-escape the result."""
+    """Get System root path on Windows, default is C:\\Windows. Doesn't %-escape the result."""
     systemroot = _get_path_env_var(repository_ctx, "SYSTEMROOT")
     if not systemroot:
         systemroot = "C:\\Windows"