Enable formatting of environment variables in cc_args

Copybara Import from https://github.com/bazelbuild/rules_cc/pull/413

BEGIN_PUBLIC
Enable formatting of environment variables in cc_args (#413)

Windows does not follow the Unix sysroot convention.
Instead, it uses the `INCLUDE` and `LIB` environment variables
to specify the include and library search paths.

This change enables formatting of the values of envirnoment variables set
in the `env` attribute of `cc_args` using the standard `format` attribute.
This allows Windows users to download a hermetic "sysroot" as part of the build,
and set the relevant "INCLUDE" and "LIB" environment variables.

Example usage:
```
cc_args(
    name = "arg-include",
    actions = [
        "@rules_cc//cc/toolchains/actions:c_compile",
        "@rules_cc//cc/toolchains/actions:cpp_compile_actions",
    ],
    env = {
        "INCLUDE": "{include}",
    },
    format = {
        "include": ":sysroot-include",
    },
    data = [
        ":sysroot-include",
    ],
)
```

example repo: https://github.com/reutermj/windows-rules-based-toolchain-example

Future work:
Current implementation for the format substitution only allows for a single variable to be substituted. Users of the API often will need to substitute in multiple paths into the `INCLUDE`/`LIB` environment variables.

Closes #413
END_PUBLIC

COPYBARA_INTEGRATE_REVIEW=https://github.com/bazelbuild/rules_cc/pull/413 from reutermj:main 4c59d66168ca57725051d2044a2949fb5fd7f0c3
PiperOrigin-RevId: 780817009
Change-Id: Ifab4889f94f788f24a2e585d124444b49102839b
diff --git a/cc/toolchains/args.bzl b/cc/toolchains/args.bzl
index 718fcee..5d33e50 100644
--- a/cc/toolchains/args.bzl
+++ b/cc/toolchains/args.bzl
@@ -24,6 +24,7 @@
 load(
     "//cc/toolchains/impl:nested_args.bzl",
     "NESTED_ARGS_ATTRS",
+    "format_dict_values",
     "nested_args_provider_from_ctx",
 )
 load(
@@ -40,9 +41,17 @@
 def _cc_args_impl(ctx):
     actions = collect_action_types(ctx.attr.actions)
 
+    formatted_env, used_format_vars = format_dict_values(
+        env = ctx.attr.env,
+        must_use = [],  # checking for unused variables in done when formatting `args`.
+        format = {k: v for v, k in ctx.attr.format.items()},
+    )
+
     nested = None
     if ctx.attr.args or ctx.attr.nested:
-        nested = nested_args_provider_from_ctx(ctx)
+        # Forward the format variables used by the env formatting so they don't trigger
+        # errors if they go unused during the argument formatting.
+        nested = nested_args_provider_from_ctx(ctx, used_format_vars)
         validate_nested_args(
             variables = ctx.attr._variables[BuiltinVariablesInfo].variables,
             nested_args = nested,
@@ -60,7 +69,7 @@
         actions = actions,
         requires_any_of = tuple(requires),
         nested = nested,
-        env = ctx.attr.env,
+        env = formatted_env,
         files = files,
         allowlist_include_directories = depset(
             direct = [d[DirectoryInfo] for d in ctx.attr.allowlist_include_directories],
@@ -234,10 +243,11 @@
         data: (List[Label]) A list of runtime data dependencies that are required for these
             arguments to work as intended.
         env: (Dict[str, str]) Environment variables that should be set when the tool is invoked.
-        format: (Dict[str, Label]) A mapping of format strings to the label of the corresponding
-            `cc_variable` that the value should be pulled from. All instances of
-            `{variable_name}` will be replaced with the expanded value of `variable_name` in this
-            dictionary. The complete list of possible variables can be found in
+        format: (Dict[str, Label]) A mapping of format strings to the label of a corresponding
+            target. This target can be a `directory`, `subdirectory`, `cc_variable`, or a single
+            file that the value should be pulled from. All instances of `{variable_name}` in the
+            `args` list will be replaced with the expanded value in this dictionary.
+            The complete list of possible variables can be found in
             https://github.com/bazelbuild/rules_cc/tree/main/cc/toolchains/variables/BUILD.
             It is not possible to declare custom variables--these are inherent to Bazel itself.
         iterate_over: (Label) The label of a `cc_variable` that should be iterated over. This is
diff --git a/cc/toolchains/impl/nested_args.bzl b/cc/toolchains/impl/nested_args.bzl
index 17ebb77..8ab07b9 100644
--- a/cc/toolchains/impl/nested_args.bzl
+++ b/cc/toolchains/impl/nested_args.bzl
@@ -84,11 +84,13 @@
 
 # TODO: Consider replacing this with a subrule in the future. However, maybe not
 # for a long time, since it'll break compatibility with all bazel versions < 7.
-def nested_args_provider_from_ctx(ctx):
+def nested_args_provider_from_ctx(ctx, maybe_used_vars = []):
     """Gets the nested args provider from a rule that has NESTED_ARGS_ATTRS.
 
     Args:
         ctx: The rule context
+        maybe_used_vars: (List[str]) A list of format variables that are not needed during args formatting.
+
     Returns:
         NestedArgsInfo
     """
@@ -105,6 +107,7 @@
         requires_false = _var(ctx.attr.requires_false),
         requires_equal = _var(ctx.attr.requires_equal),
         requires_equal_value = ctx.attr.requires_equal_value,
+        maybe_used_vars = maybe_used_vars,
     )
 
 def nested_args_provider(
@@ -121,6 +124,7 @@
         requires_false = None,
         requires_equal = None,
         requires_equal_value = "",
+        maybe_used_vars = [],
         fail = fail):
     """Creates a validated NestedArgsInfo.
 
@@ -148,6 +152,7 @@
           be ignored if the variable is not equal to requires_equal_value.
         requires_equal_value: (str) The value to compare the requires_equal
           variable with
+        maybe_used_vars: (List[str]) A list of format variables that are not needed during args formatting.
         fail: A fail function. Use only for testing.
     Returns:
         NestedArgsInfo
@@ -183,7 +188,12 @@
     #     args = ["{}"],
     #     iterate_over = "//cc/toolchains/variables:libraries_to_link.object_files",
     # )
-    args = format_args(args, replacements, must_use = format.values(), fail = fail)
+    formatted_args, _ = format_list(
+        args,
+        replacements,
+        must_use = [var for var in format.values() if var not in maybe_used_vars],
+        fail = fail,
+    )
 
     transitive_files = [ea.files for ea in nested]
     transitive_files.append(files)
@@ -203,8 +213,8 @@
 
     kwargs = {}
 
-    if args:
-        kwargs["flags"] = args
+    if formatted_args:
+        kwargs["flags"] = formatted_args
 
     requires_types = {}
     if nested:
@@ -281,8 +291,10 @@
 def _escape(s):
     return s.replace("%", "%%")
 
-def _format_target(target, fail = fail):
+def _format_target(target, arg, allow_variables, fail = fail):
     if VariableInfo in target:
+        if not allow_variables:
+            fail("Unsupported cc_variable substitution %s in %r." % (target.label, arg))
         return "%%{%s}" % target[VariableInfo].name
     elif DirectoryInfo in target:
         return _escape(target[DirectoryInfo].path)
@@ -293,10 +305,49 @@
 
     fail("%s should be either a variable, a directory, or a single file." % target.label)
 
-def format_args(args, format, must_use = [], fail = fail):
+def _format_string(arg, format, used_vars, allow_variables, fail = fail):
+    upto = 0
+    out = []
+    has_format = False
+
+    # This should be "while true".
+    # This number is used because it's an upper bound of the number of iterations.
+    for _ in range(len(arg)):
+        if upto >= len(arg):
+            break
+
+        # Escaping via "{{" and "}}"
+        if arg[upto] in "{}" and upto + 1 < len(arg) and arg[upto + 1] == arg[upto]:
+            out.append(arg[upto])
+            upto += 2
+        elif arg[upto] == "{":
+            chunks = arg[upto + 1:].split("}", 1)
+            if len(chunks) != 2:
+                fail("Unmatched { in %r" % arg)
+            variable = chunks[0]
+
+            if variable not in format:
+                fail('Unknown variable %r in format string %r. Try using cc_args(..., format = {"//path/to:variable": %r})' % (variable, arg, variable))
+            elif has_format:
+                fail("The format string %r contained multiple variables, which is unsupported." % arg)
+            else:
+                used_vars[variable] = None
+                has_format = True
+                out.append(_format_target(format[variable], arg, allow_variables, fail = fail))
+                upto += len(variable) + 2
+
+        elif arg[upto] == "}":
+            fail("Unexpected } in %r" % arg)
+        else:
+            out.append(_escape(arg[upto]))
+            upto += 1
+
+    return "".join(out)
+
+def format_list(args, format, must_use = [], fail = fail):
     """Lists all of the variables referenced by an argument.
 
-    Eg: format_args(["--foo", "--bar={bar}"], {"bar": VariableInfo(name="bar")})
+    Eg: format_list(["--foo", "--bar={bar}"], {"bar": VariableInfo(name="bar")})
       => ["--foo", "--bar=%{bar}"]
 
     Args:
@@ -312,46 +363,37 @@
     used_vars = {}
 
     for arg in args:
-        upto = 0
-        out = []
-        has_format = False
-
-        # This should be "while true". I used this number because it's an upper
-        # bound of the number of iterations.
-        for _ in range(len(arg)):
-            if upto >= len(arg):
-                break
-
-            # Escaping via "{{" and "}}"
-            if arg[upto] in "{}" and upto + 1 < len(arg) and arg[upto + 1] == arg[upto]:
-                out.append(arg[upto])
-                upto += 2
-            elif arg[upto] == "{":
-                chunks = arg[upto + 1:].split("}", 1)
-                if len(chunks) != 2:
-                    fail("Unmatched { in %r" % arg)
-                variable = chunks[0]
-
-                if variable not in format:
-                    fail('Unknown variable %r in format string %r. Try using cc_args(..., format = {"//path/to:variable": %r})' % (variable, arg, variable))
-                elif has_format:
-                    fail("The format string %r contained multiple variables, which is unsupported." % arg)
-                else:
-                    used_vars[variable] = None
-                    has_format = True
-                    out.append(_format_target(format[variable], fail = fail))
-                    upto += len(variable) + 2
-
-            elif arg[upto] == "}":
-                fail("Unexpected } in %r" % arg)
-            else:
-                out.append(_escape(arg[upto]))
-                upto += 1
-
-        formatted.append("".join(out))
+        formatted.append(_format_string(arg, format, used_vars, True, fail))
 
     unused_vars = [var for var in must_use if var not in used_vars]
     if unused_vars:
         fail("The variable %r was not used in the format string." % unused_vars[0])
 
-    return formatted
+    return formatted, used_vars.keys()
+
+def format_dict_values(env, format, must_use = [], fail = fail):
+    """Formats the environment variables.
+
+    Eg: format_dict_values({"FOO": "some/path", "BAR": "{bar}"}, {"bar": DirectoryInfo(path="path/to/bar")})
+      => {"FOO": "some/path", "BAR": "path/to/bar"}
+
+    Args:
+      env: (Dict[str, str]) The environment variables.
+      format: (Dict[str, Target]) A mapping of substitutions from key to target.
+      must_use: (List[str]) A list of substitutions that must be used.
+      fail: The fail function. Used for tests
+
+    Returns:
+      The environment variables with values defined to be compatible with flag groups.
+    """
+    formatted = {}
+    used_vars = {}
+
+    for key, value in env.items():
+        formatted[key] = _format_string(value, format, used_vars, False, fail)
+
+    unused_vars = [var for var in must_use if var not in used_vars]
+    if unused_vars:
+        fail("The variable %r was not used in the format string." % unused_vars[0])
+
+    return formatted, used_vars.keys()
diff --git a/docs/toolchain_api.md b/docs/toolchain_api.md
index 77a8756..095b23b 100755
--- a/docs/toolchain_api.md
+++ b/docs/toolchain_api.md
@@ -662,7 +662,7 @@
 | <a id="cc_args-args"></a>args |  (List[str]) The command-line arguments that are applied by using this rule. This is mutually exclusive with [nested](#cc_args-nested).   |  `None` |
 | <a id="cc_args-data"></a>data |  (List[Label]) A list of runtime data dependencies that are required for these arguments to work as intended.   |  `None` |
 | <a id="cc_args-env"></a>env |  (Dict[str, str]) Environment variables that should be set when the tool is invoked.   |  `None` |
-| <a id="cc_args-format"></a>format |  (Dict[str, Label]) A mapping of format strings to the label of the corresponding [`cc_variable`](#cc_variable) that the value should be pulled from. All instances of `{variable_name}` will be replaced with the expanded value of `variable_name` in this dictionary. The complete list of possible variables can be found in https://github.com/bazelbuild/rules_cc/tree/main/cc/toolchains/variables/BUILD. It is not possible to declare custom variables--these are inherent to Bazel itself.   |  `{}` |
+| <a id="cc_args-format"></a>format |  (Dict[str, Label]) A mapping of format strings to the label of a corresponding target. This target can be a `directory`, `subdirectory`, [`cc_variable`](#cc_variable), or a single file that the value should be pulled from. All instances of `{variable_name}` in the `args` list will be replaced with the expanded value in this dictionary. The complete list of possible variables can be found in https://github.com/bazelbuild/rules_cc/tree/main/cc/toolchains/variables/BUILD. It is not possible to declare custom variables--these are inherent to Bazel itself.   |  `{}` |
 | <a id="cc_args-iterate_over"></a>iterate_over |  (Label) The label of a [`cc_variable`](#cc_variable) that should be iterated over. This is intended for use with built-in variables that are lists.   |  `None` |
 | <a id="cc_args-nested"></a>nested |  (List[Label]) A list of [`cc_nested_args`](#cc_nested_args) rules that should be expanded to command-line arguments when this rule is used. This is mutually exclusive with [args](#cc_args-args).   |  `None` |
 | <a id="cc_args-requires_not_none"></a>requires_not_none |  (Label) The label of a [`cc_variable`](#cc_variable) that should be checked for existence before expanding this rule. If the variable is None, this rule will be ignored.   |  `None` |
diff --git a/tests/rule_based_toolchain/args/BUILD b/tests/rule_based_toolchain/args/BUILD
index 5d8c81c..1c2fbff 100644
--- a/tests/rule_based_toolchain/args/BUILD
+++ b/tests/rule_based_toolchain/args/BUILD
@@ -1,8 +1,14 @@
 load("@rules_testing//lib:util.bzl", "util")
 load("//cc/toolchains:args.bzl", "cc_args")
+load("//cc/toolchains/impl:variables.bzl", "cc_variable", "types")
 load("//tests/rule_based_toolchain:analysis_test_suite.bzl", "analysis_test_suite")
 load(":args_test.bzl", "TARGETS", "TESTS")
 
+cc_variable(
+    name = "some_variable",
+    type = types.string,
+)
+
 util.helper_target(
     cc_args,
     name = "simple",
diff --git a/tests/rule_based_toolchain/args/args_test.bzl b/tests/rule_based_toolchain/args/args_test.bzl
index 5dc55b0..393260c 100644
--- a/tests/rule_based_toolchain/args/args_test.bzl
+++ b/tests/rule_based_toolchain/args/args_test.bzl
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Tests for the cc_args rule."""
 
+load("@bazel_skylib//rules/directory:providers.bzl", "DirectoryInfo")
 load(
     "//cc:cc_toolchain_config_lib.bzl",
     "env_entry",
@@ -30,7 +31,16 @@
     "//cc/toolchains/impl:legacy_converter.bzl",
     "convert_args",
 )
-load("//tests/rule_based_toolchain:subjects.bzl", "subjects")
+load(
+    "//cc/toolchains/impl:nested_args.bzl",
+    "format_dict_values",
+)
+load("//tests/rule_based_toolchain:generics.bzl", "struct_subject")
+load(
+    "//tests/rule_based_toolchain:subjects.bzl",
+    "result_fn_wrapper",
+    "subjects",
+)
 
 visibility("private")
 
@@ -112,16 +122,129 @@
 
 TARGETS = [
     ":simple",
+    ":some_variable",
     ":env_only",
     ":with_dir",
     ":iterate_over_optional",
     "//tests/rule_based_toolchain/actions:c_compile",
     "//tests/rule_based_toolchain/actions:cpp_compile",
+    "//tests/rule_based_toolchain/testdata:directory",
+    "//tests/rule_based_toolchain/testdata:subdirectory_1",
+    "//tests/rule_based_toolchain/testdata:bin_wrapper",
 ]
 
+def _format_dict_values(args, format, must_use = [], fail = fail):
+    # return the formatted dict as a list because the test framework
+    # doesn't appear to support dicts
+    formatted, used_items = format_dict_values(args, format, must_use = must_use, fail = fail)
+    return struct(
+        env = formatted.items(),
+        used_items = used_items,
+    )
+
+def _expect_that_formatted(env, args, format, must_use = [], expr = None):
+    return env.expect.that_value(
+        result_fn_wrapper(_format_dict_values)(args, format, must_use = must_use),
+        factory = subjects.result(struct_subject(
+            env = subjects.collection,
+            used_items = subjects.collection,
+        )),
+        expr = expr or "format_dict_values(%r, %r)" % (args, format),
+    )
+
+def _format_dict_values_test(env, targets):
+    res = _expect_that_formatted(
+        env,
+        {"foo": "bar"},
+        {},
+    ).ok()
+    res.env().contains_exactly([
+        ("foo", "bar"),
+    ])
+    res.used_items().contains_exactly([])
+
+    res = _expect_that_formatted(
+        env,
+        {"foo": "{bar}"},
+        {"bar": targets.directory},
+    ).ok()
+    res.env().contains_exactly([
+        ("foo", targets.directory[DirectoryInfo].path),
+    ])
+    res.used_items().contains_exactly(["bar"])
+
+    res = _expect_that_formatted(
+        env,
+        {"foo": "{bar}"},
+        {"bar": targets.bin_wrapper},
+    ).ok()
+    res.env().contains_exactly([
+        ("foo", targets.bin_wrapper[DefaultInfo].files.to_list()[0].path),
+    ])
+    res.used_items().contains_exactly(["bar"])
+
+    res = _expect_that_formatted(
+        env,
+        {
+            "bat": "{quuz}",
+            "baz": "{qux}",
+            "foo": "{bar}",
+        },
+        {
+            "bar": targets.directory,
+            "quuz": targets.subdirectory_1,
+            "qux": targets.bin_wrapper,
+        },
+    ).ok()
+    res.env().contains_exactly([
+        ("foo", targets.directory[DirectoryInfo].path),
+        ("baz", targets.bin_wrapper[DefaultInfo].files.to_list()[0].path),
+        ("bat", targets.subdirectory_1[DirectoryInfo].path),
+    ])
+    res.used_items().contains_exactly(["bar", "quuz", "qux"])
+
+    expected_label = Label("//tests/rule_based_toolchain/args:some_variable")
+    res = _expect_that_formatted(
+        env,
+        {"foo": "{bar}"},
+        {"bar": targets.some_variable},
+    ).err().equals("Unsupported cc_variable substitution " + str(expected_label) + ' in "{bar}".')
+
+    _expect_that_formatted(
+        env,
+        {"foo": "{bar"},
+        {},
+    ).err().equals('Unmatched { in "{bar"')
+
+    _expect_that_formatted(
+        env,
+        {"foo": "bar}"},
+        {},
+    ).err().equals('Unexpected } in "bar}"')
+
+    _expect_that_formatted(
+        env,
+        {"foo": "{bar}"},
+        {},
+    ).err().contains('Unknown variable "bar" in format string "{bar}"')
+
+    _expect_that_formatted(
+        env,
+        {"foo": "{var} {var}"},
+        {"var": targets.directory},
+    ).err().contains('"{var} {var}" contained multiple variables')
+
+    _expect_that_formatted(
+        env,
+        {},
+        {"var": targets.some_variable},
+        must_use = ["var"],
+    ).err().contains('"var" was not used')
+
 # @unsorted-dict-items
 TESTS = {
     "simple_test": _simple_test,
+    "format_dict_values_test": _format_dict_values_test,
     "env_only_test": _env_only_test,
     "with_dir_test": _with_dir_test,
 }
diff --git a/tests/rule_based_toolchain/nested_args/nested_args_test.bzl b/tests/rule_based_toolchain/nested_args/nested_args_test.bzl
index bcb30dc..83d9988 100644
--- a/tests/rule_based_toolchain/nested_args/nested_args_test.bzl
+++ b/tests/rule_based_toolchain/nested_args/nested_args_test.bzl
@@ -21,9 +21,10 @@
     "REQUIRES_EQUAL_ERR",
     "REQUIRES_MUTUALLY_EXCLUSIVE_ERR",
     "REQUIRES_NONE_ERR",
-    "format_args",
+    "format_list",
     "nested_args_provider",
 )
+load("//tests/rule_based_toolchain:generics.bzl", "struct_subject")
 load("//tests/rule_based_toolchain:subjects.bzl", "result_fn_wrapper", "subjects")
 
 visibility("private")
@@ -38,15 +39,25 @@
         factory = subjects.result(subjects.NestedArgsInfo),
     )
 
+def _format_list(args, format, must_use = [], fail = fail):
+    formatted, used_items = format_list(args, format, must_use = must_use, fail = fail)
+    return struct(
+        args = formatted,
+        used_items = used_items,
+    )
+
 def _expect_that_formatted(env, args, format, must_use = [], expr = None):
     return env.expect.that_value(
-        result_fn_wrapper(format_args)(args, format, must_use = must_use),
-        factory = subjects.result(subjects.collection),
-        expr = expr or "format_args(%r, %r)" % (args, format),
+        result_fn_wrapper(_format_list)(args, format, must_use = must_use),
+        factory = subjects.result(struct_subject(
+            args = subjects.collection,
+            used_items = subjects.collection,
+        )),
+        expr = expr or "format_list(%r, %r)" % (args, format),
     )
 
 def _format_args_test(env, targets):
-    _expect_that_formatted(
+    res = _expect_that_formatted(
         env,
         [
             "a % b",
@@ -55,12 +66,14 @@
             "a {{ b }}",
         ],
         {},
-    ).ok().contains_exactly([
+    ).ok()
+    res.args().contains_exactly([
         "a %% b",
         "a {",
         "} b",
         "a { b }",
     ]).in_order()
+    res.used_items().contains_exactly([])
 
     _expect_that_formatted(
         env,
@@ -73,13 +86,14 @@
         ["foo}"],
         {},
     ).err().equals('Unexpected } in "foo}"')
+
     _expect_that_formatted(
         env,
         ["{foo}"],
         {},
     ).err().contains('Unknown variable "foo" in format string "{foo}"')
 
-    _expect_that_formatted(
+    res = _expect_that_formatted(
         env,
         [
             "a {var}",
@@ -91,17 +105,25 @@
             "file": targets.bin_wrapper,
             "var": targets.foo,
         },
-    ).ok().contains_exactly([
+    ).ok()
+    res.args().contains_exactly([
         "a %{foo}",
         "b " + targets.directory[DirectoryInfo].path,
         "c " + targets.bin_wrapper[DefaultInfo].files.to_list()[0].path,
     ]).in_order()
+    res.used_items().contains_exactly([
+        "var",
+        "directory",
+        "file",
+    ])
 
-    _expect_that_formatted(
+    res = _expect_that_formatted(
         env,
         ["{var}", "{var}"],
         {"var": targets.foo},
-    ).ok().contains_exactly(["%{foo}", "%{foo}"])
+    ).ok()
+    res.args().contains_exactly(["%{foo}", "%{foo}"])
+    res.used_items().contains_exactly(["var"])
 
     _expect_that_formatted(
         env,