Add requires_not_none support for cc_args.env

Copybara Import from https://github.com/bazelbuild/rules_cc/pull/484

BEGIN_PUBLIC
Add requires_not_none support for cc_args.env (#484)

Since this commit
https://github.com/bazelbuild/rules_cc/commit/5a8cab742c3868ae5fc813745c8677f676444cf1
bazel has supported expand_if_available for env_sets. This is useful
when it's easier in the tool being run to accept optional arguments via
environment variables vs potentially double parsing and filtering
arguments.

For example in the case of parse_headers actions you either have to
parse the command line (including optional response files) in order to
find the value passed to `-o`, or you can organize it so you can read
it from the environment instead with this.

Closes #484
END_PUBLIC

COPYBARA_INTEGRATE_REVIEW=https://github.com/bazelbuild/rules_cc/pull/484 from keith:ks/add-requires_not_none-support-for-cc_args.env 75923309c10be9151c757dcb85cb57878afd1326
PiperOrigin-RevId: 804213976
Change-Id: I31f95d427ddc3e6cd64cf548e7b65db7a1672920
diff --git a/cc/toolchains/args.bzl b/cc/toolchains/args.bzl
index de382d4..6a0133d 100644
--- a/cc/toolchains/args.bzl
+++ b/cc/toolchains/args.bzl
@@ -14,7 +14,7 @@
 """All providers for rule-based bazel toolchain config."""
 
 load("@bazel_skylib//rules/directory:providers.bzl", "DirectoryInfo")
-load("//cc/toolchains/impl:args_utils.bzl", "validate_nested_args")
+load("//cc/toolchains/impl:args_utils.bzl", "validate_env_variables", "validate_nested_args")
 load(
     "//cc/toolchains/impl:collect.bzl",
     "collect_action_types",
@@ -33,7 +33,9 @@
     "ArgsInfo",
     "ArgsListInfo",
     "BuiltinVariablesInfo",
+    "EnvInfo",
     "FeatureConstraintInfo",
+    "VariableInfo",
 )
 
 visibility("public")
@@ -68,12 +70,24 @@
 
     requires = collect_provider(ctx.attr.requires_any_of, FeatureConstraintInfo)
 
+    env = EnvInfo(
+        label = ctx.label,
+        entries = formatted_env,
+        requires_not_none = ctx.attr.requires_not_none[VariableInfo].name if ctx.attr.requires_not_none else None,
+    )
+    validate_env_variables(
+        actions = actions.to_list(),
+        env = env,
+        variables = ctx.attr._variables[BuiltinVariablesInfo].variables,
+        used_format_vars = used_format_vars,
+    )
+
     args = ArgsInfo(
         label = ctx.label,
         actions = actions,
         requires_any_of = tuple(requires),
         nested = nested,
-        env = formatted_env,
+        env = env,
         files = files,
         allowlist_include_directories = depset(
             direct = [d[DirectoryInfo] for d in ctx.attr.allowlist_include_directories],
diff --git a/cc/toolchains/cc_toolchain_info.bzl b/cc/toolchains/cc_toolchain_info.bzl
index 6bbec92..38148b4 100644
--- a/cc/toolchains/cc_toolchain_info.bzl
+++ b/cc/toolchains/cc_toolchain_info.bzl
@@ -97,6 +97,16 @@
     },
 )
 
+EnvInfo = provider(
+    doc = "A set of environment variables to be added to the environment for specific actions",
+    # @unsorted-dict-items
+    fields = {
+        "label": "(Label) The label defining this provider. Place in error messages to simplify debugging",
+        "entries": "(dict[str, str]) A mapping from environment variable name to value",
+        "requires_not_none": "(Optional[str]) The variable that must be not None to apply these environment variables",
+    },
+)
+
 ArgsInfo = provider(
     doc = "A set of arguments to be added to the command line for specific actions",
     # @unsorted-dict-items
@@ -106,7 +116,7 @@
         "requires_any_of": "(Sequence[FeatureConstraintInfo]) This will be enabled if any of the listed predicates are met. Equivalent to with_features",
         "nested": "(Optional[NestedArgsInfo]) The args expand. Equivalent to a flag group.",
         "files": "(depset[File]) Files required for the args",
-        "env": "(dict[str, str]) Environment variables to apply",
+        "env": "(EnvInfo) Environment variables to apply",
         "allowlist_include_directories": "(depset[DirectoryInfo]) Include directories implied by these arguments that should be allowlisted in Bazel's include checker",
         "allowlist_absolute_include_directories": "(depset[str]) Absolute include directories implied by these arguments that should be allowlisted in Bazel's include checker",
     },
diff --git a/cc/toolchains/impl/args_utils.bzl b/cc/toolchains/impl/args_utils.bzl
index f024a14..5d0d18e 100644
--- a/cc/toolchains/impl/args_utils.bzl
+++ b/cc/toolchains/impl/args_utils.bzl
@@ -122,3 +122,60 @@
 
         for child in nested_args.nested:
             stack.append((child, overrides))
+
+def validate_env_variables(*, env, variables, actions, used_format_vars, fail = fail):
+    """Validates the typing for environment variables with requires_not_none and format variables.
+
+    Args:
+        env: (EnvInfo) The env info to validate
+        variables: (Dict[str, VariableInfo]) A mapping from variable name to
+          the VariableInfo.
+        actions: (List[ActionTypeInfo]) The actions we require these variables
+          to be valid for.
+        used_format_vars: (List[str]) The list of variables used in env format strings.
+        fail: The fail function. Use for testing only.
+    """
+    if not env.entries:
+        return
+
+    env_requires_types = {}
+    if env.requires_not_none:
+        env_requires_types[env.requires_not_none] = struct(
+            msg = "requires_not_none for env variables only works on optional string, file, or directory types",
+            valid_types = ["option"],
+            valid_elements = ["string", "file", "directory"],
+        )
+
+    for var_name in used_format_vars:
+        env_requires_types[var_name] = struct(
+            msg = "format only works on string, file, or directory type variables",
+            valid_types = ["string", "file", "directory", "option"],
+            valid_elements = ["string", "file", "directory"],
+        )
+
+    for var_name, requirement in env_requires_types.items():
+        type = get_type(
+            name = var_name,
+            variables = variables,
+            overrides = {},
+            actions = actions,
+            args_label = env.label,
+            nested_label = env.label,
+            fail = fail,
+        )
+
+        if type["name"] not in requirement.valid_types:
+            fail("{msg}, but {var_name} has type {type}".format(
+                var_name = var_name,
+                msg = requirement.msg,
+                type = type["repr"],
+            ))
+
+        if type["name"] == "option":
+            underlying_type = type["elements"]
+            if underlying_type["name"] not in requirement.valid_elements:
+                fail("{msg}, but {var_name} has type {type}".format(
+                    var_name = var_name,
+                    msg = requirement.msg,
+                    type = underlying_type["repr"],
+                ))
diff --git a/cc/toolchains/impl/legacy_converter.bzl b/cc/toolchains/impl/legacy_converter.bzl
index e1cbdd5..8f8a460 100644
--- a/cc/toolchains/impl/legacy_converter.bzl
+++ b/cc/toolchains/impl/legacy_converter.bzl
@@ -70,7 +70,11 @@
         ))
 
     env_sets = []
-    if args.env:
+    if args.env.entries:
+        # NOTE: Use kwargs to support older bazel versions
+        kwargs = {}
+        if args.env.requires_not_none:
+            kwargs["expand_if_available"] = args.env.requires_not_none
         env_sets.append(legacy_env_set(
             actions = actions,
             with_features = with_features,
@@ -78,8 +82,9 @@
                 legacy_env_entry(
                     key = key,
                     value = value,
+                    **kwargs
                 )
-                for key, value in args.env.items()
+                for key, value in args.env.entries.items()
             ],
         ))
     return struct(
diff --git a/cc/toolchains/impl/nested_args.bzl b/cc/toolchains/impl/nested_args.bzl
index 8ab07b9..fa17d79 100644
--- a/cc/toolchains/impl/nested_args.bzl
+++ b/cc/toolchains/impl/nested_args.bzl
@@ -291,10 +291,8 @@
 def _escape(s):
     return s.replace("%", "%%")
 
-def _format_target(target, arg, allow_variables, fail = fail):
+def _format_target(target, fail = fail):
     if VariableInfo in target:
-        if not allow_variables:
-            fail("Unsupported cc_variable substitution %s in %r." % (target.label, arg))
         return "%%{%s}" % target[VariableInfo].name
     elif DirectoryInfo in target:
         return _escape(target[DirectoryInfo].path)
@@ -305,7 +303,7 @@
 
     fail("%s should be either a variable, a directory, or a single file." % target.label)
 
-def _format_string(arg, format, used_vars, allow_variables, fail = fail):
+def _format_string(arg, format, used_vars, fail = fail):
     upto = 0
     out = []
     has_format = False
@@ -333,7 +331,7 @@
             else:
                 used_vars[variable] = None
                 has_format = True
-                out.append(_format_target(format[variable], arg, allow_variables, fail = fail))
+                out.append(_format_target(format[variable], fail = fail))
                 upto += len(variable) + 2
 
         elif arg[upto] == "}":
@@ -363,7 +361,7 @@
     used_vars = {}
 
     for arg in args:
-        formatted.append(_format_string(arg, format, used_vars, True, fail))
+        formatted.append(_format_string(arg, format, used_vars, fail))
 
     unused_vars = [var for var in must_use if var not in used_vars]
     if unused_vars:
@@ -390,7 +388,7 @@
     used_vars = {}
 
     for key, value in env.items():
-        formatted[key] = _format_string(value, format, used_vars, False, fail)
+        formatted[key] = _format_string(value, format, used_vars, fail)
 
     unused_vars = [var for var in must_use if var not in used_vars]
     if unused_vars:
diff --git a/tests/rule_based_toolchain/args/BUILD b/tests/rule_based_toolchain/args/BUILD
index 1c2fbff..506338f 100644
--- a/tests/rule_based_toolchain/args/BUILD
+++ b/tests/rule_based_toolchain/args/BUILD
@@ -2,6 +2,7 @@
 load("//cc/toolchains:args.bzl", "cc_args")
 load("//cc/toolchains/impl:variables.bzl", "cc_variable", "types")
 load("//tests/rule_based_toolchain:analysis_test_suite.bzl", "analysis_test_suite")
+load("//tests/rule_based_toolchain:testing_rules.bzl", "expect_failure_test")
 load(":args_test.bzl", "TARGETS", "TESTS")
 
 cc_variable(
@@ -37,6 +38,15 @@
 
 util.helper_target(
     cc_args,
+    name = "env_only_requires",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"BAR": "{dependency_file}"},
+    format = {"dependency_file": "//cc/toolchains/variables:dependency_file"},
+    requires_not_none = "//cc/toolchains/variables:dependency_file",
+)
+
+util.helper_target(
+    cc_args,
     name = "iterate_over_optional",
     actions = ["//cc/toolchains/actions:compile_actions"],
     args = ["{user_compile_flags}"],
@@ -53,6 +63,68 @@
     args = ["--secret-builtin-include-dir"],
 )
 
+util.helper_target(
+    cc_args,
+    name = "good_env_format",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"FOO": "{gcov_gcno_file}"},
+    format = {"gcov_gcno_file": "//cc/toolchains/variables:gcov_gcno_file"},
+)
+
+util.helper_target(
+    cc_args,
+    name = "good_env_format_optional",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"FOO": "{dependency_file}"},
+    format = {"dependency_file": "//cc/toolchains/variables:dependency_file"},
+    requires_not_none = "//cc/toolchains/variables:dependency_file",
+)
+
+util.helper_target(
+    cc_args,
+    name = "bad_env_format_list",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"FOO": "{preprocessor_defines}"},
+    format = {"preprocessor_defines": "//cc/toolchains/variables:preprocessor_defines"},
+    tags = ["manual"],
+)
+
+expect_failure_test(
+    name = "bad_env_format_list_test",
+    failure_message = "format only works on string, file, or directory type variables, but preprocessor_defines has type List[string]",
+    target = ":bad_env_format_list",
+)
+
+util.helper_target(
+    cc_args,
+    name = "bad_env_format_optional",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"FOO": "{user_compile_flags}"},
+    format = {"user_compile_flags": "//cc/toolchains/variables:user_compile_flags"},
+    tags = ["manual"],
+)
+
+expect_failure_test(
+    name = "bad_env_format_optional_test",
+    failure_message = "format only works on string, file, or directory type variables, but user_compile_flags has type List[string]",
+    target = ":bad_env_format_optional",
+)
+
+util.helper_target(
+    cc_args,
+    name = "bad_env_format_wrong_action",
+    actions = ["//cc/toolchains/actions:compile_actions"],
+    env = {"FOO": "{runtime_solib_name}"},
+    format = {"runtime_solib_name": "//cc/toolchains/variables:runtime_solib_name"},
+    tags = ["manual"],
+)
+
+expect_failure_test(
+    name = "bad_env_format_wrong_action_test",
+    failure_message = "runtime_solib_name is inaccessible from the action",
+    target = ":bad_env_format_wrong_action",
+)
+
 analysis_test_suite(
     name = "test_suite",
     targets = TARGETS,
diff --git a/tests/rule_based_toolchain/args/args_test.bzl b/tests/rule_based_toolchain/args/args_test.bzl
index 393260c..619c7ba 100644
--- a/tests/rule_based_toolchain/args/args_test.bzl
+++ b/tests/rule_based_toolchain/args/args_test.bzl
@@ -62,7 +62,7 @@
         targets.c_compile.label,
         targets.cpp_compile.label,
     ])
-    simple.env().contains_exactly({"BAR": "bar"})
+    simple.env().entries().contains_exactly({"BAR": "bar"})
     simple.files().contains_exactly(_SIMPLE_FILES)
 
     c_compile = env.expect.that_target(targets.simple).provider(ArgsListInfo).by_action().get(
@@ -91,7 +91,7 @@
         targets.c_compile.label,
         targets.cpp_compile.label,
     ])
-    env_only.env().contains_exactly({"BAR": "bar"})
+    env_only.env().entries().contains_exactly({"BAR": "bar"})
     env_only.files().contains_exactly(_SIMPLE_FILES)
 
     c_compile = env.expect.that_target(targets.simple).provider(ArgsListInfo).by_action().get(
@@ -110,6 +110,45 @@
 
     converted.flag_sets().contains_exactly([])
 
+def _env_only_requires_test(env, targets):
+    env_only = env.expect.that_target(targets.env_only_requires).provider(ArgsInfo)
+    env_only.actions().contains_at_least([
+        Label("//cc/toolchains/actions:c_compile"),
+        Label("//cc/toolchains/actions:cpp_compile"),
+    ])
+    env_only.env().entries().contains_exactly(
+        {"BAR": "%{dependency_file}"},
+    )
+
+    converted = env.expect.that_value(
+        convert_args(targets.env_only_requires[ArgsInfo]),
+        factory = _CONVERTED_ARGS,
+    )
+
+    converted.env_sets().contains_exactly([env_set(
+        actions = [
+            "assemble",
+            "c++-compile",
+            "c++-header-parsing",
+            "c++-module-codegen",
+            "c++-module-compile",
+            "c-compile",
+            "clif-match",
+            "linkstamp-compile",
+            "lto-backend",
+            "objc++-compile",
+            "objc-compile",
+            "preprocess-assemble",
+        ],
+        env_entries = [env_entry(
+            key = "BAR",
+            value = "%{dependency_file}",
+            expand_if_available = "dependency_file",
+        )],
+    )])
+
+    converted.flag_sets().contains_exactly([])
+
 def _with_dir_test(env, targets):
     with_dir = env.expect.that_target(targets.with_dir).provider(ArgsInfo)
     with_dir.allowlist_include_directories().contains_exactly([_TOOL_DIRECTORY])
@@ -124,8 +163,11 @@
     ":simple",
     ":some_variable",
     ":env_only",
+    ":env_only_requires",
     ":with_dir",
     ":iterate_over_optional",
+    ":good_env_format",
+    ":good_env_format_optional",
     "//tests/rule_based_toolchain/actions:c_compile",
     "//tests/rule_based_toolchain/actions:cpp_compile",
     "//tests/rule_based_toolchain/testdata:directory",
@@ -203,12 +245,11 @@
     ])
     res.used_items().contains_exactly(["bar", "quuz", "qux"])
 
-    expected_label = Label("//tests/rule_based_toolchain/args:some_variable")
-    res = _expect_that_formatted(
+    _expect_that_formatted(
         env,
         {"foo": "{bar}"},
         {"bar": targets.some_variable},
-    ).err().equals("Unsupported cc_variable substitution " + str(expected_label) + ' in "{bar}".')
+    ).ok()
 
     _expect_that_formatted(
         env,
@@ -241,10 +282,73 @@
         must_use = ["var"],
     ).err().contains('"var" was not used')
 
+def _good_env_format_test(env, targets):
+    good_env = env.expect.that_target(targets.good_env_format).provider(ArgsInfo)
+    good_env.env().entries().contains_exactly({"FOO": "%{gcov_gcno_file}"})
+
+    converted = env.expect.that_value(
+        convert_args(targets.good_env_format[ArgsInfo]),
+        factory = _CONVERTED_ARGS,
+    )
+    converted.env_sets().contains_exactly([env_set(
+        actions = [
+            "assemble",
+            "c++-compile",
+            "c++-header-parsing",
+            "c++-module-codegen",
+            "c++-module-compile",
+            "c-compile",
+            "clif-match",
+            "linkstamp-compile",
+            "lto-backend",
+            "objc++-compile",
+            "objc-compile",
+            "preprocess-assemble",
+        ],
+        env_entries = [env_entry(
+            key = "FOO",
+            value = "%{gcov_gcno_file}",
+        )],
+    )])
+
+def _good_env_format_optional_test(env, targets):
+    """Test that env formatting works with optional types."""
+    good_env_optional = env.expect.that_target(targets.good_env_format_optional).provider(ArgsInfo)
+    good_env_optional.env().entries().contains_exactly({"FOO": "%{dependency_file}"})
+
+    converted = env.expect.that_value(
+        convert_args(targets.good_env_format_optional[ArgsInfo]),
+        factory = _CONVERTED_ARGS,
+    )
+    converted.env_sets().contains_exactly([env_set(
+        actions = [
+            "assemble",
+            "c++-compile",
+            "c++-header-parsing",
+            "c++-module-codegen",
+            "c++-module-compile",
+            "c-compile",
+            "clif-match",
+            "linkstamp-compile",
+            "lto-backend",
+            "objc++-compile",
+            "objc-compile",
+            "preprocess-assemble",
+        ],
+        env_entries = [env_entry(
+            key = "FOO",
+            value = "%{dependency_file}",
+            expand_if_available = "dependency_file",
+        )],
+    )])
+
 # @unsorted-dict-items
 TESTS = {
     "simple_test": _simple_test,
     "format_dict_values_test": _format_dict_values_test,
     "env_only_test": _env_only_test,
+    "env_only_requires_test": _env_only_requires_test,
     "with_dir_test": _with_dir_test,
+    "good_env_format_test": _good_env_format_test,
+    "good_env_format_optional_test": _good_env_format_optional_test,
 }
diff --git a/tests/rule_based_toolchain/subjects.bzl b/tests/rule_based_toolchain/subjects.bzl
index 9f4843f..79e6f1b 100644
--- a/tests/rule_based_toolchain/subjects.bzl
+++ b/tests/rule_based_toolchain/subjects.bzl
@@ -21,6 +21,7 @@
     "ActionTypeSetInfo",
     "ArgsInfo",
     "ArgsListInfo",
+    "EnvInfo",
     "FeatureConstraintInfo",
     "FeatureInfo",
     "FeatureSetInfo",
@@ -138,12 +139,22 @@
 )
 
 # buildifier: disable=name-conventions
+_EnvInfoFactory = generate_factory(
+    EnvInfo,
+    "EnvInfo",
+    dict(
+        entries = _subjects.dict,
+        requires_not_none = optional_subject(_subjects.str),
+    ),
+)
+
+# buildifier: disable=name-conventions
 _ArgsFactory = generate_factory(
     ArgsInfo,
     "ArgsInfo",
     dict(
         actions = ProviderDepset(_ActionTypeFactory),
-        env = _subjects.dict,
+        env = _EnvInfoFactory.factory,
         files = _subjects.depset_file,
         # Use .factory so it's not inlined.
         nested = optional_subject(_NestedArgsFactory.factory),