| // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| // Exceptions. See /LICENSE for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| // Data structures for whole-codebase nullability inference. |
| // |
| // To accurately determine nullability of public APIs, we join information from |
| // many translation units (e.g. a function's implementation, and all callsites). |
| // |
| // In large codebases, we may distribute this process as a mapreduce: |
| // - process the many translation units in parallel, obtaining evidence |
| // about all functions defined/called |
| // - group the evidence by the function it describes, and combine it to form |
| // conclusions for each one |
| // |
| // Key data structures are the evidence from one TU (map output/reduce input), |
| // and the conclusions (reduce output). |
| syntax = "proto2"; |
| |
| package clang.tidy.nullability; |
| |
| // A symbol whose nullability should potentially be inferred. |
| message Symbol { |
| // Clang "Unified Symbol Resolution" identifier for the symbol. |
| optional string usr = 1; |
| } |
| |
| // A "slot" identifies a position in a symbol's type that may have nullability. |
| // |
| // We use uint32 rather than the Slot type to represent slot numbers in protos. |
| // (A symbol may have any number of slots, and proto2 enums are closed). |
| // The Slot enum only defines well-known slot values for functions. Fields and |
| // global variables use Slot numbers aligned with the indices of their |
| // nullability vectors. |
| enum Slot { |
| // The slot number for a function's return type. |
| SLOT_RETURN_TYPE = 0; |
| // The slot number for a function's first parameter. |
| // The second param is SLOT_PARAM + 1, etc. |
| SLOT_PARAM = 1; |
| } |
| |
| // An observation of nullability based on local analysis (e.g. a function body). |
| // Evidence from across different functions/TUs is combined to form conclusions. |
| message Evidence { |
| optional Symbol symbol = 1; |
| optional uint32 slot = 2; |
| optional Kind kind = 3; |
| // Source location: file:line:col. Optional, for debugging only. |
| optional string location = 4; |
| |
| // A pattern in the code that might help us determine nullability. |
| enum Kind { |
| // The declaration was annotated with _Null_unspecified or similar. |
| ANNOTATED_UNKNOWN = 0; |
| // The declaration was annotated with _Nullable or similar. |
| ANNOTATED_NULLABLE = 1; |
| // The declaration was annotated with _Nonnull or similar. |
| ANNOTATED_NONNULL = 2; |
| |
| // A pointer was dereferenced without being checked for null first. |
| UNCHECKED_DEREFERENCE = 3; |
| // A Nullable value was passed as an argument. |
| NULLABLE_ARGUMENT = 4; |
| // A Nonnull value was passed as an argument. |
| NONNULL_ARGUMENT = 5; |
| // A value with Unknown nullability was passed as an argument. |
| UNKNOWN_ARGUMENT = 6; |
| // A Nullable value was returned. |
| NULLABLE_RETURN = 7; |
| // A Nonnull value was returned. |
| NONNULL_RETURN = 8; |
| // A value with Unknown nullability was returned. |
| UNKNOWN_RETURN = 9; |
| // A value was assigned to a Nonnull declaration. |
| // e.g. evidence for `p` from `int* _Nonnull q = p;`. |
| ASSIGNED_TO_NONNULL = 10; |
| // A value was assigned to a declaration that is a reference to a mutable |
| // nullable pointer, e.g. evidence for `p` from `int* _Nullable& q = p;`. |
| ASSIGNED_TO_MUTABLE_NULLABLE = 11; |
| // The program aborts if a value is null. |
| ABORT_IF_NULL = 12; |
| // A nullable value was assigned. |
| // e.g. evidence for `p` from `int* p = nullptr;`. |
| ASSIGNED_FROM_NULLABLE = 13; |
| // A pointer was used with an arithmetic operator without being checked for |
| // null first. |
| ARITHMETIC = 14; |
| // A non-static member variable has a default initializer that is a literal |
| // nullptr or is simply constructed from a literal nullptr. This is |
| // considered to be a weaker signal than other assignments to nullable, due |
| // to the common use of nullptr as a default value to avoid quieter |
| // uninitialized memory errors in favor of loud segfaults, so we |
| // differentiate the evidence. Default initializers that are nullable but |
| // not using literal nullptrs use the stronger evidence |
| // ASSIGNED_TO_NULLABLE, as they likely indicate more explicit Nullable |
| // intent. |
| NULLPTR_DEFAULT_MEMBER_INITIALIZER = 15; |
| // __attribute((nonnull[(optional_param_indices)])) was applied to a |
| // function or parameter declaration or __attribute((returns_nonnull)) was |
| // applied to a function declaration. |
| GCC_NONNULL_ATTRIBUTE = 16; |
| // A Nullable value was returned as a reference. |
| NULLABLE_REFERENCE_RETURN = 17; |
| // A Nonnull value was returned as a mutable reference. |
| NONNULL_REFERENCE_RETURN = 18; |
| // A value with Unknown nullability was returned as a reference. |
| UNKNOWN_REFERENCE_RETURN = 19; |
| // A Nullable value was passed as a reference argument. |
| NULLABLE_REFERENCE_ARGUMENT = 20; |
| // A Nonnull value was passed as a mutable reference argument. |
| NONNULL_REFERENCE_ARGUMENT = 21; |
| // A value with Unknown nullability was passed as a reference argument. |
| UNKNOWN_REFERENCE_ARGUMENT = 22; |
| // A nonnull value was assigned. |
| // e.g. evidence for `p` from `int a; int* p = &a;`. |
| ASSIGNED_FROM_NONNULL = 23; |
| // An unknown value was assigned. |
| // e.g. evidence for `p` from `int* p = getUnknownPtr();`. |
| ASSIGNED_FROM_UNKNOWN = 24; |
| // A value was bound to a reference to a nonnull pointer, regardless of |
| // whether the pointer can be mutated through the reference. If any Nonnull |
| // reference exists, the pointer must be declared Nonnull to prevent the |
| // pointer from being directly, or through a different reference, assigned |
| // to null and then accessed through the Nonnull reference unsafely. |
| // e.g. evidence for `p` from `int* _Nonnull&q = p;`. |
| ASSIGNED_TO_NONNULL_REFERENCE = 26; |
| // A reference to a nonnull value was returned as a reference to a const |
| // value. This is distinct from NONNULL_REFERENCE_RETURN because it does not |
| // require that the return type is Nonnull, the way returning a reference to |
| // a mutable nonnull value does. |
| NONNULL_REFERENCE_RETURN_AS_CONST = 27; |
| // A reference to a nonnull value was passed as an argument to a const |
| // reference parameter. This is distinct from NONNULL_REFERENCE_ARGUMENT |
| // because it does not require that the argument type is Nonnull, the way |
| // passing a reference to a mutable nonnull value does. |
| NONNULL_REFERENCE_ARGUMENT_AS_CONST = 28; |
| // A decl that is defined by the standard or strong convention to be |
| // Nonnull. |
| WELL_KNOWN_NONNULL = 29; |
| // A decl that is defined by the standard or strong convention to be |
| // Nullable. |
| WELL_KNOWN_NULLABLE = 30; |
| // A pointer was used with an array subscript operator without being checked |
| // for null first. |
| ARRAY_SUBSCRIPT = 31; |
| // A smart pointer field was left not-nullable in the exit block of a |
| // supported late initializer method. |
| LEFT_NOT_NULLABLE_BY_LATE_INITIALIZER = 32; |
| // A smart pointer field was left default-initialized (to null) or |
| // initialized to a nullable value in the exit block of a constructor. |
| LEFT_NULLABLE_BY_CONSTRUCTOR = 33; |
| } |
| } |
| |
| enum Nullability { |
| UNKNOWN = 0; |
| NONNULL = 1; |
| NULLABLE = 2; |
| } |
| |
| // A conclusion about nullability based on global analysis (e.g. all TUs). |
| message SlotInference { |
| optional Nullability nullability = 1; |
| // Indicates that not we could not reconcile all evidence into a conclusion. |
| // e.g. a decl that was both unconditionally dereferenced and assigned null. |
| optional bool conflict = 2; |
| // Examples of evidence that contributed. Optional, for debugging only. |
| repeated Evidence sample_evidence = 3; |
| // Indicates that this inference does not represent new information beyond |
| // what is explicitly written in the source code, and so does not need to be |
| // separately propagated from one round of inference into the next. |
| // e.g. an inference gathered from ANNOTATED_NONNULL Evidence. |
| optional bool trivial = 4; |
| // Slot identifiers for which this inference is applicable. Used only for |
| // debugging information. |
| repeated string slot_id = 5; |
| } |
| |
| // Summary of an incomplete set of Evidence for a slot. |
| // Once all evidence has been incorporated, can be finalized into SlotInference. |
| // This type should be treated as opaque, and its serialization is not stable. |
| message SlotPartial { |
| map</*Kind*/ uint32, uint32> kind_count = 1; |
| |
| message SampleLocations { |
| // A bounded number of locations are stored. |
| repeated string location = 1; |
| } |
| map</*Kind*/ uint32, SampleLocations> kind_samples = 2; |
| |
| // Slot identifiers for which this partial is relevant. Used only for |
| // debugging information. |
| repeated string slot_id = 3; |
| } |
| |
| // A half-open source range to be removed: [begin, end). |
| message RemovalRange { |
| optional uint64 begin = 1; |
| optional uint64 end = 2; |
| } |
| |
| // Nullability information for a pointer-type source range. Does not include the |
| // full source range for the type, as that isn't needed for adding/removing |
| // nullability annotations. |
| message SlotRange { |
| // Path of the file containing this range. |
| optional string path = 1; |
| // The nullability default set by the pragma affecting `path`, if one exists. |
| optional Nullability pragma_nullability = 2; |
| // The offset at which a qualifier-position annotation should be inserted. For |
| // a named pointer type not ending with `*`, including smart pointers and |
| // aliases to raw pointers, this precedes the type but follows any |
| // cv-qualifiers. For other pointers, this follows the `*` but precedes any |
| // cv-qualifiers. |
| // This field uniquely identifies a pointer-type slot range within a file, as |
| // no two pointer ranges can be annotated with a qualifier-positioned |
| // annotation in the same location. |
| optional uint64 qualifier_annotation_insertion_offset = 3; |
| optional Nullability existing_annotation = 4; |
| // The source ranges of existing annotations to be removed if modifying the |
| // annotation for this range. |
| repeated RemovalRange existing_annotation_removal_range = 5; |
| } |
| |
| // Summary-related messages. |
| // |
| // The protos that follow collectively define the format for Nullability |
| // summaries used by inference. They are generated from the AST and then used as |
| // (stable) input to each inference iteration. |
| |
| // Encodes type Formula. |
| message FormulaProto { |
| // A compact serialization of a formula. Serialization and deserialization |
| // (parsing) is handled by the dataflow framework. The grammar is: |
| // |
| // bool ::= "F" (false) | "T" (true) |
| // atom ::= "V" n (n is a positive integer) |
| // op ::= "&" (and) |
| // | "|" (or) |
| // | ">" (implication) |
| // | "=" (equality) |
| // F ::= bool | atom | "!" F | op F1 F2 |
| // |
| // The corresponding serialization/deserialization API is |
| // https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Analysis/FlowSensitive/FormulaSerialization.h |
| optional string serialized = 1; |
| } |
| |
| // Encodes type PointerTypeNullability. |
| message PointerTypeNullabilityProto { |
| message SymbolicNullability { |
| optional uint32 nonnull_atom = 1; |
| optional uint32 nullable_atom = 2; |
| } |
| oneof nullability { |
| Nullability concrete = 1; |
| SymbolicNullability symbolic = 2; |
| } |
| } |
| |
| // Encodes type PointerNullState. |
| message PointerNullStateProto { |
| // Both fields are optional. |
| optional FormulaProto from_nullable = 1; |
| optional FormulaProto is_null = 2; |
| } |
| |
| // Encodes a potential requirement for an annotation, specifically -- when |
| // `formula` is true in the surrounding context. |
| message RequiresAnnotationSummary { |
| // In practice, only Nonnull or Nullable make sense for this field. |
| optional Nullability required_annotation = 1; |
| optional FormulaProto formula = 2; |
| optional string location = 3; |
| optional Evidence.Kind evidence_kind = 4; |
| } |
| |
| // Encodes properties of an assignment relevant to collecting evidence for the |
| // assignment's RHS. |
| message ValueAssignedToTypeSummary { |
| optional bool lhs_is_non_reference_const = 1; |
| optional PointerTypeNullabilityProto lhs_type_nullability = 2; |
| // optional. When a decl is available for the LHS, lhs_type_nullability may be |
| // overridden during inference. `fingerprint` identifies this decl. When |
| // populated, the fingerprint should be checked for an override. |
| optional uint64 lhs_decl_fingerprint = 3; |
| // This field is genuinely optional and should be provided only if the LHS has |
| // reference type. |
| optional PointerTypeNullabilityProto rhs_type_nullability = 4; |
| optional PointerNullStateProto rhs_value_nullability = 5; |
| optional string rhs_loc = 6; |
| } |
| |
| // Encodes properties of an assignment relevant to collecting evidence for the |
| // assignment's LHS; that is, the assignee. |
| message AssignmentFromValueSummary { |
| optional PointerTypeNullabilityProto lhs_type_nullability = 1; |
| // Genuinely optional: not populated when RHS is a nullptr literal. |
| optional PointerNullStateProto rhs_null_state = 2; |
| optional string rhs_loc = 3; |
| optional Evidence.Kind evidence_kind = 4; |
| } |
| |
| message AbortIfEqualSummary { |
| optional FormulaProto first_is_null = 1; |
| optional string first_loc = 2; |
| optional FormulaProto second_is_null = 3; |
| optional string second_loc = 4; |
| } |
| |
| // Covers argument binding and "binding" to the returned value. |
| message BindingSummary { |
| optional Symbol function_symbol = 1; |
| optional uint32 slot = 2; |
| // Whether the type is: |
| // a) a reference, and ... |
| optional bool type_is_lvalue_ref = 3; |
| // b) const, after stripping any reference qualifier. |
| optional bool type_is_const = 4; |
| // Genuinely optional: not populated when the value is a nullptr literal. |
| optional PointerNullStateProto null_state = 5; |
| optional string location = 6; |
| } |
| |
| // For initialization functions -- constructors and "late" initializers -- we |
| // collect the state of fields at function exit. We may judge the nullability of |
| // the field based on this exit state rather than strictly on initial state. |
| message InitOnExitSummary { |
| optional Symbol field = 1; |
| optional PointerNullStateProto null_state = 2; |
| optional string location = 3; |
| } |
| |
| // Covers behaviors learned specifically from particular function exit blocks -- |
| // constructors and (late) initializers. |
| message ExitBlockSummary { |
| repeated InitOnExitSummary ctor_inits_on_exit = 2; |
| repeated InitOnExitSummary late_inits_on_exit = 3; |
| } |
| |
| message NullabilityBehaviorSummary { |
| // The numerical ID of the atom representing the flow condition (aka. flow |
| // condition token) of the block from which this behavior was summarized. |
| optional uint32 block_atom = 8; |
| oneof behavior { |
| RequiresAnnotationSummary requires_annotation = 1; |
| ValueAssignedToTypeSummary value_assigned = 2; |
| AssignmentFromValueSummary assignment_from_value = 3; |
| BindingSummary argument_binding = 4; |
| AbortIfEqualSummary abort_if_equal = 5; |
| BindingSummary returned = 6; |
| ExitBlockSummary on_exit = 7; |
| } |
| } |
| |
| message InferableSlotProto { |
| optional uint32 nonnull_atom = 1; |
| optional uint32 nullable_atom = 2; |
| optional uint32 slot = 3; |
| optional Symbol symbol = 4; |
| } |
| |
| // The logical context for a function definition. |
| message LogicalContext { |
| // optional, string-encoded formula providing an invariant across all formulas |
| // represented in this logical context. |
| optional FormulaProto invariant = 1; |
| |
| // Int-encoded atoms and their definitions as formulas. |
| map<uint32, FormulaProto> atom_defs = 2; |
| |
| message AtomSet { |
| repeated uint32 atoms = 1 [packed = true]; |
| } |
| // The dependencies of the atoms used in the function. |
| // The key is the atom, and the value is the set of atoms that the key atom |
| // depends on. |
| map<uint32, AtomSet> atom_deps = 3; |
| } |
| |
| // Summarizes Nullability-relevant "behaviors" and relevant context from a CFG. |
| message CFGSummary { |
| repeated InferableSlotProto inferable_slots = 1; |
| optional LogicalContext logical_context = 2; |
| repeated NullabilityBehaviorSummary behavior_summaries = 3; |
| } |
| |
| // A generic serialization format for a relation between symbols. Currently, |
| // used as for the `RelatedVirtualMethodsMap` type. |
| // |
| // TODO: b/440317964 - design a compact representation. For example, we could |
| // represent Symbols by integers, and separately maintain a map from integers to |
| // USRs. |
| message RelatedSymbols { |
| message SymbolSet { |
| repeated Symbol symbols = 1; |
| } |
| // Since map keys cannot be message fields, we use strings directly for the |
| // key. These must be USRs. |
| map<string, SymbolSet> related_symbols = 1; |
| } |