blob: ec935183488ba6a5f131dec0b178b1d81b7edf31 [file] [log] [blame] [edit]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Data structures for whole-codebase nullability inference.
//
// To accurately determine nullability of public APIs, we join information from
// many translation units (e.g. a function's implementation, and all callsites).
//
// In large codebases, we may distribute this process as a mapreduce:
// - process the many translation units in parallel, obtaining evidence
// about all functions defined/called
// - group the evidence by the function it describes, and combine it to form
// conclusions for each one
//
// Key data structures are the evidence from one TU (map output/reduce input),
// and the conclusions (reduce output).
syntax = "proto2";
package clang.tidy.nullability;
// A symbol whose nullability should potentially be inferred.
message Symbol {
// Clang "Unified Symbol Resolution" identifier for the symbol.
optional string usr = 1;
}
// A "slot" identifies a position in a symbol's type that may have nullability.
//
// We use uint32 rather than the Slot type to represent slot numbers in protos.
// (A symbol may have any number of slots, and proto2 enums are closed).
// The Slot enum only defines well-known slot values for functions. Fields and
// global variables use Slot numbers aligned with the indices of their
// nullability vectors.
enum Slot {
// The slot number for a function's return type.
SLOT_RETURN_TYPE = 0;
// The slot number for a function's first parameter.
// The second param is SLOT_PARAM + 1, etc.
SLOT_PARAM = 1;
}
// An observation of nullability based on local analysis (e.g. a function body).
// Evidence from across different functions/TUs is combined to form conclusions.
message Evidence {
optional Symbol symbol = 1;
optional uint32 slot = 2;
optional Kind kind = 3;
// Source location: file:line:col. Optional, for debugging only.
optional string location = 4;
// A pattern in the code that might help us determine nullability.
enum Kind {
// The declaration was annotated with _Null_unspecified or similar.
ANNOTATED_UNKNOWN = 0;
// The declaration was annotated with _Nullable or similar.
ANNOTATED_NULLABLE = 1;
// The declaration was annotated with _Nonnull or similar.
ANNOTATED_NONNULL = 2;
// A pointer was dereferenced without being checked for null first.
UNCHECKED_DEREFERENCE = 3;
// A Nullable value was passed as an argument.
NULLABLE_ARGUMENT = 4;
// A Nonnull value was passed as an argument.
NONNULL_ARGUMENT = 5;
// A value with Unknown nullability was passed as an argument.
UNKNOWN_ARGUMENT = 6;
// A Nullable value was returned.
NULLABLE_RETURN = 7;
// A Nonnull value was returned.
NONNULL_RETURN = 8;
// A value with Unknown nullability was returned.
UNKNOWN_RETURN = 9;
// A value was assigned to a Nonnull declaration.
// e.g. evidence for `p` from `int* _Nonnull q = p;`.
ASSIGNED_TO_NONNULL = 10;
// A value was assigned to a declaration that is a reference to a mutable
// nullable pointer, e.g. evidence for `p` from `int* _Nullable& q = p;`.
ASSIGNED_TO_MUTABLE_NULLABLE = 11;
// The program aborts if a value is null.
ABORT_IF_NULL = 12;
// A nullable value was assigned.
// e.g. evidence for `p` from `int* p = nullptr;`.
ASSIGNED_FROM_NULLABLE = 13;
// A pointer was used with an arithmetic operator without being checked for
// null first.
ARITHMETIC = 14;
// A non-static member variable has a default initializer that is a literal
// nullptr or is simply constructed from a literal nullptr. This is
// considered to be a weaker signal than other assignments to nullable, due
// to the common use of nullptr as a default value to avoid quieter
// uninitialized memory errors in favor of loud segfaults, so we
// differentiate the evidence. Default initializers that are nullable but
// not using literal nullptrs use the stronger evidence
// ASSIGNED_TO_NULLABLE, as they likely indicate more explicit Nullable
// intent.
NULLPTR_DEFAULT_MEMBER_INITIALIZER = 15;
// __attribute((nonnull[(optional_param_indices)])) was applied to a
// function or parameter declaration or __attribute((returns_nonnull)) was
// applied to a function declaration.
GCC_NONNULL_ATTRIBUTE = 16;
// A Nullable value was returned as a reference.
NULLABLE_REFERENCE_RETURN = 17;
// A Nonnull value was returned as a mutable reference.
NONNULL_REFERENCE_RETURN = 18;
// A value with Unknown nullability was returned as a reference.
UNKNOWN_REFERENCE_RETURN = 19;
// A Nullable value was passed as a reference argument.
NULLABLE_REFERENCE_ARGUMENT = 20;
// A Nonnull value was passed as a mutable reference argument.
NONNULL_REFERENCE_ARGUMENT = 21;
// A value with Unknown nullability was passed as a reference argument.
UNKNOWN_REFERENCE_ARGUMENT = 22;
// A nonnull value was assigned.
// e.g. evidence for `p` from `int a; int* p = &a;`.
ASSIGNED_FROM_NONNULL = 23;
// An unknown value was assigned.
// e.g. evidence for `p` from `int* p = getUnknownPtr();`.
ASSIGNED_FROM_UNKNOWN = 24;
// A value was bound to a reference to a nonnull pointer, regardless of
// whether the pointer can be mutated through the reference. If any Nonnull
// reference exists, the pointer must be declared Nonnull to prevent the
// pointer from being directly, or through a different reference, assigned
// to null and then accessed through the Nonnull reference unsafely.
// e.g. evidence for `p` from `int* _Nonnull&q = p;`.
ASSIGNED_TO_NONNULL_REFERENCE = 26;
// A reference to a nonnull value was returned as a reference to a const
// value. This is distinct from NONNULL_REFERENCE_RETURN because it does not
// require that the return type is Nonnull, the way returning a reference to
// a mutable nonnull value does.
NONNULL_REFERENCE_RETURN_AS_CONST = 27;
// A reference to a nonnull value was passed as an argument to a const
// reference parameter. This is distinct from NONNULL_REFERENCE_ARGUMENT
// because it does not require that the argument type is Nonnull, the way
// passing a reference to a mutable nonnull value does.
NONNULL_REFERENCE_ARGUMENT_AS_CONST = 28;
// A decl that is defined by the standard or strong convention to be
// Nonnull.
WELL_KNOWN_NONNULL = 29;
// A decl that is defined by the standard or strong convention to be
// Nullable.
WELL_KNOWN_NULLABLE = 30;
// A pointer was used with an array subscript operator without being checked
// for null first.
ARRAY_SUBSCRIPT = 31;
// A smart pointer field was left not-nullable in the exit block of a
// supported late initializer method.
LEFT_NOT_NULLABLE_BY_LATE_INITIALIZER = 32;
// A smart pointer field was left default-initialized (to null) or
// initialized to a nullable value in the exit block of a constructor.
LEFT_NULLABLE_BY_CONSTRUCTOR = 33;
}
}
enum Nullability {
UNKNOWN = 0;
NONNULL = 1;
NULLABLE = 2;
}
// A conclusion about nullability based on global analysis (e.g. all TUs).
message SlotInference {
optional Nullability nullability = 1;
// Indicates that not we could not reconcile all evidence into a conclusion.
// e.g. a decl that was both unconditionally dereferenced and assigned null.
optional bool conflict = 2;
// Examples of evidence that contributed. Optional, for debugging only.
repeated Evidence sample_evidence = 3;
// Indicates that this inference does not represent new information beyond
// what is explicitly written in the source code, and so does not need to be
// separately propagated from one round of inference into the next.
// e.g. an inference gathered from ANNOTATED_NONNULL Evidence.
optional bool trivial = 4;
// Slot identifiers for which this inference is applicable. Used only for
// debugging information.
repeated string slot_id = 5;
}
// Summary of an incomplete set of Evidence for a slot.
// Once all evidence has been incorporated, can be finalized into SlotInference.
// This type should be treated as opaque, and its serialization is not stable.
message SlotPartial {
map</*Kind*/ uint32, uint32> kind_count = 1;
message SampleLocations {
// A bounded number of locations are stored.
repeated string location = 1;
}
map</*Kind*/ uint32, SampleLocations> kind_samples = 2;
// Slot identifiers for which this partial is relevant. Used only for
// debugging information.
repeated string slot_id = 3;
}
// A half-open source range to be removed: [begin, end).
message RemovalRange {
optional uint64 begin = 1;
optional uint64 end = 2;
}
// Nullability information for a pointer-type source range. Does not include the
// full source range for the type, as that isn't needed for adding/removing
// nullability annotations.
message SlotRange {
// Path of the file containing this range.
optional string path = 1;
// The nullability default set by the pragma affecting `path`, if one exists.
optional Nullability pragma_nullability = 2;
// The offset at which a qualifier-position annotation should be inserted. For
// a named pointer type not ending with `*`, including smart pointers and
// aliases to raw pointers, this precedes the type but follows any
// cv-qualifiers. For other pointers, this follows the `*` but precedes any
// cv-qualifiers.
// This field uniquely identifies a pointer-type slot range within a file, as
// no two pointer ranges can be annotated with a qualifier-positioned
// annotation in the same location.
optional uint64 qualifier_annotation_insertion_offset = 3;
optional Nullability existing_annotation = 4;
// The source ranges of existing annotations to be removed if modifying the
// annotation for this range.
repeated RemovalRange existing_annotation_removal_range = 5;
}
// Summary-related messages.
//
// The protos that follow collectively define the format for Nullability
// summaries used by inference. They are generated from the AST and then used as
// (stable) input to each inference iteration.
// Encodes type Formula.
message FormulaProto {
// A compact serialization of a formula. Serialization and deserialization
// (parsing) is handled by the dataflow framework. The grammar is:
//
// bool ::= "F" (false) | "T" (true)
// atom ::= "V" n (n is a positive integer)
// op ::= "&" (and)
// | "|" (or)
// | ">" (implication)
// | "=" (equality)
// F ::= bool | atom | "!" F | op F1 F2
//
// The corresponding serialization/deserialization API is
// https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Analysis/FlowSensitive/FormulaSerialization.h
optional string serialized = 1;
}
// Encodes type PointerTypeNullability.
message PointerTypeNullabilityProto {
message SymbolicNullability {
optional uint32 nonnull_atom = 1;
optional uint32 nullable_atom = 2;
}
oneof nullability {
Nullability concrete = 1;
SymbolicNullability symbolic = 2;
}
}
// Encodes type PointerNullState.
message PointerNullStateProto {
// Both fields are optional.
optional FormulaProto from_nullable = 1;
optional FormulaProto is_null = 2;
}
// Encodes a potential requirement for an annotation, specifically -- when
// `formula` is true in the surrounding context.
message RequiresAnnotationSummary {
// In practice, only Nonnull or Nullable make sense for this field.
optional Nullability required_annotation = 1;
optional FormulaProto formula = 2;
optional string location = 3;
optional Evidence.Kind evidence_kind = 4;
}
// Encodes properties of an assignment relevant to collecting evidence for the
// assignment's RHS.
message ValueAssignedToTypeSummary {
optional bool lhs_is_non_reference_const = 1;
optional PointerTypeNullabilityProto lhs_type_nullability = 2;
// optional. When a decl is available for the LHS, lhs_type_nullability may be
// overridden during inference. `fingerprint` identifies this decl. When
// populated, the fingerprint should be checked for an override.
optional uint64 lhs_decl_fingerprint = 3;
// This field is genuinely optional and should be provided only if the LHS has
// reference type.
optional PointerTypeNullabilityProto rhs_type_nullability = 4;
optional PointerNullStateProto rhs_value_nullability = 5;
optional string rhs_loc = 6;
}
// Encodes properties of an assignment relevant to collecting evidence for the
// assignment's LHS; that is, the assignee.
message AssignmentFromValueSummary {
optional PointerTypeNullabilityProto lhs_type_nullability = 1;
// Genuinely optional: not populated when RHS is a nullptr literal.
optional PointerNullStateProto rhs_null_state = 2;
optional string rhs_loc = 3;
optional Evidence.Kind evidence_kind = 4;
}
message AbortIfEqualSummary {
optional FormulaProto first_is_null = 1;
optional string first_loc = 2;
optional FormulaProto second_is_null = 3;
optional string second_loc = 4;
}
// Covers argument binding and "binding" to the returned value.
message BindingSummary {
optional Symbol function_symbol = 1;
optional uint32 slot = 2;
// Whether the type is:
// a) a reference, and ...
optional bool type_is_lvalue_ref = 3;
// b) const, after stripping any reference qualifier.
optional bool type_is_const = 4;
// Genuinely optional: not populated when the value is a nullptr literal.
optional PointerNullStateProto null_state = 5;
optional string location = 6;
}
// For initialization functions -- constructors and "late" initializers -- we
// collect the state of fields at function exit. We may judge the nullability of
// the field based on this exit state rather than strictly on initial state.
message InitOnExitSummary {
optional Symbol field = 1;
optional PointerNullStateProto null_state = 2;
optional string location = 3;
}
// Covers behaviors learned specifically from particular function exit blocks --
// constructors and (late) initializers.
message ExitBlockSummary {
repeated InitOnExitSummary ctor_inits_on_exit = 2;
repeated InitOnExitSummary late_inits_on_exit = 3;
}
message NullabilityBehaviorSummary {
// The numerical ID of the atom representing the flow condition (aka. flow
// condition token) of the block from which this behavior was summarized.
optional uint32 block_atom = 8;
oneof behavior {
RequiresAnnotationSummary requires_annotation = 1;
ValueAssignedToTypeSummary value_assigned = 2;
AssignmentFromValueSummary assignment_from_value = 3;
BindingSummary argument_binding = 4;
AbortIfEqualSummary abort_if_equal = 5;
BindingSummary returned = 6;
ExitBlockSummary on_exit = 7;
}
}
message InferableSlotProto {
optional uint32 nonnull_atom = 1;
optional uint32 nullable_atom = 2;
optional uint32 slot = 3;
optional Symbol symbol = 4;
}
// The logical context for a function definition.
message LogicalContext {
// optional, string-encoded formula providing an invariant across all formulas
// represented in this logical context.
optional FormulaProto invariant = 1;
// Int-encoded atoms and their definitions as formulas.
map<uint32, FormulaProto> atom_defs = 2;
message AtomSet {
repeated uint32 atoms = 1 [packed = true];
}
// The dependencies of the atoms used in the function.
// The key is the atom, and the value is the set of atoms that the key atom
// depends on.
map<uint32, AtomSet> atom_deps = 3;
}
// Summarizes Nullability-relevant "behaviors" and relevant context from a CFG.
message CFGSummary {
repeated InferableSlotProto inferable_slots = 1;
optional LogicalContext logical_context = 2;
repeated NullabilityBehaviorSummary behavior_summaries = 3;
}
// A generic serialization format for a relation between symbols. Currently,
// used as for the `RelatedVirtualMethodsMap` type.
//
// TODO: b/440317964 - design a compact representation. For example, we could
// represent Symbols by integers, and separately maintain a map from integers to
// USRs.
message RelatedSymbols {
message SymbolSet {
repeated Symbol symbols = 1;
}
// Since map keys cannot be message fields, we use strings directly for the
// key. These must be USRs.
map<string, SymbolSet> related_symbols = 1;
}