blob: dd6c9d7cbe8e8b4d9457f62f122f0d721e43f06d [file] [log] [blame]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Data structures for whole-codebase nullability inference.
//
// To accurately determine nullability of public APIs, we join information from
// many translation units (e.g. a function's implementation, and all callsites).
//
// In large codebases, we may distribute this process as a mapreduce:
// - process the many translation units in parallel, obtaining evidence
// about all functions defined/called
// - group the evidence by the function it describes, and combine it to form
// conclusions for each one
//
// Key data structures are the evidence from one TU (map output/reduce input),
// and the conclusions (reduce output).
syntax = "proto2";
package clang.tidy.nullability;
// A symbol whose nullability should potentially be inferred.
message Symbol {
// Clang "Unified Symbol Resolution" identifier for the symbol.
optional string usr = 1;
}
// A "slot" identifies a position in a symbol's type that may have nullability.
//
// We use uint32 rather than the Slot type to represent slot numbers in protos.
// (A symbol may have any number of slots, and proto2 enums are closed).
// The Slot enum only defines well-known slot values for functions. Fields and
// global variables use Slot numbers aligned with the indices of their
// nullability vectors.
enum Slot {
// The slot number for a function's return type.
SLOT_RETURN_TYPE = 0;
// The slot number for a function's first parameter.
// The second param is SLOT_PARAM + 1, etc.
SLOT_PARAM = 1;
}
// An observation of nullability based on local analysis (e.g. a function body).
// Evidence from across different functions/TUs is combined to form conclusions.
message Evidence {
optional Symbol symbol = 1;
optional uint32 slot = 2;
optional Kind kind = 3;
// Source location: file:line:col. Optional, for debugging only.
optional string location = 4;
// A pattern in the code that might help us determine nullability.
enum Kind {
// The declaration was annotated with _Null_unspecified or similar.
ANNOTATED_UNKNOWN = 0;
// The declaration was annotated with _Nullable or similar.
ANNOTATED_NULLABLE = 1;
// The declaration was annotated with _Nonnull or similar.
ANNOTATED_NONNULL = 2;
// A pointer was dereferenced without being checked for null first.
UNCHECKED_DEREFERENCE = 3;
// A Nullable value was passed as an argument.
NULLABLE_ARGUMENT = 4;
// A Nonnull value was passed as an argument.
NONNULL_ARGUMENT = 5;
// A value with Unknown nullability was passed as an argument.
UNKNOWN_ARGUMENT = 6;
// A Nullable value was returned.
NULLABLE_RETURN = 7;
// A Nonnull value was returned.
NONNULL_RETURN = 8;
// A value with Unknown nullability was returned.
UNKNOWN_RETURN = 9;
// A value was assigned to a Nonnull declaration.
// e.g. evidence for `p` from `Nonnull<int*> q = p;`.
ASSIGNED_TO_NONNULL = 10;
// A value was assigned to a mutable Nullable declaration.
// e.g. evidence for `p` from `Nullable<int*>& q = p;`.
ASSIGNED_TO_MUTABLE_NULLABLE = 11;
// The program aborts if a value is null.
ABORT_IF_NULL = 12;
// A nullable value was assigned.
// e.g. evidence for `p` from `int* p = nullptr;`.
ASSIGNED_FROM_NULLABLE = 13;
// A pointer was used with an arithmetic operator without being checked for
// null first.
ARITHMETIC = 14;
// A non-static member variable has a default initializer that is a literal
// nullptr or is simply constructed from a literal nullptr. This is
// considered to be a weaker signal than other assignments to nullable, due
// to the common use of nullptr as a default value to avoid quieter
// uninitialized memory errors in favor of loud segfaults, so we
// differentiate the evidence. Default initializers that are nullable but
// not using literal nullptrs use the stronger evidence
// ASSIGNED_TO_NULLABLE, as they likely indicate more explicit Nullable
// intent.
NULLPTR_DEFAULT_MEMBER_INITIALIZER = 15;
// __attribute((nonnull[(optional_param_indices)])) was applied to a
// function or parameter declaration or __attribute((returns_nonnull)) was
// applied to a function declaration.
GCC_NONNULL_ATTRIBUTE = 16;
// A Nullable value was returned as a reference.
NULLABLE_REFERENCE_RETURN = 17;
// A Nonnull value was returned as a reference.
NONNULL_REFERENCE_RETURN = 18;
// A value with Unknown nullability was returned as a reference.
UNKNOWN_REFERENCE_RETURN = 19;
// A Nullable value was passed as a reference argument.
NULLABLE_REFERENCE_ARGUMENT = 20;
// A Nonnull value was passed as a reference argument.
NONNULL_REFERENCE_ARGUMENT = 21;
// A value with Unknown nullability was passed as a reference argument.
UNKNOWN_REFERENCE_ARGUMENT = 22;
// A nonnull value was assigned.
// e.g. evidence for `p` from `int a; int* p = &a;`.
ASSIGNED_FROM_NONNULL = 23;
// An unknown value was assigned.
// e.g. evidence for `p` from `int* p = getUnknownPtr();`.
ASSIGNED_FROM_UNKNOWN = 24;
}
}
enum Nullability {
UNKNOWN = 0;
NONNULL = 1;
NULLABLE = 2;
}
// A conclusion about nullability based on global analysis (e.g. all TUs).
message Inference {
optional Symbol symbol = 1;
repeated SlotInference slot_inference = 2;
message SlotInference {
optional uint32 slot = 1;
optional Nullability nullability = 2;
// Indicates that not we could not reconcile all evidence into a conclusion.
// e.g. a Nullable<int*> parameter that was unconditionally dereferenced.
optional bool conflict = 3;
// Examples of evidence that contributed. Optional, for debugging only.
repeated Evidence sample_evidence = 4;
// Indicates that this inference does not represent new information beyond
// what is explicitly written in the source code, and so does not need to be
// separately propagated from one round of inference into the next.
// e.g. an inference gathered from ANNOTATED_NONNULL Evidence.
optional bool trivial = 5;
}
}
// Summary of an incomplete set of Evidence for a symbol.
// Once all evidence has been incorporated, can be finalized into Inference.
// This type should be treated as opaque, and its serialization is not stable.
message Partial {
optional Symbol symbol = 1;
// Return type is slot[0], first param is slot[1]...
repeated SlotPartial slot = 2;
message SlotPartial {
map</*Kind*/ uint32, uint32> kind_count = 1;
map</*Kind*/ uint32, SampleLocations> kind_samples = 2;
}
message SampleLocations {
// A bounded number of locations are stored.
repeated string location = 1;
}
}
// The half-open source range of text to remove: [begin, end).
message RemovalRange {
optional uint32 begin = 1;
optional uint32 end = 2;
}
// Information relevant for complex declarator cases, like function pointer
// types and some array types.
message ComplexDeclaratorRanges {
// The text to move from within the type to after the annotation. This may
// include the name of the declarator, array brackets/sizes, and/or pointer
// stars in types with multiple pointer sub-types.
optional string following_annotation = 1;
// A range of text to remove from the type.
repeated RemovalRange removal = 2;
}
// The half-open source range of a slot: [begin, end).
message SlotRange {
optional uint32 slot = 1;
optional uint64 begin = 2;
optional uint64 end = 3;
optional Nullability existing_annotation = 4;
// The length of the source range before the slot that is part of an existing
// annotation to be potentially removed, e.g. the length of
// "absl::NullabilityUnknown<" or 0 if the existing annotation is a
// non-template representation, such as clang's "_Null_unspecified".
optional uint32 existing_annotation_pre_range_length = 5;
// The length of the source range after the slot that is part of an existing
// annotation to be potentially removed, e.g. 1 for ">" or longer for a
// non-template representation of annotations, such as clang's
// "_Null_unspecified".
optional uint32 existing_annotation_post_range_length = 6;
optional ComplexDeclaratorRanges complex_declarator_ranges = 7;
optional bool contains_auto_star = 8;
}
// A set of slot ranges and their associated file.
message TypeLocRanges {
// Path of the file to which the ranges refer.
optional string path = 2;
// Source ranges of type locations (written types) in `path`. These should be
// ordered by slot number to enable predictable edits that may insert multiple
// annotations in the same locations, e.g. for applying annotations to nested
// pointers.
repeated SlotRange range = 3;
// The nullability default set by the pragma affecting `path`, if one exists.
optional Nullability pragma_nullability = 4;
}