| // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| // Exceptions. See /LICENSE for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "nullability/inference/collect_evidence.h" |
| |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <string_view> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/base/nullability.h" |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/log/check.h" |
| #include "nullability/inference/inferable.h" |
| #include "nullability/inference/inference.proto.h" |
| #include "nullability/inference/slot_fingerprint.h" |
| #include "nullability/pointer_nullability.h" |
| #include "nullability/pointer_nullability_analysis.h" |
| #include "nullability/pointer_nullability_lattice.h" |
| #include "nullability/type_nullability.h" |
| #include "clang/AST/ASTContext.h" |
| #include "clang/AST/Decl.h" |
| #include "clang/AST/DeclBase.h" |
| #include "clang/AST/DeclCXX.h" |
| #include "clang/AST/Expr.h" |
| #include "clang/AST/ExprCXX.h" |
| #include "clang/AST/OperationKinds.h" |
| #include "clang/AST/RecursiveASTVisitor.h" |
| #include "clang/AST/Stmt.h" |
| #include "clang/AST/Type.h" |
| #include "clang/ASTMatchers/ASTMatchFinder.h" |
| #include "clang/ASTMatchers/ASTMatchers.h" |
| #include "clang/Analysis/CFG.h" |
| #include "clang/Analysis/FlowSensitive/Arena.h" |
| #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" |
| #include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" |
| #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" |
| #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" |
| #include "clang/Analysis/FlowSensitive/Formula.h" |
| #include "clang/Analysis/FlowSensitive/Value.h" |
| #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" |
| #include "clang/Basic/LLVM.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/Specifiers.h" |
| #include "clang/Index/USRGeneration.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/FunctionExtras.h" |
| #include "llvm/ADT/STLFunctionalExtras.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| namespace clang::tidy::nullability { |
| using ::clang::ast_matchers::callee; |
| using ::clang::ast_matchers::callExpr; |
| using ::clang::ast_matchers::forEachDescendant; |
| using ::clang::ast_matchers::functionDecl; |
| using ::clang::ast_matchers::qualType; |
| using ::clang::ast_matchers::returns; |
| using ::clang::dataflow::DataflowAnalysisContext; |
| using ::clang::dataflow::Environment; |
| using ::clang::dataflow::Formula; |
| |
| using ConcreteNullabilityCache = |
| absl::flat_hash_map<const Decl *, |
| std::optional<const PointerTypeNullability>>; |
| |
| std::string_view getOrGenerateUSR(USRCache &Cache, const Decl &Decl) { |
| auto [It, Inserted] = Cache.try_emplace(&Decl); |
| if (Inserted) { |
| llvm::SmallString<128> USR; |
| if (!index::generateUSRForDecl(&Decl, USR)) It->second = USR.str(); |
| } |
| return It->second; |
| } |
| |
| llvm::unique_function<EvidenceEmitter> evidenceEmitter( |
| llvm::unique_function<void(const Evidence &) const> Emit, |
| nullability::USRCache &USRCache) { |
| class EvidenceEmitterImpl { |
| public: |
| EvidenceEmitterImpl( |
| llvm::unique_function<void(const Evidence &) const> Emit, |
| nullability::USRCache &USRCache) |
| : Emit(std::move(Emit)), USRCache(USRCache) {} |
| |
| void operator()(const Decl &Target, Slot S, Evidence::Kind Kind, |
| SourceLocation Loc) const { |
| CHECK(isInferenceTarget(Target)) |
| << "Evidence emitted for a Target which is not an inference target: " |
| << (dyn_cast<NamedDecl>(&Target) |
| ? dyn_cast<NamedDecl>(&Target)->getQualifiedNameAsString() |
| : "not a named decl"); |
| |
| Evidence E; |
| E.set_slot(S); |
| E.set_kind(Kind); |
| |
| std::string_view USR = getOrGenerateUSR(USRCache, Target); |
| if (USR.empty()) return; // Can't emit without a USR |
| E.mutable_symbol()->set_usr(USR); |
| |
| // TODO: make collecting and propagating location information optional? |
| auto &SM = |
| Target.getDeclContext()->getParentASTContext().getSourceManager(); |
| // TODO: are macro locations actually useful enough for debugging? |
| // we could leave them out, and make room for non-macro samples. |
| if (Loc = SM.getFileLoc(Loc); Loc.isValid()) |
| E.set_location(Loc.printToString(SM)); |
| |
| Emit(E); |
| } |
| |
| private: |
| llvm::unique_function<void(const Evidence &) const> Emit; |
| nullability::USRCache &USRCache; |
| }; |
| return EvidenceEmitterImpl(std::move(Emit), USRCache); |
| } |
| |
| namespace { |
| |
| class InferableSlot { |
| public: |
| InferableSlot(PointerTypeNullability Nullability, Slot Slot, const Decl &Decl) |
| : SymbolicNullability(Nullability), |
| TargetSlot(Slot), |
| InferenceTarget(Decl) {} |
| |
| const PointerTypeNullability &getSymbolicNullability() const { |
| return SymbolicNullability; |
| } |
| Slot getTargetSlot() const { return TargetSlot; } |
| const Decl &getInferenceTarget() const { return InferenceTarget; } |
| |
| private: |
| const PointerTypeNullability SymbolicNullability; |
| const Slot TargetSlot; |
| const Decl &InferenceTarget; |
| }; |
| |
| // If Stmt is a dereference, returns its target and location. |
| std::pair<Expr *, SourceLocation> describeDereference(const Stmt &Stmt) { |
| if (auto *Op = dyn_cast<UnaryOperator>(&Stmt); |
| Op && Op->getOpcode() == UO_Deref) { |
| return {Op->getSubExpr(), Op->getOperatorLoc()}; |
| } |
| if (auto *ME = dyn_cast<MemberExpr>(&Stmt); ME && ME->isArrow()) { |
| return {ME->getBase(), ME->getOperatorLoc()}; |
| } |
| return {nullptr, SourceLocation()}; |
| } |
| |
| // Records evidence derived from the assumption that `Value` is nonnull. |
| // It may be dereferenced, passed as a nonnull param, etc, per `EvidenceKind`. |
| void collectMustBeNonnullEvidence( |
| const dataflow::PointerValue &Value, const dataflow::Environment &Env, |
| SourceLocation Loc, const std::vector<InferableSlot> &InferableSlots, |
| Evidence::Kind EvidenceKind, llvm::function_ref<EvidenceEmitter> Emit) { |
| CHECK(hasPointerNullState(Value)) |
| << "Value should be the value of an expression. Cannot collect evidence " |
| "for nonnull-ness if there is no null state."; |
| auto *IsNull = getPointerNullState(Value).IsNull; |
| // If `IsNull` is top, we can't infer anything about it. |
| if (IsNull == nullptr) return; |
| // If the flow conditions already imply that Value is not null, then we don't |
| // have any new evidence of a necessary annotation. |
| if (!Env.allows(*IsNull)) return; |
| |
| auto &A = Env.getDataflowAnalysisContext().arena(); |
| // Otherwise, if an inferable slot being annotated Nonnull would imply that |
| // `Value` is not null, then we have evidence suggesting that slot should be |
| // annotated. For now, we simply choose the first such slot, sidestepping |
| // complexities around the possibility of multiple such slots, any one of |
| // which would be sufficient if annotated Nonnull. |
| for (auto &IS : InferableSlots) { |
| auto &SlotNonnull = IS.getSymbolicNullability().isNonnull(A); |
| auto &SlotNonnullImpliesValueNonnull = |
| A.makeImplies(SlotNonnull, A.makeNot(*IsNull)); |
| // Don't collect evidence if the implication is true by virtue of |
| // `SlotNonnull` being false. |
| // |
| // In practice, `SlotNonnull` can be made false by a flow condition, and |
| // marking the slot Nonnull would make that conditioned block dead code. |
| // Technically, this does make the dereference "safe", but we'd prefer to |
| // mark a different slot Nonnull that has a more direct relationship with |
| // the nullability of `Value`. |
| // |
| // e.g. We'd prefer to mark `q` Nonnull rather than `p` in the following: |
| // ``` |
| // void target(int* p, int* q) { |
| // if (!p) { |
| // *q; |
| // } |
| // } |
| // ``` |
| if (Env.allows(SlotNonnull) && Env.proves(SlotNonnullImpliesValueNonnull)) { |
| Emit(IS.getInferenceTarget(), IS.getTargetSlot(), EvidenceKind, Loc); |
| return; |
| } |
| } |
| } |
| |
| void collectEvidenceFromDereference( |
| const std::vector<InferableSlot> &InferableSlots, const Stmt &Stmt, |
| const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| auto [Target, Loc] = describeDereference(Stmt); |
| if (!Target || !isSupportedPointerType(Target->getType())) return; |
| |
| // It is a dereference of a pointer. Now gather evidence from it. |
| dataflow::PointerValue *DereferencedValue = |
| getPointerValueFromExpr(Target, Env); |
| if (!DereferencedValue) return; |
| collectMustBeNonnullEvidence(*DereferencedValue, Env, Loc, InferableSlots, |
| Evidence::UNCHECKED_DEREFERENCE, Emit); |
| } |
| |
| // Inferable slots are nullability slots not explicitly annotated in source |
| // code that we are currently capable of handling. This returns a boolean |
| // constraint representing these slots having a) the nullability inferred from |
| // the previous round for this slot or b) Unknown nullability if no inference |
| // was made in the previous round or there was no previous round. |
| const Formula &getInferableSlotsAsInferredOrUnknownConstraint( |
| const std::vector<InferableSlot> &InferableSlots, USRCache &USRCache, |
| const PreviousInferences &PreviousInferences, dataflow::Arena &A) { |
| const Formula *Constraint = &A.makeLiteral(true); |
| for (auto &IS : InferableSlots) { |
| std::string_view USR = getOrGenerateUSR(USRCache, IS.getInferenceTarget()); |
| SlotFingerprint Fingerprint = fingerprint(USR, IS.getTargetSlot()); |
| auto Nullability = IS.getSymbolicNullability(); |
| const Formula &Nullable = PreviousInferences.Nullable.contains(Fingerprint) |
| ? Nullability.isNullable(A) |
| : A.makeNot(Nullability.isNullable(A)); |
| const Formula &Nonnull = PreviousInferences.Nonnull.contains(Fingerprint) |
| ? Nullability.isNonnull(A) |
| : A.makeNot(Nullability.isNonnull(A)); |
| Constraint = &A.makeAnd(*Constraint, A.makeAnd(Nullable, Nonnull)); |
| } |
| return *Constraint; |
| } |
| |
| auto getNullabilityAnnotationsFromTypeAndOverrides( |
| QualType Type, absl::Nonnull<const Decl *> D, |
| const PointerNullabilityLattice &Lattice) { |
| auto N = getNullabilityAnnotationsFromType(Type); |
| if (N.empty()) { |
| // We expect this not to be the case, but not to a crash-worthy level, so |
| // just log if it is. |
| llvm::errs() << "Nullability for type " << Type.getAsString(); |
| if (auto *ND = dyn_cast_or_null<clang::NamedDecl>(D)) { |
| llvm::errs() << "for Decl named " << ND->getName(); |
| } |
| llvm::errs() << " requested with overrides, but is an empty vector.\n"; |
| } else { |
| Lattice.overrideNullabilityFromDecl(D, N); |
| } |
| return N; |
| } |
| |
| // Collect evidence for each of `InferableSlots` if that slot being marked |
| // Nullable would imply `Value`'s FromNullable property. |
| // |
| // This function is called when we have reason to believe that `Value` must be |
| // Nullable. As we can't directly retrieve the combination of Decl and Slot that |
| // corresponds to `Value`'s nullability, we consider each inferable slot and |
| // emit evidence for all inferable slots that, if marked Nullable, cause `Value` |
| // to be considered explicitly Nullable. |
| void collectMustBeMarkedNullableEvidence( |
| const dataflow::PointerValue &Value, const dataflow::Environment &Env, |
| SourceLocation Loc, const std::vector<InferableSlot> &InferableSlots, |
| Evidence::Kind EvidenceKind, llvm::function_ref<EvidenceEmitter> Emit) { |
| CHECK(hasPointerNullState(Value)) |
| << "Value should be the value of an expression. Cannot collect evidence " |
| "for nonnull-ness if there is no null state."; |
| auto *FromNullable = getPointerNullState(Value).FromNullable; |
| // If `FromNullable` is top, we can't infer anything about it. |
| if (FromNullable == nullptr) return; |
| // If the flow conditions already imply that `Value` is from a Nullable, then |
| // we don't have any new evidence of a necessary annotation. |
| if (Env.proves(*FromNullable)) return; |
| |
| auto &A = Env.getDataflowAnalysisContext().arena(); |
| // Otherwise, if an inferable slot being annotated Nullable would imply that |
| // `Value` is from a Nullable, then we have evidence suggesting that slot |
| // should be annotated. We collect this evidence for every slot that connects |
| // in this way to `Value`. |
| // |
| // e.g. We should mark both `p` and `q` Nullable in the following: |
| // ``` |
| // void target(int* p, int* q, bool b) { |
| // Nullable<int*>& x = b ? p : q; |
| // ... |
| // } |
| // ``` |
| // because at runtime, either `p` or `q` could be taken as a mutable reference |
| // and later set to nullptr. |
| for (auto &IS : InferableSlots) { |
| auto &SlotNullableImpliesValueFromNullable = |
| A.makeImplies(IS.getSymbolicNullability().isNullable(A), *FromNullable); |
| if (Env.proves(SlotNullableImpliesValueFromNullable)) |
| Emit(IS.getInferenceTarget(), IS.getTargetSlot(), EvidenceKind, Loc); |
| } |
| } |
| |
| void collectEvidenceFromBindingToType( |
| QualType Type, TypeNullability &TypeNullability, |
| const dataflow::PointerValue &PointerValue, |
| const std::vector<InferableSlot> &InferableSlotsFromValueContext, |
| const Formula &InferableSlotsConstraint, const dataflow::Environment &Env, |
| SourceLocation ValueLoc, llvm::function_ref<EvidenceEmitter> Emit) { |
| // TODO: Account for variance and each layer of nullability when we handle |
| // more than top-level pointers. |
| if (TypeNullability.empty()) return; |
| PointerTypeNullability &TopLevel = TypeNullability[0]; |
| dataflow::Arena &A = Env.arena(); |
| if (TopLevel.concrete() == NullabilityKind::NonNull || |
| (TopLevel.isSymbolic() && |
| Env.proves( |
| A.makeImplies(InferableSlotsConstraint, TopLevel.isNonnull(A))))) { |
| collectMustBeNonnullEvidence(PointerValue, Env, ValueLoc, |
| InferableSlotsFromValueContext, |
| Evidence::BOUND_TO_NONNULL, Emit); |
| } else if (!Type.isConstQualified() && Type->isReferenceType() && |
| (TopLevel.concrete() == NullabilityKind::Nullable || |
| (TopLevel.isSymbolic() && |
| Env.proves(A.makeImplies(InferableSlotsConstraint, |
| TopLevel.isNullable(A)))))) { |
| collectMustBeMarkedNullableEvidence( |
| PointerValue, Env, ValueLoc, InferableSlotsFromValueContext, |
| Evidence::BOUND_TO_MUTABLE_NULLABLE, Emit); |
| } |
| } |
| |
| template <typename CallOrConstructExpr> |
| void collectEvidenceFromArgsAndParams( |
| const FunctionDecl &CalleeDecl, const CallOrConstructExpr &Expr, |
| const std::vector<InferableSlot> &InferableCallerSlots, |
| const Formula &InferableSlotsConstraint, |
| const PointerNullabilityLattice &Lattice, const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| unsigned ParamI = 0; |
| unsigned ArgI = 0; |
| // Member operator calls hold the function object as the first argument, |
| // offsetting the indices of parameters and corresponding arguments by 1. |
| // For example: Given struct S { bool operator+(int*); } |
| // The CXXMethodDecl has one parameter, but a call S{}+p is a |
| // CXXOperatorCallExpr with two arguments: an S and an int*. |
| if (isa<clang::CXXOperatorCallExpr>(Expr) && |
| isa<clang::CXXMethodDecl>(CalleeDecl)) |
| ++ArgI; |
| |
| bool CollectEvidenceForCallee = isInferenceTarget(CalleeDecl); |
| bool CollectEvidenceForCaller = isInferenceTarget(*Env.getCurrentFunc()); |
| |
| // For each pointer parameter of the callee, ... |
| for (; ParamI < CalleeDecl.param_size(); ++ParamI, ++ArgI) { |
| const auto *ParamDecl = CalleeDecl.getParamDecl(ParamI); |
| const auto ParamType = ParamDecl->getType().getNonReferenceType(); |
| if (!isSupportedRawPointerType(ParamType)) continue; |
| // the corresponding argument should also be a pointer. |
| CHECK(isSupportedRawPointerType(Expr.getArg(ArgI)->getType())) |
| << "Unsupported argument " << ArgI |
| << " type: " << Expr.getArg(ArgI)->getType().getAsString(); |
| |
| dataflow::PointerValue *PV = |
| getPointerValueFromExpr(Expr.getArg(ArgI), Env); |
| if (!PV) continue; |
| |
| SourceLocation ArgLoc = Expr.getArg(ArgI)->getExprLoc(); |
| |
| if (CollectEvidenceForCaller) { |
| auto ParamNullability = getNullabilityAnnotationsFromTypeAndOverrides( |
| ParamType, ParamDecl, Lattice); |
| |
| // Collect evidence from the binding of the argument to the parameter's |
| // nullability, if known. |
| collectEvidenceFromBindingToType( |
| ParamDecl->getType(), ParamNullability, *PV, InferableCallerSlots, |
| InferableSlotsConstraint, Env, ArgLoc, Emit); |
| } |
| |
| if (CollectEvidenceForCallee) { |
| // Emit evidence of the parameter's nullability. First, calculate that |
| // nullability based on InferableSlots for the caller being assigned to |
| // Unknown or their previously-inferred value, to reflect the current |
| // annotations and not all possible annotations for them. |
| NullabilityKind ArgNullability = |
| getNullability(*PV, Env, &InferableSlotsConstraint); |
| Evidence::Kind ArgEvidenceKind; |
| switch (ArgNullability) { |
| case NullabilityKind::Nullable: |
| ArgEvidenceKind = Evidence::NULLABLE_ARGUMENT; |
| break; |
| case NullabilityKind::NonNull: |
| ArgEvidenceKind = Evidence::NONNULL_ARGUMENT; |
| break; |
| default: |
| ArgEvidenceKind = Evidence::UNKNOWN_ARGUMENT; |
| } |
| Emit(CalleeDecl, paramSlot(ParamI), ArgEvidenceKind, ArgLoc); |
| } |
| } |
| } |
| |
| // Similar to collectEvidenceFromArgsAndParams, but handles the case of a call |
| // to a function pointer that is provided as a parameter to the caller. |
| // |
| // e.g. We can collect evidence for the nullability of `p` and (when we handle |
| // more than top-level pointer slots) `j` in the following, based on the call to |
| // `callee`: |
| // ``` |
| // void target(int* p, void (*callee)(Nonnull<int*> i, int* j)) { |
| // callee(p, nullptr); |
| // } |
| // ``` |
| // |
| // With `CalleeDecl` in this case being a ParmVarDecl instead of a FunctionDecl |
| // as in most CallExpr cases, distinct handling is needed. |
| void collectEvidenceFromCallExprWithoutDecl( |
| const Decl &CalleeDecl, const CallExpr &Expr, |
| const std::vector<InferableSlot> &InferableCallerSlots, |
| const Formula &InferableSlotsConstraint, |
| const PointerNullabilityLattice &Lattice, const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| // Function pointer params are the only case we know of so far that needs this |
| // special handling, so if we run into others, skip them, but log first. |
| if (!CalleeDecl.isFunctionPointerType() || !isa<ParmVarDecl>(CalleeDecl)) { |
| llvm::errs() << "Unsupported case of a CallExpr without a FunctionDecl. " |
| "Not collecting any evidence from this CallExpr:\n"; |
| Expr.dump(); |
| return; |
| } |
| auto *CalleeType = CalleeDecl.getFunctionType()->getAs<FunctionProtoType>(); |
| if (!CalleeType) return; |
| if (!isInferenceTarget(*Env.getCurrentFunc())) return; |
| |
| // For each pointer parameter of the callee, ... |
| for (unsigned I = 0; I < CalleeType->getNumParams(); ++I) { |
| const auto ParamType = CalleeType->getParamType(I); |
| if (!isSupportedRawPointerType(ParamType.getNonReferenceType())) continue; |
| // the corresponding argument should also be a pointer. |
| CHECK(isSupportedRawPointerType(Expr.getArg(I)->getType())) |
| << "Unsupported argument " << I |
| << " type: " << Expr.getArg(I)->getType().getAsString(); |
| |
| dataflow::PointerValue *PV = getPointerValueFromExpr(Expr.getArg(I), Env); |
| if (!PV) continue; |
| |
| auto ParamNullability = getNullabilityAnnotationsFromType(ParamType); |
| |
| // Collect evidence from the binding of the argument to the parameter's |
| // nullability, if known. |
| collectEvidenceFromBindingToType( |
| ParamType, ParamNullability, *PV, InferableCallerSlots, |
| InferableSlotsConstraint, Env, Expr.getArg(I)->getExprLoc(), Emit); |
| |
| // TODO: When we collect evidence for more complex slots than just top-level |
| // pointers, emit evidence of the nullability of the parameter of the |
| // function pointer as a slot in the caller's declaration, i.e. of `j` in |
| // the example in the function comment above. |
| } |
| } |
| |
| void collectEvidenceFromCallExpr( |
| const std::vector<InferableSlot> &InferableCallerSlots, |
| const Formula &InferableSlotsConstraint, const Stmt &Stmt, |
| const PointerNullabilityLattice &Lattice, const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| auto *CallExpr = dyn_cast_or_null<clang::CallExpr>(&Stmt); |
| if (!CallExpr) return; |
| auto *CalleeDecl = CallExpr->getCalleeDecl(); |
| if (!CalleeDecl) return; |
| if (auto *CalleeFunctionDecl = |
| dyn_cast_or_null<clang::FunctionDecl>(CalleeDecl)) { |
| collectEvidenceFromArgsAndParams( |
| *CalleeFunctionDecl, *CallExpr, InferableCallerSlots, |
| InferableSlotsConstraint, Lattice, Env, Emit); |
| } else { |
| collectEvidenceFromCallExprWithoutDecl( |
| *CalleeDecl, *CallExpr, InferableCallerSlots, InferableSlotsConstraint, |
| Lattice, Env, Emit); |
| } |
| } |
| |
| void collectEvidenceFromConstructExpr( |
| const std::vector<InferableSlot> &InferableSlots, |
| const Formula &InferableSlotsConstraint, const Stmt &Stmt, |
| const PointerNullabilityLattice &Lattice, const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| auto *ConstructExpr = dyn_cast_or_null<clang::CXXConstructExpr>(&Stmt); |
| if (!ConstructExpr || !ConstructExpr->getConstructor()) return; |
| auto *ConstructorDecl = dyn_cast_or_null<clang::CXXConstructorDecl>( |
| ConstructExpr->getConstructor()); |
| if (!ConstructorDecl || !isInferenceTarget(*ConstructorDecl)) return; |
| |
| collectEvidenceFromArgsAndParams(*ConstructorDecl, *ConstructExpr, |
| InferableSlots, InferableSlotsConstraint, |
| Lattice, Env, Emit); |
| } |
| |
| void collectEvidenceFromReturn(const std::vector<InferableSlot> &InferableSlots, |
| const Formula &InferableSlotsConstraint, |
| const Stmt &Stmt, |
| const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| // Is this CFGElement a return statement? |
| auto *ReturnStmt = dyn_cast_or_null<clang::ReturnStmt>(&Stmt); |
| if (!ReturnStmt) return; |
| auto *ReturnExpr = ReturnStmt->getRetValue(); |
| if (!ReturnExpr || !isSupportedRawPointerType(ReturnExpr->getType())) return; |
| |
| // Skip gathering evidence about the current function if the current function |
| // is not an inference target. |
| if (!isInferenceTarget(*Env.getCurrentFunc())) return; |
| |
| NullabilityKind ReturnNullability = |
| getNullability(ReturnExpr, Env, &InferableSlotsConstraint); |
| Evidence::Kind ReturnEvidenceKind; |
| switch (ReturnNullability) { |
| case NullabilityKind::Nullable: |
| ReturnEvidenceKind = Evidence::NULLABLE_RETURN; |
| break; |
| case NullabilityKind::NonNull: |
| ReturnEvidenceKind = Evidence::NONNULL_RETURN; |
| break; |
| default: |
| ReturnEvidenceKind = Evidence::UNKNOWN_RETURN; |
| } |
| Emit(*Env.getCurrentFunc(), SLOT_RETURN_TYPE, ReturnEvidenceKind, |
| ReturnExpr->getExprLoc()); |
| } |
| |
| void collectEvidenceFromAssignment( |
| const std::vector<InferableSlot> &InferableSlots, |
| const Formula &InferableSlotsConstraint, const Stmt &Stmt, |
| const PointerNullabilityLattice &Lattice, const dataflow::Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| if (!isInferenceTarget(*Env.getCurrentFunc())) return; |
| |
| // Initialization of new decl. |
| if (auto *DeclStmt = dyn_cast_or_null<clang::DeclStmt>(&Stmt)) { |
| for (auto *Decl : DeclStmt->decls()) { |
| if (auto *VarDecl = dyn_cast_or_null<clang::VarDecl>(Decl); |
| VarDecl && VarDecl->hasInit()) { |
| bool DeclTypeSupported = |
| isSupportedRawPointerType(VarDecl->getType().getNonReferenceType()); |
| bool InitTypeSupported = isSupportedPointerType( |
| VarDecl->getInit()->getType().getNonReferenceType()); |
| if (!DeclTypeSupported) return; |
| if (!InitTypeSupported) { |
| // TODO: we could perhaps support pointer initialization from numeric |
| // values, but this is very rare and not the most useful for |
| // nullability. |
| llvm::errs() << "Unsupported init type: " |
| << VarDecl->getInit()->getType() << "\n"; |
| return; |
| } |
| auto *PV = getPointerValueFromExpr(VarDecl->getInit(), Env); |
| if (!PV) return; |
| TypeNullability TypeNullability = |
| getNullabilityAnnotationsFromTypeAndOverrides(VarDecl->getType(), |
| VarDecl, Lattice); |
| collectEvidenceFromBindingToType( |
| VarDecl->getType(), TypeNullability, *PV, InferableSlots, |
| InferableSlotsConstraint, Env, VarDecl->getInit()->getExprLoc(), |
| Emit); |
| } |
| } |
| } |
| |
| // Assignment to existing decl. |
| if (auto *BinaryOperator = dyn_cast_or_null<clang::BinaryOperator>(&Stmt); |
| BinaryOperator && |
| BinaryOperator->getOpcode() == clang::BinaryOperatorKind::BO_Assign) { |
| bool LhsSupported = |
| isSupportedRawPointerType(BinaryOperator->getLHS()->getType()); |
| bool RhsSupported = |
| isSupportedRawPointerType(BinaryOperator->getRHS()->getType()); |
| if (!LhsSupported) return; |
| if (!RhsSupported) { |
| // TODO: we could perhaps support pointer assignments to numeric |
| // values, but this is very rare and not the most useful for |
| // nullability. |
| llvm::errs() << "Unsupported RHS type: " |
| << BinaryOperator->getRHS()->getType() << "\n"; |
| } |
| auto *PV = getPointerValueFromExpr(BinaryOperator->getRHS(), Env); |
| if (!PV) return; |
| TypeNullability TypeNullability; |
| if (auto *DeclRefExpr = |
| dyn_cast_or_null<clang::DeclRefExpr>(BinaryOperator->getLHS())) { |
| TypeNullability = getNullabilityAnnotationsFromTypeAndOverrides( |
| BinaryOperator->getLHS()->getType(), DeclRefExpr->getDecl(), Lattice); |
| } else { |
| TypeNullability = getNullabilityAnnotationsFromType( |
| BinaryOperator->getLHS()->getType()); |
| } |
| collectEvidenceFromBindingToType( |
| BinaryOperator->getLHS()->getType(), TypeNullability, *PV, |
| InferableSlots, InferableSlotsConstraint, Env, |
| BinaryOperator->getRHS()->getExprLoc(), Emit); |
| } |
| } |
| |
| void collectEvidenceFromElement( |
| const std::vector<InferableSlot> &InferableSlots, |
| const Formula &InferableSlotsConstraint, const CFGElement &Element, |
| const PointerNullabilityLattice &Lattice, const Environment &Env, |
| llvm::function_ref<EvidenceEmitter> Emit) { |
| auto CFGStmt = Element.getAs<clang::CFGStmt>(); |
| if (!CFGStmt) return; |
| auto *Stmt = CFGStmt->getStmt(); |
| if (!Stmt) return; |
| collectEvidenceFromDereference(InferableSlots, *Stmt, Env, Emit); |
| collectEvidenceFromCallExpr(InferableSlots, InferableSlotsConstraint, *Stmt, |
| Lattice, Env, Emit); |
| collectEvidenceFromConstructExpr(InferableSlots, InferableSlotsConstraint, |
| *Stmt, Lattice, Env, Emit); |
| collectEvidenceFromReturn(InferableSlots, InferableSlotsConstraint, *Stmt, |
| Env, Emit); |
| collectEvidenceFromAssignment(InferableSlots, InferableSlotsConstraint, *Stmt, |
| Lattice, Env, Emit); |
| // TODO: add more heuristic collections here |
| } |
| |
| std::optional<Evidence::Kind> evidenceKindFromDeclaredType(QualType T) { |
| if (!isSupportedRawPointerType(T.getNonReferenceType())) return std::nullopt; |
| auto Nullability = getNullabilityAnnotationsFromType(T); |
| switch (Nullability.front().concrete()) { |
| default: |
| return std::nullopt; |
| case NullabilityKind::NonNull: |
| return Evidence::ANNOTATED_NONNULL; |
| case NullabilityKind::Nullable: |
| return Evidence::ANNOTATED_NULLABLE; |
| } |
| } |
| |
| // Returns a function that the analysis can use to override Decl nullability |
| // values from the source code being analyzed with previously inferred |
| // nullabilities. |
| // |
| // In practice, this should only override the default nullability for Decls that |
| // do not spell out a nullability in source code, because we only pass in |
| // inferences from the previous round which are non-trivial and annotations |
| // "inferred" by reading an annotation from source code in the previous round |
| // were marked trivial. |
| auto getConcreteNullabilityOverrideFromPreviousInferences( |
| ConcreteNullabilityCache &Cache, USRCache &USRCache, |
| const PreviousInferences &PreviousInferences) { |
| return [&](const Decl &D) -> std::optional<const PointerTypeNullability *> { |
| auto [It, Inserted] = Cache.try_emplace(&D); |
| if (Inserted) { |
| std::optional<const Decl *> FingerprintedDecl; |
| Slot Slot; |
| if (auto *FD = clang::dyn_cast_or_null<FunctionDecl>(&D)) { |
| FingerprintedDecl = FD; |
| Slot = SLOT_RETURN_TYPE; |
| } else if (auto *PD = clang::dyn_cast_or_null<ParmVarDecl>(&D)) { |
| if (auto *Parent = clang::dyn_cast_or_null<FunctionDecl>( |
| PD->getParentFunctionOrMethod())) { |
| FingerprintedDecl = Parent; |
| Slot = paramSlot(PD->getFunctionScopeIndex()); |
| } |
| } |
| if (!FingerprintedDecl) return std::nullopt; |
| auto Fingerprint = |
| fingerprint(getOrGenerateUSR(USRCache, **FingerprintedDecl), Slot); |
| if (PreviousInferences.Nullable.contains(Fingerprint)) { |
| It->second.emplace(NullabilityKind::Nullable); |
| } else if (PreviousInferences.Nonnull.contains(Fingerprint)) { |
| It->second.emplace(NullabilityKind::NonNull); |
| } else { |
| It->second = std::nullopt; |
| } |
| } |
| if (!It->second) return std::nullopt; |
| return &*It->second; |
| }; |
| } |
| |
| // Adds InferableSlots for the return types of functions called by |
| // `CurrentFunction`. If a called function's return value is dereferenced, |
| // this enables us to collect evidence that the return type should be Nonnull. |
| void addInferableSlotsForCalledFunctions( |
| const FunctionDecl &CurrentFunction, |
| std::vector<InferableSlot> &InferableSlots, |
| PointerNullabilityAnalysis &Analysis, dataflow::Arena &Arena) { |
| static constexpr std::string_view ReturnTypeNodeId = "ReturnType"; |
| static constexpr std::string_view FunctionDeclNodeId = "FunctionDecl"; |
| |
| llvm::DenseSet<const FunctionDecl *> Functions; |
| for (const auto &Match : clang::ast_matchers::match( |
| functionDecl(forEachDescendant(callExpr( |
| callee(functionDecl(returns(qualType().bind(ReturnTypeNodeId))) |
| .bind(FunctionDeclNodeId))))), |
| CurrentFunction, CurrentFunction.getASTContext())) { |
| auto *ReturnType = Match.getNodeAs<QualType>(ReturnTypeNodeId); |
| if (!ReturnType || !hasInferable(*ReturnType) || |
| evidenceKindFromDeclaredType(*ReturnType)) |
| continue; |
| auto *CalledFunction = Match.getNodeAs<FunctionDecl>(FunctionDeclNodeId); |
| if (!CalledFunction || !isInferenceTarget(*CalledFunction)) continue; |
| auto [it, inserted] = Functions.insert(CalledFunction); |
| if (inserted) { |
| InferableSlots.emplace_back( |
| Analysis.assignNullabilityVariable(CalledFunction, Arena), |
| SLOT_RETURN_TYPE, *CalledFunction); |
| } |
| } |
| } |
| } // namespace |
| |
| llvm::Error collectEvidenceFromImplementation( |
| const Decl &ImplementationDecl, llvm::function_ref<EvidenceEmitter> Emit, |
| USRCache &USRCache, const PreviousInferences PreviousInferences) { |
| const FunctionDecl *Func = dyn_cast<FunctionDecl>(&ImplementationDecl); |
| if (!Func || !Func->doesThisDeclarationHaveABody()) { |
| return llvm::createStringError( |
| llvm::inconvertibleErrorCode(), |
| "Implementation must be a function with a body."); |
| } |
| |
| llvm::Expected<dataflow::ControlFlowContext> ControlFlowContext = |
| dataflow::ControlFlowContext::build(*Func); |
| if (!ControlFlowContext) return ControlFlowContext.takeError(); |
| |
| DataflowAnalysisContext AnalysisContext( |
| std::make_unique<dataflow::WatchedLiteralsSolver>(200000)); |
| Environment Environment(AnalysisContext, *Func); |
| PointerNullabilityAnalysis Analysis( |
| Func->getDeclContext()->getParentASTContext(), Environment); |
| std::vector<InferableSlot> InferableSlots; |
| if (isInferenceTarget(*Func)) { |
| auto Parameters = Func->parameters(); |
| for (auto I = 0; I < Parameters.size(); ++I) { |
| auto T = Parameters[I]->getType().getNonReferenceType(); |
| if (isSupportedRawPointerType(T) && !evidenceKindFromDeclaredType(T)) { |
| InferableSlots.emplace_back(Analysis.assignNullabilityVariable( |
| Parameters[I], AnalysisContext.arena()), |
| paramSlot(I), *Func); |
| } |
| } |
| } |
| addInferableSlotsForCalledFunctions(*Func, InferableSlots, Analysis, |
| AnalysisContext.arena()); |
| const auto &InferableSlotsConstraint = |
| getInferableSlotsAsInferredOrUnknownConstraint(InferableSlots, USRCache, |
| PreviousInferences, |
| AnalysisContext.arena()); |
| |
| ConcreteNullabilityCache ConcreteNullabilityCache; |
| Analysis.assignNullabilityOverride( |
| getConcreteNullabilityOverrideFromPreviousInferences( |
| ConcreteNullabilityCache, USRCache, PreviousInferences)); |
| |
| return dataflow::runDataflowAnalysis( |
| *ControlFlowContext, Analysis, Environment, |
| [&](const CFGElement &Element, |
| const dataflow::DataflowAnalysisState< |
| PointerNullabilityLattice> &State) { |
| collectEvidenceFromElement(InferableSlots, |
| InferableSlotsConstraint, Element, |
| State.Lattice, State.Env, Emit); |
| }) |
| .takeError(); |
| } |
| |
| void collectEvidenceFromTargetDeclaration( |
| const clang::Decl &D, llvm::function_ref<EvidenceEmitter> Emit) { |
| // For now, we can only describe the nullability of functions. |
| const auto *Fn = dyn_cast<clang::FunctionDecl>(&D); |
| if (!Fn) return; |
| |
| if (auto K = evidenceKindFromDeclaredType(Fn->getReturnType())) |
| Emit(*Fn, SLOT_RETURN_TYPE, *K, Fn->getReturnTypeSourceRange().getBegin()); |
| for (unsigned I = 0; I < Fn->param_size(); ++I) { |
| if (auto K = evidenceKindFromDeclaredType(Fn->getParamDecl(I)->getType())) |
| Emit(*Fn, paramSlot(I), *K, Fn->getParamDecl(I)->getTypeSpecStartLoc()); |
| } |
| } |
| |
| EvidenceSites EvidenceSites::discover(ASTContext &Ctx) { |
| struct Walker : public RecursiveASTVisitor<Walker> { |
| EvidenceSites Out; |
| |
| // We do want to see concrete code, including function instantiations. |
| bool shouldVisitTemplateInstantiations() const { return true; } |
| |
| bool VisitFunctionDecl(absl::Nonnull<const FunctionDecl *> FD) { |
| if (isInferenceTarget(*FD)) Out.Declarations.push_back(FD); |
| |
| // Visiting template instantiations is fine, these are valid functions! |
| // But we'll be limited in what we can infer. |
| bool IsUsefulImplementation = |
| FD->doesThisDeclarationHaveABody() && |
| // We will not get anywhere with dependent code. |
| !FD->isDependentContext(); |
| if (IsUsefulImplementation) Out.Implementations.push_back(FD); |
| |
| return true; |
| } |
| }; |
| |
| Walker W; |
| W.TraverseAST(Ctx); |
| return std::move(W.Out); |
| } |
| |
| } // namespace clang::tidy::nullability |