Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 1 | // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| 2 | // Exceptions. See /LICENSE for license information. |
| 3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | |
| 5 | #include "nullability/inference/collect_evidence.h" |
| 6 | |
Dmitri Gribenko | 742c4c3 | 2023-07-31 12:32:09 -0700 | [diff] [blame] | 7 | #include <memory> |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 8 | #include <optional> |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 9 | #include <string> |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 10 | #include <string_view> |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 11 | #include <utility> |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 12 | #include <vector> |
| 13 | |
Dmitri Gribenko | 742c4c3 | 2023-07-31 12:32:09 -0700 | [diff] [blame] | 14 | #include "absl/log/check.h" |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 15 | #include "nullability/inference/inferable.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 16 | #include "nullability/inference/inference.proto.h" |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 17 | #include "nullability/inference/slot_fingerprint.h" |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 18 | #include "nullability/pointer_nullability.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 19 | #include "nullability/pointer_nullability_analysis.h" |
| 20 | #include "nullability/pointer_nullability_lattice.h" |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 21 | #include "nullability/type_nullability.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 22 | #include "clang/AST/ASTContext.h" |
| 23 | #include "clang/AST/Decl.h" |
| 24 | #include "clang/AST/DeclBase.h" |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 25 | #include "clang/AST/Expr.h" |
Googler | 2e9a797 | 2023-07-24 06:02:13 -0700 | [diff] [blame] | 26 | #include "clang/AST/ExprCXX.h" |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 27 | #include "clang/AST/OperationKinds.h" |
Sam McCall | 296d070 | 2023-07-14 13:32:57 -0700 | [diff] [blame] | 28 | #include "clang/AST/RecursiveASTVisitor.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 29 | #include "clang/AST/Stmt.h" |
| 30 | #include "clang/AST/Type.h" |
| 31 | #include "clang/Analysis/CFG.h" |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 32 | #include "clang/Analysis/FlowSensitive/Arena.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 33 | #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" |
| 34 | #include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" |
| 35 | #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" |
| 36 | #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 37 | #include "clang/Analysis/FlowSensitive/Formula.h" |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 38 | #include "clang/Analysis/FlowSensitive/Value.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 39 | #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" |
| 40 | #include "clang/Basic/LLVM.h" |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 41 | #include "clang/Basic/SourceLocation.h" |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 42 | #include "clang/Basic/Specifiers.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 43 | #include "clang/Index/USRGeneration.h" |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 44 | #include "llvm/ADT/DenseSet.h" |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 45 | #include "llvm/ADT/FunctionExtras.h" |
| 46 | #include "llvm/ADT/STLFunctionalExtras.h" |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 47 | #include "llvm/Support/Error.h" |
| 48 | #include "llvm/Support/raw_ostream.h" |
| 49 | |
| 50 | namespace clang::tidy::nullability { |
| 51 | using ::clang::dataflow::DataflowAnalysisContext; |
| 52 | using ::clang::dataflow::Environment; |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 53 | using ::clang::dataflow::Formula; |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 54 | |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 55 | std::string_view getOrGenerateUSR(USRCache &Cache, const Decl &Decl) { |
| 56 | auto [It, Inserted] = Cache.try_emplace(&Decl); |
| 57 | if (Inserted) { |
| 58 | llvm::SmallString<128> USR; |
| 59 | if (!index::generateUSRForDecl(&Decl, USR)) It->second = USR.str(); |
| 60 | } |
| 61 | return It->second; |
| 62 | } |
| 63 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 64 | llvm::unique_function<EvidenceEmitter> evidenceEmitter( |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 65 | llvm::unique_function<void(const Evidence &) const> Emit, |
| 66 | nullability::USRCache &USRCache) { |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 67 | class EvidenceEmitterImpl { |
| 68 | public: |
| 69 | EvidenceEmitterImpl( |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 70 | llvm::unique_function<void(const Evidence &) const> Emit, |
| 71 | nullability::USRCache &USRCache) |
| 72 | : Emit(std::move(Emit)), USRCache(USRCache) {} |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 73 | |
Sam McCall | cfd2dd1 | 2023-07-18 19:35:21 -0700 | [diff] [blame] | 74 | void operator()(const Decl &Target, Slot S, Evidence::Kind Kind, |
| 75 | SourceLocation Loc) const { |
Googler | 6604531 | 2023-09-11 12:28:58 -0700 | [diff] [blame] | 76 | CHECK(isInferenceTarget(Target)) |
| 77 | << "Evidence emitted for a Target which is not an inference target."; |
| 78 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 79 | Evidence E; |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 80 | E.set_slot(S); |
| 81 | E.set_kind(Kind); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 82 | |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 83 | std::string_view USR = getOrGenerateUSR(USRCache, Target); |
| 84 | if (USR.empty()) return; // Can't emit without a USR |
| 85 | E.mutable_symbol()->set_usr(USR); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 86 | |
Sam McCall | cfd2dd1 | 2023-07-18 19:35:21 -0700 | [diff] [blame] | 87 | // TODO: make collecting and propagating location information optional? |
| 88 | auto &SM = |
| 89 | Target.getDeclContext()->getParentASTContext().getSourceManager(); |
| 90 | // TODO: are macro locations actually useful enough for debugging? |
| 91 | // we could leave them out, and make room for non-macro samples. |
| 92 | if (Loc = SM.getFileLoc(Loc); Loc.isValid()) |
| 93 | E.set_location(Loc.printToString(SM)); |
| 94 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 95 | Emit(E); |
| 96 | } |
| 97 | |
| 98 | private: |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 99 | llvm::unique_function<void(const Evidence &) const> Emit; |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 100 | nullability::USRCache &USRCache; |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 101 | }; |
Googler | 6acdc64 | 2023-10-19 08:03:40 -0700 | [diff] [blame] | 102 | return EvidenceEmitterImpl(std::move(Emit), USRCache); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 103 | } |
| 104 | |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 105 | namespace { |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 106 | |
Sam McCall | 5f2bb24 | 2023-09-14 07:02:56 -0700 | [diff] [blame] | 107 | // If Element is a dereference, returns its target and location. |
| 108 | std::pair<Expr *, SourceLocation> describeDereference( |
| 109 | const CFGElement &Element) { |
| 110 | if (auto CFGStmt = Element.getAs<clang::CFGStmt>()) { |
| 111 | if (auto *Op = dyn_cast<UnaryOperator>(CFGStmt->getStmt()); |
| 112 | Op && Op->getOpcode() == UO_Deref) { |
| 113 | return {Op->getSubExpr(), Op->getOperatorLoc()}; |
| 114 | } |
| 115 | if (auto *ME = dyn_cast<MemberExpr>(CFGStmt->getStmt()); |
| 116 | ME && ME->isArrow()) { |
| 117 | return {ME->getBase(), ME->getOperatorLoc()}; |
| 118 | } |
| 119 | } |
| 120 | return {nullptr, SourceLocation()}; |
| 121 | } |
| 122 | |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 123 | // Records evidence derived from the assumption that Value is nonnull. |
| 124 | // It may be dereferenced, passed as a nonnull param, etc, per EvidenceKind. |
| 125 | void collectMustBeNonnullEvidence( |
| 126 | const dataflow::PointerValue &Value, const dataflow::Environment &Env, |
| 127 | SourceLocation Loc, |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 128 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableSlots, |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 129 | Evidence::Kind EvidenceKind, llvm::function_ref<EvidenceEmitter> Emit) { |
| 130 | auto &A = Env.getDataflowAnalysisContext().arena(); |
| 131 | auto &NotIsNull = A.makeNot(getPointerNullState(Value).IsNull); |
| 132 | |
| 133 | // If the flow conditions already imply that Value is not null, then we don't |
| 134 | // have any new evidence of a necessary annotation. |
| 135 | if (Env.flowConditionImplies(NotIsNull)) return; |
| 136 | |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 137 | // Otherwise, if an inferable slot being annotated Nonnull would imply that |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 138 | // Value is not null, then we have evidence suggesting that slot should be |
| 139 | // annotated. For now, we simply choose the first such slot, sidestepping |
| 140 | // complexities around the possibility of multiple such slots, any one of |
| 141 | // which would be sufficient if annotated Nonnull. |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 142 | for (auto &[Nullability, Slot] : InferableSlots) { |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 143 | auto &SlotNonnullImpliesValueNonnull = |
| 144 | A.makeImplies(Nullability.isNonnull(A), NotIsNull); |
| 145 | if (Env.flowConditionImplies(SlotNonnullImpliesValueNonnull)) |
| 146 | Emit(*Env.getCurrentFunc(), Slot, EvidenceKind, Loc); |
| 147 | } |
| 148 | } |
| 149 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 150 | void collectEvidenceFromDereference( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 151 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableSlots, |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 152 | const CFGElement &Element, const dataflow::Environment &Env, |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 153 | llvm::function_ref<EvidenceEmitter> Emit) { |
Sam McCall | 5f2bb24 | 2023-09-14 07:02:56 -0700 | [diff] [blame] | 154 | auto [Target, Loc] = describeDereference(Element); |
| 155 | if (!Target || !Target->getType()->isPointerType()) return; |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 156 | |
| 157 | // It is a dereference of a pointer. Now gather evidence from it. |
Googler | 6604531 | 2023-09-11 12:28:58 -0700 | [diff] [blame] | 158 | |
Sam McCall | 5f2bb24 | 2023-09-14 07:02:56 -0700 | [diff] [blame] | 159 | // Skip gathering evidence about the current function if the current |
| 160 | // function is not an inference target. |
Googler | 6604531 | 2023-09-11 12:28:58 -0700 | [diff] [blame] | 161 | if (!isInferenceTarget(*Env.getCurrentFunc())) return; |
| 162 | |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 163 | dataflow::PointerValue *DereferencedValue = |
Sam McCall | 5f2bb24 | 2023-09-14 07:02:56 -0700 | [diff] [blame] | 164 | getPointerValueFromExpr(Target, Env); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 165 | if (!DereferencedValue) return; |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 166 | collectMustBeNonnullEvidence(*DereferencedValue, Env, Loc, InferableSlots, |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 167 | Evidence::UNCHECKED_DEREFERENCE, Emit); |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 168 | } |
| 169 | |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 170 | // Inferable slots are nullability slots not explicitly annotated in source |
| 171 | // code that we are currently capable of handling. This returns a boolean |
| 172 | // constraint representing these slots having a) the nullability inferred from |
| 173 | // the previous round for this slot or b) Unknown nullability if no inference |
| 174 | // was made in the previous round or there was no previous round. |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 175 | const Formula &getInferableSlotsAsInferredOrUnknownConstraint( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 176 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableSlots, |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 177 | const PreviousInferences &PreviousInferences, USRCache &USRCache, |
| 178 | dataflow::Arena &A, const Decl &CurrentFunc) { |
| 179 | const Formula *Constraint = &A.makeLiteral(true); |
| 180 | std::string_view USR = getOrGenerateUSR(USRCache, CurrentFunc); |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 181 | for (auto &[Nullability, Slot] : InferableSlots) { |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 182 | SlotFingerprint Fingerprint = fingerprint(USR, Slot); |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 183 | const Formula &Nullable = PreviousInferences.Nullable.contains(Fingerprint) |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 184 | ? Nullability.isNullable(A) |
| 185 | : A.makeNot(Nullability.isNullable(A)); |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 186 | const Formula &Nonnull = PreviousInferences.Nonnull.contains(Fingerprint) |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 187 | ? Nullability.isNonnull(A) |
| 188 | : A.makeNot(Nullability.isNonnull(A)); |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 189 | Constraint = &A.makeAnd(*Constraint, A.makeAnd(Nullable, Nonnull)); |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 190 | } |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 191 | return *Constraint; |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 192 | } |
| 193 | |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 194 | void collectEvidenceFromParamAnnotation( |
| 195 | TypeNullability &ParamNullability, const dataflow::PointerValue &ArgPV, |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 196 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableCallerSlots, |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 197 | const dataflow::Environment &Env, SourceLocation ArgLoc, |
| 198 | llvm::function_ref<EvidenceEmitter> Emit) { |
| 199 | // TODO: Account for variance and each layer of nullability when we handle |
| 200 | // more than top-level pointers. |
| 201 | if (ParamNullability.empty()) return; |
| 202 | if (ParamNullability[0].concrete() == NullabilityKind::NonNull) { |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 203 | collectMustBeNonnullEvidence(ArgPV, Env, ArgLoc, InferableCallerSlots, |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 204 | Evidence::PASSED_TO_NONNULL, Emit); |
| 205 | } |
| 206 | } |
| 207 | |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 208 | void collectEvidenceFromCallExpr( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 209 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableCallerSlots, |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 210 | const Formula &InferableSlotsConstraint, const CFGElement &Element, |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 211 | const dataflow::Environment &Env, |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 212 | llvm::function_ref<EvidenceEmitter> Emit) { |
| 213 | // Is this CFGElement a call to a function? |
| 214 | auto CFGStmt = Element.getAs<clang::CFGStmt>(); |
| 215 | if (!CFGStmt) return; |
| 216 | auto *CallExpr = dyn_cast_or_null<clang::CallExpr>(CFGStmt->getStmt()); |
| 217 | if (!CallExpr || !CallExpr->getCalleeDecl()) return; |
| 218 | auto *CalleeDecl = |
| 219 | dyn_cast_or_null<clang::FunctionDecl>(CallExpr->getCalleeDecl()); |
| 220 | if (!CalleeDecl || !isInferenceTarget(*CalleeDecl)) return; |
| 221 | |
Googler | 2e9a797 | 2023-07-24 06:02:13 -0700 | [diff] [blame] | 222 | unsigned ParamI = 0; |
| 223 | unsigned ArgI = 0; |
| 224 | // Member operator calls hold the function object as the first argument, |
| 225 | // offsetting the indices of parameters and corresponding arguments by 1. |
| 226 | // For example: Given struct S { bool operator+(int*); } |
| 227 | // The CXXMethodDecl has one parameter, but a call S{}+p is a |
| 228 | // CXXOperatorCallExpr with two arguments: an S and an int*. |
| 229 | if (isa<clang::CXXOperatorCallExpr>(CallExpr) && |
| 230 | isa<clang::CXXMethodDecl>(CalleeDecl)) |
| 231 | ++ArgI; |
| 232 | |
| 233 | // For each pointer parameter of the callee, ... |
| 234 | for (; ParamI < CalleeDecl->param_size(); ++ParamI, ++ArgI) { |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 235 | auto ParamType = |
| 236 | CalleeDecl->getParamDecl(ParamI)->getType().getNonReferenceType(); |
| 237 | if (!isSupportedPointerType(ParamType)) continue; |
Googler | 2e9a797 | 2023-07-24 06:02:13 -0700 | [diff] [blame] | 238 | // the corresponding argument should also be a pointer. |
Googler | 779d3da | 2023-09-07 10:22:58 -0700 | [diff] [blame] | 239 | CHECK(isSupportedPointerType(CallExpr->getArg(ArgI)->getType())); |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 240 | |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 241 | dataflow::PointerValue *PV = |
Googler | 2e9a797 | 2023-07-24 06:02:13 -0700 | [diff] [blame] | 242 | getPointerValueFromExpr(CallExpr->getArg(ArgI), Env); |
| 243 | if (!PV) continue; |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 244 | |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 245 | SourceLocation ArgLoc = CallExpr->getArg(ArgI)->getExprLoc(); |
| 246 | |
| 247 | // TODO: Include inferred annotations from previous rounds when propagating. |
| 248 | auto ParamNullability = getNullabilityAnnotationsFromType(ParamType); |
| 249 | |
| 250 | // Collect evidence from the binding of the argument to the parameter's |
| 251 | // nullability, if known. |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 252 | collectEvidenceFromParamAnnotation(ParamNullability, *PV, |
| 253 | InferableCallerSlots, Env, ArgLoc, Emit); |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 254 | |
Googler | 2e9a797 | 2023-07-24 06:02:13 -0700 | [diff] [blame] | 255 | // Emit evidence of the parameter's nullability. First, calculate that |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 256 | // nullability based on InferableSlots for the caller being assigned to |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 257 | // Unknown, to reflect the current annotations and not all possible |
| 258 | // annotations for them. |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 259 | NullabilityKind ArgNullability = |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 260 | getNullability(*PV, Env, &InferableSlotsConstraint); |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 261 | Evidence::Kind ArgEvidenceKind; |
| 262 | switch (ArgNullability) { |
| 263 | case NullabilityKind::Nullable: |
| 264 | ArgEvidenceKind = Evidence::NULLABLE_ARGUMENT; |
| 265 | break; |
| 266 | case NullabilityKind::NonNull: |
| 267 | ArgEvidenceKind = Evidence::NONNULL_ARGUMENT; |
| 268 | break; |
| 269 | default: |
| 270 | ArgEvidenceKind = Evidence::UNKNOWN_ARGUMENT; |
| 271 | } |
Googler | 43114e8 | 2023-09-26 10:51:01 -0700 | [diff] [blame] | 272 | Emit(*CalleeDecl, paramSlot(ParamI), ArgEvidenceKind, ArgLoc); |
Googler | e1504a6 | 2023-07-18 14:03:23 -0700 | [diff] [blame] | 273 | } |
| 274 | } |
| 275 | |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 276 | void collectEvidenceFromReturn( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 277 | std::vector<std::pair<PointerTypeNullability, Slot>> &InferableSlots, |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 278 | const Formula &InferableSlotsConstraint, const CFGElement &Element, |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 279 | const dataflow::Environment &Env, |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 280 | llvm::function_ref<EvidenceEmitter> Emit) { |
| 281 | // Is this CFGElement a return statement? |
| 282 | auto CFGStmt = Element.getAs<clang::CFGStmt>(); |
| 283 | if (!CFGStmt) return; |
| 284 | auto *ReturnStmt = dyn_cast_or_null<clang::ReturnStmt>(CFGStmt->getStmt()); |
| 285 | if (!ReturnStmt) return; |
| 286 | auto *ReturnExpr = ReturnStmt->getRetValue(); |
Martin Brænne | 7a8d25c | 2023-08-23 03:52:30 -0700 | [diff] [blame] | 287 | if (!ReturnExpr || !isSupportedPointerType(ReturnExpr->getType())) return; |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 288 | |
Googler | 6604531 | 2023-09-11 12:28:58 -0700 | [diff] [blame] | 289 | // Skip gathering evidence about the current function if the current function |
| 290 | // is not an inference target. |
| 291 | if (!isInferenceTarget(*Env.getCurrentFunc())) return; |
| 292 | |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 293 | NullabilityKind ReturnNullability = |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 294 | getNullability(ReturnExpr, Env, &InferableSlotsConstraint); |
Googler | 3e3ae48 | 2023-07-19 08:50:59 -0700 | [diff] [blame] | 295 | Evidence::Kind ReturnEvidenceKind; |
| 296 | switch (ReturnNullability) { |
| 297 | case NullabilityKind::Nullable: |
| 298 | ReturnEvidenceKind = Evidence::NULLABLE_RETURN; |
| 299 | break; |
| 300 | case NullabilityKind::NonNull: |
| 301 | ReturnEvidenceKind = Evidence::NONNULL_RETURN; |
| 302 | break; |
| 303 | default: |
| 304 | ReturnEvidenceKind = Evidence::UNKNOWN_RETURN; |
| 305 | } |
| 306 | Emit(*Env.getCurrentFunc(), SLOT_RETURN_TYPE, ReturnEvidenceKind, |
| 307 | ReturnExpr->getExprLoc()); |
| 308 | } |
| 309 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 310 | void collectEvidenceFromElement( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 311 | std::vector<std::pair<PointerTypeNullability, Slot>> InferableSlots, |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 312 | const Formula &InferableSlotsConstraint, const CFGElement &Element, |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 313 | const Environment &Env, llvm::function_ref<EvidenceEmitter> Emit) { |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 314 | collectEvidenceFromDereference(InferableSlots, Element, Env, Emit); |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 315 | collectEvidenceFromCallExpr(InferableSlots, InferableSlotsConstraint, Element, |
| 316 | Env, Emit); |
| 317 | collectEvidenceFromReturn(InferableSlots, InferableSlotsConstraint, Element, |
| 318 | Env, Emit); |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 319 | // TODO: add more heuristic collections here |
| 320 | } |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 321 | |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 322 | std::optional<Evidence::Kind> evidenceKindFromDeclaredType(QualType T) { |
Martin Brænne | 7a8d25c | 2023-08-23 03:52:30 -0700 | [diff] [blame] | 323 | if (!isSupportedPointerType(T.getNonReferenceType())) return std::nullopt; |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 324 | auto Nullability = getNullabilityAnnotationsFromType(T); |
Sam McCall | e644e1d | 2023-07-18 19:19:12 -0700 | [diff] [blame] | 325 | switch (Nullability.front().concrete()) { |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 326 | default: |
| 327 | return std::nullopt; |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 328 | case NullabilityKind::NonNull: |
| 329 | return Evidence::ANNOTATED_NONNULL; |
| 330 | case NullabilityKind::Nullable: |
| 331 | return Evidence::ANNOTATED_NULLABLE; |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 332 | } |
| 333 | } |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 334 | } // namespace |
| 335 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 336 | llvm::Error collectEvidenceFromImplementation( |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 337 | const Decl &Decl, llvm::function_ref<EvidenceEmitter> Emit, |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 338 | USRCache &USRCache, const PreviousInferences PreviousInferences) { |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 339 | const FunctionDecl *Func = dyn_cast<FunctionDecl>(&Decl); |
| 340 | if (!Func || !Func->doesThisDeclarationHaveABody()) { |
| 341 | return llvm::createStringError( |
| 342 | llvm::inconvertibleErrorCode(), |
| 343 | "Implementation must be a function with a body."); |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 344 | } |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 345 | |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 346 | llvm::Expected<dataflow::ControlFlowContext> ControlFlowContext = |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 347 | dataflow::ControlFlowContext::build(*Func); |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 348 | if (!ControlFlowContext) return ControlFlowContext.takeError(); |
| 349 | |
| 350 | DataflowAnalysisContext AnalysisContext( |
Sam McCall | b23bf3b | 2023-09-14 09:06:17 -0700 | [diff] [blame] | 351 | std::make_unique<dataflow::WatchedLiteralsSolver>(100000)); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 352 | Environment Environment(AnalysisContext, *Func); |
| 353 | PointerNullabilityAnalysis Analysis( |
| 354 | Decl.getDeclContext()->getParentASTContext()); |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 355 | std::vector<std::pair<PointerTypeNullability, Slot>> InferableSlots; |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 356 | auto Parameters = Func->parameters(); |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 357 | for (auto I = 0; I < Parameters.size(); ++I) { |
Googler | 1cf79e4 | 2023-07-17 14:31:35 -0700 | [diff] [blame] | 358 | auto T = Parameters[I]->getType().getNonReferenceType(); |
Martin Brænne | 7a8d25c | 2023-08-23 03:52:30 -0700 | [diff] [blame] | 359 | if (isSupportedPointerType(T) && !evidenceKindFromDeclaredType(T)) { |
Googler | f1f793d | 2023-10-19 07:51:34 -0700 | [diff] [blame] | 360 | InferableSlots.push_back( |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 361 | std::make_pair(Analysis.assignNullabilityVariable( |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 362 | Parameters[I], AnalysisContext.arena()), |
| 363 | paramSlot(I))); |
Googler | e9210aa | 2023-07-13 10:55:06 -0700 | [diff] [blame] | 364 | } |
| 365 | } |
Googler | 31cc2d3 | 2023-10-23 15:48:20 -0700 | [diff] [blame^] | 366 | const auto &InferableSlotsConstraint = |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 367 | getInferableSlotsAsInferredOrUnknownConstraint( |
| 368 | InferableSlots, PreviousInferences, USRCache, AnalysisContext.arena(), |
| 369 | Decl); |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 370 | |
Sam McCall | b23bf3b | 2023-09-14 09:06:17 -0700 | [diff] [blame] | 371 | return dataflow::runDataflowAnalysis( |
| 372 | *ControlFlowContext, Analysis, Environment, |
| 373 | [&](const CFGElement &Element, |
| 374 | const dataflow::DataflowAnalysisState< |
| 375 | PointerNullabilityLattice> &State) { |
Googler | ac9ac80 | 2023-10-19 09:09:35 -0700 | [diff] [blame] | 376 | collectEvidenceFromElement(InferableSlots, |
Googler | 7191949 | 2023-10-23 15:44:06 -0700 | [diff] [blame] | 377 | InferableSlotsConstraint, Element, |
| 378 | State.Env, Emit); |
Sam McCall | b23bf3b | 2023-09-14 09:06:17 -0700 | [diff] [blame] | 379 | }) |
| 380 | .takeError(); |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 381 | } |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 382 | |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 383 | void collectEvidenceFromTargetDeclaration( |
| 384 | const clang::Decl &D, llvm::function_ref<EvidenceEmitter> Emit) { |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 385 | // For now, we can only describe the nullability of functions. |
| 386 | const auto *Fn = dyn_cast<clang::FunctionDecl>(&D); |
Sam McCall | ebcc123 | 2023-07-14 11:36:44 -0700 | [diff] [blame] | 387 | if (!Fn) return; |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 388 | |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 389 | if (auto K = evidenceKindFromDeclaredType(Fn->getReturnType())) |
Sam McCall | cfd2dd1 | 2023-07-18 19:35:21 -0700 | [diff] [blame] | 390 | Emit(*Fn, SLOT_RETURN_TYPE, *K, Fn->getReturnTypeSourceRange().getBegin()); |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 391 | for (unsigned I = 0; I < Fn->param_size(); ++I) { |
Sam McCall | 83bc55c | 2023-07-17 09:47:17 -0700 | [diff] [blame] | 392 | if (auto K = evidenceKindFromDeclaredType(Fn->getParamDecl(I)->getType())) |
Sam McCall | cfd2dd1 | 2023-07-18 19:35:21 -0700 | [diff] [blame] | 393 | Emit(*Fn, paramSlot(I), *K, Fn->getParamDecl(I)->getTypeSpecStartLoc()); |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 394 | } |
Sam McCall | bd1a6e5 | 2023-07-14 01:04:11 -0700 | [diff] [blame] | 395 | } |
| 396 | |
Sam McCall | 296d070 | 2023-07-14 13:32:57 -0700 | [diff] [blame] | 397 | EvidenceSites EvidenceSites::discover(ASTContext &Ctx) { |
| 398 | struct Walker : public RecursiveASTVisitor<Walker> { |
| 399 | EvidenceSites Out; |
| 400 | |
| 401 | // We do want to see concrete code, including function instantiations. |
| 402 | bool shouldVisitTemplateInstantiations() const { return true; } |
| 403 | |
| 404 | bool VisitFunctionDecl(const FunctionDecl *FD) { |
| 405 | if (isInferenceTarget(*FD)) Out.Declarations.push_back(FD); |
| 406 | |
| 407 | // Visiting template instantiations is fine, these are valid functions! |
| 408 | // But we'll be limited in what we can infer. |
| 409 | bool IsUsefulImplementation = |
| 410 | FD->doesThisDeclarationHaveABody() && |
| 411 | // We will not get anywhere with dependent code. |
| 412 | !FD->isDependentContext(); |
| 413 | if (IsUsefulImplementation) Out.Implementations.push_back(FD); |
| 414 | |
| 415 | return true; |
| 416 | } |
| 417 | }; |
| 418 | |
| 419 | Walker W; |
| 420 | W.TraverseAST(Ctx); |
| 421 | return std::move(W.Out); |
| 422 | } |
| 423 | |
Googler | 113194c | 2023-07-12 11:03:47 -0700 | [diff] [blame] | 424 | } // namespace clang::tidy::nullability |