blob: 1d8f1cee3e1292b4f7cecca452b548ea73033171 [file] [log] [blame]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "lifetime_analysis/lifetime_analysis.h"
#include <iostream>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "lifetime_analysis/builtin_lifetimes.h"
#include "lifetime_analysis/object.h"
#include "lifetime_analysis/object_repository.h"
#include "lifetime_analysis/object_set.h"
#include "lifetime_analysis/pointer_compatibility.h"
#include "lifetime_analysis/points_to_map.h"
#include "lifetime_analysis/visit_lifetimes.h"
#include "lifetime_annotations/function_lifetimes.h"
#include "lifetime_annotations/lifetime.h"
#include "lifetime_annotations/pointee_type.h"
#include "lifetime_annotations/type_lifetimes.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
namespace clang {
namespace tidy {
namespace lifetimes {
namespace {
class TransferStmtVisitor
: public clang::StmtVisitor<TransferStmtVisitor,
std::optional<std::string>> {
public:
TransferStmtVisitor(
ObjectRepository& object_repository, PointsToMap& points_to_map,
const clang::FunctionDecl* func,
const llvm::DenseMap<const clang::FunctionDecl*,
FunctionLifetimesOrError>& callee_lifetimes,
const DiagnosticReporter& diag_reporter)
: object_repository_(object_repository),
points_to_map_(points_to_map),
func_(func),
callee_lifetimes_(callee_lifetimes),
diag_reporter_(diag_reporter) {}
std::optional<std::string> VisitExpr(const clang::Expr* expr);
std::optional<std::string> VisitDeclRefExpr(
const clang::DeclRefExpr* decl_ref);
std::optional<std::string> VisitStringLiteral(
const clang::StringLiteral* strlit);
std::optional<std::string> VisitCastExpr(const clang::CastExpr* cast);
std::optional<std::string> VisitReturnStmt(
const clang::ReturnStmt* return_stmt);
std::optional<std::string> VisitDeclStmt(const clang::DeclStmt* decl_stmt);
std::optional<std::string> VisitUnaryOperator(const clang::UnaryOperator* op);
std::optional<std::string> VisitArraySubscriptExpr(
const clang::ArraySubscriptExpr* subscript);
std::optional<std::string> VisitBinaryOperator(
const clang::BinaryOperator* op);
std::optional<std::string> VisitConditionalOperator(
const clang::ConditionalOperator* op);
std::optional<std::string> VisitInitListExpr(
const clang::InitListExpr* init_list);
std::optional<std::string> VisitMaterializeTemporaryExpr(
const clang::MaterializeTemporaryExpr* temporary_expr);
std::optional<std::string> VisitMemberExpr(const clang::MemberExpr* member);
std::optional<std::string> VisitCXXThisExpr(
const clang::CXXThisExpr* this_expr);
std::optional<std::string> VisitCallExpr(const clang::CallExpr* call);
std::optional<std::string> VisitCXXConstructExpr(
const clang::CXXConstructExpr* construct_expr);
private:
ObjectRepository& object_repository_;
PointsToMap& points_to_map_;
const clang::FunctionDecl* func_;
const llvm::DenseMap<const clang::FunctionDecl*, FunctionLifetimesOrError>&
callee_lifetimes_;
const DiagnosticReporter& diag_reporter_;
};
} // namespace
void TransferInitializer(const Object* dest, clang::QualType type,
const ObjectRepository& object_repository,
const clang::Expr* init_expr,
PointsToMap& points_to_map) {
type = type.getCanonicalType();
if (type->isArrayType()) {
type = type->castAsArrayTypeUnsafe()->getElementType();
}
// Initializer lists are handled one member/field at a time.
if (type->isRecordType()) {
if (auto init_list_expr = clang::dyn_cast<clang::InitListExpr>(init_expr)) {
// We assume that initializers are always the semantic form of
// InitListExpr.
assert(init_list_expr->isSemanticForm());
size_t init = 0;
for (auto f : type->getAs<clang::RecordType>()->getDecl()->fields()) {
assert(init < init_list_expr->getNumInits());
auto field_init = init_list_expr->getInit(init);
++init;
TransferInitializer(object_repository.GetFieldObject(dest, f),
f->getType(), object_repository, field_init,
points_to_map);
}
return;
}
}
if (type->isPointerType() || type->isReferenceType() ||
type->isStructureOrClassType()) {
ObjectSet init_points_to = points_to_map.GetExprObjectSet(init_expr);
// It's important to use "Extend" (not "Set") here because we process
// initializers for member variables only _after_ the dataflow analysis has
// run.
points_to_map.ExtendPointerPointsToSet(dest, init_points_to);
}
}
namespace {
void SetPointerPointsToSetRespectingTypes(const Object* pointer,
const ObjectSet& points_to,
PointsToMap& points_to_map,
clang::ASTContext& ast_context) {
assert(pointer->Type()->isPointerType() ||
pointer->Type()->isReferenceType());
ObjectSet points_to_filtered;
for (auto object : points_to) {
if (MayPointTo(pointer->Type(), object->Type(), ast_context)) {
points_to_filtered.Add(object);
}
}
points_to_map.SetPointerPointsToSet(pointer, points_to_filtered);
}
void SetAllPointersPointsToSetRespectingTypes(const ObjectSet& pointers,
const ObjectSet& points_to,
PointsToMap& points_to_map,
clang::ASTContext& ast_context) {
for (auto pointer : pointers) {
SetPointerPointsToSetRespectingTypes(pointer, points_to, points_to_map,
ast_context);
}
}
void CollectLifetimes(
const Object* arg_object, clang::QualType type,
const ValueLifetimes& value_lifetimes, const PointsToMap& points_to_map,
const ObjectRepository& object_repository,
llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set) {
class Visitor : public LifetimeVisitor {
public:
Visitor(const ObjectRepository& object_repository,
const PointsToMap& points_to_map,
llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set)
: object_repository_(object_repository),
points_to_map_(points_to_map),
lifetime_to_object_set_(lifetime_to_object_set) {}
const Object* GetFieldObject(const ObjectSet& objects,
const clang::FieldDecl* field) override {
// All the objects have the same field.
assert(!objects.empty());
return object_repository_.GetFieldObject(*objects.begin(), field);
}
const Object* GetBaseClassObject(const ObjectSet& objects,
clang::QualType base) override {
// All the objects have the same base.
assert(!objects.empty());
return object_repository_.GetBaseClassObject(*objects.begin(), base);
}
ObjectSet Traverse(const ObjectLifetimes& lifetimes,
const ObjectSet& objects,
int /*pointee_depth*/) override {
lifetime_to_object_set_[lifetimes.GetLifetime()].Add(objects);
return points_to_map_.GetPointerPointsToSet(objects);
}
private:
const ObjectRepository& object_repository_;
const PointsToMap& points_to_map_;
llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set_;
};
Visitor visitor(object_repository, points_to_map, lifetime_to_object_set);
VisitLifetimes({arg_object}, type,
ObjectLifetimes(arg_object->GetLifetime(), value_lifetimes),
visitor);
}
void PropagateLifetimesToPointees(
const Object* arg_object, clang::QualType type,
const ValueLifetimes& value_lifetimes, PointsToMap& points_to_map,
ObjectRepository& object_repository,
const llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set,
clang::ASTContext& ast_context) {
class Visitor : public LifetimeVisitor {
public:
Visitor(ObjectRepository& object_repository, PointsToMap& points_to_map,
const llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set,
clang::ASTContext& ast_context)
: object_repository_(object_repository),
points_to_map_(points_to_map),
lifetime_to_object_set_(lifetime_to_object_set),
ast_context_(ast_context) {}
const Object* GetFieldObject(const ObjectSet& objects,
const clang::FieldDecl* field) override {
// All the objects have the same field.
assert(!objects.empty());
return object_repository_.GetFieldObject(*objects.begin(), field);
}
const Object* GetBaseClassObject(const ObjectSet& objects,
clang::QualType base) override {
// All the objects have the same base.
assert(!objects.empty());
return object_repository_.GetBaseClassObject(*objects.begin(), base);
}
ObjectSet Traverse(const ObjectLifetimes& lifetimes,
const ObjectSet& objects,
int /*pointee_depth*/) override {
clang::QualType type = lifetimes.GetValueLifetimes().Type();
ObjectSet points_to_original =
points_to_map_.GetPointerPointsToSet(objects);
if (!type.isConstQualified() && !PointeeType(type).isNull()) {
Lifetime pointee_lifetime =
lifetimes.GetValueLifetimes().GetPointeeLifetimes().GetLifetime();
ObjectSet points_to = lifetime_to_object_set_.lookup(pointee_lifetime);
// If this is pointer-to-static, assume the callee can modify it to
// point to a static object that we don't know about.
if (pointee_lifetime == Lifetime::Static()) {
points_to.Add(
object_repository_.CreateStaticObject(PointeeType(type)));
}
SetAllPointersPointsToSetRespectingTypes(objects, points_to,
points_to_map_, ast_context_);
assert(points_to_map_.GetPointerPointsToSet(objects).Contains(
points_to_original));
}
// Return the original points-to set, not the modified one. The original
// points-to set is sufficient because it captures the arguments that
// were passed to the function, but it doesn't contain any possibly
// spurious edges that may have been inserted by the logic above, which
// can reduce the precision of the analysis.
return points_to_original;
}
private:
ObjectRepository& object_repository_;
PointsToMap& points_to_map_;
const llvm::DenseMap<Lifetime, ObjectSet>& lifetime_to_object_set_;
clang::ASTContext& ast_context_;
};
Visitor visitor(object_repository, points_to_map, lifetime_to_object_set,
ast_context);
VisitLifetimes({arg_object}, type,
ObjectLifetimes(arg_object->GetLifetime(), value_lifetimes),
visitor);
}
bool AllStatic(const ValueLifetimes& lifetimes) {
return !lifetimes.HasAny([](Lifetime l) { return l != Lifetime::Static(); });
}
} // namespace
std::optional<ObjectSet> TransferLifetimesForCall(
const clang::Expr* call, const std::vector<FunctionParameter>& fn_params,
const ValueLifetimes& return_lifetimes, ObjectRepository& object_repository,
PointsToMap& points_to_map, clang::ASTContext& ast_context) {
// TODO(mboehme): The following description says what we _want_ to do, but
// this isn't what we actually do right now. Modify the code so that it
// corresponds to the description, then remove this TODO.
//
// Overall approach:
// - Step 1: Find all objects accessible by the callee.
// This means finding all objects transitively accessible from the argument
// pointees passed to the callee. As part of this step, we establish a
// mapping from callee lifetimes to caller lifetimes, which will be used in
// subsequent steps to determine whether a given object (whose lifetime is
// a caller lifetime) has a given callee lifetime. Note that, in general, a
// single callee lifetime may correspond to multiple caller lifetimes.
//
// - Step 2: Perform all modifications the callee could make to the points-to
// map that are permissible from a lifetime and type system point of view.
// Specifically, for every non-const pointer accessible by the callee:
// - Determine the callee lifetime 'l associated with that pointer.
// - For each object accessible by the callee, determine whether it has
// callee lifetime 'l (using the mapping established in step 1) and
// and whether the type of the pointer is compatible with the type of the
// object. If both of these conditions are met, add an edge from the
// pointer to the object into the points-to map.
// It remains to be explained what "compatible" means above. The most
// principled approach would be to use C++'s strict aliasing rules, but some
// real-world code unfortunately violates the strict aliasing rules.
// Instead, we make the compatibility rule more permissive than strict
// aliasing; we expect we will need some experimentation to achieve a
// good tradeoff between the following considerations:
// - If we make the compatibility rule too strict, we miss some points-to
// edges that may be introduced by real-world code (even though that code
// is in violation of the strict aliasing rule), and the analysis result
// becomes wrong.
// - If we make the compatibility rule too permissive, we allow spurious
// edges in the points-to map, and the analysis result becomes overly
// restrictive.
// We also need to consider that the type returned by Object::Type() might
// not be identical to the actual dynamic type of the object. If the object
// was passed in to the function through a pointer or reference to class
// type, the dynamic type of the object might be a derived class of the
// type we assumed for the object.
//
// - Step 3: Determine points-to set for the return value.
// This is the set of all objects accessible by the callee that
// - are compatible with the callee's return type, and
// - conform to the lifetime annotations on the return type.
// The latter point means that every object that is transitively reachable
// from the original object has a lifetime that corresponds to the callee
// lifetime implied by the annotation.
//
// Some additional considerations apply if the callee signature contains the
// 'static lifetime, either in the parameters or the return value:
// - Any objects that are associated with the static lifetime in the callee
// must be forced to have static lifetime.
// We have no way of doing this directly, as we cannot mutate the lifetime
// of the object (and, in any case, such a mutation would be global and not
// limited to the current point in the program flow).
// Instead, for each such object, we synthesize a pointer with static
// lifetime and make it point at the object. Later, in
// PropagateStaticToPointees(), this will cause us to assign static lifetime
// to the object.
// A cleaner solution to this would be to explicitly express "outlives"
// constraints in the lattice. This might also help more generally to
// simplify the logic associated with static lifetimes, but it would also be
// a more invasive change.
//
// - Any pointer or reference may point to an object of static lifetime. This
// has the following implications:
// - In step 2, when adding edges to the points-to map, we always add edges
// to objects of static lifetime if their type is compatible with the
// type of the pointer.
// - In step 3, an object of static lifetime conforms to any callee lifetime
// if that lifetime occurs in covariant position.
//
// - The callee may have access to objects of static lifetime that are not
// passed as arguments, in addition to the ones that are accessible from the
// arguments.
// Because of this, for any non-const pointer accessible by the callee, we
// add a points-to edge to a newly created static object of the appropriate
// type.
// This does cause us to add a lot of static objects to the graph that we
// do not expect to occur in reality. If this turns out to have undesired
// effects, we could use the following alternative approach as a compromise:
// - In step 2, if the non-const pointer is associated with static lifetime,
// does not already point to an object of static lifetime and would not
// gain an edge to an existing object of static lifetime, create a new
// object of static lifetime and the appropriate type and add an edge
// from the pointer to the newly created object.
// - In step 3, if we obtain an empty points-to set for the return value
// because the return type contains 'static lifetime annotations and the
// existing objects do not conform to these annotations, add newly
// created static objects to the points-to map in suitable places so that
// we can return a non-empty points-to set.
// TODO(mboehme): Investigate whether it's really so bad to add newly
// created static objects in all the places they could theoretically occur.
// If this turns out not to have any adverse effect on the analysis, it
// would be the more principled and simpler thing to do.
assert(call || !return_lifetimes.HasLifetimes());
// Step 1: Create mapping from callee lifetimes to points-to sets.
llvm::DenseMap<Lifetime, ObjectSet> lifetime_to_object_set;
for (auto [type, param_lifetimes, arg_object] : fn_params) {
CollectLifetimes(arg_object, type, param_lifetimes, points_to_map,
object_repository, lifetime_to_object_set);
}
// Force any objects associated with the static lifetime in the callee to have
// static lifetime (see more detailed explanation above).
if (auto iter = lifetime_to_object_set.find(Lifetime::Static());
iter != lifetime_to_object_set.end()) {
for (const Object* object : iter->second) {
const Object* pointer = object_repository.CreateStaticObject(
ast_context.getPointerType(object->Type()));
points_to_map.ExtendPointerPointsToSet(pointer, {object});
}
}
// Step 2: Propagate points-to sets to output parameters.
for (auto [type, param_lifetimes, arg_object] : fn_params) {
PropagateLifetimesToPointees(arg_object, type, param_lifetimes,
points_to_map, object_repository,
lifetime_to_object_set, ast_context);
}
// Step 3: Determine points-to set for the return value.
if (return_lifetimes.HasLifetimes()) {
if (IsInitExprInitializingARecordObject(call)) {
const Object* init_object = object_repository.GetInitializedObject(call);
PropagateLifetimesToPointees(
init_object, call->getType(), return_lifetimes, points_to_map,
object_repository, lifetime_to_object_set, ast_context);
} else {
ObjectSet rval_points_to;
rval_points_to = lifetime_to_object_set.lookup(
return_lifetimes.GetPointeeLifetimes().GetLifetime());
// If this return value is a pointer-to-static, assume the callee can
// return a static object that we don't know about.
if (return_lifetimes.GetPointeeLifetimes().GetLifetime() ==
Lifetime::Static()) {
bool all_static = AllStatic(return_lifetimes);
(void)all_static;
assert(all_static);
rval_points_to.Add(
object_repository.CreateStaticObject(PointeeType(call->getType())));
}
return rval_points_to;
}
}
return std::nullopt;
}
LifetimeLattice LifetimeAnalysis::initialElement() {
return LifetimeLattice(object_repository_.InitialPointsToMap());
}
std::string LifetimeAnalysis::ToString(const LifetimeLattice& state) {
return state.ToString();
}
bool LifetimeAnalysis::IsEqual(const LifetimeLattice& state1,
const LifetimeLattice& state2) {
return state1 == state2;
}
void LifetimeAnalysis::transfer(const clang::Stmt* stmt, LifetimeLattice& state,
clang::dataflow::Environment& /*environment*/) {
if (state.IsError()) return;
TransferStmtVisitor visitor(object_repository_, state.PointsTo(), func_,
callee_lifetimes_, diag_reporter_);
if (std::optional<std::string> err =
visitor.Visit(const_cast<clang::Stmt*>(stmt))) {
state = LifetimeLattice(*err);
}
}
namespace {
std::optional<std::string> TransferStmtVisitor::VisitExpr(
const clang::Expr* expr) {
// Ensure that we don't attempt to analyze code that contains errors.
// This is triggered by TypoExpr and RecoveryExpr, but rather than handling
// these particular expression types individually, we just check
// Expr::containsErrors().
if (expr->containsErrors()) {
return "encountered an expression containing errors";
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitDeclRefExpr(
const clang::DeclRefExpr* decl_ref) {
auto* decl = decl_ref->getDecl();
if (!clang::isa<clang::VarDecl>(decl) &&
!clang::isa<clang::FunctionDecl>(decl)) {
return std::nullopt;
}
const Object* object = object_repository_.GetDeclObject(decl);
assert(decl_ref->isGLValue() || decl_ref->getType()->isBuiltinType());
clang::QualType type = decl->getType().getCanonicalType();
if (type->isReferenceType()) {
points_to_map_.SetExprObjectSet(
decl_ref, points_to_map_.GetPointerPointsToSet(object));
} else {
points_to_map_.SetExprObjectSet(decl_ref, {object});
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitStringLiteral(
const clang::StringLiteral* strlit) {
const Object* obj = object_repository_.CreateStaticObject(strlit->getType());
points_to_map_.SetExprObjectSet(strlit, {obj});
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitCastExpr(
const clang::CastExpr* cast) {
switch (cast->getCastKind()) {
case clang::CK_LValueToRValue: {
if (cast->getType()->isPointerType()) {
// Converting from a glvalue to a prvalue means that we need to perform
// a dereferencing operation because the objects associated with
// glvalues and prvalues have different meanings:
// - A glvalue is associated with the object identified by the glvalue.
// - A prvalue is only associated with an object if the prvalue is of
// pointer type; the object it is associated with is the object the
// pointer points to.
// See also documentation for PointsToMap.
ObjectSet points_to = points_to_map_.GetPointerPointsToSet(
points_to_map_.GetExprObjectSet(cast->getSubExpr()));
points_to_map_.SetExprObjectSet(cast, points_to);
}
break;
}
case clang::CK_NullToPointer: {
points_to_map_.SetExprObjectSet(cast, {});
break;
}
// These casts are just no-ops from a Object point of view.
case clang::CK_FunctionToPointerDecay:
case clang::CK_BuiltinFnToFnPtr:
case clang::CK_ArrayToPointerDecay:
case clang::CK_UserDefinedConversion:
// Note on CK_UserDefinedConversion: The actual conversion happens in a
// CXXMemberCallExpr that is a subexpression of this CastExpr. The
// CK_UserDefinedConversion is just used to mark the fact that this is a
// user-defined conversion; it's therefore a no-op for our purposes.
case clang::CK_NoOp: {
clang::QualType type = cast->getType().getCanonicalType();
if (type->isPointerType() || cast->isGLValue()) {
points_to_map_.SetExprObjectSet(
cast, points_to_map_.GetExprObjectSet(cast->getSubExpr()));
}
break;
}
case clang::CK_DerivedToBase:
case clang::CK_UncheckedDerivedToBase:
case clang::CK_BaseToDerived:
case clang::CK_Dynamic: {
// These need to be mapped to what the subexpr points to.
// (Simple cases just work okay with this; may need to be revisited when
// we add more inheritance support.)
ObjectSet points_to = points_to_map_.GetExprObjectSet(cast->getSubExpr());
points_to_map_.SetExprObjectSet(cast, points_to);
break;
}
case clang::CK_BitCast:
case clang::CK_LValueBitCast:
case clang::CK_IntegralToPointer: {
// We don't support analyzing functions that perform a reinterpret_cast.
diag_reporter_(
func_->getBeginLoc(),
"cannot infer lifetimes because function uses a type-unsafe cast",
clang::DiagnosticIDs::Warning);
diag_reporter_(cast->getBeginLoc(), "type-unsafe cast occurs here",
clang::DiagnosticIDs::Note);
return "type-unsafe cast prevents analysis";
}
default: {
if (cast->isGLValue() ||
cast->getType().getCanonicalType()->isPointerType()) {
llvm::errs() << "Unknown cast type:\n";
cast->dump();
// No-noop casts of pointer types are not handled yet.
llvm::report_fatal_error("unknown cast type encountered");
}
}
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitReturnStmt(
const clang::ReturnStmt* return_stmt) {
clang::QualType return_type = func_->getReturnType();
// We only need to handle pointers and references.
// For record types, initialization of the return value has already been
// handled in VisitCXXConstructExpr() or VisitInitListExpr(), so nothing
// to do here.
if (!return_type->isPointerType() && !return_type->isReferenceType()) {
return std::nullopt;
}
const clang::Expr* ret_expr = return_stmt->getRetValue();
// This occurs when computing `ret_expr`s result includes creating temporary
// objects with destructors. We want to find the value to be returned inside
// the ExprWithCleanups.
//
// The PointsToMap::GetExprObjectSet() function could do this but it doesn't
// understand the context from which it is being called. This operation needs
// to be done only in cases where we are leaving scope - that is, the return
// statement. And the return statement also needs to look for initializers in
// its sub expressions, after looking inside ExprWithCleanups.
//
// That means GetExprObjectSet() would need to also look for initializers but
// we don't want to do this on every call to GetExprObjectSet().
if (auto cleanups = clang::dyn_cast<clang::ExprWithCleanups>(ret_expr)) {
ret_expr = cleanups->getSubExpr();
}
ObjectSet expr_points_to = points_to_map_.GetExprObjectSet(ret_expr);
points_to_map_.ExtendPointerPointsToSet(object_repository_.GetReturnObject(),
expr_points_to);
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitDeclStmt(
const clang::DeclStmt* decl_stmt) {
for (const clang::Decl* decl : decl_stmt->decls()) {
if (const auto* var_decl = clang::dyn_cast<clang::VarDecl>(decl)) {
const Object* var_object = object_repository_.GetDeclObject(var_decl);
// Don't need to record initializers because initialization has already
// happened in VisitCXXConstructExpr(), VisitInitListExpr(), or
// VisitCallExpr().
if (var_decl->hasInit() && !var_decl->getType()->isRecordType()) {
TransferInitializer(var_object, var_decl->getType(), object_repository_,
var_decl->getInit(), points_to_map_);
}
}
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitUnaryOperator(
const clang::UnaryOperator* op) {
if (!op->isGLValue() && !op->getType()->isPointerType() &&
!op->getType()->isArrayType()) {
return std::nullopt;
}
ObjectSet sub_points_to = points_to_map_.GetExprObjectSet(op->getSubExpr());
// Maybe surprisingly, the code here doesn't do any actual address-taking or
// dereferencing.
// This is because AddrOf and Deref really only do a reinterpretation:
// - AddrOf reinterprets a glvalue of type T as a prvalue of type T*
// - Deref reinterprets an prvalue of type T* as a glvalue of type T
// (See also the assertions below.)
// The actual dereferencing happens in the LValueToRValue CastExpr,
// see TransferCastExpr().
switch (op->getOpcode()) {
case clang::UO_AddrOf:
assert(!op->isGLValue());
assert(op->getSubExpr()->isGLValue());
points_to_map_.SetExprObjectSet(op, sub_points_to);
break;
case clang::UO_Deref:
assert(op->isGLValue());
assert(!op->getSubExpr()->isGLValue());
points_to_map_.SetExprObjectSet(op, sub_points_to);
break;
case clang::UO_PostInc:
case clang::UO_PostDec:
assert(!op->isGLValue());
assert(op->getSubExpr()->isGLValue());
points_to_map_.SetExprObjectSet(
op, points_to_map_.GetPointerPointsToSet(sub_points_to));
break;
case clang::UO_PreInc:
case clang::UO_PreDec:
assert(op->isGLValue());
assert(op->getSubExpr()->isGLValue());
points_to_map_.SetExprObjectSet(op, sub_points_to);
break;
default:
break;
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitArraySubscriptExpr(
const clang::ArraySubscriptExpr* subscript) {
// For our purposes here, a subscripting operation is equivalent to a
// dereference on its base - we don't make a distinction between different
// lifetimes in an array. This effectively merges the points-to sets of all
// elements in the array. See <internal link> for why we
// don't track individual array elements.
ObjectSet sub_points_to =
points_to_map_.GetExprObjectSet(subscript->getBase());
assert(subscript->isGLValue());
assert(!subscript->getBase()->isGLValue());
points_to_map_.SetExprObjectSet(subscript, sub_points_to);
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitBinaryOperator(
const clang::BinaryOperator* op) {
switch (op->getOpcode()) {
case clang::BO_Assign: {
assert(op->getLHS()->isGLValue());
ObjectSet lhs_points_to = points_to_map_.GetExprObjectSet(op->getLHS());
points_to_map_.SetExprObjectSet(op, lhs_points_to);
// Because of how we handle reference-like structs, a member access to a
// non-reference-like field in a struct might still produce lifetimes. We
// don't want to change points-to sets in those cases.
if (!op->getLHS()->getType()->isPointerType()) break;
ObjectSet rhs_points_to = points_to_map_.GetExprObjectSet(op->getRHS());
for (const Object* pointer : lhs_points_to) {
// We can overwrite (instead of extend) the destination points-to-set
// only in very specific circumstances:
// - We need to know unambiguously what the LHS refers to, so that we
// know we're definitely writing to a particular object, and
// - That destination object needs to be "single-valued" (it can't be
// an array, for example).
if (lhs_points_to.size() == 1 &&
object_repository_.GetObjectValueType(pointer) ==
ObjectRepository::ObjectValueType::kSingleValued) {
points_to_map_.SetPointerPointsToSet(pointer, rhs_points_to);
} else {
points_to_map_.ExtendPointerPointsToSet(pointer, rhs_points_to);
}
}
break;
}
case clang::BO_Add:
case clang::BO_Sub: {
// Pointer arithmetic.
// We are only interested in the case in which exactly one of the two
// operands is a pointer (in particular we want to exclude int* - int*).
if (op->getLHS()->getType()->isPointerType() ^
op->getRHS()->getType()->isPointerType()) {
if (op->getLHS()->getType()->isPointerType()) {
points_to_map_.SetExprObjectSet(
op, points_to_map_.GetExprObjectSet(op->getLHS()));
} else {
points_to_map_.SetExprObjectSet(
op, points_to_map_.GetExprObjectSet(op->getRHS()));
}
}
break;
}
default:
break;
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitConditionalOperator(
const clang::ConditionalOperator* op) {
clang::QualType type = op->getType().getCanonicalType();
if (op->isGLValue() || type->isPointerType()) {
ObjectSet points_to_true =
points_to_map_.GetExprObjectSet(op->getTrueExpr());
ObjectSet points_to_false =
points_to_map_.GetExprObjectSet(op->getFalseExpr());
points_to_map_.SetExprObjectSet(op, points_to_true.Union(points_to_false));
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitInitListExpr(
const clang::InitListExpr* init_list) {
if (init_list->isSyntacticForm()) {
// We are only interested in the semantic form, which is fully realized,
// and is the one considered to be the initializer.
return std::nullopt;
}
if (IsInitExprInitializingARecordObject(init_list)) {
if (init_list->isTransparent()) {
// A transparent initializer list does nothing, the actual initializer
// terminating expression is within, and has already transferred lifetimes
// up to the object being initialized.
return std::nullopt;
}
// The object set for each field should be pointing to the initializers.
const Object* init_object =
object_repository_.GetInitializedObject(init_list);
TransferInitializer(init_object, init_list->getType(), object_repository_,
init_list, points_to_map_);
} else {
// If the InitListExpr is not initializing a record object, we assume it's
// initializing an array or a reference and hence associate the InitListExpr
// with the union of the points-to sets of the initializers (as the analysis
// is array-insensitive).
ObjectSet targets;
for (clang::Expr* expr : init_list->inits()) {
// If we are constructing an initializer list of non-pointer types, we
// don't need to do anything here. Note that initializer list elements
// must all have the same type in this case.
if (PointeeType(expr->getType()).isNull() && !expr->isGLValue()) {
return std::nullopt;
}
targets.Add(points_to_map_.GetExprObjectSet(expr));
}
points_to_map_.SetExprObjectSet(init_list, std::move(targets));
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitMaterializeTemporaryExpr(
const clang::MaterializeTemporaryExpr* temporary_expr) {
const Object* temp_object =
object_repository_.GetTemporaryObject(temporary_expr);
points_to_map_.SetExprObjectSet(temporary_expr, {temp_object});
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitMemberExpr(
const clang::MemberExpr* member) {
ObjectSet struct_points_to =
points_to_map_.GetExprObjectSet(member->getBase());
if (const auto* method =
clang::dyn_cast<clang::CXXMethodDecl>(member->getMemberDecl())) {
// It doesn't really make sense to associate an object set with a non-static
// member function.
// If the member function is being called, we're not interested in its
// "value" anyway. If the non-static member function is used outside of a
// function call, then, it's a pointer-to-member, but those aren't
// really pointers anyway, and we'll need special treatment for them.
if (method->isStatic()) {
points_to_map_.SetExprObjectSet(
member, {object_repository_.GetDeclObject(method)});
}
return std::nullopt;
}
auto field = clang::dyn_cast<clang::FieldDecl>(member->getMemberDecl());
if (field == nullptr) {
llvm::report_fatal_error("indirect member access is not supported yet");
}
ObjectSet expr_points_to =
object_repository_.GetFieldObject(struct_points_to, field);
if (field->getType()->isReferenceType()) {
expr_points_to = points_to_map_.GetPointerPointsToSet(expr_points_to);
}
points_to_map_.SetExprObjectSet(member, expr_points_to);
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitCXXThisExpr(
const clang::CXXThisExpr* this_expr) {
std::optional<const Object*> this_object = object_repository_.GetThisObject();
assert(this_object.has_value());
points_to_map_.SetExprObjectSet(this_expr, ObjectSet{this_object.value()});
return std::nullopt;
}
// Collects all function parameters, including (if this is a member call) the
// implicit this argument.
std::vector<FunctionParameter> CollectFunctionParameters(
const clang::CallExpr* call, const clang::FunctionDecl* callee,
const FunctionLifetimes& callee_lifetimes,
const ObjectRepository& object_repository) {
std::vector<FunctionParameter> fn_params;
if (clang::isa<clang::CXXOperatorCallExpr>(call) &&
clang::isa<clang::CXXMethodDecl>(callee)) {
// `this` is considered an argument in this case (but not a parameter on its
// definition).
assert(call->getNumArgs() == callee->getNumParams() + 1);
// Handle the `this` argument.
{
fn_params.push_back(FunctionParameter{
clang::dyn_cast<clang::CXXMethodDecl>(callee)->getThisType(),
callee_lifetimes.GetThisLifetimes(),
object_repository.GetCallExprThisPointer(call)});
}
// Handle all other arguments.
for (size_t i = 1; i < call->getNumArgs(); i++) {
fn_params.push_back(FunctionParameter{
callee->getParamDecl(i - 1)->getType().getCanonicalType(),
callee_lifetimes.GetParamLifetimes(i - 1),
object_repository.GetCallExprArgumentObject(call, i)});
}
} else {
// We check <= instead of == because of default arguments.
assert(call->getNumArgs() <= callee->getNumParams());
for (size_t i = 0; i < call->getNumArgs(); i++) {
fn_params.push_back(FunctionParameter{
callee->getParamDecl(i)->getType().getCanonicalType(),
callee_lifetimes.GetParamLifetimes(i),
object_repository.GetCallExprArgumentObject(call, i)});
}
if (const auto* member_call =
clang::dyn_cast<clang::CXXMemberCallExpr>(call)) {
// The callee is always a MemberExpr.
// - If the call uses `->`, the object argument should be a prvalue that
// is a pointer to the struct.
// - If the call uses `.`, the object argument should be a glvalue of
// struct type.
assert(clang::isa<clang::MemberExpr>(member_call->getCallee()));
assert(clang::dyn_cast<clang::MemberExpr>(member_call->getCallee())
->isArrow() ^
member_call->getImplicitObjectArgument()->isGLValue());
// This is the type of the function *parameter*, not of the argument.
// This is always a pointer, even if the argument is a reference, but as
// we don't treat pointers or references differently, this is not an
// issue.
fn_params.push_back(
FunctionParameter{member_call->getMethodDecl()->getThisType(),
callee_lifetimes.GetThisLifetimes(),
object_repository.GetCallExprThisPointer(call)});
}
}
return fn_params;
}
void SetExprObjectSetRespectingType(const clang::Expr* expr,
const ObjectSet& points_to,
PointsToMap& points_to_map,
clang::ASTContext& ast_context) {
ObjectSet points_to_filtered;
for (auto object : points_to) {
if (expr->isGLValue()) {
if (PointeesCompatible(expr->getType(), object->Type(), ast_context)) {
points_to_filtered.Add(object);
}
} else {
clang::QualType expr_type = expr->getType();
// CXXConstructExpr is a special case -- it is a non-glvalue with the type
// of the constructed object itself. Non-pointer, non-glvalue expressions
// like this are not usually allowed to be associated with a points-to
// set, but CXXConstructExpr is an exception. We need to associate it with
// an `Object` representing the newly constructed object so that
// TransferInitializer() can then retrieve this object. So we pretend that
// the type is actually "pointer to object" to give MayPointTo() what it
// expects.
//
// Note that we will not see clang::InitListExpr here, which is the other
// form of initializer along with CXXConstructExpr. That is because we
// come here through a "call" and we don't consider an initializer list to
// be a "call" or treat it as such.
assert(!clang::isa<clang::InitListExpr>(expr));
if (clang::isa<clang::CXXConstructExpr>(expr)) {
expr_type = ast_context.getPointerType(expr_type);
}
if (MayPointTo(expr_type, object->Type(), ast_context)) {
points_to_filtered.Add(object);
}
}
}
points_to_map.SetExprObjectSet(expr, points_to_filtered);
}
std::optional<std::string> TransferStmtVisitor::VisitCallExpr(
const clang::CallExpr* call) {
llvm::SmallVector<const clang::FunctionDecl*> callees;
const clang::FunctionDecl* direct_callee = call->getDirectCallee();
if (direct_callee) {
// This code path is needed for non-static member functions, as those don't
// have an `Object` for their callees.
callees.push_back(direct_callee);
} else {
const clang::Expr* callee = call->getCallee();
for (const auto& object : points_to_map_.GetExprObjectSet(callee)) {
const clang::FunctionDecl* func = object->GetFunc();
assert(func);
callees.push_back(func);
}
}
std::optional<ObjectSet> call_points_to;
for (const auto* callee : callees) {
bool is_builtin = callee->getBuiltinID() != 0;
FunctionLifetimesOrError builtin_callee_lifetimes_or_error;
if (is_builtin) {
builtin_callee_lifetimes_or_error = GetBuiltinLifetimes(callee);
} else {
assert(callee_lifetimes_.count(callee->getCanonicalDecl()));
}
const FunctionLifetimesOrError& callee_lifetimes_or_error =
is_builtin ? builtin_callee_lifetimes_or_error
: callee_lifetimes_.lookup(callee->getCanonicalDecl());
if (!std::holds_alternative<FunctionLifetimes>(callee_lifetimes_or_error)) {
return "No lifetimes for callee '" + callee->getNameAsString() + "': " +
std::get<FunctionAnalysisError>(callee_lifetimes_or_error).message;
}
FunctionLifetimes callee_lifetimes =
std::get<FunctionLifetimes>(callee_lifetimes_or_error);
bool is_member_operator = clang::isa<clang::CXXOperatorCallExpr>(call) &&
clang::isa<clang::CXXMethodDecl>(callee);
for (size_t i = is_member_operator ? 1 : 0; i < call->getNumArgs(); i++) {
// We can't just use SetPointerPointsToSet here because call->getArg(i)
// might not have an ObjectSet (for example for integer constants); it
// also may be needed for struct initialization.
// Note that we don't need to worry about possibly extending the
// PointsToSet more than needed, as dataflow analysis relies on points-to
// sets never shrinking.
TransferInitializer(
object_repository_.GetCallExprArgumentObject(call, i),
callee->getParamDecl(is_member_operator ? i - 1 : i)->getType(),
object_repository_, call->getArg(i), points_to_map_);
}
if (is_member_operator) {
points_to_map_.SetPointerPointsToSet(
object_repository_.GetCallExprThisPointer(call),
points_to_map_.GetExprObjectSet(call->getArg(0)));
}
if (const auto* member_call =
clang::dyn_cast<clang::CXXMemberCallExpr>(call)) {
points_to_map_.SetPointerPointsToSet(
object_repository_.GetCallExprThisPointer(call),
points_to_map_.GetExprObjectSet(
member_call->getImplicitObjectArgument()));
}
std::vector<FunctionParameter> fn_params = CollectFunctionParameters(
call, callee, callee_lifetimes, object_repository_);
std::optional<ObjectSet> single_call_points_to = TransferLifetimesForCall(
call, fn_params, callee_lifetimes.GetReturnLifetimes(),
object_repository_, points_to_map_, callee->getASTContext());
if (single_call_points_to) {
if (call_points_to) {
call_points_to.value().Add(std::move(single_call_points_to).value());
} else {
call_points_to = std::move(single_call_points_to);
}
}
}
if (call_points_to) {
SetExprObjectSetRespectingType(call, call_points_to.value(), points_to_map_,
callees[0]->getASTContext());
}
return std::nullopt;
}
std::optional<std::string> TransferStmtVisitor::VisitCXXConstructExpr(
const clang::CXXConstructExpr* construct_expr) {
const clang::CXXConstructorDecl* constructor =
construct_expr->getConstructor();
assert(callee_lifetimes_.count(constructor->getCanonicalDecl()));
const FunctionLifetimesOrError& callee_lifetimes_or_error =
callee_lifetimes_.lookup(constructor->getCanonicalDecl());
if (!std::holds_alternative<FunctionLifetimes>(callee_lifetimes_or_error)) {
return "No lifetimes for constructor " + constructor->getNameAsString();
}
const FunctionLifetimes& callee_lifetimes =
std::get<FunctionLifetimes>(callee_lifetimes_or_error);
// We check <= instead of == because of default arguments.
assert(construct_expr->getNumArgs() <= constructor->getNumParams());
for (size_t i = 0; i < construct_expr->getNumArgs(); i++) {
TransferInitializer(
object_repository_.GetCXXConstructExprArgumentObject(construct_expr, i),
construct_expr->getArg(i)->getType(), object_repository_,
construct_expr->getArg(i), points_to_map_);
}
// Handle the `this` parameter, which should point to the object getting
// initialized.
points_to_map_.SetPointerPointsToSet(
object_repository_.GetCXXConstructExprThisPointer(construct_expr),
{object_repository_.GetInitializedObject(construct_expr)});
// Populate fn_params for the constructor call.
std::vector<FunctionParameter> fn_params;
for (size_t i = 0; i < construct_expr->getNumArgs(); i++) {
clang::QualType arg_type =
constructor->getParamDecl(i)->getType().getCanonicalType();
fn_params.push_back(
FunctionParameter{arg_type, callee_lifetimes.GetParamLifetimes(i),
object_repository_.GetCXXConstructExprArgumentObject(
construct_expr, i)});
}
clang::QualType type = constructor->getThisType();
fn_params.push_back(FunctionParameter{
type, callee_lifetimes.GetThisLifetimes(),
object_repository_.GetCXXConstructExprThisPointer(construct_expr)});
TransferLifetimesForCall(
construct_expr, fn_params,
ValueLifetimes::ForLifetimeLessType(constructor->getReturnType()),
object_repository_, points_to_map_, constructor->getASTContext());
return std::nullopt;
}
} // namespace
} // namespace lifetimes
} // namespace tidy
} // namespace clang