blob: 16c6f9356ee5b11e76f83f70d6135ddb2ed79d50 [file] [log] [blame]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// This file defines an intermediate representation (IR) used between Clang AST
// and code generators that generate Rust bindings and C++ bindings
// implementation.
//
// All types in this file own their data. This IR is expected to outlive the
// Clang's AST context, therefore it cannot reference data owned by it.
#ifndef CRUBIT_RS_BINDINGS_FROM_CC_IR_H_
#define CRUBIT_RS_BINDINGS_FROM_CC_IR_H_
#include <stdint.h>
#include <cstddef>
#include <iomanip>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "base/integral_types.h"
#include "base/logging.h"
#include "rs_bindings_from_cc/bazel_types.h"
#include "third_party/absl/strings/string_view.h"
#include "third_party/json/src/json.hpp"
#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/APSInt.h"
#include "util/intops/strong_int.h"
namespace rs_bindings_from_cc {
namespace internal {
inline constexpr absl::string_view kRustPtrMut = "*mut";
inline constexpr absl::string_view kRustPtrConst = "*const";
inline constexpr absl::string_view kRustRefMut = "&mut";
inline constexpr absl::string_view kRustRefConst = "&";
inline constexpr absl::string_view kRustFuncPtr = "#funcPtr";
inline constexpr absl::string_view kCcPtr = "*";
inline constexpr absl::string_view kCcLValueRef = "&";
inline constexpr absl::string_view kCcFuncValue = "#funcValue";
inline constexpr int kJsonIndent = 2;
} // namespace internal
// A name of a public header of the C++ library.
class HeaderName {
public:
explicit HeaderName(std::string name) : name_(std::move(name)) {}
absl::string_view IncludePath() const { return name_; }
nlohmann::json ToJson() const;
template <typename H>
friend H AbslHashValue(H h, const HeaderName& header_name) {
return H::combine(std::move(h), header_name.name_);
}
private:
// Header pathname in the format suitable for a google3-relative quote
// include.
std::string name_;
};
inline bool operator==(const HeaderName& lhs, const HeaderName& rhs) {
return lhs.IncludePath() == rhs.IncludePath();
}
inline std::ostream& operator<<(std::ostream& o, const HeaderName& h) {
return o << std::setw(internal::kJsonIndent) << h.ToJson();
}
// An int uniquely representing a Decl. Since our IR goes through the JSON
// serialization/deserialization at the moment, we need a way to restore graph
// edges that don't follow the JSON tree structure (for example between types
// and records). We use DeclIds for this.
DEFINE_STRONG_INT_TYPE(DeclId, uintptr_t);
// A numerical ID that uniquely identifies a lifetime.
DEFINE_STRONG_INT_TYPE(LifetimeId, int);
// A lifetime.
struct Lifetime {
nlohmann::json ToJson() const;
// Lifetime name. Unlike syn::Lifetime, this does not include the apostrophe.
//
// Note that this is not an identifier; the rules for what is a valid lifetime
// name are slightly different than for identifiers, so we simply use a
// std::string instead of an Identifier here.
std::string name;
LifetimeId id;
};
inline std::ostream& operator<<(std::ostream& o, const Lifetime& l) {
return o << std::setw(internal::kJsonIndent) << l.ToJson();
}
// A C++ type involved in the bindings. It has the knowledge of how the type
// is spelled in C++.
struct CcType {
nlohmann::json ToJson() const;
// The name of the type. Examples:
// - "int32_t", "std::ptrdiff_t", "long long", "bool"
// - "void"
// - "&" or "*" (pointee stored in `type_args[0]`)
// - "#funcValue <callConv>" (compare with "#funcPtr <abi>" in RsType::name
// and note that Rust only supports function pointers; note that <callConv>
// in CcType doesn't map 1:1 to <abi> in RsType).
// - a decl name when MappedType::WithDeclIds was used
std::string name;
// Id of a decl that this type corresponds to. `nullopt` for primitive types
// (i.e. when `name` is non-empty).
std::optional<DeclId> decl_id = std::nullopt;
// The C++ const-qualification for the type.
//
// Note: there are two types for which cv-qualification does not do anything:
// references and functions. if `T` is either a function type like `void()`,
// or a reference type like `int&`, then `T`, `const T`, and `volatile T` are
// all the same type in C++.
bool is_const = false;
// Type arguments for a generic type. Examples:
// int has no type arguments.
// int* has a single type argument, int.
// tuple<int, float> has two type arguments, int and float.
std::vector<CcType> type_args = {};
};
// A Rust type involved in the bindings. It has the knowledge of how the type
// is spelled in Rust.
struct RsType {
nlohmann::json ToJson() const;
// The name of the type. Examples:
// - "i32" or "bool"
// - "()" (the unit type, equivalent of "void" in CcType)
// - "&", "&mut", "*const", "*mut" (pointee stored in `type_args[0]`)
// - "Option" (e.g. representing nullable, lifetime-annotated C++ pointer as
// `Option<&'a SomeOtherType>` - in this case `type_args[0]` is the generic
// argument representing the Rust reference type).
// - "#funcPtr <abi>" (function pointer; return type is the last elem in
// `type_args`; param types are stored in other `type_args`; <abi> would be
// replaced with "cdecl", "stdcall" or other Abi - see
// https://doc.rust-lang.org/reference/types/function-pointer.html);
// - a decl name when MappedType::WithDeclIds was used
std::string name;
// Id of a decl that this type corresponds to. `nullopt` when `name` is
// non-empty.
std::optional<DeclId> decl_id = std::nullopt;
// Lifetime arguments for a generic type. Examples:
// *mut i32 has no lifetime arguments
// &'a 32 has a single lifetime argument, 'a.
// SomeType<'a, 'b> has two lifetime arguments, 'a and 'b.
// Lifetimes are identified by their unique ID. The corresponding Lifetime
// will be found within the lifetime_params of a Func or Record or TypeAlias
// that uses this type underneath (as a parameter type, field type, or aliased
// type).
std::vector<LifetimeId> lifetime_args = {};
// Type arguments for a generic type. Examples:
// i32 has no type arguments.
// *mut i32 has a single type argument, i32.
// (i32, f32) has two type arguments, i32 and f32.
std::vector<RsType> type_args = {};
};
inline std::ostream& operator<<(std::ostream& o, const RsType& type) {
return o << std::setw(internal::kJsonIndent) << type.ToJson();
}
// A type involved in the bindings. The rs_type and cc_type will be treated
// as interchangeable during bindings, and so should share the same layout.
//
// For example: a C++ pointer may be a usize in Rust, rather than a pointer, but
// should almost certainly not be a u8, because u8 and pointers are sized and
// aligned differently.
struct MappedType {
static MappedType Void() { return Simple("()", "void"); }
/// Returns the MappedType for a non-templated/generic, non-cv-qualified type.
/// For example, Void() is Simple("()", "void").
static MappedType Simple(std::string rs_name, std::string cc_name) {
return MappedType{RsType{rs_name}, CcType{cc_name}};
}
static MappedType WithDeclIds(std::string rs_name, DeclId rs_decl_id,
std::string cc_name, DeclId cc_decl_id) {
return MappedType{RsType{std::move(rs_name), rs_decl_id},
CcType{std::move(cc_name), cc_decl_id}};
}
static MappedType PointerTo(MappedType pointee_type,
std::optional<LifetimeId> lifetime,
bool nullable = true);
static MappedType LValueReferenceTo(MappedType pointee_type,
std::optional<LifetimeId> lifetime);
static MappedType FuncPtr(absl::string_view cc_call_conv,
absl::string_view rs_abi,
std::optional<LifetimeId> lifetime,
MappedType return_type,
std::vector<MappedType> param_types);
bool IsVoid() const { return rs_type.name == "()"; }
nlohmann::json ToJson() const;
RsType rs_type;
CcType cc_type;
};
inline std::ostream& operator<<(std::ostream& o, const MappedType& type) {
return o << std::setw(internal::kJsonIndent) << type.ToJson();
}
// An identifier involved in bindings.
//
// For example, the identifier for the C++ function `int Add(int a, int b);`
// is `Identifier("Add")`.
//
// This also includes operator names, such as "operator==". Non-symbol tokens in
// the operator name are separated by a single space. For example:
//
// * `Identifier("operator==")`
// * `Identifier("operator new[]")`
// * `Identifier("operator co_await")`
//
// Invariants:
// `identifier` cannot be empty.
class Identifier {
public:
explicit Identifier(std::string identifier)
: identifier_(std::move(identifier)) {
CHECK(!identifier_.empty()) << "Identifier name cannot be empty.";
}
absl::string_view Ident() const { return identifier_; }
nlohmann::json ToJson() const;
private:
std::string identifier_;
};
inline std::ostream& operator<<(std::ostream& o, const Identifier& id) {
return o << std::setw(internal::kJsonIndent) << id.Ident();
}
// An integer value in the range [-2**63, 2**64). This is intended to be used
// to produce integer literals in Rust code while specifying the type
// out-of-band.
class IntegerConstant {
public:
explicit IntegerConstant(const llvm::APSInt& value) {
CHECK(value.getSignificantBits() <= 64)
<< "enumerator value unexpectedly had more than 64 bits";
is_negative_ = value < 0;
wrapped_value_ = static_cast<uint64_t>(value.getExtValue());
}
IntegerConstant(const IntegerConstant& other) = default;
IntegerConstant& operator=(const IntegerConstant& other) = default;
nlohmann::json ToJson() const;
private:
// value < 0
bool is_negative_;
// value (mod 2**64)
uint64_t wrapped_value_;
};
class Operator {
public:
explicit Operator(std::string name) : name_(std::move(name)) {
CHECK(!name_.empty()) << "Operator name cannot be empty.";
}
absl::string_view Name() const { return name_; }
nlohmann::json ToJson() const;
private:
std::string name_;
};
inline std::ostream& operator<<(std::ostream& stream, const Operator& op) {
char first_char = op.Name()[0];
const char* separator = ('a' <= first_char) && (first_char <= 'z') ? " " : "";
return stream << std::setw(internal::kJsonIndent) << "`operator" << separator
<< op.Name() << "`";
}
// A function parameter.
//
// Examples:
// FuncParam of a C++ function `void Foo(int32_t a);` will be
// `FuncParam{.type=Type{"i32", "int32_t"}, .identifier=Identifier("foo"))`.
struct FuncParam {
nlohmann::json ToJson() const;
MappedType type;
Identifier identifier;
};
inline std::ostream& operator<<(std::ostream& o, const FuncParam& param) {
return o << std::setw(internal::kJsonIndent) << param.ToJson();
}
enum SpecialName {
kDestructor,
kConstructor,
};
std::ostream& operator<<(std::ostream& o, const SpecialName& special_name);
// A generalized notion of identifier, or an "Unqualified Identifier" in C++
// jargon: https://en.cppreference.com/w/cpp/language/identifiers
//
// Note that constructors are given a separate variant, so that we can treat
// them differently. After all, they are not invoked or defined like normal
// functions.
using UnqualifiedIdentifier = std::variant<Identifier, Operator, SpecialName>;
nlohmann::json ToJson(const UnqualifiedIdentifier& unqualified_identifier);
struct MemberFuncMetadata {
enum ReferenceQualification : char {
kLValue, // void Foo() &;
kRValue, // void Foo() &&;
kUnqualified, // void Foo();
};
// TODO(lukasza): Consider extracting a separate ConstructorMetadata struct to
// account for the fact that 1) `is_explicit_ctor` applies only to
// constructors and 2) `is_const` and `is_virtual` never applies to
// constructors.
struct InstanceMethodMetadata {
ReferenceQualification reference = kUnqualified;
bool is_const = false;
bool is_virtual = false;
// If the member function was a constructor with an `explicit` specifier.
bool is_explicit_ctor = false;
};
nlohmann::json ToJson() const;
// The type that this is a member function for.
DeclId record_id;
// Qualifiers for the instance method.
//
// If null, this is a static method.
std::optional<InstanceMethodMetadata> instance_method_metadata;
};
// Source code location
struct SourceLoc {
nlohmann::json ToJson() const;
std::string filename;
uint64 line;
uint64 column;
};
inline std::ostream& operator<<(std::ostream& o, const SourceLoc& r) {
return o << std::setw(internal::kJsonIndent) << r.ToJson();
}
// A function involved in the bindings.
struct Func {
nlohmann::json ToJson() const;
UnqualifiedIdentifier name;
BlazeLabel owning_target;
std::optional<std::string> doc_comment;
std::string mangled_name;
MappedType return_type;
std::vector<FuncParam> params;
std::vector<Lifetime> lifetime_params;
bool is_inline;
// If null, this is not a member function.
std::optional<MemberFuncMetadata> member_func_metadata;
SourceLoc source_loc;
};
inline std::ostream& operator<<(std::ostream& o, const Func& f) {
return o << std::setw(internal::kJsonIndent) << f.ToJson();
}
// Access specifier for a member or base class.
enum AccessSpecifier {
kPublic,
kProtected,
kPrivate,
};
std::ostream& operator<<(std::ostream& o, const AccessSpecifier& access);
// A field (non-static member variable) of a record.
struct Field {
nlohmann::json ToJson() const;
Identifier identifier;
std::optional<std::string> doc_comment;
MappedType type;
AccessSpecifier access;
// Field offset in bits.
uint64_t offset;
// True if the field is [[no_unique_address]].
bool is_no_unique_address;
};
inline std::ostream& operator<<(std::ostream& o, const Field& f) {
return o << std::setw(internal::kJsonIndent) << f.ToJson();
}
// Information about special member functions.
//
// Nontrivial definitions are divided into two: there are nontrivial definitions
// which are nontrivial only due to a member variable which defines the special
// member function, and those which are nontrivial because the operation was
// user defined for the object itself, or for any base class.
//
// This allows us to sidestep calling C++ implementations of special member
// functions in narrow cases: even for a nontrivial special member function, if
// it is kNontrivialMembers, we can directly implement it in Rust in terms of
// the member variables.
struct SpecialMemberFunc {
enum class Definition : char {
kTrivial,
// Nontrivial, but only because of a member variable with a nontrivial
// special member function.
kNontrivialMembers,
// Nontrivial because of a user-defined special member function in this or a
// base class. (May *also* be nontrivial due to member variables.)
kNontrivialUserDefined,
kDeleted,
};
nlohmann::json ToJson() const;
Definition definition = Definition::kTrivial;
AccessSpecifier access = AccessSpecifier::kPublic;
};
std::ostream& operator<<(std::ostream& o,
const SpecialMemberFunc::Definition& definition);
inline std::ostream& operator<<(std::ostream& o, const SpecialMemberFunc& f) {
return o << std::setw(internal::kJsonIndent) << f.ToJson();
}
// A base class subobject of a struct or class.
struct BaseClass {
nlohmann::json ToJson() const;
DeclId base_record_id;
// The offset the base class subobject is located at. This is always nonempty
// for nonvirtual inheritance, and always empty if a virtual base class is
// anywhere in the inheritance chain.
std::optional<int64_t> offset;
};
// A record (struct, class, union).
struct Record {
nlohmann::json ToJson() const;
Identifier identifier;
DeclId id;
BlazeLabel owning_target;
std::optional<std::string> doc_comment;
std::vector<BaseClass> unambiguous_public_bases;
std::vector<Field> fields;
std::vector<Lifetime> lifetime_params;
// Size and alignment in bytes.
int64_t size;
int64_t alignment;
// The size of the base class subobjects, or null if there are none.
//
// More information: docs/struct_layout
std::optional<size_t> base_size = std::nullopt;
// True if the alignment may differ from what the fields would imply.
//
// For example, a base class or [[no_unique_address]] of alignment 8 should
// cause the record to have alignment at least 8. Since the field cannot be
// aligned due to layout issues, the parent struct must instead receive an
// alignment adjustment as necessary, via .override_alignment=true.
//
// More information: docs/struct_layout
bool override_alignment = false;
// Special member functions.
SpecialMemberFunc copy_constructor = {};
SpecialMemberFunc move_constructor = {};
SpecialMemberFunc destructor = {};
// Whether this type is passed by value as if it were a trivial type (the same
// as it would be if it were a struct in C).
//
// This can be either due to language rules (it *is* a trivial type), or due
// to the usage of a Clang attribute that forces trivial for calls:
//
// * https://eel.is/c++draft/class.temporary#3
// * https://clang.llvm.org/docs/AttributeReference.html#trivial-abi
bool is_trivial_abi = false;
// Whether this type is effectively `final`, and cannot be inherited from.
//
// This can happen because it was explicitly marked final, or because a core
// function like the destructor was marked final.
bool is_final = false;
};
struct Enumerator {
nlohmann::json ToJson() const;
Identifier identifier;
IntegerConstant value;
};
struct Enum {
nlohmann::json ToJson() const;
Identifier identifier;
DeclId id;
BlazeLabel owning_target;
MappedType underlying_type;
std::vector<Enumerator> enumerators;
};
inline std::ostream& operator<<(std::ostream& o, const Record& r) {
return o << std::setw(internal::kJsonIndent) << r.ToJson();
}
// A type alias (defined either using `typedef` or `using`).
struct TypeAlias {
nlohmann::json ToJson() const;
Identifier identifier;
DeclId id;
BlazeLabel owning_target;
MappedType underlying_type;
};
inline std::ostream& operator<<(std::ostream& o, const TypeAlias& t) {
return o << std::setw(internal::kJsonIndent) << t.ToJson();
}
// A placeholder for an item that we can't generate bindings for (yet)
struct UnsupportedItem {
nlohmann::json ToJson() const;
// TODO(forster): We could show the original declaration in the generated
// message (potentially also for successfully imported items).
// Qualified name of the item for which we couldn't generate bindings
std::string name;
// Explanation of why we couldn't generate bindings
// TODO(forster): We should support multiple reasons per unsupported item.
std::string message;
SourceLoc source_loc;
};
inline std::ostream& operator<<(std::ostream& o, const UnsupportedItem& r) {
return o << std::setw(internal::kJsonIndent) << r.ToJson();
}
struct Comment {
nlohmann::json ToJson() const;
std::string text;
};
inline std::ostream& operator<<(std::ostream& o, const Comment& r) {
return o << std::setw(internal::kJsonIndent) << r.ToJson();
}
// A complete intermediate representation of bindings for publicly accessible
// declarations of a single C++ library.
struct IR {
nlohmann::json ToJson() const;
template <typename T>
std::vector<const T*> get_items_if() const {
std::vector<const T*> filtered_items;
for (const auto& item : items) {
if (auto* filtered_item = std::get_if<T>(&item)) {
filtered_items.push_back(filtered_item);
}
}
return filtered_items;
}
// Collection of public headers that were used to construct the AST this `IR`
// is generated from.
std::vector<HeaderName> used_headers;
BlazeLabel current_target;
using Item =
std::variant<Func, Record, Enum, TypeAlias, UnsupportedItem, Comment>;
std::vector<Item> items;
};
inline std::ostream& operator<<(std::ostream& o, const IR& ir) {
return o << std::setw(internal::kJsonIndent) << ir.ToJson();
}
} // namespace rs_bindings_from_cc
#endif // CRUBIT_RS_BINDINGS_FROM_CC_IR_H_