Stop returning structs by value from `extern "C"` thunks.
PiperOrigin-RevId: 515488468
diff --git a/cc_bindings_from_rs/bazel_support/cc_bindings_from_rust_rule.bzl b/cc_bindings_from_rs/bazel_support/cc_bindings_from_rust_rule.bzl
index 347f254..6ec5055 100644
--- a/cc_bindings_from_rs/bazel_support/cc_bindings_from_rust_rule.bzl
+++ b/cc_bindings_from_rs/bazel_support/cc_bindings_from_rust_rule.bzl
@@ -246,6 +246,7 @@
"_cc_deps_for_bindings": attr.label_list(
doc = "Dependencies needed to build the C++ sources generated by cc_bindings_from_rs.",
default = [
+ "//support/internal:bindings_support",
"//support/rs_std:rs_char",
],
),
diff --git a/cc_bindings_from_rs/bindings.rs b/cc_bindings_from_rs/bindings.rs
index ffdf6c5..b355ea6 100644
--- a/cc_bindings_from_rs/bindings.rs
+++ b/cc_bindings_from_rs/bindings.rs
@@ -42,6 +42,17 @@
pub _crate_to_include_map: (),
}
+impl<'tcx> Input<'tcx> {
+ // TODO(b/259724276): This function's results should be memoized. It may be
+ // easier if separate functions are provided for each support header - e.g.
+ // `rs_char()`, `return_value_slot()`, etc.
+ fn support_header(&self, suffix: &str) -> CcInclude {
+ let support_path = &*self.crubit_support_path;
+ let full_path = format!("{support_path}/{suffix}");
+ CcInclude::user_header(full_path.into())
+ }
+}
+
pub struct Output {
pub h_body: TokenStream,
pub rs_body: TokenStream,
@@ -251,6 +262,64 @@
}
}
+/// Whether functions using `extern "C"` ABI can safely handle values of type
+/// `ty` (e.g. when passing by value arguments or return values of such type).
+fn is_c_abi_compatible_by_value(ty: Ty) -> bool {
+ match ty.kind() {
+ // `improper_ctypes_definitions` warning doesn't complain about the following types:
+ ty::TyKind::Bool |
+ ty::TyKind::Float{..} |
+ ty::TyKind::Int{..} |
+ ty::TyKind::Uint{..} |
+ ty::TyKind::Never |
+ ty::TyKind::RawPtr{..} |
+ ty::TyKind::FnPtr{..} => true,
+ ty::TyKind::Tuple(types) if types.len() == 0 => true,
+
+ // Crubit assumes that `char` is compatible with a certain `extern "C"` ABI.
+ // See `rust_builtin_type_abi_assumptions.md` for more details.
+ ty::TyKind::Char => true,
+
+ // Crubit's C++ bindings for tuples, structs, and other ADTs may not preserve
+ // their ABI (even if they *do* preserve their memory layout). For example:
+ // - In System V ABI replacing a field with a fixed-length array of bytes may affect
+ // whether the whole struct is classified as an integer and passed in general purpose
+ // registers VS classified as SSE2 and passed in floating-point registers like xmm0).
+ // See also b/270454629.
+ // - To replicate field offsets, Crubit may insert explicit padding fields. These
+ // extra fields may also impact the ABI of the generated bindings.
+ //
+ // TODO(lukasza): In the future, some additional performance gains may be realized by
+ // returning `true` in a few limited cases (this may require additional complexity to
+ // ensure that `format_adt` never injects explicit padding into such structs):
+ // - `#[repr(C)]` structs and unions,
+ // - `#[repr(transparent)]` struct that wraps an ABI-safe type,
+ // - Discriminant-only enums (b/259984090).
+ ty::TyKind::Tuple{..} | // An empty tuple (`()` - the unit type) is handled above.
+ ty::TyKind::Adt{..} => false,
+
+ // These kinds of reference-related types are not implemented yet - `is_c_abi_compatible_by_value`
+ // should never need to handle them, because `format_ty_for_cc` fails for such types.
+ //
+ // TODO(b/258235219): When implementing support for references we should
+ // consider returning `true` for `TyKind::Ref` and document the rationale
+ // for such decision - maybe something like this will be sufficient:
+ // - In general `TyKind::Ref` should have the same ABI as `TyKind::RawPtr`
+ // - References to slices (`&[T]`) or strings (`&str`) rely on assumptions
+ // spelled out in `rust_builtin_type_abi_assumptions.md`..
+ ty::TyKind::Ref{..} |
+ ty::TyKind::Str |
+ ty::TyKind::Array{..} |
+ ty::TyKind::Slice{..} =>
+ unimplemented!(),
+
+ // `format_ty_for_cc` is expected to fail for other kinds of types
+ // and therefore `is_c_abi_compatible_by_value` should never be called for
+ // these other types
+ _ => unimplemented!(),
+ }
+}
+
/// Formats `ty` into a `CcSnippet` that represents how the type should be
/// spelled in a C++ declaration of a function parameter or field.
//
@@ -311,10 +380,9 @@
value: Primitive::Int(Integer::I32, /* signedness = */false), ..
})));
- let rs_char_path = format!("{}/rs_std/rs_char.h", &*input.crubit_support_path);
CcSnippet::with_include(
quote! { rs_std::rs_char },
- CcInclude::user_header(rs_char_path.into()),
+ input.support_header("rs_std/rs_char.h"),
)
},
@@ -685,12 +753,11 @@
}
};
- // TODO(b/270454629): Tweak `thunk_ret_type`, `thunk_params`, and `thunk_args` as
- // needed to pass some arguments and/or a return value by pointer, rather than by
- // value.
+ // TODO(b/270454629): Tweak `thunk_params`, and `thunk_args` as needed to pass some
+ // arguments by pointer, rather than by value.
let mut prereqs = main_api_prereqs;
- let thunk_params = &main_api_params;
- let thunk_args = params
+ let mut thunk_params = main_api_params.clone();
+ let mut thunk_args = params
.iter()
.map(|Param{ cc_name, ty, ..}|
if ty.is_copy_modulo_regions(tcx, ty::ParamEnv::empty()) {
@@ -700,7 +767,25 @@
quote!{ std::move(#cc_name) }
})
.collect_vec();
- let thunk_ret_type = &main_api_ret_type;
+ let thunk_ret_type: TokenStream;
+ let impl_body: TokenStream;
+ if is_c_abi_compatible_by_value(sig.output()) {
+ thunk_ret_type = main_api_ret_type.clone();
+ impl_body = quote!{
+ return __crubit_internal :: #thunk_name( #( #thunk_args ),* );
+ };
+ } else {
+ thunk_ret_type = quote!{ void };
+ thunk_params.push(quote!{ #main_api_ret_type* __ret_ptr });
+ thunk_args.push(quote!{ __ret_slot.Get() });
+ impl_body = quote!{
+ crubit::ReturnValueSlot<#main_api_ret_type> __ret_slot;
+ __crubit_internal :: #thunk_name( #( #thunk_args ),* );
+ return std::move(__ret_slot).AssumeInitAndTakeValue();
+ };
+ prereqs.includes.insert(CcInclude::utility());
+ prereqs.includes.insert(input.support_header("internal/return_value_slot.h"));
+ };
CcSnippet {
prereqs,
tokens: quote! {
@@ -710,7 +795,7 @@
}
inline #main_api_ret_type #struct_name #main_api_fn_name (
#( #main_api_params ),* ) {
- return __crubit_internal :: #thunk_name( #( #thunk_args ),* );
+ #impl_body
}
__NEWLINE__
},
@@ -721,11 +806,12 @@
quote! {}
} else {
let thunk_name = make_rs_ident(symbol_name.name);
- let thunk_params = params
+ let mut thunk_params = params
.iter()
- .map(|Param{ rs_name, rs_type, ..}| quote!{ #rs_name: #rs_type });
- let thunk_ret_type = format_ty_for_rs(tcx, sig.output())?;
- let thunk_body = {
+ .map(|Param{ rs_name, rs_type, ..}| quote!{ #rs_name: #rs_type })
+ .collect_vec();
+ let mut thunk_ret_type = format_ty_for_rs(tcx, sig.output())?;
+ let mut thunk_body = {
let crate_name = make_rs_ident(krate.as_str());
let mod_path = mod_path.format_for_rs();
let fn_name = make_rs_ident(fn_name.as_str());
@@ -746,6 +832,19 @@
extern "C" fn #thunk_name( #( #thunk_params ),* ) -> #thunk_ret_type {
#thunk_body
}
+ };
+ if !is_c_abi_compatible_by_value(sig.output()) {
+ thunk_params.push(quote!{
+ __ret_slot: &mut ::core::mem::MaybeUninit<#thunk_ret_type>
+ });
+ thunk_ret_type = quote!{ () };
+ thunk_body = quote!{ __ret_slot.write(#thunk_body); };
+ };
+ quote! {
+ #[no_mangle]
+ extern "C" fn #thunk_name( #( #thunk_params ),* ) -> #thunk_ret_type {
+ #thunk_body
+ }
}
};
@@ -2650,11 +2749,13 @@
impl_details.cc.tokens,
quote! {
namespace __crubit_internal {
- extern "C" ::rust_out::S ...(std::int32_t i);
+ extern "C" void ...(std::int32_t i, ::rust_out::S* __ret_ptr);
}
...
inline ::rust_out::S create(std::int32_t i) {
- return __crubit_internal::...(i);
+ crubit::ReturnValueSlot<::rust_out::S> __ret_slot;
+ __crubit_internal::...(i, __ret_slot.Get());
+ return std::move(__ret_slot).AssumeInitAndTakeValue();
}
}
);
@@ -2663,8 +2764,11 @@
quote! {
#[no_mangle]
extern "C"
- fn ...(i: i32) -> ::rust_out::S {
- ::rust_out::create(i)
+ fn ...(
+ i: i32,
+ __ret_slot: &mut ::core::mem::MaybeUninit<::rust_out::S>
+ ) -> () {
+ __ret_slot.write(::rust_out::create(i));
}
}
);
diff --git a/rs_bindings_from_cc/BUILD b/rs_bindings_from_cc/BUILD
index 8ff5c30..a30710c 100644
--- a/rs_bindings_from_cc/BUILD
+++ b/rs_bindings_from_cc/BUILD
@@ -41,7 +41,7 @@
deps_for_bindings(
name = "deps_for_bindings",
deps_for_generated_cc_file = [
- "//support/internal:rs_api_impl_support",
+ "//support/internal:bindings_support",
],
deps_for_generated_rs_file = [
# Required for struct layout assertions added to the generated
diff --git a/support/internal/BUILD b/support/internal/BUILD
index bd1ddad..ad858da 100644
--- a/support/internal/BUILD
+++ b/support/internal/BUILD
@@ -1,14 +1,15 @@
package(default_applicable_licenses = ["//third_party/crubit:license"])
cc_library(
- name = "rs_api_impl_support",
+ name = "bindings_support",
hdrs = [
"cxx20_backports.h",
"offsetof.h",
+ "return_value_slot.h",
],
visibility = ["//:__subpackages__"],
# It is important to be thoughtful when adding new dependencies for
- # `rs_api_impl_support` (and possibly other targets in this BUILD file).
+ # `bindings_support` (and possibly other targets in this BUILD file).
# Using mature Abseil APIs seems okay - we should be able to assume that
# Crubit users have a version of Abseil that is relatively recent (although
# we can't rely on an exact version and/or exact absl/base/options.h).
@@ -19,7 +20,17 @@
name = "offsetof_test",
srcs = ["offsetof_test.cc"],
deps = [
- ":rs_api_impl_support",
+ ":bindings_support",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "return_value_slot_test",
+ srcs = ["return_value_slot_test.cc"],
+ deps = [
+ ":bindings_support",
+ "@absl//absl/log:check",
"@com_google_googletest//:gtest_main",
],
)
diff --git a/support/internal/return_value_slot.h b/support/internal/return_value_slot.h
new file mode 100644
index 0000000..c2f50a0
--- /dev/null
+++ b/support/internal/return_value_slot.h
@@ -0,0 +1,105 @@
+// Part of the Crubit project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef THIRD_PARTY_CRUBIT_SUPPORT_INTERNAL_RETURN_VALUE_SLOT_H_
+#define THIRD_PARTY_CRUBIT_SUPPORT_INTERNAL_RETURN_VALUE_SLOT_H_
+
+#include <memory>
+#include <utility>
+
+namespace crubit {
+
+// `ReturnValueSlot<T>` provides a slot that can store a move-only return
+// value. This class is used to return non-`#[repr(C)]` structs from Rust
+// into C++ in a way that is compatible with the ABI of `extern "C"` Rust
+// thunks.
+//
+// An example will help to illustrate the purpose of this class:
+//
+// ```rs
+// pub struct SomeStruct(...);
+// pub fn foo(arg1: i32, arg2: i32) -> SomeStruct { unimplemented!() }
+// ```
+//
+// The generated C++ header will look like this:
+//
+// ```cc
+// inline SomeStruct foo(int32_t arg1, int32_t arg2) {
+// /* 1 */ crubit::ReturnValueSlot<SomeStruct> __ret;
+// /* 2 */ __rust_thunk_for_foo(arg1, arg2, __ret.GetSlotPtr());
+// /* 3 */ return __ret.AssumeInitAndTakeValue();
+// }
+// }
+// ```
+//
+// `ReturnValueSlot` helps to coordinate when C++ constructors and destructors
+// run in the example above:
+// - `SomeStruct`'s constructor should *not* run on line 1.
+// - Rust thunk can populates the return slot on line 2.
+// The Rust thunk may panic without populating the return slot - in this
+// case nothing should operate on the uninitialized `SomeStruct` value
+// (this is accomplished by ReturnValueSlot having an empty/no-op destructor)
+// - `SomeStruct`'s move constructor will run on line 3 (moving the return value
+// out of `ReturnValueSlot::value_`, and then destructing the moved-away
+// `ReturnValueSlot::value_`).
+//
+// Behavior of `ReturnValueSlot<T>` in steps 1 and 2 is somewhat similar to
+// `MaybeUninit<T>` in Rust, but the behavior on line 3 is a bit different:
+// - There are no move constructors in Rust
+// - Dropping `MaybeUninit<T>` doesn't drop `T`
+template <typename T>
+union ReturnValueSlot {
+ public:
+ // Creates `ReturnValueSlot` in an uninitialized state.
+ ReturnValueSlot() {
+ // Leaving `value_` uninitialized / not invoking any constructor of `T`.
+ }
+
+ // Gets a pointer to the slot where the return value may be written.
+ //
+ // SAFETY REQUIREMENTS:
+ // - Caller should not read from the returned pointer before the value has
+ // been initialized.
+ // - Caller should only write to the returned pointer while the
+ // `ReturnValueSlot` is in an uninitialized state (i.e. care should be taken
+ // to avoid writing to the slot twice, potentially overwriting a value
+ // without calling its destructor).
+ T* Get() { return &value_; }
+
+ // Takes and returns the value. This leaves the `ReturnValueSlot` in a
+ // moved-away state - afterwards the only valid operation is to destroy the
+ // `ReturnValueSlot` object (or assign to it, but the assignment operators
+ // have been `delete`d below).
+ //
+ // SAFETY REQUIREMENTS:
+ // Caller should ensure the return value has been initialized before calling
+ // `AssumeInitAndTakeValue()`. (e.g. by ensuring that the value has been
+ // earlier written to the location pointed to by `GetPtr()`).
+ T AssumeInitAndTakeValue() && {
+ T return_value(std::move(value_));
+ std::destroy_at(&value_);
+ return return_value;
+ }
+
+ // SAFETY REQUIREMENTS: The return value in `other` must have been
+ // initialized. (It is also okay if the other value is in a moved-away state
+ // after calling `AssumeInitAndTakeValue` on it).
+ ReturnValueSlot(ReturnValueSlot&& other) { value_ = std::move(other.value_); }
+
+ ~ReturnValueSlot() {
+ // Not destroying or otherwise using `value_`, because it may still be in
+ // an uninitialized state - e.g. in case of a panic.
+ }
+
+ ReturnValueSlot(const ReturnValueSlot&) = delete;
+ ReturnValueSlot& operator=(const ReturnValueSlot&) = delete;
+ ReturnValueSlot& operator=(ReturnValueSlot&&) = delete;
+
+ private:
+ T value_;
+};
+
+} // namespace crubit
+
+#endif // THIRD_PARTY_CRUBIT_SUPPORT_INTERNAL_RETURN_VALUE_SLOT_H_
diff --git a/support/internal/return_value_slot_test.cc b/support/internal/return_value_slot_test.cc
new file mode 100644
index 0000000..e81b9cc
--- /dev/null
+++ b/support/internal/return_value_slot_test.cc
@@ -0,0 +1,104 @@
+// Part of the Crubit project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "support/internal/return_value_slot.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/log/check.h"
+
+namespace crubit {
+namespace {
+
+constexpr int kUninitializedState = -1;
+constexpr int kMovedAwayState = -2;
+constexpr int kDestroyedState = -3;
+struct MonitoringHelper {
+ MonitoringHelper() = delete;
+ MonitoringHelper(const MonitoringHelper&) = delete;
+ MonitoringHelper& operator=(const MonitoringHelper&) = delete;
+
+ explicit MonitoringHelper(int new_state, std::vector<int>* destroyed_states)
+ : state(new_state), destroyed_states(destroyed_states) {
+ CHECK_NE(state, kMovedAwayState);
+ CHECK_NE(state, kDestroyedState);
+ }
+
+ MonitoringHelper(MonitoringHelper&& other) {
+ state = other.state;
+ destroyed_states = other.destroyed_states;
+ other.state = kMovedAwayState;
+ }
+
+ MonitoringHelper& operator=(MonitoringHelper&& other) {
+ // Destruct old field values. It is okay for `operator=` to assume that
+ // `this` has been initialized.
+ CHECK_NE(state, kUninitializedState);
+ CHECK_NE(state, kDestroyedState);
+ destroyed_states->push_back(state);
+
+ state = other.state;
+ destroyed_states = other.destroyed_states;
+ other.state = kMovedAwayState;
+ return *this;
+ }
+
+ ~MonitoringHelper() {
+ CHECK_NE(state, kUninitializedState);
+ CHECK_NE(state, kDestroyedState);
+ destroyed_states->push_back(state);
+ state = kDestroyedState;
+ }
+
+ int state;
+ std::vector<int>* destroyed_states;
+};
+
+TEST(ReturnValueSlot, Test) {
+ std::vector<int> destroyed_states;
+
+ constexpr int kInitialValue = 1;
+ constexpr int kReturnedValue = 2;
+
+ MonitoringHelper return_value(kInitialValue, &destroyed_states);
+
+ {
+ // At this point `slot` is in an uninitialized state.
+ ReturnValueSlot<MonitoringHelper> slot;
+ MonitoringHelper* slot_ptr = slot.Get();
+ slot_ptr->state = kUninitializedState;
+ slot_ptr->destroyed_states = &destroyed_states;
+ // No destructors should run up to this point.
+ EXPECT_THAT(destroyed_states, testing::IsEmpty());
+
+ // The placement `new` below simulates a Rust thunk that populates
+ // `slot_ptr` (typically by calling `MaybeUninit<T>::write`).
+ new (slot_ptr) MonitoringHelper(kReturnedValue, &destroyed_states);
+ EXPECT_THAT(destroyed_states, testing::IsEmpty());
+
+ // Move the return value from `slot` to `return_value`.
+ return_value = std::move(slot).AssumeInitAndTakeValue();
+ // Move assignment will destroy fields of the original lhs value that gets
+ // overwritten by the assignment - this is where `kInitialValue` comes from.
+ //
+ // AssumeInitAndTakeValue will destroy `ReturnValueSlot::value_` in a
+ // kMovedAwayState.
+ //
+ // Additionally, a temporary `MonitoringHelper` value in a moved-away state
+ // will be destroyed..
+ EXPECT_THAT(
+ destroyed_states,
+ testing::ElementsAre(kMovedAwayState, kInitialValue, kMovedAwayState));
+ // The value inside `ReturnValueSlot` (pointed to by `slot_ptr`) should be
+ // in a `kMovedAwayState` at this point (certainly not in
+ // `kDestroyedState`).
+ EXPECT_EQ(kDestroyedState, slot_ptr->state);
+ EXPECT_EQ(kReturnedValue, return_value.state);
+ }
+
+ EXPECT_EQ(kReturnedValue, return_value.state);
+}
+
+} // namespace
+} // namespace crubit