blob: 2bb04bbdc5d7a873b3e6ae59582bcb3daf717eca [file] [log] [blame] [edit]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
use crate::slice_ptr::get_raw_parts;
use crate::std::raw_string_view;
use core::ptr;
/// Live type for std::string_view bindings.
///
/// This is a raw_string_view wrapper with an associated lifetime.
///
/// # Invariants
///
/// * The contained raw_string_view is valid for lifetime `'a`.
/// * This does **not** make any guarantees about mutable aliasing.
#[allow(non_camel_case_types)]
#[repr(transparent)]
#[doc = "CRUBIT_ANNOTATE: cpp_type=std::string_view"]
#[doc = "CRUBIT_ANNOTATE: include_path=<string_view>"]
#[derive(Debug)]
pub struct string_view<'a> {
raw: raw_string_view,
phantom_data: core::marker::PhantomData<&'a ()>,
}
impl<'a> string_view<'a> {
pub fn as_raw(&self) -> &raw_string_view {
&self.raw
}
pub fn into_raw(self) -> raw_string_view {
self.raw
}
pub fn as_ptr(&self) -> *const raw_string_view {
&self.raw
}
/// Returns a Rust byte slice referring to the string_view's data.
///
/// # Safety
///
/// The viewed memory must NOT be mutated by any C++ code during `'a`.
/// The returned `&'a [u8]` requires this immutability. While C++ `std::string_view`
/// itself provides read-only access, the C++ code owning the viewed data must
/// not modify it via other aliases for the duration of `'a`.
pub unsafe fn as_bytes(&self) -> &'a [u8] {
// SAFETY (internal dereference): The method's SAFETY contract (see above) ensures
// `self.raw` points to memory that is valid and immutable for lifetime `'a`.
// `self.raw.as_raw_bytes()` provides a `*const [u8]`, correctly handling
// empty/null cases for dereferencing to an empty slice. Thus, `&*` is safe.
&*self.raw.as_raw_bytes()
}
pub fn len(&self) -> usize {
// SAFETY: `&self.raw` is valid according to the `string_view` invariants.
unsafe { self.raw.len() }
}
/// Perform UTF-8 validation and returns a &str view of the underlying C++ string if validation
/// succeeds. Returns an Utf8Error otherwise.
///
/// # Safety
///
/// The viewed memory must NOT be mutated by any C++ code during lifetime `'a`.
/// The returned `&'a str` (on `Ok`) requires this immutability. While C++
/// `std::string_view` itself provides read-only access, the C++ code owning the
/// viewed data must not modify it via other aliases for the duration of `'a`.
pub unsafe fn to_str(&self) -> Result<&'a str, core::str::Utf8Error> {
// SAFETY (internal dereference in map):
// The method's main SAFETY contract (see above) ensures `self.raw` points to
// memory that is valid and immutable for `'a`. `self.raw.to_str()` (itself unsafe)
// attempts UTF-8 conversion, yielding a `*const str` if valid. Dereferencing
// this pointer via `&*s` is safe given the outer contract and successful UTF-8 check.
self.raw.to_str().map(|s| &*s)
}
}
impl<'a> From<&'a [u8]> for string_view<'a> {
fn from(s: &[u8]) -> Self {
string_view {
raw: raw_string_view::from(s as *const [u8]),
phantom_data: core::marker::PhantomData,
}
}
}
impl<'a, const N: usize> From<&'a [u8; N]> for string_view<'a> {
fn from(s: &[u8; N]) -> Self {
string_view {
raw: raw_string_view::from(s as *const [u8]),
phantom_data: core::marker::PhantomData,
}
}
}
impl<'a> From<&'a str> for string_view<'a> {
fn from(s: &str) -> Self {
string_view {
raw: raw_string_view::from(s.as_bytes()),
phantom_data: core::marker::PhantomData,
}
}
}
impl<'a> From<&'a core::ffi::CStr> for string_view<'a> {
fn from(cstr: &core::ffi::CStr) -> Self {
string_view {
raw: raw_string_view::from(cstr.to_bytes()),
phantom_data: core::marker::PhantomData,
}
}
}
impl raw_string_view {
/// Returns an equivalent Rust slice pointer.
///
/// The resulting slice pointer is valid for the lifetime of the pointed-to
/// object.
///
/// Note: For empty strings, the address of the slice pointer may not be the
/// same as the address of the raw_string_view. Null pointers are converted
/// to valid, but dangling, pointers.
#[inline(always)]
pub fn as_raw_bytes(self) -> *const [u8] {
self.into()
}
/// Converts a `raw_string_view` containing valid UTF-8 to a `*const str`.
///
/// # Safety
///
/// Behavior is undefined if the `raw_string_view` has an invalid pointer.
pub unsafe fn to_str(&self) -> Result<*const str, core::str::Utf8Error> {
let bytes: &[u8] = unsafe { &*self.as_raw_bytes() };
let res: *const str = core::str::from_utf8(bytes)?;
Ok(res)
}
/// Returns the length of the underlying string.
///
/// # Safety
///
/// Behavior is undefined if the `raw_string_view` has an invalid pointer.
pub unsafe fn len(&self) -> usize {
// TODO(b/249376862): use size(), which does not have the soundness issue below.
// let size = unsafe {raw_string_view::size(&sv)};
//
// SAFETY: the call to end() requires that the raw_string_view not be dangling,
// so this is unsound. (If `self` is dangling, then `end()` will perform pointer
// arithmetic on a dangling pointer, which is implementation defined (and treated on
// Clang as if it were UB).)
(unsafe { raw_string_view::end(self) }) as usize
- unsafe { raw_string_view::begin(self) } as usize
}
/// Get string_view with lifetime linked to self.
///
/// # Safety
///
/// The data referred to by `self` must be valid, and the resulting `string_view` is subject to
/// the same rules as a reference constructed from a raw pointer: it must not be accessed after
/// the underlying memory becomes invalid or aliased by a unique reference. Careful choice of
/// lifetime can enforce this.
pub unsafe fn as_live<'s>(&'s self) -> string_view<'s> {
string_view { raw: *self, phantom_data: core::marker::PhantomData }
}
/// Get a string_view with static lifetime.
///
/// # Safety
///
/// The data referred to by `self` must be valid, and the resulting `string_view` is subject to
/// the same rules as a reference constructed from a raw pointer: it must not be accessed after
/// the underlying memory becomes invalid or aliased by a unique reference.
pub unsafe fn as_static_live(&'static self) -> string_view<'static> {
self.as_live()
}
}
/// Equivalent to `as_raw_bytes()`.
impl From<raw_string_view> for *const [u8] {
fn from(sv: raw_string_view) -> Self {
// SAFETY: `&sv` is a valid pointer. `data()` does not dereference the
// raw_string_view.
let mut data = unsafe { raw_string_view::data(&sv) } as *const u8;
// SAFETY: this is unsound for the reason explained in raw_string_view::len().
let size = unsafe { sv.len() };
// Unlike C++, Rust does not allow for null data pointers in slices.
if data.is_null() {
data = ptr::NonNull::dangling().as_ptr();
debug_assert_eq!(size, 0);
}
ptr::slice_from_raw_parts(data, size)
}
}
impl From<&[u8]> for raw_string_view {
fn from(s: &[u8]) -> Self {
raw_string_view::from(s as *const [u8])
}
}
impl<const N: usize> From<&[u8; N]> for raw_string_view {
fn from(s: &[u8; N]) -> Self {
raw_string_view::from(s as *const [u8])
}
}
impl<const N: usize> From<*const [u8; N]> for raw_string_view {
fn from(s: *const [u8; N]) -> Self {
raw_string_view::from(s as *const [u8])
}
}
impl From<&str> for raw_string_view {
fn from(s: &str) -> Self {
raw_string_view::from(s.as_bytes())
}
}
impl From<&core::ffi::CStr> for raw_string_view {
fn from(cstr: &core::ffi::CStr) -> Self {
raw_string_view::from(cstr.to_bytes())
}
}
impl From<*const [u8]> for raw_string_view {
fn from(slice: *const [u8]) -> Self {
// TODO(jeanpierreda): We can't access the constructors at the moment.
// This little maneuver's gonna cost us 51 years of annoying build breakages
// later, so really we should try to get the constructors callable.
// SAFETY: raw_string_view (in Rust) is a `repr(C)` struct entirely composed of
// arrays of MaybeUninit<u8>, so this would be safe even without the
// zeroed(). With the zeroed, it's also safe even if we accidentally use
// the type without further initialization. (In C++, the fields are a
// pointer and an integer).
let mut sv = unsafe { <core::mem::MaybeUninit<raw_string_view>>::zeroed().assume_init() };
// We could also use the (unstable) to_raw_parts, but that feature may change
// over time. It's also difficult, for idiosyncratic reasons, to pipe in
// the feature flag to the automatically generated bindings for
// `raw_string_view` that this file attaches onto, or to inject a dependency
// on a crate to put this logic into. (The crate this file is a part of is
// automatically generated by Crubit, and so we would need to tell
// Crubit to add these to the generated bindings for `std`.) So for now,
// the most expedient thing, and the thing least likely to break in a
// future version of Rust, to roll our own version of to_raw_parts, which uses
// transmute under the hood.
let (ptr, size) = get_raw_parts::<u8>(slice);
// SAFETY: there is no immediate UB, because we are converting to an array of
// MaybeUninit. There is also no later UB when read by C++, where it is
// interpreted as a pointer: any non-dangling valid Rust pointer is also
// a valid C++ pointer. The only time a dangling pointer exists is if
// size == 0, in which case we replace it with null, which is
// also a valid C++ pointer.
sv.__data_ = unsafe { core::mem::transmute(if size == 0 { 0 as *const _ } else { ptr }) };
// SAFETY: there is no immediate UB, because we are converting to an array of
// MaybeUninit. There is also no later UB when read by C++, where it is
// interpreted as a `size_t`, which has the same set of object
// representations as `size : usize`.
sv.__size_ = unsafe { core::mem::transmute(size) };
sv
}
}
impl core::fmt::Debug for raw_string_view {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "<raw_string_view>")
}
}