blob: 8c78f39f5c21177cc5160c47697a5f49dce9beea [file] [log] [blame]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#![feature(allocator_api)]
#![feature(cfg_sanitize)]
use std::collections::TryReserveError;
#[cfg(sanitize = "address")]
use std::ffi::c_void;
use std::mem::ManuallyDrop;
use std::ops::RangeBounds;
use std::ops::{Deref, DerefMut};
use std::ops::{Index, IndexMut};
use std::slice;
extern "C" {
// https://github.com/llvm/llvm-project/blob/9d0616ce52fc2a75c8e4808adec41d5189f4240c/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h#L70
#[cfg(sanitize = "address")]
fn __sanitizer_annotate_contiguous_container(
beg: *const c_void,
end: *const c_void,
old_mid: *const c_void,
new_mid: *const c_void,
);
}
/// A mutable, contiguous, dynamically-sized container of elements
/// of type `T`, ABI-compatible with `std::vector` from C++.
// TODO(b/356221873): Ensure Vector<T> is covariant.
/// 2 layouts are supported.
/// 1. This layout was found empirically on Linux with modern g++ and libc++. If
/// for some version of libc++ it is different, we will need to update it with
/// conditional compilation.
#[cfg(not(len_capacity_encoding))]
#[repr(C)]
pub struct Vector<T> {
begin: *mut T,
_end: *mut T,
_capacity_end: *mut T,
}
/// 2. This layout is experimental.
#[cfg(len_capacity_encoding)]
#[repr(C)]
pub struct Vector<T> {
begin: *mut T,
len: usize,
capacity: usize,
}
// TODO(b/356221873): Implement Send and Sync.
impl<T> Vector<T> {
pub fn new() -> Vector<T> {
#[cfg(len_capacity_encoding)]
{
Vector { begin: core::ptr::null_mut(), len: 0, capacity: 0 }
}
#[cfg(not(len_capacity_encoding))]
{
Vector {
begin: core::ptr::null_mut(),
_end: core::ptr::null_mut(),
_capacity_end: core::ptr::null_mut(),
}
}
}
pub fn len(&self) -> usize {
#[cfg(len_capacity_encoding)]
{
self.len
}
#[cfg(not(len_capacity_encoding))]
{
// TODO(b/356221873): delete the `if` once a stable Rust release allows
//offset_from for "the same address"
if self.begin.is_null() {
0
} else {
unsafe { self._end.offset_from(self.begin).try_into().unwrap() }
}
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn capacity(&self) -> usize {
#[cfg(len_capacity_encoding)]
{
self.capacity
}
#[cfg(not(len_capacity_encoding))]
{
// TODO(b/356221873): delete the `if` once a stable Rust release allows
// offset_from for "the same address"
if self.begin.is_null() {
0
} else {
unsafe { self._capacity_end.offset_from(self.begin).try_into().unwrap() }
}
}
}
fn end(&self) -> *mut T {
#[cfg(len_capacity_encoding)]
{
unsafe { self.begin.add(self.len) }
}
#[cfg(not(len_capacity_encoding))]
{
self._end
}
}
fn capacity_end(&self) -> *mut T {
#[cfg(len_capacity_encoding)]
{
unsafe { self.begin.add(self.capacity) }
}
#[cfg(not(len_capacity_encoding))]
{
self._capacity_end
}
}
unsafe fn set_begin_len_capacity(&mut self, begin: *mut T, len: usize, capacity: usize) {
#[cfg(len_capacity_encoding)]
{
self.begin = begin;
self.len = len;
self.capacity = capacity;
}
#[cfg(not(len_capacity_encoding))]
{
self.begin = begin;
self._end = begin.add(len);
self._capacity_end = begin.add(capacity);
}
}
/// Prepares the vector to write into the tail.
/// See docstring of [`Vector::set_len`] for more details.
pub fn prepare_to_write_into_tail(&mut self) {
self.asan_unpoison_tail();
}
/// Sets the length of the vector.
///
/// # Safety
///
/// - `new_len` must be less than or equal to [`capacity()`].
/// - The elements at `old_len..new_len` must be initialized.
///
/// See [`std::vec::Vec::set_len`] for more details.
///
/// The difference with `std::vec::Vec::set_len` is that the tail of the
/// Vector is poisoned with ASan, so when writing to the tail for
/// avoiding ASan errors [`Vector::prepare_to_write_into_tail`] must be
/// called first:
/// ```
/// v.prepare_to_write_into_tail()
/// // write to tail (i.e. between v.len() and v.capacity())
/// v.set_len(len)
/// ```
pub unsafe fn set_len(&mut self, len: usize) {
#[cfg(len_capacity_encoding)]
{
self.len = len;
}
#[cfg(not(len_capacity_encoding))]
{
self._end = self.begin.add(len);
}
self.asan_poison_tail();
}
#[inline]
#[cfg(not(sanitize = "address"))]
fn asan_poison_tail(&self) {}
#[inline]
#[cfg(not(sanitize = "address"))]
fn asan_unpoison_tail(&self) {}
#[inline]
#[cfg(sanitize = "address")]
fn asan_poison_tail(&self) {
// C++ std::vector supports an ASan container annotation feature
// (https://github.com/google/sanitizers/wiki/AddressSanitizerContainerOverflow)
// that allows ASan to detect reads and writes in the uninitialized tail of the
// std::vector's storage (between the end iterator and capacity).
//
// Rust std::vec::Vec intentionally does not support this ASan annotation
// feature, because it allows users to initialize the elements in the
// tail of the storage and then call set_len to tell the Vec about new
// elements.
//
// ASan uses the term "poisoned" for data that cannot be accessed. When marking
// data as inaccessible, we poison it, to make them accessible we
// unpoison the data. So, the tail of a Rust Vec's storage is always
// unpoisoned, even when ASan is enabled.
//
// Thus, when we use Rust Vec to implement operations on C++ std::vector's
// storage, we hit an incompatibility: the tail of the C++ std::vector
// is poisoned, but Rust Vec does not unpoison before writing into it.
//
// Therefore, when we create a Rust Vec using the storage of a C++ std::vector
// we need to establish the ASan poision/unpoison invariants that the Rust Vec
// expects. Specifically, we unpoison the storage before a Rust Vec
// writes into the uninitialized tail. Furthermore, once we are done
// using a Rust Vec to manipulate the storage, we poison the tail agail.
//
// As an optimization we don't poison/unpoison the tail when we create a Rust
// Vec purely to perform reads, or to mutate existing elements in-place.
unsafe {
// The following call is the same as __annotate_new in C++ std::vector
// https://github.com/llvm/llvm-project/blob/9d0616ce52fc2a75c8e4808adec41d5189f4240c/libcxx/include/vector#L920
__sanitizer_annotate_contiguous_container(
self.begin as *const c_void,
self.capacity_end() as *const c_void,
self.capacity_end() as *const c_void,
self.end() as *const c_void,
);
}
}
#[inline]
#[cfg(sanitize = "address")]
fn asan_unpoison_tail(&self) {
unsafe {
// The following call is the same as __annotate_delete in C++
// std::vector https://github.com/llvm/llvm-project/blob/9d0616ce52fc2a75c8e4808adec41d5189f4240c/libcxx/include/vector#L927
__sanitizer_annotate_contiguous_container(
self.begin as *const c_void,
self.capacity_end() as *const c_void,
self.end() as *const c_void,
self.capacity_end() as *const c_void,
);
}
}
}
impl<T: Unpin> Vector<T> {
/// Mutates `self` as if it were a `Vec<T>`.
fn mutate_self_as_vec<F, R>(&mut self, mutate_self: F) -> R
where
F: FnOnce(&mut Vec<T, cpp_std_allocator::StdAllocator>) -> R,
{
unsafe {
self.asan_unpoison_tail();
let mut v = ManuallyDrop::new(create_vec_from_raw_parts(
self.begin,
self.len(),
self.capacity(),
));
let result = mutate_self(v.deref_mut());
self.set_begin_len_capacity(v.as_mut_ptr(), v.len(), v.capacity());
self.asan_poison_tail();
result
}
}
// Methods for changing the capacity of the vector.
pub fn reserve(&mut self, capacity: usize) {
self.mutate_self_as_vec(|v| v.reserve(capacity));
}
pub fn reserve_exact(&mut self, additional: usize) {
self.mutate_self_as_vec(|v| v.reserve_exact(additional));
}
pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.mutate_self_as_vec(|v| v.try_reserve(additional))
}
pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.mutate_self_as_vec(|v| v.try_reserve_exact(additional))
}
pub fn shrink_to_fit(&mut self) {
self.mutate_self_as_vec(|v| v.shrink_to_fit());
}
pub fn shrink_to(&mut self, min_capacity: usize) {
self.mutate_self_as_vec(|v| v.shrink_to(min_capacity));
}
pub fn with_capacity(capacity: usize) -> Vector<T> {
let mut result = Vector::new();
result.reserve(capacity);
result
}
// Methods for adding elements to the vector.
pub fn push(&mut self, value: T) {
self.mutate_self_as_vec(|v| v.push(value));
}
pub fn insert(&mut self, index: usize, element: T) {
self.mutate_self_as_vec(|v| v.insert(index, element));
}
pub fn append(&mut self, other: &mut Self) {
self.mutate_self_as_vec(|v| other.mutate_self_as_vec(|other_v| v.append(other_v)))
}
// Methods for deleting elements from the vector.
pub fn swap_remove(&mut self, index: usize) -> T {
self.mutate_self_as_vec(|v| v.swap_remove(index))
}
pub fn remove(&mut self, index: usize) -> T {
self.mutate_self_as_vec(|v| v.remove(index))
}
pub fn pop(&mut self) -> Option<T> {
self.mutate_self_as_vec(|v| v.pop())
}
pub fn clear(&mut self) {
self.mutate_self_as_vec(|v| v.clear());
}
pub fn truncate(&mut self, len: usize) {
self.mutate_self_as_vec(|v| v.truncate(len));
}
// Different Vector transformations.
pub fn dedup_by<F>(&mut self, same_bucket: F)
where
F: FnMut(&mut T, &mut T) -> bool,
{
self.mutate_self_as_vec(|v| v.dedup_by(same_bucket));
}
pub fn dedup_by_key<F, K>(&mut self, key: F)
where
F: FnMut(&mut T) -> K,
K: PartialEq,
{
self.mutate_self_as_vec(|v| v.dedup_by_key(key));
}
// Methods returning different vector representations.
pub fn into_vec(mut self) -> Vec<T> {
let mut result = Vec::<T>::with_capacity(self.len());
unsafe {
std::ptr::copy_nonoverlapping(self.as_ptr(), result.as_mut_ptr(), self.len());
result.set_len(self.len());
// The elements were moved out. Now mark `self` empty, without calling drop on
// elements.
self.asan_unpoison_tail();
self.set_len(0);
}
result
}
pub fn as_ptr(&self) -> *const T {
self.begin
}
pub fn as_mut_ptr(&mut self) -> *mut T {
self.begin
}
pub fn as_slice(&self) -> &[T] {
unsafe { std::slice::from_raw_parts(self.begin, self.len()) }
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
unsafe { std::slice::from_raw_parts_mut(self.begin, self.len()) }
}
}
impl<T: Unpin + Clone> Vector<T> {
pub fn extend_from_within<R>(&mut self, src: R)
where
R: RangeBounds<usize>,
{
self.mutate_self_as_vec(|v| v.extend_from_within(src));
}
pub fn resize(&mut self, new_len: usize, value: T) {
self.mutate_self_as_vec(|v| v.resize(new_len, value));
}
pub fn resize_with<F>(&mut self, new_len: usize, f: F)
where
F: FnMut() -> T,
{
self.mutate_self_as_vec(|v| v.resize_with(new_len, f));
}
}
impl<T: Unpin + PartialEq> Vector<T> {
pub fn dedup(&mut self) {
self.mutate_self_as_vec(|v| v.dedup());
}
}
impl<T> Default for Vector<T> {
fn default() -> Self {
Self::new()
}
}
impl<T> Drop for Vector<T> {
fn drop(&mut self) {
if !self.begin.is_null() {
unsafe {
self.asan_unpoison_tail();
_ = Vec::from_raw_parts_in(
self.begin,
self.len(),
self.capacity(),
cpp_std_allocator::StdAllocator {},
);
}
}
}
}
impl<T> Index<usize> for Vector<T> {
type Output = T;
fn index(&self, index: usize) -> &Self::Output {
self.get(index).unwrap()
}
}
impl<T: Unpin> IndexMut<usize> for Vector<T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
self.get_mut(index).unwrap()
}
}
impl<T> Deref for Vector<T> {
type Target = [T];
fn deref(&self) -> &Self::Target {
if self.is_empty() {
&[]
} else {
unsafe { std::slice::from_raw_parts(self.begin, self.len()) }
}
}
}
impl<T: Unpin> DerefMut for Vector<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
if self.is_empty() {
&mut []
} else {
unsafe { std::slice::from_raw_parts_mut(self.begin, self.len()) }
}
}
}
impl<T: Unpin> From<Vec<T>> for Vector<T> {
fn from(v: Vec<T>) -> Self {
// Elements from `v` are moved. It would be more efficient to steal a buffer
// from `v`. But `v` might have different allocator than Vector.
// TODO(b/356221873): Figure out conditions when it is possible to steal buffer
// from `v`.
let mut result = Vector::<T>::with_capacity(v.len());
result.mutate_self_as_vec(|u| {
u.extend(v);
});
result
}
}
impl<T: Clone> Vector<T> {
/// Clone elements from `self` to `Vec<T>`.
pub fn to_vec(&self) -> Vec<T> {
let mut v = Vec::<T>::with_capacity(self.len());
for el in self.iter() {
v.push(el.clone());
}
v
}
}
impl<T: Unpin> Extend<T> for Vector<T> {
fn extend<I>(&mut self, iter: I)
where
I: IntoIterator<Item = T>,
{
self.mutate_self_as_vec(|v| v.extend(iter));
}
}
/// Helper method for creating a `Vec<T>` from raw parts.
fn create_vec_from_raw_parts<T>(
begin: *mut T,
len: usize,
capacity: usize,
) -> Vec<T, cpp_std_allocator::StdAllocator> {
unsafe {
if begin.is_null() {
Vec::new_in(cpp_std_allocator::StdAllocator {})
} else {
Vec::from_raw_parts_in(begin, len, capacity, cpp_std_allocator::StdAllocator {})
}
}
}
mod iter {
use crate::Vector;
use std::fmt;
use std::fmt::Debug;
use std::iter::FusedIterator;
/// An iterator that moves out of a vector.
///
/// This type is currently a wrapper around `std::vec::IntoIter`,
/// however users should not depend on it, and the wrapped iterator
/// should not be made public. If the current implementation
/// strategy stops working, the wrapped iterator will be replaced with
/// a more complex implementation.
pub struct VectorIntoIter<T>(std::vec::IntoIter<T, cpp_std_allocator::StdAllocator>);
impl<T> VectorIntoIter<T> {
pub fn new(v: std::vec::IntoIter<T, cpp_std_allocator::StdAllocator>) -> Self {
VectorIntoIter(v)
}
/// Returns the remaining items of this iterator as a slice.
#[inline]
pub fn as_slice(&mut self) -> &[T] {
self.0.as_slice()
}
/// Returns the remaining items of this iterator as a mutable slice.
#[inline]
pub fn as_mut_slice(&mut self) -> &mut [T] {
self.0.as_mut_slice()
}
}
impl<T> Iterator for VectorIntoIter<T> {
type Item = T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
// TODO(b/356638830): Uncomment when feature advance_by is stable
// fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
// self.0.advance_by(n)
// }
#[inline]
fn count(self) -> usize {
self.0.len()
}
}
impl<T> AsRef<[T]> for VectorIntoIter<T> {
fn as_ref(&self) -> &[T] {
self.0.as_ref()
}
}
impl<T: fmt::Debug> Debug for VectorIntoIter<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl<T: Unpin> Default for VectorIntoIter<T> {
fn default() -> Self {
Vector::<T>::default().into_iter()
}
}
impl<T> DoubleEndedIterator for VectorIntoIter<T> {
#[inline]
fn next_back(&mut self) -> Option<T> {
self.0.next_back()
}
// TODO(b/356638830): Uncomment when feature advance_back_by is stable
// #[inline]
// fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
// self.0.advance_back_by()
// }
}
impl<T> ExactSizeIterator for VectorIntoIter<T> {
// TODO(b/356638830): Uncomment when feature is_empty is stable
// #[inline]
// fn is_empty(&self) -> bool {
// self.0.is_empty()
// }
}
impl<T> FusedIterator for VectorIntoIter<T> {}
// TODO(b/356638830): Uncomment when feature TrustedLen is stable
// unsafe impl<T> TrustedLen for VectorIntoIter<T> {}
}
use iter::*;
impl<T: Unpin> IntoIterator for Vector<T> {
type Item = T;
type IntoIter = VectorIntoIter<T>;
fn into_iter(self) -> Self::IntoIter {
unsafe {
self.asan_unpoison_tail();
let v = create_vec_from_raw_parts(self.begin, self.len(), self.capacity());
core::mem::forget(self);
VectorIntoIter::new(v.into_iter())
}
}
}
impl<'a, T: Unpin> IntoIterator for &'a Vector<T> {
type Item = &'a T;
type IntoIter = slice::Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a, T: Unpin> IntoIterator for &'a mut Vector<T> {
type Item = &'a mut T;
type IntoIter = slice::IterMut<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter_mut()
}
}
impl<T: Unpin> FromIterator<T> for Vector<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut v = Self::new();
v.mutate_self_as_vec(|u| {
u.extend(iter);
});
v
}
}
impl<T: Unpin> From<Vector<T>> for std::vec::Vec<T> {
fn from(v: Vector<T>) -> std::vec::Vec<T> {
v.into_vec()
}
}