blob: a13101e8c4badfde07ae78e7a09b1fa8051be59a [file] [log] [blame]
Lukasz Anforowicza782bda2023-01-17 14:04:50 -08001// Part of the Crubit project, under the Apache License v2.0 with LLVM
2// Exceptions. See /LICENSE for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
Lukasz Anforowiczd71686c2023-02-17 14:29:55 -08005#ifndef CRUBIT_SUPPORT_RS_STD_CHAR_H_
6#define CRUBIT_SUPPORT_RS_STD_CHAR_H_
Lukasz Anforowicza782bda2023-01-17 14:04:50 -08007
8#include <cstdint>
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -08009#include <optional>
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080010
Lukasz Anforowiczf1abea72023-01-20 14:18:58 -080011#include "absl/base/optimization.h"
Devin Jeanpierre12b7b572023-05-03 15:40:55 -070012#include "support/internal/attribute_macros.h"
Lukasz Anforowiczf1abea72023-01-20 14:18:58 -080013
Lukasz Anforowiczd71686c2023-02-17 14:29:55 -080014namespace rs_std {
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080015
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080016// `rs_std::rs_char` is a C++ representation of the `char` type from Rust.
Devin Jeanpierre12b7b572023-05-03 15:40:55 -070017// `rust_builtin_type_abi_assumptions.md` documents the ABI compatibility of
Lukasz Anforowiczb06e0812023-03-02 15:54:32 -080018// these types.
Devin Jeanpierre12b7b572023-05-03 15:40:55 -070019class CRUBIT_INTERNAL_RUST_TYPE("char") rs_char final {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080020 public:
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080021 // Creates a default `rs_char` - one that represents ASCII NUL character.
Lukasz Anforowicz3ffc18d2023-01-19 14:56:09 -080022 //
23 // Providing the default constructor helps to ensure that the `value_` always
24 // effectively stores a C++ equivalent of a well-defined Rust's `u32` value
25 // (and never has a `MaybeUninit<u32>` value). See also the P2723R1 proposal
26 // for C++ which argues that zero-initialization may mitigate 10% of exploits.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080027 constexpr rs_char() = default;
Lukasz Anforowicz3ffc18d2023-01-19 14:56:09 -080028
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080029 // Converts a `uint32_t` into a `rs_std::rs_char`.
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080030 //
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080031 // Note that not all valid `uint32_t`s are valid `rs_std::rs_char`s.
32 // `from_u32` will return `std::nullopt` if the input is not a valid value for
33 // a `rs_std::rs_char`.
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080034 //
35 // See also
36 // https://doc.rust-lang.org/reference/behavior-considered-undefined.html
37 // which documents that undefined behavior may result in presence of "A value
38 // in a char which is a surrogate or above char::MAX."
39 //
40 // This function mimics Rust's `char::from_u32`:
41 // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080042 static constexpr std::optional<rs_char> from_u32(char32_t c) {
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080043 // TODO(lukasza): Consider using slightly more efficient checks similarly
44 // to how `char_try_from_u32` is implemented in Rust standard library.
Lukasz Anforowiczf1abea72023-01-20 14:18:58 -080045 if (ABSL_PREDICT_FALSE(c > 0x10ffff)) {
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080046 // Value greater than Rust's `char::MAX`:
47 // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX
48 return std::nullopt;
49 }
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080050
Lukasz Anforowiczf1abea72023-01-20 14:18:58 -080051 if (ABSL_PREDICT_FALSE(c >= 0xd800 && c <= 0xdfff)) {
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080052 // Surrogate characters.
53 return std::nullopt;
54 }
55
56 return from_u32_unchecked(c);
57 }
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080058
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080059 constexpr rs_char(const rs_char&) = default;
60 constexpr rs_char& operator=(const rs_char&) = default;
61 constexpr rs_char(rs_char&&) = default;
62 constexpr rs_char& operator=(rs_char&&) = default;
63 ~rs_char() = default;
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080064
65 explicit constexpr operator std::uint32_t() const { return value_; }
66
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080067 constexpr bool operator==(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080068 return value_ == other.value_;
69 }
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080070 constexpr bool operator!=(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080071 return value_ != other.value_;
72 }
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080073 constexpr bool operator<=(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080074 return value_ <= other.value_;
75 }
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080076 constexpr bool operator<(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080077 return value_ < other.value_;
78 }
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080079 constexpr bool operator>=(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080080 return value_ >= other.value_;
81 }
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080082 constexpr bool operator>(const rs_char& other) const {
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080083 return value_ > other.value_;
84 }
85
Lukasz Anforowicz467a4202023-01-20 14:21:49 -080086 // The highest valid code point a char can have, '\u{10FFFF}'.
87 //
88 // This constant mimics Rust's `char::MAX`:
89 // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080090 static const rs_char MAX;
Lukasz Anforowicz467a4202023-01-20 14:21:49 -080091
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080092 private:
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080093 // This function mimics Rust's `char::from_u32_unchecked`:
94 // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32_unchecked
95 //
96 // TODO(b/254095482): Figure out how to annotate/expose unsafe functions in
97 // C++ and then make this method public.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080098 static constexpr rs_char from_u32_unchecked(std::uint32_t value) {
99 return rs_char(value);
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800100 }
101
102 // Private constructor - intended to only be used from `from_u32_unchecked`.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800103 explicit constexpr rs_char(std::uint32_t value) : value_(value) {}
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800104
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -0800105 // See "layout tests" comments in `char_test.cc` for explanation why
106 // `char32_t` is not used.
Lukasz Anforowicz3ffc18d2023-01-19 14:56:09 -0800107 std::uint32_t value_ = '\0';
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -0800108};
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800109
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800110// Definition of `rs_char::MAX` - it can't be defined and declared within the
111// `class` definition, because before `rs_char` is fully defined the compiler
Lukasz Anforowicz467a4202023-01-20 14:21:49 -0800112// complains that `constexpr` variable cannot have non-literal type
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800113// 'const rs_char'.
114constexpr rs_char rs_char::MAX = rs_char::from_u32_unchecked(0x10ffff);
Lukasz Anforowicz467a4202023-01-20 14:21:49 -0800115
Lukasz Anforowiczd71686c2023-02-17 14:29:55 -0800116} // namespace rs_std
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800117
Lukasz Anforowiczd71686c2023-02-17 14:29:55 -0800118#endif // CRUBIT_SUPPORT_RS_STD_CHAR_H_