Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 1 | // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| 2 | // Exceptions. See /LICENSE for license information. |
| 3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | |
Lukasz Anforowicz | d71686c | 2023-02-17 14:29:55 -0800 | [diff] [blame] | 5 | #ifndef CRUBIT_SUPPORT_RS_STD_CHAR_H_ |
| 6 | #define CRUBIT_SUPPORT_RS_STD_CHAR_H_ |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 7 | |
| 8 | #include <cstdint> |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 9 | #include <optional> |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 10 | |
Lukasz Anforowicz | f1abea7 | 2023-01-20 14:18:58 -0800 | [diff] [blame] | 11 | #include "absl/base/optimization.h" |
Devin Jeanpierre | 12b7b57 | 2023-05-03 15:40:55 -0700 | [diff] [blame^] | 12 | #include "support/internal/attribute_macros.h" |
Lukasz Anforowicz | f1abea7 | 2023-01-20 14:18:58 -0800 | [diff] [blame] | 13 | |
Lukasz Anforowicz | d71686c | 2023-02-17 14:29:55 -0800 | [diff] [blame] | 14 | namespace rs_std { |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 15 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 16 | // `rs_std::rs_char` is a C++ representation of the `char` type from Rust. |
Devin Jeanpierre | 12b7b57 | 2023-05-03 15:40:55 -0700 | [diff] [blame^] | 17 | // `rust_builtin_type_abi_assumptions.md` documents the ABI compatibility of |
Lukasz Anforowicz | b06e081 | 2023-03-02 15:54:32 -0800 | [diff] [blame] | 18 | // these types. |
Devin Jeanpierre | 12b7b57 | 2023-05-03 15:40:55 -0700 | [diff] [blame^] | 19 | class CRUBIT_INTERNAL_RUST_TYPE("char") rs_char final { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 20 | public: |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 21 | // Creates a default `rs_char` - one that represents ASCII NUL character. |
Lukasz Anforowicz | 3ffc18d | 2023-01-19 14:56:09 -0800 | [diff] [blame] | 22 | // |
| 23 | // Providing the default constructor helps to ensure that the `value_` always |
| 24 | // effectively stores a C++ equivalent of a well-defined Rust's `u32` value |
| 25 | // (and never has a `MaybeUninit<u32>` value). See also the P2723R1 proposal |
| 26 | // for C++ which argues that zero-initialization may mitigate 10% of exploits. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 27 | constexpr rs_char() = default; |
Lukasz Anforowicz | 3ffc18d | 2023-01-19 14:56:09 -0800 | [diff] [blame] | 28 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 29 | // Converts a `uint32_t` into a `rs_std::rs_char`. |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 30 | // |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 31 | // Note that not all valid `uint32_t`s are valid `rs_std::rs_char`s. |
| 32 | // `from_u32` will return `std::nullopt` if the input is not a valid value for |
| 33 | // a `rs_std::rs_char`. |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 34 | // |
| 35 | // See also |
| 36 | // https://doc.rust-lang.org/reference/behavior-considered-undefined.html |
| 37 | // which documents that undefined behavior may result in presence of "A value |
| 38 | // in a char which is a surrogate or above char::MAX." |
| 39 | // |
| 40 | // This function mimics Rust's `char::from_u32`: |
| 41 | // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32 |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 42 | static constexpr std::optional<rs_char> from_u32(char32_t c) { |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 43 | // TODO(lukasza): Consider using slightly more efficient checks similarly |
| 44 | // to how `char_try_from_u32` is implemented in Rust standard library. |
Lukasz Anforowicz | f1abea7 | 2023-01-20 14:18:58 -0800 | [diff] [blame] | 45 | if (ABSL_PREDICT_FALSE(c > 0x10ffff)) { |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 46 | // Value greater than Rust's `char::MAX`: |
| 47 | // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX |
| 48 | return std::nullopt; |
| 49 | } |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 50 | |
Lukasz Anforowicz | f1abea7 | 2023-01-20 14:18:58 -0800 | [diff] [blame] | 51 | if (ABSL_PREDICT_FALSE(c >= 0xd800 && c <= 0xdfff)) { |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 52 | // Surrogate characters. |
| 53 | return std::nullopt; |
| 54 | } |
| 55 | |
| 56 | return from_u32_unchecked(c); |
| 57 | } |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 58 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 59 | constexpr rs_char(const rs_char&) = default; |
| 60 | constexpr rs_char& operator=(const rs_char&) = default; |
| 61 | constexpr rs_char(rs_char&&) = default; |
| 62 | constexpr rs_char& operator=(rs_char&&) = default; |
| 63 | ~rs_char() = default; |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 64 | |
| 65 | explicit constexpr operator std::uint32_t() const { return value_; } |
| 66 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 67 | constexpr bool operator==(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 68 | return value_ == other.value_; |
| 69 | } |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 70 | constexpr bool operator!=(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 71 | return value_ != other.value_; |
| 72 | } |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 73 | constexpr bool operator<=(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 74 | return value_ <= other.value_; |
| 75 | } |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 76 | constexpr bool operator<(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 77 | return value_ < other.value_; |
| 78 | } |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 79 | constexpr bool operator>=(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 80 | return value_ >= other.value_; |
| 81 | } |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 82 | constexpr bool operator>(const rs_char& other) const { |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 83 | return value_ > other.value_; |
| 84 | } |
| 85 | |
Lukasz Anforowicz | 467a420 | 2023-01-20 14:21:49 -0800 | [diff] [blame] | 86 | // The highest valid code point a char can have, '\u{10FFFF}'. |
| 87 | // |
| 88 | // This constant mimics Rust's `char::MAX`: |
| 89 | // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 90 | static const rs_char MAX; |
Lukasz Anforowicz | 467a420 | 2023-01-20 14:21:49 -0800 | [diff] [blame] | 91 | |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 92 | private: |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 93 | // This function mimics Rust's `char::from_u32_unchecked`: |
| 94 | // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32_unchecked |
| 95 | // |
| 96 | // TODO(b/254095482): Figure out how to annotate/expose unsafe functions in |
| 97 | // C++ and then make this method public. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 98 | static constexpr rs_char from_u32_unchecked(std::uint32_t value) { |
| 99 | return rs_char(value); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | // Private constructor - intended to only be used from `from_u32_unchecked`. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 103 | explicit constexpr rs_char(std::uint32_t value) : value_(value) {} |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 104 | |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 105 | // See "layout tests" comments in `char_test.cc` for explanation why |
| 106 | // `char32_t` is not used. |
Lukasz Anforowicz | 3ffc18d | 2023-01-19 14:56:09 -0800 | [diff] [blame] | 107 | std::uint32_t value_ = '\0'; |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 108 | }; |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 109 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 110 | // Definition of `rs_char::MAX` - it can't be defined and declared within the |
| 111 | // `class` definition, because before `rs_char` is fully defined the compiler |
Lukasz Anforowicz | 467a420 | 2023-01-20 14:21:49 -0800 | [diff] [blame] | 112 | // complains that `constexpr` variable cannot have non-literal type |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 113 | // 'const rs_char'. |
| 114 | constexpr rs_char rs_char::MAX = rs_char::from_u32_unchecked(0x10ffff); |
Lukasz Anforowicz | 467a420 | 2023-01-20 14:21:49 -0800 | [diff] [blame] | 115 | |
Lukasz Anforowicz | d71686c | 2023-02-17 14:29:55 -0800 | [diff] [blame] | 116 | } // namespace rs_std |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 117 | |
Lukasz Anforowicz | d71686c | 2023-02-17 14:29:55 -0800 | [diff] [blame] | 118 | #endif // CRUBIT_SUPPORT_RS_STD_CHAR_H_ |