Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 1 | // Part of the Crubit project, under the Apache License v2.0 with LLVM |
| 2 | // Exceptions. See /LICENSE for license information. |
| 3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 5 | #include "support/rs_std/rs_char.h" |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 6 | |
| 7 | #include <stdint.h> |
| 8 | |
Dmitri Gribenko | 785831e | 2023-07-14 06:47:36 -0700 | [diff] [blame] | 9 | #include <optional> |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 10 | #include <type_traits> |
| 11 | |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 12 | #include "gtest/gtest.h" |
| 13 | |
| 14 | namespace { |
| 15 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 16 | // Check that `rs_std::rs_char` is trivially destructible, copyable, and |
| 17 | // moveable. |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 18 | // |
| 19 | // There are no constructor-related checks, because well-formed-ness checks |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 20 | // require going through factory methods like `rs_char::from_u32`. |
| 21 | static_assert(std::is_trivially_destructible_v<rs_std::rs_char>); |
| 22 | static_assert(std::is_trivially_copy_constructible_v<rs_std::rs_char>); |
| 23 | static_assert(std::is_trivially_copy_assignable_v<rs_std::rs_char>); |
| 24 | static_assert(std::is_trivially_move_constructible_v<rs_std::rs_char>); |
| 25 | static_assert(std::is_trivially_move_assignable_v<rs_std::rs_char>); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 26 | |
Lukasz Anforowicz | f46bdfb | 2023-05-22 13:35:50 -0700 | [diff] [blame] | 27 | // ABI^H^H^HLayout assertions. |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 28 | // |
| 29 | // https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#char |
| 30 | // documents that "Rust char is 32-bit wide and represents an unicode scalar |
| 31 | // value". |
| 32 | // |
| 33 | // We don't map Rust's `char` to C++ `char32_t` because |
Lukasz Anforowicz | 1165a1f | 2023-01-18 15:10:41 -0800 | [diff] [blame] | 34 | // https://en.cppreference.com/w/cpp/language/types#char32_t points out that the |
| 35 | // builtin `char32_t` type "has the same size, signedness, and alignment as |
| 36 | // std::uint_least32_t" (and therefore it is not guaranteed to be exactly |
| 37 | // 32-bits wide as required for ABI-compatibility with Rust). |
Lukasz Anforowicz | f46bdfb | 2023-05-22 13:35:50 -0700 | [diff] [blame] | 38 | // |
| 39 | // Equivalent layout and ABI assertion are also checked on Rust side in |
| 40 | // `format_ty_for_cc` in `cc_bindings_from_rs/bindings.rs` via `layout.align()` |
| 41 | // and `layout.size()`. It seems that there is no way to check `layout.abi()` |
| 42 | // on C++ side, but we can at least say that under the System V ABI a struct |
| 43 | // with a single field (i.e. the `rs_char` struct) has the same ABI |
| 44 | // classification as the field (as long as the field is smaller than "eight |
| 45 | // eightbytes" and the struct is trivial as verified via `static_assert`s |
| 46 | // above). In other words, under System V ABI we expect `rs_char` to be of |
| 47 | // INTEGER class - the same as verified by the `layout.abi()` assertion in |
| 48 | // `bindings.rs`. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 49 | static_assert(sizeof(rs_std::rs_char) == 4); |
| 50 | static_assert(alignof(rs_std::rs_char) == 4); |
| 51 | static_assert(std::is_standard_layout_v<rs_std::rs_char>); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 52 | |
| 53 | // This test covers the following case from |
| 54 | // https://en.cppreference.com/w/cpp/language/character_literal: |
| 55 | // |
| 56 | // Ordinary character literal, e.g. 'a' or '\n' or '\13'. Such literal has type |
| 57 | // `char` and the value equal to either: |
| 58 | // - the representation of c-char in the execution character set (until C++23) |
| 59 | // - the corresponding code point from ordinary literal encoding (since C++23). |
| 60 | TEST(RsCharTest, FromAsciiLiteral) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 61 | std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32('x'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 62 | ASSERT_TRUE(c.has_value()); |
| 63 | EXPECT_EQ(0x78, uint32_t{*c}); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 64 | } |
| 65 | |
| 66 | // This test covers the following case from |
| 67 | // https://en.cppreference.com/w/cpp/language/character_literal: |
| 68 | // |
| 69 | // UTF-8 character literal, e.g. u8'a'. Such literal has type `char` (until |
| 70 | // C++20) or `char8_t` (since C++20) and the value equal to ISO/IEC 10646 code |
| 71 | // point value of c-char, provided that the code point value is representable |
| 72 | // with a single UTF-8 code unit (that is, c-char is in the range 0x0-0x7F, |
| 73 | // inclusive). |
| 74 | TEST(RsCharTest, FromUtf8Literal) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 75 | std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(u8'x'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 76 | ASSERT_TRUE(c.has_value()); |
| 77 | EXPECT_EQ(0x78, uint32_t{*c}); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 78 | } |
| 79 | |
| 80 | // This test covers the following case from |
| 81 | // https://en.cppreference.com/w/cpp/language/character_literal: |
| 82 | // |
| 83 | // UTF-16 character literal, e.g. u'猫', but not u'🍌' (u'\U0001f34c'). Such |
| 84 | // literal has type `char16_t` and the value equal to ISO/IEC 10646 code point |
| 85 | // value of c-char, provided that the code point value is representable with a |
| 86 | // single UTF-16 code unit (that is, c-char is in the range 0x0-0xFFFF, |
| 87 | // inclusive). |
| 88 | TEST(RsCharTest, FromUtf16Literal) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 89 | std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(u'Ł'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 90 | ASSERT_TRUE(c.has_value()); |
| 91 | EXPECT_EQ(0x141, uint32_t{*c}); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | // This test covers the following case from |
| 95 | // https://en.cppreference.com/w/cpp/language/character_literal: |
| 96 | // |
| 97 | // UTF-32 character literal, e.g. U'猫' or U'🍌'. Such literal has type |
| 98 | // `char32_t` and the value equal to ISO/IEC 10646 code point value of c-char. |
| 99 | TEST(RsCharTest, FromUtf32Literal) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 100 | std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(U'🦀'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 101 | ASSERT_TRUE(c.has_value()); |
| 102 | EXPECT_EQ(0x1F980, uint32_t{*c}); |
| 103 | } |
| 104 | |
| 105 | TEST(RsCharTest, FromU32ValidityChecks) { |
| 106 | // Max 32-bit value. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 107 | EXPECT_FALSE(rs_std::rs_char::from_u32(0xffffffff).has_value()); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 108 | |
| 109 | // A value just above Rust's `char::MAX`: |
| 110 | // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 111 | EXPECT_FALSE(rs_std::rs_char::from_u32(0x110000).has_value()); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 112 | |
| 113 | // Smallest/greatest "high"/"low" surrogates. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 114 | EXPECT_FALSE(rs_std::rs_char::from_u32(0xd800).has_value()); |
| 115 | EXPECT_FALSE(rs_std::rs_char::from_u32(0xdbff).has_value()); |
| 116 | EXPECT_FALSE(rs_std::rs_char::from_u32(0xdc00).has_value()); |
| 117 | EXPECT_FALSE(rs_std::rs_char::from_u32(0xdfff).has_value()); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 118 | |
| 119 | // Smallest valid value. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 120 | std::optional<rs_std::rs_char> maybe_c = rs_std::rs_char::from_u32('\0'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 121 | ASSERT_TRUE(maybe_c.has_value()); |
| 122 | EXPECT_EQ(0x00, uint32_t{*maybe_c}); |
| 123 | |
| 124 | // Greatest valid value. See also Rust's `char::MAX`: |
| 125 | // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 126 | maybe_c = rs_std::rs_char::from_u32(0x10ffff); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 127 | ASSERT_TRUE(maybe_c.has_value()); |
| 128 | EXPECT_EQ(0x10ffff, uint32_t{*maybe_c}); |
| 129 | |
| 130 | // Just below surrogates. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 131 | maybe_c = rs_std::rs_char::from_u32(0xd7ff); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 132 | ASSERT_TRUE(maybe_c.has_value()); |
| 133 | EXPECT_EQ(0xd7ff, uint32_t{*maybe_c}); |
| 134 | |
| 135 | // Just above surrogates. |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 136 | maybe_c = rs_std::rs_char::from_u32(0xe000); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 137 | ASSERT_TRUE(maybe_c.has_value()); |
| 138 | EXPECT_EQ(0xe000, uint32_t{*maybe_c}); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 139 | } |
| 140 | |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 141 | // Test that `rs_std::rs_char` values can be compared with other |
| 142 | // `rs_std::rs_char` values. |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 143 | TEST(RsCharTest, ComparisonWithAnotherRsChar) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 144 | std::optional<const rs_std::rs_char> a = rs_std::rs_char::from_u32('a'); |
| 145 | std::optional<const rs_std::rs_char> b = rs_std::rs_char::from_u32('b'); |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 146 | ASSERT_TRUE(a.has_value()); |
| 147 | ASSERT_TRUE(b.has_value()); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 148 | |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 149 | EXPECT_TRUE(*a == *a); |
| 150 | EXPECT_FALSE(*a != *a); |
| 151 | EXPECT_TRUE(*a <= *a); |
| 152 | EXPECT_FALSE(a < *a); |
| 153 | EXPECT_TRUE(*a >= *a); |
| 154 | EXPECT_FALSE(*a > *a); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 155 | |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 156 | EXPECT_FALSE(*a == *b); |
| 157 | EXPECT_TRUE(*a != *b); |
| 158 | EXPECT_TRUE(*a <= *b); |
| 159 | EXPECT_TRUE(*a < *b); |
| 160 | EXPECT_FALSE(*a >= *b); |
| 161 | EXPECT_FALSE(*a > *b); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 162 | |
Lukasz Anforowicz | 706b25c | 2023-01-20 14:14:44 -0800 | [diff] [blame] | 163 | EXPECT_FALSE(*b == *a); |
| 164 | EXPECT_TRUE(*b != *a); |
| 165 | EXPECT_FALSE(*b <= *a); |
| 166 | EXPECT_FALSE(*b < *a); |
| 167 | EXPECT_TRUE(*b >= *a); |
| 168 | EXPECT_TRUE(*b > *a); |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 169 | } |
| 170 | |
Lukasz Anforowicz | 3ffc18d | 2023-01-19 14:56:09 -0800 | [diff] [blame] | 171 | TEST(RsCharTest, DefaultConstructedValue) { |
Lukasz Anforowicz | ec0b64e | 2023-02-17 14:31:12 -0800 | [diff] [blame] | 172 | rs_std::rs_char c; |
Lukasz Anforowicz | 3ffc18d | 2023-01-19 14:56:09 -0800 | [diff] [blame] | 173 | EXPECT_EQ(0, uint32_t{c}); |
| 174 | } |
| 175 | |
Lukasz Anforowicz | a782bda | 2023-01-17 14:04:50 -0800 | [diff] [blame] | 176 | } // namespace |