blob: be478cd6f5b15aeb098439997486e73a64bd957b [file] [log] [blame]
Lukasz Anforowicza782bda2023-01-17 14:04:50 -08001// Part of the Crubit project, under the Apache License v2.0 with LLVM
2// Exceptions. See /LICENSE for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -08005#include "support/rs_std/rs_char.h"
Lukasz Anforowicza782bda2023-01-17 14:04:50 -08006
7#include <stdint.h>
8
Dmitri Gribenko785831e2023-07-14 06:47:36 -07009#include <optional>
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080010#include <type_traits>
11
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080012#include "gtest/gtest.h"
13
14namespace {
15
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080016// Check that `rs_std::rs_char` is trivially destructible, copyable, and
17// moveable.
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080018//
19// There are no constructor-related checks, because well-formed-ness checks
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080020// require going through factory methods like `rs_char::from_u32`.
21static_assert(std::is_trivially_destructible_v<rs_std::rs_char>);
22static_assert(std::is_trivially_copy_constructible_v<rs_std::rs_char>);
23static_assert(std::is_trivially_copy_assignable_v<rs_std::rs_char>);
24static_assert(std::is_trivially_move_constructible_v<rs_std::rs_char>);
25static_assert(std::is_trivially_move_assignable_v<rs_std::rs_char>);
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080026
Lukasz Anforowiczf46bdfb2023-05-22 13:35:50 -070027// ABI^H^H^HLayout assertions.
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080028//
29// https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#char
30// documents that "Rust char is 32-bit wide and represents an unicode scalar
31// value".
32//
33// We don't map Rust's `char` to C++ `char32_t` because
Lukasz Anforowicz1165a1f2023-01-18 15:10:41 -080034// https://en.cppreference.com/w/cpp/language/types#char32_t points out that the
35// builtin `char32_t` type "has the same size, signedness, and alignment as
36// std::uint_least32_t" (and therefore it is not guaranteed to be exactly
37// 32-bits wide as required for ABI-compatibility with Rust).
Lukasz Anforowiczf46bdfb2023-05-22 13:35:50 -070038//
39// Equivalent layout and ABI assertion are also checked on Rust side in
40// `format_ty_for_cc` in `cc_bindings_from_rs/bindings.rs` via `layout.align()`
41// and `layout.size()`. It seems that there is no way to check `layout.abi()`
42// on C++ side, but we can at least say that under the System V ABI a struct
43// with a single field (i.e. the `rs_char` struct) has the same ABI
44// classification as the field (as long as the field is smaller than "eight
45// eightbytes" and the struct is trivial as verified via `static_assert`s
46// above). In other words, under System V ABI we expect `rs_char` to be of
47// INTEGER class - the same as verified by the `layout.abi()` assertion in
48// `bindings.rs`.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080049static_assert(sizeof(rs_std::rs_char) == 4);
50static_assert(alignof(rs_std::rs_char) == 4);
51static_assert(std::is_standard_layout_v<rs_std::rs_char>);
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080052
53// This test covers the following case from
54// https://en.cppreference.com/w/cpp/language/character_literal:
55//
56// Ordinary character literal, e.g. 'a' or '\n' or '\13'. Such literal has type
57// `char` and the value equal to either:
58// - the representation of c-char in the execution character set (until C++23)
59// - the corresponding code point from ordinary literal encoding (since C++23).
60TEST(RsCharTest, FromAsciiLiteral) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080061 std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32('x');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080062 ASSERT_TRUE(c.has_value());
63 EXPECT_EQ(0x78, uint32_t{*c});
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080064}
65
66// This test covers the following case from
67// https://en.cppreference.com/w/cpp/language/character_literal:
68//
69// UTF-8 character literal, e.g. u8'a'. Such literal has type `char` (until
70// C++20) or `char8_t` (since C++20) and the value equal to ISO/IEC 10646 code
71// point value of c-char, provided that the code point value is representable
72// with a single UTF-8 code unit (that is, c-char is in the range 0x0-0x7F,
73// inclusive).
74TEST(RsCharTest, FromUtf8Literal) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080075 std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(u8'x');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080076 ASSERT_TRUE(c.has_value());
77 EXPECT_EQ(0x78, uint32_t{*c});
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080078}
79
80// This test covers the following case from
81// https://en.cppreference.com/w/cpp/language/character_literal:
82//
83// UTF-16 character literal, e.g. u'猫', but not u'🍌' (u'\U0001f34c'). Such
84// literal has type `char16_t` and the value equal to ISO/IEC 10646 code point
85// value of c-char, provided that the code point value is representable with a
86// single UTF-16 code unit (that is, c-char is in the range 0x0-0xFFFF,
87// inclusive).
88TEST(RsCharTest, FromUtf16Literal) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -080089 std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(u'Ł');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -080090 ASSERT_TRUE(c.has_value());
91 EXPECT_EQ(0x141, uint32_t{*c});
Lukasz Anforowicza782bda2023-01-17 14:04:50 -080092}
93
94// This test covers the following case from
95// https://en.cppreference.com/w/cpp/language/character_literal:
96//
97// UTF-32 character literal, e.g. U'猫' or U'🍌'. Such literal has type
98// `char32_t` and the value equal to ISO/IEC 10646 code point value of c-char.
99TEST(RsCharTest, FromUtf32Literal) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800100 std::optional<const rs_std::rs_char> c = rs_std::rs_char::from_u32(U'🦀');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800101 ASSERT_TRUE(c.has_value());
102 EXPECT_EQ(0x1F980, uint32_t{*c});
103}
104
105TEST(RsCharTest, FromU32ValidityChecks) {
106 // Max 32-bit value.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800107 EXPECT_FALSE(rs_std::rs_char::from_u32(0xffffffff).has_value());
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800108
109 // A value just above Rust's `char::MAX`:
110 // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800111 EXPECT_FALSE(rs_std::rs_char::from_u32(0x110000).has_value());
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800112
113 // Smallest/greatest "high"/"low" surrogates.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800114 EXPECT_FALSE(rs_std::rs_char::from_u32(0xd800).has_value());
115 EXPECT_FALSE(rs_std::rs_char::from_u32(0xdbff).has_value());
116 EXPECT_FALSE(rs_std::rs_char::from_u32(0xdc00).has_value());
117 EXPECT_FALSE(rs_std::rs_char::from_u32(0xdfff).has_value());
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800118
119 // Smallest valid value.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800120 std::optional<rs_std::rs_char> maybe_c = rs_std::rs_char::from_u32('\0');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800121 ASSERT_TRUE(maybe_c.has_value());
122 EXPECT_EQ(0x00, uint32_t{*maybe_c});
123
124 // Greatest valid value. See also Rust's `char::MAX`:
125 // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800126 maybe_c = rs_std::rs_char::from_u32(0x10ffff);
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800127 ASSERT_TRUE(maybe_c.has_value());
128 EXPECT_EQ(0x10ffff, uint32_t{*maybe_c});
129
130 // Just below surrogates.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800131 maybe_c = rs_std::rs_char::from_u32(0xd7ff);
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800132 ASSERT_TRUE(maybe_c.has_value());
133 EXPECT_EQ(0xd7ff, uint32_t{*maybe_c});
134
135 // Just above surrogates.
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800136 maybe_c = rs_std::rs_char::from_u32(0xe000);
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800137 ASSERT_TRUE(maybe_c.has_value());
138 EXPECT_EQ(0xe000, uint32_t{*maybe_c});
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800139}
140
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800141// Test that `rs_std::rs_char` values can be compared with other
142// `rs_std::rs_char` values.
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800143TEST(RsCharTest, ComparisonWithAnotherRsChar) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800144 std::optional<const rs_std::rs_char> a = rs_std::rs_char::from_u32('a');
145 std::optional<const rs_std::rs_char> b = rs_std::rs_char::from_u32('b');
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800146 ASSERT_TRUE(a.has_value());
147 ASSERT_TRUE(b.has_value());
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800148
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800149 EXPECT_TRUE(*a == *a);
150 EXPECT_FALSE(*a != *a);
151 EXPECT_TRUE(*a <= *a);
152 EXPECT_FALSE(a < *a);
153 EXPECT_TRUE(*a >= *a);
154 EXPECT_FALSE(*a > *a);
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800155
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800156 EXPECT_FALSE(*a == *b);
157 EXPECT_TRUE(*a != *b);
158 EXPECT_TRUE(*a <= *b);
159 EXPECT_TRUE(*a < *b);
160 EXPECT_FALSE(*a >= *b);
161 EXPECT_FALSE(*a > *b);
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800162
Lukasz Anforowicz706b25c2023-01-20 14:14:44 -0800163 EXPECT_FALSE(*b == *a);
164 EXPECT_TRUE(*b != *a);
165 EXPECT_FALSE(*b <= *a);
166 EXPECT_FALSE(*b < *a);
167 EXPECT_TRUE(*b >= *a);
168 EXPECT_TRUE(*b > *a);
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800169}
170
Lukasz Anforowicz3ffc18d2023-01-19 14:56:09 -0800171TEST(RsCharTest, DefaultConstructedValue) {
Lukasz Anforowiczec0b64e2023-02-17 14:31:12 -0800172 rs_std::rs_char c;
Lukasz Anforowicz3ffc18d2023-01-19 14:56:09 -0800173 EXPECT_EQ(0, uint32_t{c});
174}
175
Lukasz Anforowicza782bda2023-01-17 14:04:50 -0800176} // namespace