Change `rstd::Char` into a `class`. Before this CL, `rstd::Char` was a type alias for `std::uint32_t`. After this CL `rstd::Char` is a separate `class` (that privately wraps `std::uint32_t`). This change is a step toward a follow-up CL that will start rejecting invalid bit patterns in constructors of `rstd::Char`. PiperOrigin-RevId: 502991306
diff --git a/support/rstd/char.h b/support/rstd/char.h index 2270ba0..d4c1450 100644 --- a/support/rstd/char.h +++ b/support/rstd/char.h
@@ -10,13 +10,50 @@ namespace rstd { // `rstd::Char` is a C++ representation of the `char` type from Rust. -// -// See "layout tests" comments in `char_test.cc` for explanation why `char32_t` -// is not used. -// -// TODO(b/265338802): Reject `char` values with invalid bit patterns (possibly -// retaining `constexpr` aspect of some conversions). -using Char = std::uint32_t; +class Char final { + public: + // TODO(b/265338802): Reject `char` values that may represent a part of a + // UTF-8 character (i.e. only the first 0-127 ASCII characters should be + // accepted). + constexpr explicit Char(char c) : value_(c) {} + + // TODO(b/265338802): Reject `char` values with invalid bit patterns + // (retaining the `constexpr` aspect if possible). + constexpr explicit Char(char16_t c) : value_(c) {} + constexpr explicit Char(char32_t c) : value_(c) {} + + constexpr Char(const Char&) = default; + constexpr Char& operator=(const Char&) = default; + constexpr Char(Char&&) = default; + constexpr Char& operator=(Char&&) = default; + ~Char() = default; + + explicit constexpr operator std::uint32_t() const { return value_; } + + constexpr bool operator==(const Char& other) const { + return value_ == other.value_; + } + constexpr bool operator!=(const Char& other) const { + return value_ != other.value_; + } + constexpr bool operator<=(const Char& other) const { + return value_ <= other.value_; + } + constexpr bool operator<(const Char& other) const { + return value_ < other.value_; + } + constexpr bool operator>=(const Char& other) const { + return value_ >= other.value_; + } + constexpr bool operator>(const Char& other) const { + return value_ > other.value_; + } + + private: + // See "layout tests" comments in `char_test.cc` for explanation why + // `char32_t` is not used. + std::uint32_t value_; +}; } // namespace rstd
diff --git a/support/rstd/char_test.cc b/support/rstd/char_test.cc index bc84094..4db685f 100644 --- a/support/rstd/char_test.cc +++ b/support/rstd/char_test.cc
@@ -32,12 +32,10 @@ // value". // // We don't map Rust's `char` to C++ `char32_t` because -// - It may be wider than 32 bits - <internal link>/c/string/multibyte/char32_t says -// that "char32_t is an unsigned integer type used for 32-bit wide characters -// and is the same type as uint_least32_t. uint_least32_t is the smallest -// unsigned integer type with width of at least 32 bits" -// - It is problematic on MacOS - https://github.com/eqrion/cbindgen/issues/423 -// points out that `uchar.h` is missing on that platform. +// https://en.cppreference.com/w/cpp/language/types#char32_t points out that the +// builtin `char32_t` type "has the same size, signedness, and alignment as +// std::uint_least32_t" (and therefore it is not guaranteed to be exactly +// 32-bits wide as required for ABI-compatibility with Rust). static_assert(sizeof(rstd::Char) == 4); static_assert(alignof(rstd::Char) == 4); static_assert(std::is_standard_layout_v<rstd::Char>); @@ -50,7 +48,7 @@ // - the representation of c-char in the execution character set (until C++23) // - the corresponding code point from ordinary literal encoding (since C++23). TEST(RsCharTest, FromAsciiLiteral) { - rstd::Char c = 'x'; + const rstd::Char c('x'); EXPECT_EQ(0x78, static_cast<uint32_t>(c)); } @@ -63,7 +61,7 @@ // with a single UTF-8 code unit (that is, c-char is in the range 0x0-0x7F, // inclusive). TEST(RsCharTest, FromUtf8Literal) { - rstd::Char c = u8'x'; + const rstd::Char c(u8'x'); EXPECT_EQ(0x78, static_cast<uint32_t>(c)); } @@ -79,7 +77,7 @@ // Not testing `is_trivially_constructible`, because UTF-16 literals may // fail Rust's well-formed-ness checks (e.g. they may represent only one // part of a surrogate pair). - rstd::Char c = u'Ł'; + const rstd::Char c(u'Ł'); EXPECT_EQ(0x141, static_cast<uint32_t>(c)); } @@ -92,14 +90,14 @@ // Not testing `is_trivially_constructible`, because UTF-32 literals may fail // Rust's well-formed-ness checks (e.g. they may exceed the value of Rust's // `std::char::MAX`). - rstd::Char c = U'🦀'; + const rstd::Char c(U'🦀'); EXPECT_EQ(0x1F980, static_cast<uint32_t>(c)); } // Test that `rstd::Char` values can be compared with other `rstd::Char` values. TEST(RsCharTest, ComparisonWithAnotherRsChar) { - const rstd::Char a = 'a'; - const rstd::Char b = 'b'; + const rstd::Char a('a'); + const rstd::Char b('b'); EXPECT_TRUE(a == a); EXPECT_FALSE(a != a);