`rstd::Char::from_u32` that checks character validity.
Before this CL, `rstd::Char` had constructors that could implicitly and
infallibly convert `char`, `char16_t`, or `char32_t` into `rstd::Char`.
This CL deletes these constructors and replaces them a factory method:
`from_u32` that can fail the conversion (returning `std::nullopt` for
invalid bit patterns).
This CL also introduces a well-defined default constructor. This is
a bit opportunistic (i.e. not required for implementing the factory
methods described in the previous paragraph). This is mostly motivated
by the desire to use `value_or` in tests (and avoid having explicit
`has_value()` checks for scenarios where tests know that the factory
method will succeed).
PiperOrigin-RevId: 503520624
diff --git a/cc_bindings_from_rs/test/functions/functions_test.cc b/cc_bindings_from_rs/test/functions/functions_test.cc
index d9df745..f0dd6ab 100644
--- a/cc_bindings_from_rs/test/functions/functions_test.cc
+++ b/cc_bindings_from_rs/test/functions/functions_test.cc
@@ -51,8 +51,9 @@
}
TEST(FnParamTyTests, Char) {
- rstd::Char input(U'🦀');
- rstd::Char output = fn_param_ty_tests::char_identity(input);
+ std::optional<const rstd::Char> input = rstd::Char::from_u32(U'🦀');
+ ASSERT_TRUE(input.has_value());
+ rstd::Char output = fn_param_ty_tests::char_identity(*input);
EXPECT_EQ(input, output);
}
diff --git a/support/rstd/char.h b/support/rstd/char.h
index e795280..062a0e9 100644
--- a/support/rstd/char.h
+++ b/support/rstd/char.h
@@ -6,6 +6,7 @@
#define CRUBIT_RS_BINDINGS_FROM_CC_SUPPORT_RSTD_CHAR_H_
#include <cstdint>
+#include <optional>
namespace rstd {
@@ -20,15 +21,35 @@
// for C++ which argues that zero-initialization may mitigate 10% of exploits.
constexpr Char() = default;
- // TODO(b/265338802): Reject `char` values that may represent a part of a
- // UTF-8 character (i.e. only the first 0-127 ASCII characters should be
- // accepted).
- constexpr explicit Char(char c) : value_(c) {}
+ // Converts a `uint32_t` into a `rstd::Char`.
+ //
+ // Note that not all valid `uint32_t`s are valid `rstd::Char`s. `from_u32`
+ // will return `std::nullopt` if the input is not a valid value for a
+ // `rstd::Char`.
+ //
+ // See also
+ // https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ // which documents that undefined behavior may result in presence of "A value
+ // in a char which is a surrogate or above char::MAX."
+ //
+ // This function mimics Rust's `char::from_u32`:
+ // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32
+ static constexpr std::optional<Char> from_u32(char32_t c) {
+ // TODO(lukasza): Consider using slightly more efficient checks similarly
+ // to how `char_try_from_u32` is implemented in Rust standard library.
+ if (c > 0x10ffff) {
+ // Value greater than Rust's `char::MAX`:
+ // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX
+ return std::nullopt;
+ }
- // TODO(b/265338802): Reject `char` values with invalid bit patterns
- // (retaining the `constexpr` aspect if possible).
- constexpr explicit Char(char16_t c) : value_(c) {}
- constexpr explicit Char(char32_t c) : value_(c) {}
+ if (c >= 0xd800 && c <= 0xdfff) {
+ // Surrogate characters.
+ return std::nullopt;
+ }
+
+ return from_u32_unchecked(c);
+ }
constexpr Char(const Char&) = default;
constexpr Char& operator=(const Char&) = default;
@@ -58,6 +79,18 @@
}
private:
+ // This function mimics Rust's `char::from_u32_unchecked`:
+ // https://doc.rust-lang.org/std/primitive.char.html#method.from_u32_unchecked
+ //
+ // TODO(b/254095482): Figure out how to annotate/expose unsafe functions in
+ // C++ and then make this method public.
+ static constexpr Char from_u32_unchecked(std::uint32_t value) {
+ return Char(value);
+ }
+
+ // Private constructor - intended to only be used from `from_u32_unchecked`.
+ explicit constexpr Char(std::uint32_t value) : value_(value) {}
+
// See "layout tests" comments in `char_test.cc` for explanation why
// `char32_t` is not used.
std::uint32_t value_ = '\0';
diff --git a/support/rstd/char_test.cc b/support/rstd/char_test.cc
index c8e29a3..3337b70 100644
--- a/support/rstd/char_test.cc
+++ b/support/rstd/char_test.cc
@@ -16,9 +16,7 @@
// Check that `rstd::Char` is trivially destructible, copyable, and moveable.
//
// There are no constructor-related checks, because well-formed-ness checks
-// will make construction non-trivial. The FromAsciiLiteral, FromUtf32Literal,
-// etc. tests ensure that `rstd::Char` provide test coverage for certain
-// construction-related scenarios.
+// require going through factory methods like `Char::from_u32`.
static_assert(std::is_trivially_destructible_v<rstd::Char>);
static_assert(std::is_trivially_copy_constructible_v<rstd::Char>);
static_assert(std::is_trivially_copy_assignable_v<rstd::Char>);
@@ -48,8 +46,9 @@
// - the representation of c-char in the execution character set (until C++23)
// - the corresponding code point from ordinary literal encoding (since C++23).
TEST(RsCharTest, FromAsciiLiteral) {
- const rstd::Char c('x');
- EXPECT_EQ(0x78, static_cast<uint32_t>(c));
+ std::optional<const rstd::Char> c = rstd::Char::from_u32('x');
+ ASSERT_TRUE(c.has_value());
+ EXPECT_EQ(0x78, uint32_t{*c});
}
// This test covers the following case from
@@ -61,8 +60,9 @@
// with a single UTF-8 code unit (that is, c-char is in the range 0x0-0x7F,
// inclusive).
TEST(RsCharTest, FromUtf8Literal) {
- const rstd::Char c(u8'x');
- EXPECT_EQ(0x78, static_cast<uint32_t>(c));
+ std::optional<const rstd::Char> c = rstd::Char::from_u32(u8'x');
+ ASSERT_TRUE(c.has_value());
+ EXPECT_EQ(0x78, uint32_t{*c});
}
// This test covers the following case from
@@ -74,11 +74,9 @@
// single UTF-16 code unit (that is, c-char is in the range 0x0-0xFFFF,
// inclusive).
TEST(RsCharTest, FromUtf16Literal) {
- // Not testing `is_trivially_constructible`, because UTF-16 literals may
- // fail Rust's well-formed-ness checks (e.g. they may represent only one
- // part of a surrogate pair).
- const rstd::Char c(u'Ł');
- EXPECT_EQ(0x141, static_cast<uint32_t>(c));
+ std::optional<const rstd::Char> c = rstd::Char::from_u32(u'Ł');
+ ASSERT_TRUE(c.has_value());
+ EXPECT_EQ(0x141, uint32_t{*c});
}
// This test covers the following case from
@@ -87,38 +85,74 @@
// UTF-32 character literal, e.g. U'猫' or U'🍌'. Such literal has type
// `char32_t` and the value equal to ISO/IEC 10646 code point value of c-char.
TEST(RsCharTest, FromUtf32Literal) {
- // Not testing `is_trivially_constructible`, because UTF-32 literals may fail
- // Rust's well-formed-ness checks (e.g. they may exceed the value of Rust's
- // `std::char::MAX`).
- const rstd::Char c(U'🦀');
- EXPECT_EQ(0x1F980, static_cast<uint32_t>(c));
+ std::optional<const rstd::Char> c = rstd::Char::from_u32(U'🦀');
+ ASSERT_TRUE(c.has_value());
+ EXPECT_EQ(0x1F980, uint32_t{*c});
+}
+
+TEST(RsCharTest, FromU32ValidityChecks) {
+ // Max 32-bit value.
+ EXPECT_FALSE(rstd::Char::from_u32(0xffffffff).has_value());
+
+ // A value just above Rust's `char::MAX`:
+ // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX.
+ EXPECT_FALSE(rstd::Char::from_u32(0x110000).has_value());
+
+ // Smallest/greatest "high"/"low" surrogates.
+ EXPECT_FALSE(rstd::Char::from_u32(0xd800).has_value());
+ EXPECT_FALSE(rstd::Char::from_u32(0xdbff).has_value());
+ EXPECT_FALSE(rstd::Char::from_u32(0xdc00).has_value());
+ EXPECT_FALSE(rstd::Char::from_u32(0xdfff).has_value());
+
+ // Smallest valid value.
+ std::optional<rstd::Char> maybe_c = rstd::Char::from_u32('\0');
+ ASSERT_TRUE(maybe_c.has_value());
+ EXPECT_EQ(0x00, uint32_t{*maybe_c});
+
+ // Greatest valid value. See also Rust's `char::MAX`:
+ // https://doc.rust-lang.org/std/primitive.char.html#associatedconstant.MAX.
+ maybe_c = rstd::Char::from_u32(0x10ffff);
+ ASSERT_TRUE(maybe_c.has_value());
+ EXPECT_EQ(0x10ffff, uint32_t{*maybe_c});
+
+ // Just below surrogates.
+ maybe_c = rstd::Char::from_u32(0xd7ff);
+ ASSERT_TRUE(maybe_c.has_value());
+ EXPECT_EQ(0xd7ff, uint32_t{*maybe_c});
+
+ // Just above surrogates.
+ maybe_c = rstd::Char::from_u32(0xe000);
+ ASSERT_TRUE(maybe_c.has_value());
+ EXPECT_EQ(0xe000, uint32_t{*maybe_c});
}
// Test that `rstd::Char` values can be compared with other `rstd::Char` values.
TEST(RsCharTest, ComparisonWithAnotherRsChar) {
- const rstd::Char a('a');
- const rstd::Char b('b');
+ std::optional<const rstd::Char> a = rstd::Char::from_u32('a');
+ std::optional<const rstd::Char> b = rstd::Char::from_u32('b');
+ ASSERT_TRUE(a.has_value());
+ ASSERT_TRUE(b.has_value());
- EXPECT_TRUE(a == a);
- EXPECT_FALSE(a != a);
- EXPECT_TRUE(a <= a);
- EXPECT_FALSE(a < a);
- EXPECT_TRUE(a >= a);
- EXPECT_FALSE(a > a);
+ EXPECT_TRUE(*a == *a);
+ EXPECT_FALSE(*a != *a);
+ EXPECT_TRUE(*a <= *a);
+ EXPECT_FALSE(a < *a);
+ EXPECT_TRUE(*a >= *a);
+ EXPECT_FALSE(*a > *a);
- EXPECT_FALSE(a == b);
- EXPECT_TRUE(a != b);
- EXPECT_TRUE(a <= b);
- EXPECT_TRUE(a < b);
- EXPECT_FALSE(a >= b);
- EXPECT_FALSE(a > b);
+ EXPECT_FALSE(*a == *b);
+ EXPECT_TRUE(*a != *b);
+ EXPECT_TRUE(*a <= *b);
+ EXPECT_TRUE(*a < *b);
+ EXPECT_FALSE(*a >= *b);
+ EXPECT_FALSE(*a > *b);
- EXPECT_FALSE(b == a);
- EXPECT_TRUE(b != a);
- EXPECT_FALSE(b <= a);
- EXPECT_FALSE(b < a);
- EXPECT_TRUE(b >= a);
- EXPECT_TRUE(b > a);
+ EXPECT_FALSE(*b == *a);
+ EXPECT_TRUE(*b != *a);
+ EXPECT_FALSE(*b <= *a);
+ EXPECT_FALSE(*b < *a);
+ EXPECT_TRUE(*b >= *a);
+ EXPECT_TRUE(*b > *a);
}
TEST(RsCharTest, DefaultConstructedValue) {