Cleanup
This commit is contained in:
parent
32c78aa63a
commit
425d4dc338
@ -166,9 +166,10 @@ namespace cairo {
|
|||||||
|
|
||||||
string utf8 = t.contents;
|
string utf8 = t.contents;
|
||||||
string_util::unicode_charlist chars;
|
string_util::unicode_charlist chars;
|
||||||
bool success = string_util::utf8_to_ucs4(utf8, chars);
|
bool valid = string_util::utf8_to_ucs4(utf8, chars);
|
||||||
|
|
||||||
if (!success) {
|
// The conversion already removed any invalid chunks. We should probably log a warning though.
|
||||||
|
if (!valid) {
|
||||||
sstream hex;
|
sstream hex;
|
||||||
hex << std::hex << std::setw(2) << std::setfill('0');
|
hex << std::hex << std::setw(2) << std::setfill('0');
|
||||||
|
|
||||||
|
@ -84,14 +84,7 @@ string trim(string&& value, const char& needle = ' ');
|
|||||||
|
|
||||||
size_t char_len(const string& value);
|
size_t char_len(const string& value);
|
||||||
string utf8_truncate(string&& value, size_t len);
|
string utf8_truncate(string&& value, size_t len);
|
||||||
/**
|
|
||||||
* @brief Create a UCS-4 codepoint from a utf-8 encoded string
|
|
||||||
*/
|
|
||||||
[[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list);
|
[[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list);
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Convert a UCS-4 codepoint to a utf-8 encoded string
|
|
||||||
*/
|
|
||||||
size_t ucs4_to_utf8(std::array<char, 5>& utf8, unsigned int ucs);
|
size_t ucs4_to_utf8(std::array<char, 5>& utf8, unsigned int ucs);
|
||||||
|
|
||||||
string join(const vector<string>& strs, const string& delim);
|
string join(const vector<string>& strs, const string& delim);
|
||||||
|
@ -188,8 +188,8 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) {
|
|||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
string str = GetParam();
|
string str = GetParam();
|
||||||
|
|
||||||
bool success = string_util::utf8_to_ucs4(str, result_list);
|
bool valid = string_util::utf8_to_ucs4(str, result_list);
|
||||||
ASSERT_TRUE(success);
|
ASSERT_TRUE(valid);
|
||||||
|
|
||||||
ASSERT_EQ(str.size(), result_list.size());
|
ASSERT_EQ(str.size(), result_list.size());
|
||||||
|
|
||||||
@ -206,18 +206,20 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String containing a single codepoint and the expected numerical codepoint
|
||||||
using single_test_t = std::pair<string, uint32_t>;
|
using single_test_t = std::pair<string, uint32_t>;
|
||||||
class Utf8ToUCS4SingleTest : public testing::TestWithParam<single_test_t> {};
|
class Utf8ToUCS4SingleTest : public testing::TestWithParam<single_test_t> {};
|
||||||
|
|
||||||
const vector<single_test_t> utf8_to_ucs4_single_list = {
|
const vector<single_test_t> utf8_to_ucs4_single_list = {
|
||||||
{" ", 0x20}, {"\u007f", 0x7f}, // End of 1 byte range
|
{" ", 0x20}, // Single ASCII character
|
||||||
{"\u0080", 0x80}, // Start of 2 byte range
|
{"\u007f", 0x7f}, // End of 1 byte range
|
||||||
{"\u07ff", 0x7ff}, // End of 2 byte range
|
{"\u0080", 0x80}, // Start of 2 byte range
|
||||||
{"\u0800", 0x800}, // Start of 3 byte range
|
{"\u07ff", 0x7ff}, // End of 2 byte range
|
||||||
{"\uffff", 0xffff}, // End of 3 byte range
|
{"\u0800", 0x800}, // Start of 3 byte range
|
||||||
{"\U00010000", 0x10000}, // Start of 4 byte range
|
{"\uffff", 0xffff}, // End of 3 byte range
|
||||||
{"\U0010ffff", 0x10ffff}, // End of 4 byte range
|
{"\U00010000", 0x10000}, // Start of 4 byte range
|
||||||
{"\U0001f600", 0x1f600}, // Grinning face emoji
|
{"\U0010ffff", 0x10ffff}, // End of 4 byte range
|
||||||
|
{"\U0001f600", 0x1f600}, // Grinning face emoji
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list));
|
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list));
|
||||||
@ -229,8 +231,8 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) {
|
|||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
const auto [str, codepoint] = GetParam();
|
const auto [str, codepoint] = GetParam();
|
||||||
|
|
||||||
bool success = string_util::utf8_to_ucs4(str, result_list);
|
bool valid = string_util::utf8_to_ucs4(str, result_list);
|
||||||
ASSERT_TRUE(success);
|
ASSERT_TRUE(valid);
|
||||||
|
|
||||||
ASSERT_EQ(1, result_list.size());
|
ASSERT_EQ(1, result_list.size());
|
||||||
|
|
||||||
@ -262,8 +264,8 @@ INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_
|
|||||||
TEST_P(Utf8ToUCS4InvalidTest, correctness) {
|
TEST_P(Utf8ToUCS4InvalidTest, correctness) {
|
||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
const auto str = GetParam();
|
const auto str = GetParam();
|
||||||
bool success = string_util::utf8_to_ucs4(str, result_list);
|
bool valid = string_util::utf8_to_ucs4(str, result_list);
|
||||||
EXPECT_FALSE(success);
|
EXPECT_FALSE(valid);
|
||||||
EXPECT_EQ(0, result_list.size());
|
EXPECT_EQ(0, result_list.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -273,8 +275,8 @@ TEST_P(Utf8ToUCS4InvalidTest, correctness) {
|
|||||||
TEST(String, utf8ToUCS4Partial) {
|
TEST(String, utf8ToUCS4Partial) {
|
||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
string str = "\xe0\x70\x80"; // a valid ascii character between two invalid characters
|
string str = "\xe0\x70\x80"; // a valid ascii character between two invalid characters
|
||||||
bool success = string_util::utf8_to_ucs4(str, result_list);
|
bool valid = string_util::utf8_to_ucs4(str, result_list);
|
||||||
EXPECT_FALSE(success);
|
EXPECT_FALSE(valid);
|
||||||
EXPECT_EQ(1, result_list.size());
|
EXPECT_EQ(1, result_list.size());
|
||||||
|
|
||||||
EXPECT_EQ(0x70, result_list[0].codepoint);
|
EXPECT_EQ(0x70, result_list[0].codepoint);
|
||||||
|
Loading…
Reference in New Issue
Block a user