Make utf8_to_ucs4 take a string reference
This commit is contained in:
parent
5e5a0a7c4d
commit
5b1fae4fc1
@ -165,7 +165,7 @@ namespace cairo {
|
|||||||
|
|
||||||
string utf8 = t.contents;
|
string utf8 = t.contents;
|
||||||
string_util::unicode_charlist chars;
|
string_util::unicode_charlist chars;
|
||||||
bool success = string_util::utf8_to_ucs4(utf8.c_str(), chars);
|
bool success = string_util::utf8_to_ucs4(utf8, chars);
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
m_log.warn("Dropping invalid UTF8 text '%s'", utf8);
|
m_log.warn("Dropping invalid UTF8 text '%s'", utf8);
|
||||||
|
@ -87,7 +87,7 @@ string utf8_truncate(string&& value, size_t len);
|
|||||||
/**
|
/**
|
||||||
* @brief Create a UCS-4 codepoint from a utf-8 encoded string
|
* @brief Create a UCS-4 codepoint from a utf-8 encoded string
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] bool utf8_to_ucs4(const char* src, unicode_charlist& result_list);
|
[[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Convert a UCS-4 codepoint to a utf-8 encoded string
|
* @brief Convert a UCS-4 codepoint to a utf-8 encoded string
|
||||||
|
@ -280,10 +280,10 @@ static pair<int, uint32_t> utf8_get_len(uint8_t leading) {
|
|||||||
*
|
*
|
||||||
* @return Whether the string is completely valid utf8
|
* @return Whether the string is completely valid utf8
|
||||||
*/
|
*/
|
||||||
bool utf8_to_ucs4(const char* src, unicode_charlist& result_list) {
|
bool utf8_to_ucs4(const string& src, unicode_charlist& result_list) {
|
||||||
assert(src);
|
result_list.reserve(src.size());
|
||||||
bool has_errors = false;
|
bool has_errors = false;
|
||||||
const auto* begin = reinterpret_cast<const uint8_t*>(src);
|
const auto* begin = reinterpret_cast<const uint8_t*>(src.c_str());
|
||||||
|
|
||||||
const auto* current = begin;
|
const auto* current = begin;
|
||||||
while (*current) {
|
while (*current) {
|
||||||
|
@ -188,7 +188,7 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) {
|
|||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
string str = GetParam();
|
string str = GetParam();
|
||||||
|
|
||||||
bool success = string_util::utf8_to_ucs4(str.c_str(), result_list);
|
bool success = string_util::utf8_to_ucs4(str, result_list);
|
||||||
ASSERT_TRUE(success);
|
ASSERT_TRUE(success);
|
||||||
|
|
||||||
ASSERT_EQ(str.size(), result_list.size());
|
ASSERT_EQ(str.size(), result_list.size());
|
||||||
@ -229,7 +229,7 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) {
|
|||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
const auto [str, codepoint] = GetParam();
|
const auto [str, codepoint] = GetParam();
|
||||||
|
|
||||||
bool success = string_util::utf8_to_ucs4(str.c_str(), result_list);
|
bool success = string_util::utf8_to_ucs4(str, result_list);
|
||||||
ASSERT_TRUE(success);
|
ASSERT_TRUE(success);
|
||||||
|
|
||||||
ASSERT_EQ(1, result_list.size());
|
ASSERT_EQ(1, result_list.size());
|
||||||
@ -251,7 +251,7 @@ const vector<string> utf8_to_ucs4_invalid_list = {
|
|||||||
"\xe0", // 3 byte code point with only leading byte
|
"\xe0", // 3 byte code point with only leading byte
|
||||||
"\xf0", // 4 byte code point with only leading byte
|
"\xf0", // 4 byte code point with only leading byte
|
||||||
"\xf0\x80\x80", // 4 byte code point with only 3 bytes
|
"\xf0\x80\x80", // 4 byte code point with only 3 bytes
|
||||||
"\xe0\x70\x80", // 3 byte code point, 2nd byte has no continuation prefix
|
"\xe0\xf0\x80", // 3 byte code point, 2nd byte has no continuation prefix
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ucs4_invalid_list));
|
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ucs4_invalid_list));
|
||||||
@ -262,7 +262,7 @@ INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_
|
|||||||
TEST_P(Utf8ToUCS4InvalidTest, correctness) {
|
TEST_P(Utf8ToUCS4InvalidTest, correctness) {
|
||||||
string_util::unicode_charlist result_list{};
|
string_util::unicode_charlist result_list{};
|
||||||
const auto str = GetParam();
|
const auto str = GetParam();
|
||||||
bool success = string_util::utf8_to_ucs4(str.c_str(), result_list);
|
bool success = string_util::utf8_to_ucs4(str, result_list);
|
||||||
EXPECT_FALSE(success);
|
EXPECT_FALSE(success);
|
||||||
EXPECT_EQ(0, result_list.size());
|
EXPECT_EQ(0, result_list.size());
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user