From 5b1fae4fc1b78de4d648e8d542f579f860b801f5 Mon Sep 17 00:00:00 2001 From: patrick96 Date: Wed, 10 May 2023 16:16:47 +0200 Subject: [PATCH] Make utf8_to_ucs4 take a string reference --- include/cairo/context.hpp | 2 +- include/utils/string.hpp | 2 +- src/utils/string.cpp | 6 +++--- tests/unit_tests/utils/string.cpp | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/cairo/context.hpp b/include/cairo/context.hpp index e8f54188..af4807b4 100644 --- a/include/cairo/context.hpp +++ b/include/cairo/context.hpp @@ -165,7 +165,7 @@ namespace cairo { string utf8 = t.contents; string_util::unicode_charlist chars; - bool success = string_util::utf8_to_ucs4(utf8.c_str(), chars); + bool success = string_util::utf8_to_ucs4(utf8, chars); if (!success) { m_log.warn("Dropping invalid UTF8 text '%s'", utf8); diff --git a/include/utils/string.hpp b/include/utils/string.hpp index 328ef897..3f8ed30a 100644 --- a/include/utils/string.hpp +++ b/include/utils/string.hpp @@ -87,7 +87,7 @@ string utf8_truncate(string&& value, size_t len); /** * @brief Create a UCS-4 codepoint from a utf-8 encoded string */ -[[nodiscard]] bool utf8_to_ucs4(const char* src, unicode_charlist& result_list); +[[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list); /** * @brief Convert a UCS-4 codepoint to a utf-8 encoded string diff --git a/src/utils/string.cpp b/src/utils/string.cpp index a66e00cb..54325b4a 100644 --- a/src/utils/string.cpp +++ b/src/utils/string.cpp @@ -280,10 +280,10 @@ static pair utf8_get_len(uint8_t leading) { * * @return Whether the string is completely valid utf8 */ -bool utf8_to_ucs4(const char* src, unicode_charlist& result_list) { - assert(src); +bool utf8_to_ucs4(const string& src, unicode_charlist& result_list) { + result_list.reserve(src.size()); bool has_errors = false; - const auto* begin = reinterpret_cast(src); + const auto* begin = reinterpret_cast(src.c_str()); const auto* current = begin; while (*current) { diff --git a/tests/unit_tests/utils/string.cpp b/tests/unit_tests/utils/string.cpp index e0f1257e..428e648f 100644 --- a/tests/unit_tests/utils/string.cpp +++ b/tests/unit_tests/utils/string.cpp @@ -188,7 +188,7 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) { string_util::unicode_charlist result_list{}; string str = GetParam(); - bool success = string_util::utf8_to_ucs4(str.c_str(), result_list); + bool success = string_util::utf8_to_ucs4(str, result_list); ASSERT_TRUE(success); ASSERT_EQ(str.size(), result_list.size()); @@ -229,7 +229,7 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) { string_util::unicode_charlist result_list{}; const auto [str, codepoint] = GetParam(); - bool success = string_util::utf8_to_ucs4(str.c_str(), result_list); + bool success = string_util::utf8_to_ucs4(str, result_list); ASSERT_TRUE(success); ASSERT_EQ(1, result_list.size()); @@ -251,7 +251,7 @@ const vector utf8_to_ucs4_invalid_list = { "\xe0", // 3 byte code point with only leading byte "\xf0", // 4 byte code point with only leading byte "\xf0\x80\x80", // 4 byte code point with only 3 bytes - "\xe0\x70\x80", // 3 byte code point, 2nd byte has no continuation prefix + "\xe0\xf0\x80", // 3 byte code point, 2nd byte has no continuation prefix }; INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ucs4_invalid_list)); @@ -262,7 +262,7 @@ INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ TEST_P(Utf8ToUCS4InvalidTest, correctness) { string_util::unicode_charlist result_list{}; const auto str = GetParam(); - bool success = string_util::utf8_to_ucs4(str.c_str(), result_list); + bool success = string_util::utf8_to_ucs4(str, result_list); EXPECT_FALSE(success); EXPECT_EQ(0, result_list.size()); }