From 425d4dc338228a418619894ce060d578134f054c Mon Sep 17 00:00:00 2001
From: patrick96
Date: Wed, 10 May 2023 16:46:09 +0200
Subject: [PATCH] Cleanup
---
include/cairo/context.hpp | 5 +++--
include/utils/string.hpp | 7 -------
tests/unit_tests/utils/string.cpp | 34 ++++++++++++++++---------------
3 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/include/cairo/context.hpp b/include/cairo/context.hpp
index b0589d0f..0434e6f5 100644
--- a/include/cairo/context.hpp
+++ b/include/cairo/context.hpp
@@ -166,9 +166,10 @@ namespace cairo {
string utf8 = t.contents;
string_util::unicode_charlist chars;
- bool success = string_util::utf8_to_ucs4(utf8, chars);
+ bool valid = string_util::utf8_to_ucs4(utf8, chars);
- if (!success) {
+ // The conversion already removed any invalid chunks. We should probably log a warning though.
+ if (!valid) {
sstream hex;
hex << std::hex << std::setw(2) << std::setfill('0');
diff --git a/include/utils/string.hpp b/include/utils/string.hpp
index 7d0a3dc5..45eaec06 100644
--- a/include/utils/string.hpp
+++ b/include/utils/string.hpp
@@ -84,14 +84,7 @@ string trim(string&& value, const char& needle = ' ');
size_t char_len(const string& value);
string utf8_truncate(string&& value, size_t len);
-/**
- * @brief Create a UCS-4 codepoint from a utf-8 encoded string
- */
[[nodiscard]] bool utf8_to_ucs4(const string& src, unicode_charlist& result_list);
-
-/**
- * @brief Convert a UCS-4 codepoint to a utf-8 encoded string
- */
size_t ucs4_to_utf8(std::array& utf8, unsigned int ucs);
string join(const vector& strs, const string& delim);
diff --git a/tests/unit_tests/utils/string.cpp b/tests/unit_tests/utils/string.cpp
index e95be7c4..de7a1825 100644
--- a/tests/unit_tests/utils/string.cpp
+++ b/tests/unit_tests/utils/string.cpp
@@ -188,8 +188,8 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) {
string_util::unicode_charlist result_list{};
string str = GetParam();
- bool success = string_util::utf8_to_ucs4(str, result_list);
- ASSERT_TRUE(success);
+ bool valid = string_util::utf8_to_ucs4(str, result_list);
+ ASSERT_TRUE(valid);
ASSERT_EQ(str.size(), result_list.size());
@@ -206,18 +206,20 @@ TEST_P(Utf8ToUCS4AsciiTest, correctness) {
}
}
+// String containing a single codepoint and the expected numerical codepoint
using single_test_t = std::pair;
class Utf8ToUCS4SingleTest : public testing::TestWithParam {};
const vector utf8_to_ucs4_single_list = {
- {" ", 0x20}, {"\u007f", 0x7f}, // End of 1 byte range
- {"\u0080", 0x80}, // Start of 2 byte range
- {"\u07ff", 0x7ff}, // End of 2 byte range
- {"\u0800", 0x800}, // Start of 3 byte range
- {"\uffff", 0xffff}, // End of 3 byte range
- {"\U00010000", 0x10000}, // Start of 4 byte range
- {"\U0010ffff", 0x10ffff}, // End of 4 byte range
- {"\U0001f600", 0x1f600}, // Grinning face emoji
+ {" ", 0x20}, // Single ASCII character
+ {"\u007f", 0x7f}, // End of 1 byte range
+ {"\u0080", 0x80}, // Start of 2 byte range
+ {"\u07ff", 0x7ff}, // End of 2 byte range
+ {"\u0800", 0x800}, // Start of 3 byte range
+ {"\uffff", 0xffff}, // End of 3 byte range
+ {"\U00010000", 0x10000}, // Start of 4 byte range
+ {"\U0010ffff", 0x10ffff}, // End of 4 byte range
+ {"\U0001f600", 0x1f600}, // Grinning face emoji
};
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list));
@@ -229,8 +231,8 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) {
string_util::unicode_charlist result_list{};
const auto [str, codepoint] = GetParam();
- bool success = string_util::utf8_to_ucs4(str, result_list);
- ASSERT_TRUE(success);
+ bool valid = string_util::utf8_to_ucs4(str, result_list);
+ ASSERT_TRUE(valid);
ASSERT_EQ(1, result_list.size());
@@ -262,8 +264,8 @@ INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_
TEST_P(Utf8ToUCS4InvalidTest, correctness) {
string_util::unicode_charlist result_list{};
const auto str = GetParam();
- bool success = string_util::utf8_to_ucs4(str, result_list);
- EXPECT_FALSE(success);
+ bool valid = string_util::utf8_to_ucs4(str, result_list);
+ EXPECT_FALSE(valid);
EXPECT_EQ(0, result_list.size());
}
@@ -273,8 +275,8 @@ TEST_P(Utf8ToUCS4InvalidTest, correctness) {
TEST(String, utf8ToUCS4Partial) {
string_util::unicode_charlist result_list{};
string str = "\xe0\x70\x80"; // a valid ascii character between two invalid characters
- bool success = string_util::utf8_to_ucs4(str, result_list);
- EXPECT_FALSE(success);
+ bool valid = string_util::utf8_to_ucs4(str, result_list);
+ EXPECT_FALSE(valid);
EXPECT_EQ(1, result_list.size());
EXPECT_EQ(0x70, result_list[0].codepoint);