Add tests for utf8_to_ucs4
This commit is contained in:
parent
71c65447f8
commit
270c0a340c
@ -173,3 +173,75 @@ TEST(String, filesize) {
|
||||
EXPECT_EQ("3 GB", string_util::filesize((unsigned long long)3 * 1024 * 1024 * 1024));
|
||||
EXPECT_EQ("3 TB", string_util::filesize((unsigned long long)3 * 1024 * 1024 * 1024 * 1024));
|
||||
}
|
||||
|
||||
// utf8_to_ucs4 {{{
|
||||
class Utf8ToUCS4AsciiTest : public testing::TestWithParam<string> {};
|
||||
|
||||
const vector<string> utf8_to_ucs4_ascii_list = {"", "Hello World", "\n", "\0", "\u007f"};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4AsciiTest, testing::ValuesIn(utf8_to_ucs4_ascii_list));
|
||||
|
||||
/**
|
||||
* Test that the conversion to ucs4 works correctly with pure ASCII strings.
|
||||
*/
|
||||
TEST_P(Utf8ToUCS4AsciiTest, correctness) {
|
||||
string_util::unicode_charlist result_list{};
|
||||
string str = GetParam();
|
||||
|
||||
bool success = string_util::utf8_to_ucs4((const unsigned char*)str.c_str(), result_list);
|
||||
ASSERT_TRUE(success);
|
||||
|
||||
ASSERT_EQ(str.size(), result_list.size());
|
||||
|
||||
int i = 0;
|
||||
for (const auto& unicode_char : result_list) {
|
||||
auto c = str[i];
|
||||
|
||||
// Matches the single byte character
|
||||
EXPECT_EQ(c, unicode_char.codepoint);
|
||||
// Is at the same offset as in the original string
|
||||
EXPECT_EQ(i, unicode_char.offset);
|
||||
// Only takes a single byte
|
||||
EXPECT_EQ(1, unicode_char.length);
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
using single_test_t = std::pair<string, uint32_t>;
|
||||
class Utf8ToUCS4SingleTest : public testing::TestWithParam<single_test_t> {};
|
||||
|
||||
const vector<single_test_t> utf8_to_ucs4_single_list = {
|
||||
{" ", 0x20}, {"\u007f", 0x7f}, // End of 1 byte range
|
||||
{"\u0080", 0x80}, // Start of 2 byte range
|
||||
{"\u07ff", 0x7ff}, // End of 2 byte range
|
||||
{"\u0800", 0x800}, // Start of 3 byte range
|
||||
{"\uffff", 0xffff}, // End of 3 byte range
|
||||
{"\U00010000", 0x10000}, // Start of 4 byte range
|
||||
{"\U0010ffff", 0x10ffff}, // End of 4 byte range
|
||||
{"\U0001f600", 0x1f600}, // Grinning face emoji
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list));
|
||||
|
||||
/**
|
||||
* Test that the conversion to ucs4 works correctly with pure ASCII strings.
|
||||
*/
|
||||
TEST_P(Utf8ToUCS4SingleTest, correctness) {
|
||||
string_util::unicode_charlist result_list{};
|
||||
const auto [str, codepoint] = GetParam();
|
||||
|
||||
bool success = string_util::utf8_to_ucs4((const unsigned char*)str.c_str(), result_list);
|
||||
ASSERT_TRUE(success);
|
||||
|
||||
ASSERT_EQ(1, result_list.size());
|
||||
|
||||
auto unicode_char = result_list.front();
|
||||
|
||||
EXPECT_EQ(0, unicode_char.offset);
|
||||
// Must encompass entire string
|
||||
EXPECT_EQ(str.size(), unicode_char.length);
|
||||
// Must match expected codepoint
|
||||
EXPECT_EQ(codepoint, unicode_char.codepoint);
|
||||
}
|
||||
// }}}
|
||||
|
Loading…
Reference in New Issue
Block a user