test: utf8_to_ucs4 with invalid strings

2023-05-10 15:35:14 +02:00 · 2023-05-10 15:35:14 +02:00 · c86519f077
commit c86519f077
parent 3a27e891d2
2 changed files with 26 additions and 4 deletions
--- a/src/utils/string.cpp
+++ b/src/utils/string.cpp
@ -1,6 +1,7 @@
 #include "utils/string.hpp"
 #include <algorithm>
 #include <cassert>
 #include <iomanip>
 #include <sstream>
 #include <utility>
@ -273,9 +274,7 @@ static pair<int, uint32_t> utf8_get_len(uint8_t leading) {
 * @brief Create a list of UCS-4 codepoint from a utf-8 encoded string
 */
 bool utf8_to_ucs4(const char* src, unicode_charlist& result_list) {
-  if (!src) {
+  assert(src);
    return false;
  }
  const auto* begin = reinterpret_cast<const uint8_t*>(src);
  const auto* first = begin;
  while (*first) {
--- a/tests/unit_tests/utils/string.cpp
+++ b/tests/unit_tests/utils/string.cpp
@ -225,7 +225,7 @@ const vector<single_test_t> utf8_to_ucs4_single_list = {
 INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4SingleTest, testing::ValuesIn(utf8_to_ucs4_single_list));
 /**
- * Test that the conversion to ucs4 works correctly with pure ASCII strings.
+ * Test that the conversion to ucs4 works correctly with a single UTF8 character
 */
 TEST_P(Utf8ToUCS4SingleTest, correctness) {
  string_util::unicode_charlist result_list{};
@ -244,4 +244,27 @@ TEST_P(Utf8ToUCS4SingleTest, correctness) {
  // Must match expected codepoint
  EXPECT_EQ(codepoint, unicode_char.codepoint);
 }
 class Utf8ToUCS4InvalidTest : public testing::TestWithParam<string> {};
 const vector<string> utf8_to_ucs4_invalid_list = {
    "\x80",         // continuation byte without leading byte
    "\xa0",         // 2 byte code point with only leading byte
    "\xe0",         // 3 byte code point with only leading byte
    "\xf0",         // 4 byte code point with only leading byte
    "\xf0\x80\x80", // 4 byte code point with only 3 bytes
    "\xe0\x70\x80", // 3 byte code point, 2nd byte has no continuation prefix
 };
 INSTANTIATE_TEST_SUITE_P(Inst, Utf8ToUCS4InvalidTest, testing::ValuesIn(utf8_to_ucs4_invalid_list));
 /**
 * Tests that the conversion correctly returns false for invalid strings.
 */
 TEST_P(Utf8ToUCS4InvalidTest, correctness) {
  string_util::unicode_charlist result_list{};
  const auto str = GetParam();
  bool success = string_util::utf8_to_ucs4(str.c_str(), result_list);
  EXPECT_FALSE(success);
 }
 // }}}