From 3a27e891d2a7ae3ae19bb301d67c68989ce602c3 Mon Sep 17 00:00:00 2001
From: patrick96
Date: Wed, 10 May 2023 15:15:09 +0200
Subject: [PATCH] fix(renderer): Drop strings with invalid UTF8
---
include/cairo/context.hpp | 9 +++++++--
include/cairo/font.hpp | 4 ----
include/utils/string.hpp | 4 ++--
src/utils/string.cpp | 2 +-
4 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/include/cairo/context.hpp b/include/cairo/context.hpp
index bdfefa0c..e8f54188 100644
--- a/include/cairo/context.hpp
+++ b/include/cairo/context.hpp
@@ -163,9 +163,14 @@ namespace cairo {
std::iter_swap(fns.begin(), fns.begin() + t.font - 1);
}
- string utf8 = string(t.contents);
+ string utf8 = t.contents;
string_util::unicode_charlist chars;
- string_util::utf8_to_ucs4(utf8.c_str(), chars);
+ bool success = string_util::utf8_to_ucs4(utf8.c_str(), chars);
+
+ if (!success) {
+ m_log.warn("Dropping invalid UTF8 text '%s'", utf8);
+ return *this;
+ }
while (!chars.empty()) {
auto remaining = chars.size();
diff --git a/include/cairo/font.hpp b/include/cairo/font.hpp
index d7a14da9..2220a13e 100644
--- a/include/cairo/font.hpp
+++ b/include/cairo/font.hpp
@@ -220,10 +220,6 @@ class font_fc : public font {
m_scaled, x, y, utf8.c_str(), utf8.size(), &glyphs, &nglyphs, &clusters, &nclusters, &cf);
if (status != CAIRO_STATUS_SUCCESS) {
- logger::make().notice("ERROR %d", status);
- for (char& c : utf8) {
- logger::make().notice("0x%02x", c);
- }
throw application_error(sstream() << "cairo_scaled_font_text_to_glyphs() " << cairo_status_to_string(status));
}
diff --git a/include/utils/string.hpp b/include/utils/string.hpp
index 32ef01e9..62c6ee9d 100644
--- a/include/utils/string.hpp
+++ b/include/utils/string.hpp
@@ -24,7 +24,7 @@ class sstream {
return m_stream.str();
}
- string to_string() const {
+ string to_string() const {
return m_stream.str();
}
@@ -88,7 +88,7 @@ string utf8_truncate(string&& value, size_t len);
/**
* @brief Create a UCS-4 codepoint from a utf-8 encoded string
*/
-bool utf8_to_ucs4(const char* src, unicode_charlist& result_list);
+[[nodiscard]] bool utf8_to_ucs4(const char* src, unicode_charlist& result_list);
/**
* @brief Convert a UCS-4 codepoint to a utf-8 encoded string
diff --git a/src/utils/string.cpp b/src/utils/string.cpp
index 74130979..c9ed64a6 100644
--- a/src/utils/string.cpp
+++ b/src/utils/string.cpp
@@ -270,7 +270,7 @@ static pair utf8_get_len(uint8_t leading) {
}
/**
- * @brief Create a UCS-4 codepoint from a utf-8 encoded string
+ * @brief Create a list of UCS-4 codepoint from a utf-8 encoded string
*/
bool utf8_to_ucs4(const char* src, unicode_charlist& result_list) {
if (!src) {