From 3f14484ecacf881e9fb66b5e19b8d9ec9381f113 Mon Sep 17 00:00:00 2001 From: David Kocik Date: Wed, 5 Jan 2022 12:37:07 +0100 Subject: [PATCH] Moved Odaki's implemetation of searching for end of utf8 sequence to separate function. + Fix of 0xC0 in trailer bytes followup on fff862a5ae4b013f9a4ca20161b514f7fb0574e8 --- src/libslic3r/Utils.hpp | 4 + src/libslic3r/utils.cpp | 65 ++++++++++++++++ src/slic3r/GUI/HintNotification.cpp | 102 +------------------------ src/slic3r/GUI/NotificationManager.cpp | 2 +- 4 files changed, 72 insertions(+), 101 deletions(-) diff --git a/src/libslic3r/Utils.hpp b/src/libslic3r/Utils.hpp index cfb4cfa92..d9419495e 100644 --- a/src/libslic3r/Utils.hpp +++ b/src/libslic3r/Utils.hpp @@ -74,6 +74,10 @@ extern local_encoded_string encode_path(const char *src); extern std::string decode_path(const char *src); extern std::string normalize_utf8_nfc(const char *src); +// Returns next utf8 sequence length. =number of bytes in string, that creates together one utf-8 character. +// Starting at pos. ASCII characters returns 1. Works also if pos is in the middle of the sequence. +extern size_t get_utf8_sequence_length(const std::string& text, size_t pos = 0); + // Safely rename a file even if the target exists. // On Windows, the file explorer (or anti-virus or whatever else) often locks the file // for a short while, so the file may not be movable. Retry while we see recoverable errors. diff --git a/src/libslic3r/utils.cpp b/src/libslic3r/utils.cpp index 7e4b281fd..c74e57e99 100644 --- a/src/libslic3r/utils.cpp +++ b/src/libslic3r/utils.cpp @@ -863,6 +863,71 @@ std::string normalize_utf8_nfc(const char *src) return boost::locale::normalize(src, boost::locale::norm_nfc, locale_utf8); } +size_t get_utf8_sequence_length(const std::string& text, size_t pos) +{ + assert(pos < text.size()); + size_t length = 0; + unsigned char c = text[pos]; + if (c < 0x80) { // 0x00-0x7F + // is ASCII letter + length++; + } + // Bytes 0x80 to 0xBD are trailer bytes in a multibyte sequence. + // pos is in the middle of a utf-8 sequence. Add the utf-8 trailer bytes. + else if (c < 0xC0) { // 0x80-0xBF + length++; + while (pos + length < text.size()) { + c = text[pos + length]; + if (c < 0x80 || c >= 0xC0) { + break; // prevent overrun + } + length++; // add a utf-8 trailer byte + } + } + // Bytes 0xC0 to 0xFD are header bytes in a multibyte sequence. + // The number of one bits above the topmost zero bit indicates the number of bytes (including this one) in the whole sequence. + else if (c < 0xE0) { // 0xC0-0xDF + // add a utf-8 sequence (2 bytes) + if (pos + 2 > text.size()) { + return text.size() - pos; // prevent overrun + } + length += 2; + } + else if (c < 0xF0) { // 0xE0-0xEF + // add a utf-8 sequence (3 bytes) + if (pos + 3 > text.size()) { + return text.size() - pos; // prevent overrun + } + length += 3; + } + else if (c < 0xF8) { // 0xF0-0xF7 + // add a utf-8 sequence (4 bytes) + if (pos + 4 > text.size()) { + return text.size() - pos; // prevent overrun + } + length += 4; + } + else if (c < 0xFC) { // 0xF8-0xFB + // add a utf-8 sequence (5 bytes) + if (pos + 5 > text.size()) { + return text.size() - pos; // prevent overrun + } + length += 5; + } + else if (c < 0xFE) { // 0xFC-0xFD + // add a utf-8 sequence (6 bytes) + if (pos + 6 > text.size()) { + return text.size() - pos; // prevent overrun + } + length += 6; + } + else { // 0xFE-0xFF + // not a utf-8 sequence + length++; + } + return length; +} + namespace PerlUtils { // Get a file name including the extension. std::string path_to_filename(const char *src) { return boost::filesystem::path(src).filename().string(); } diff --git a/src/slic3r/GUI/HintNotification.cpp b/src/slic3r/GUI/HintNotification.cpp index 91c9d71c8..291ce8f40 100644 --- a/src/slic3r/GUI/HintNotification.cpp +++ b/src/slic3r/GUI/HintNotification.cpp @@ -615,56 +615,7 @@ void NotificationManager::HintNotification::count_lines() float width_of_a = ImGui::CalcTextSize("a").x; int letter_count = (int)((m_window_width - m_window_width_offset) / width_of_a); while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset) { - size_t text_size = text.size(); - size_t current_position = last_end + letter_count; - unsigned char c = text[current_position]; - if (c < 0x80) { // 0x00-0x7F - // add ASCII letter - letter_count++; - } else if (c < 0xC0) { // 0x80-0xBF - // it is in the middle of a utf-8 sequence. add the utf-8 trailer bytes. - letter_count++; - while (last_end + letter_count < text.size()) { - c = text[last_end + letter_count]; - if (c < 0x80 || c > 0xC0) { - break; // prevent overrun - } - letter_count++; // add a utf-8 trailer byte - } - } else if (c < 0xE0) { // 0xC0-0xDF - // add a utf-8 sequence (2 bytes) - if (current_position + 2 > text_size) { - break; // prevent overrun - } - letter_count += 2; - } else if (c < 0xF0) { // 0xE0-0xEF - // add a utf-8 sequence (3 bytes) - if (current_position + 3 > text_size) { - break; // prevent overrun - } - letter_count += 3; - } else if (c < 0xF8) { // 0xF0-0xF7 - // add a utf-8 sequence (4 bytes) - if (current_position + 4 > text_size) { - break; // prevent overrun - } - letter_count += 4; - } else if (c < 0xFC) { // 0xF8-0xFB - // add a utf-8 sequence (5 bytes) - if (current_position + 5 > text_size) { - break; // prevent overrun - } - letter_count += 5; - } else if (c < 0xFE) { // 0xFC-0xFD - // add a utf-8 sequence (6 bytes) - if (current_position + 6 > text_size) { - break; // prevent overrun - } - letter_count += 6; - } else { // 0xFE-0xFF - // not a utf-8 sequence - letter_count++; - } + letter_count += get_utf8_sequence_length(text, last_end + letter_count); } m_endlines.push_back(last_end + letter_count); last_end += letter_count; @@ -734,56 +685,7 @@ void NotificationManager::HintNotification::count_lines() float width_of_a = ImGui::CalcTextSize("a").x; int letter_count = (int)((m_window_width - m_window_width_offset - size_of_last_line) / width_of_a); while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset - size_of_last_line) { - size_t text_size = text.size(); - size_t current_position = last_end + letter_count; - unsigned char c = text[current_position]; - if (c < 0x80) { // 0x00-0x7F - // add ASCII letter - letter_count++; - } else if (c < 0xC0) { // 0x80-0xBF - // it is in the middle of a utf-8 sequence. add the utf-8 trailer bytes. - letter_count++; - while (last_end + letter_count < text.size()) { - c = text[last_end + letter_count]; - if (c < 0x80 || c > 0xC0) { - break; // prevent overrun - } - letter_count++; // add a utf-8 trailer byte - } - } else if (c < 0xE0) { // 0xC0-0xDF - // add a utf-8 sequence (2 bytes) - if (current_position + 2 > text_size) { - break; // prevent overrun - } - letter_count += 2; - } else if (c < 0xF0) { // 0xE0-0xEF - // add a utf-8 sequence (3 bytes) - if (current_position + 3 > text_size) { - break; // prevent overrun - } - letter_count += 3; - } else if (c < 0xF8) { // 0xF0-0xF7 - // add a utf-8 sequence (4 bytes) - if (current_position + 4 > text_size) { - break; // prevent overrun - } - letter_count += 4; - } else if (c < 0xFC) { // 0xF8-0xFB - // add a utf-8 sequence (5 bytes) - if (current_position + 5 > text_size) { - break; // prevent overrun - } - letter_count += 5; - } else if (c < 0xFE) { // 0xFC-0xFD - // add a utf-8 sequence (6 bytes) - if (current_position + 6 > text_size) { - break; // prevent overrun - } - letter_count += 6; - } else { // 0xFE-0xFF - // not a utf-8 sequence - letter_count++; - } + letter_count += get_utf8_sequence_length(text, last_end + letter_count); } m_endlines2.push_back(last_end + letter_count); last_end += letter_count; diff --git a/src/slic3r/GUI/NotificationManager.cpp b/src/slic3r/GUI/NotificationManager.cpp index 66c22cb9b..aab0dbbab 100644 --- a/src/slic3r/GUI/NotificationManager.cpp +++ b/src/slic3r/GUI/NotificationManager.cpp @@ -301,7 +301,7 @@ void NotificationManager::PopNotification::count_lines() float width_of_a = ImGui::CalcTextSize("a").x; int letter_count = (int)((m_window_width - m_window_width_offset) / width_of_a); while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset) { - letter_count++; + letter_count += get_utf8_sequence_length(text, last_end + letter_count); } m_endlines.push_back(last_end + letter_count); last_end += letter_count;