Moved Odaki's implemetation of searching for end of utf8 sequence to separate function.
+ Fix of 0xC0 in trailer bytes
followup on fff862a5ae
This commit is contained in:
parent
fff862a5ae
commit
3f14484eca
@ -74,6 +74,10 @@ extern local_encoded_string encode_path(const char *src);
|
||||
extern std::string decode_path(const char *src);
|
||||
extern std::string normalize_utf8_nfc(const char *src);
|
||||
|
||||
// Returns next utf8 sequence length. =number of bytes in string, that creates together one utf-8 character.
|
||||
// Starting at pos. ASCII characters returns 1. Works also if pos is in the middle of the sequence.
|
||||
extern size_t get_utf8_sequence_length(const std::string& text, size_t pos = 0);
|
||||
|
||||
// Safely rename a file even if the target exists.
|
||||
// On Windows, the file explorer (or anti-virus or whatever else) often locks the file
|
||||
// for a short while, so the file may not be movable. Retry while we see recoverable errors.
|
||||
|
@ -863,6 +863,71 @@ std::string normalize_utf8_nfc(const char *src)
|
||||
return boost::locale::normalize(src, boost::locale::norm_nfc, locale_utf8);
|
||||
}
|
||||
|
||||
size_t get_utf8_sequence_length(const std::string& text, size_t pos)
|
||||
{
|
||||
assert(pos < text.size());
|
||||
size_t length = 0;
|
||||
unsigned char c = text[pos];
|
||||
if (c < 0x80) { // 0x00-0x7F
|
||||
// is ASCII letter
|
||||
length++;
|
||||
}
|
||||
// Bytes 0x80 to 0xBD are trailer bytes in a multibyte sequence.
|
||||
// pos is in the middle of a utf-8 sequence. Add the utf-8 trailer bytes.
|
||||
else if (c < 0xC0) { // 0x80-0xBF
|
||||
length++;
|
||||
while (pos + length < text.size()) {
|
||||
c = text[pos + length];
|
||||
if (c < 0x80 || c >= 0xC0) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
length++; // add a utf-8 trailer byte
|
||||
}
|
||||
}
|
||||
// Bytes 0xC0 to 0xFD are header bytes in a multibyte sequence.
|
||||
// The number of one bits above the topmost zero bit indicates the number of bytes (including this one) in the whole sequence.
|
||||
else if (c < 0xE0) { // 0xC0-0xDF
|
||||
// add a utf-8 sequence (2 bytes)
|
||||
if (pos + 2 > text.size()) {
|
||||
return text.size() - pos; // prevent overrun
|
||||
}
|
||||
length += 2;
|
||||
}
|
||||
else if (c < 0xF0) { // 0xE0-0xEF
|
||||
// add a utf-8 sequence (3 bytes)
|
||||
if (pos + 3 > text.size()) {
|
||||
return text.size() - pos; // prevent overrun
|
||||
}
|
||||
length += 3;
|
||||
}
|
||||
else if (c < 0xF8) { // 0xF0-0xF7
|
||||
// add a utf-8 sequence (4 bytes)
|
||||
if (pos + 4 > text.size()) {
|
||||
return text.size() - pos; // prevent overrun
|
||||
}
|
||||
length += 4;
|
||||
}
|
||||
else if (c < 0xFC) { // 0xF8-0xFB
|
||||
// add a utf-8 sequence (5 bytes)
|
||||
if (pos + 5 > text.size()) {
|
||||
return text.size() - pos; // prevent overrun
|
||||
}
|
||||
length += 5;
|
||||
}
|
||||
else if (c < 0xFE) { // 0xFC-0xFD
|
||||
// add a utf-8 sequence (6 bytes)
|
||||
if (pos + 6 > text.size()) {
|
||||
return text.size() - pos; // prevent overrun
|
||||
}
|
||||
length += 6;
|
||||
}
|
||||
else { // 0xFE-0xFF
|
||||
// not a utf-8 sequence
|
||||
length++;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
namespace PerlUtils {
|
||||
// Get a file name including the extension.
|
||||
std::string path_to_filename(const char *src) { return boost::filesystem::path(src).filename().string(); }
|
||||
|
@ -615,56 +615,7 @@ void NotificationManager::HintNotification::count_lines()
|
||||
float width_of_a = ImGui::CalcTextSize("a").x;
|
||||
int letter_count = (int)((m_window_width - m_window_width_offset) / width_of_a);
|
||||
while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset) {
|
||||
size_t text_size = text.size();
|
||||
size_t current_position = last_end + letter_count;
|
||||
unsigned char c = text[current_position];
|
||||
if (c < 0x80) { // 0x00-0x7F
|
||||
// add ASCII letter
|
||||
letter_count++;
|
||||
} else if (c < 0xC0) { // 0x80-0xBF
|
||||
// it is in the middle of a utf-8 sequence. add the utf-8 trailer bytes.
|
||||
letter_count++;
|
||||
while (last_end + letter_count < text.size()) {
|
||||
c = text[last_end + letter_count];
|
||||
if (c < 0x80 || c > 0xC0) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count++; // add a utf-8 trailer byte
|
||||
}
|
||||
} else if (c < 0xE0) { // 0xC0-0xDF
|
||||
// add a utf-8 sequence (2 bytes)
|
||||
if (current_position + 2 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 2;
|
||||
} else if (c < 0xF0) { // 0xE0-0xEF
|
||||
// add a utf-8 sequence (3 bytes)
|
||||
if (current_position + 3 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 3;
|
||||
} else if (c < 0xF8) { // 0xF0-0xF7
|
||||
// add a utf-8 sequence (4 bytes)
|
||||
if (current_position + 4 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 4;
|
||||
} else if (c < 0xFC) { // 0xF8-0xFB
|
||||
// add a utf-8 sequence (5 bytes)
|
||||
if (current_position + 5 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 5;
|
||||
} else if (c < 0xFE) { // 0xFC-0xFD
|
||||
// add a utf-8 sequence (6 bytes)
|
||||
if (current_position + 6 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 6;
|
||||
} else { // 0xFE-0xFF
|
||||
// not a utf-8 sequence
|
||||
letter_count++;
|
||||
}
|
||||
letter_count += get_utf8_sequence_length(text, last_end + letter_count);
|
||||
}
|
||||
m_endlines.push_back(last_end + letter_count);
|
||||
last_end += letter_count;
|
||||
@ -734,56 +685,7 @@ void NotificationManager::HintNotification::count_lines()
|
||||
float width_of_a = ImGui::CalcTextSize("a").x;
|
||||
int letter_count = (int)((m_window_width - m_window_width_offset - size_of_last_line) / width_of_a);
|
||||
while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset - size_of_last_line) {
|
||||
size_t text_size = text.size();
|
||||
size_t current_position = last_end + letter_count;
|
||||
unsigned char c = text[current_position];
|
||||
if (c < 0x80) { // 0x00-0x7F
|
||||
// add ASCII letter
|
||||
letter_count++;
|
||||
} else if (c < 0xC0) { // 0x80-0xBF
|
||||
// it is in the middle of a utf-8 sequence. add the utf-8 trailer bytes.
|
||||
letter_count++;
|
||||
while (last_end + letter_count < text.size()) {
|
||||
c = text[last_end + letter_count];
|
||||
if (c < 0x80 || c > 0xC0) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count++; // add a utf-8 trailer byte
|
||||
}
|
||||
} else if (c < 0xE0) { // 0xC0-0xDF
|
||||
// add a utf-8 sequence (2 bytes)
|
||||
if (current_position + 2 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 2;
|
||||
} else if (c < 0xF0) { // 0xE0-0xEF
|
||||
// add a utf-8 sequence (3 bytes)
|
||||
if (current_position + 3 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 3;
|
||||
} else if (c < 0xF8) { // 0xF0-0xF7
|
||||
// add a utf-8 sequence (4 bytes)
|
||||
if (current_position + 4 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 4;
|
||||
} else if (c < 0xFC) { // 0xF8-0xFB
|
||||
// add a utf-8 sequence (5 bytes)
|
||||
if (current_position + 5 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 5;
|
||||
} else if (c < 0xFE) { // 0xFC-0xFD
|
||||
// add a utf-8 sequence (6 bytes)
|
||||
if (current_position + 6 > text_size) {
|
||||
break; // prevent overrun
|
||||
}
|
||||
letter_count += 6;
|
||||
} else { // 0xFE-0xFF
|
||||
// not a utf-8 sequence
|
||||
letter_count++;
|
||||
}
|
||||
letter_count += get_utf8_sequence_length(text, last_end + letter_count);
|
||||
}
|
||||
m_endlines2.push_back(last_end + letter_count);
|
||||
last_end += letter_count;
|
||||
|
@ -301,7 +301,7 @@ void NotificationManager::PopNotification::count_lines()
|
||||
float width_of_a = ImGui::CalcTextSize("a").x;
|
||||
int letter_count = (int)((m_window_width - m_window_width_offset) / width_of_a);
|
||||
while (last_end + letter_count < text.size() && ImGui::CalcTextSize(text.substr(last_end, letter_count).c_str()).x < m_window_width - m_window_width_offset) {
|
||||
letter_count++;
|
||||
letter_count += get_utf8_sequence_length(text, last_end + letter_count);
|
||||
}
|
||||
m_endlines.push_back(last_end + letter_count);
|
||||
last_end += letter_count;
|
||||
|
Loading…
Reference in New Issue
Block a user