diff --git a/include/cairo/context.hpp b/include/cairo/context.hpp index 23167dfe..60f3a232 100644 --- a/include/cairo/context.hpp +++ b/include/cairo/context.hpp @@ -164,8 +164,8 @@ namespace cairo { } string utf8 = string(t.contents); - utils::unicode_charlist chars; - utils::utf8_to_ucs4((const unsigned char*)utf8.c_str(), chars); + string_util::unicode_charlist chars; + string_util::utf8_to_ucs4((const unsigned char*)utf8.c_str(), chars); while (!chars.empty()) { auto remaining = chars.size(); @@ -235,7 +235,7 @@ namespace cairo { } char unicode[6]{'\0'}; - utils::ucs4_to_utf8(unicode, chars.begin()->codepoint); + string_util::ucs4_to_utf8(unicode, chars.begin()->codepoint); m_log.warn("Dropping unmatched character %s (U+%04x) in '%s'", unicode, chars.begin()->codepoint, t.contents); utf8.erase(chars.begin()->offset, chars.begin()->length); for (auto&& c : chars) { diff --git a/include/cairo/font.hpp b/include/cairo/font.hpp index 5b396480..d7a14da9 100644 --- a/include/cairo/font.hpp +++ b/include/cairo/font.hpp @@ -39,8 +39,8 @@ class font { cairo_set_font_face(m_cairo, cairo_font_face_reference(m_font_face)); } - virtual size_t match(utils::unicode_character& character) = 0; - virtual size_t match(utils::unicode_charlist& charlist) = 0; + virtual size_t match(string_util::unicode_character& character) = 0; + virtual size_t match(string_util::unicode_charlist& charlist) = 0; virtual size_t render(const string& text, double x = 0.0, double y = 0.0) = 0; virtual void textwidth(const string& text, cairo_text_extents_t* extents) = 0; @@ -187,13 +187,13 @@ class font_fc : public font { cairo_set_scaled_font(m_cairo, m_scaled); } - size_t match(utils::unicode_character& character) override { + size_t match(string_util::unicode_character& character) override { auto lock = make_unique(m_scaled); auto face = static_cast(*lock); return FT_Get_Char_Index(face, character.codepoint) ? 1 : 0; } - size_t match(utils::unicode_charlist& charlist) override { + size_t match(string_util::unicode_charlist& charlist) override { auto lock = make_unique(m_scaled); auto face = static_cast(*lock); size_t available_chars = 0; diff --git a/include/cairo/utils.hpp b/include/cairo/utils.hpp index bd0497c1..9b5220ad 100644 --- a/include/cairo/utils.hpp +++ b/include/cairo/utils.hpp @@ -2,8 +2,6 @@ #include -#include - #include "common.hpp" POLYBAR_NS @@ -39,32 +37,10 @@ namespace utils { FT_Face m_face; }; - /** - * @brief Unicode character containing converted codepoint - * and details on where its position in the source string - */ - struct unicode_character { - explicit unicode_character(); - unsigned long codepoint; - int offset; - int length; - }; - using unicode_charlist = std::list; - /** * @see */ cairo_operator_t str2operator(const string& mode, cairo_operator_t fallback); - - /** - * @brief Create a UCS-4 codepoint from a utf-8 encoded string - */ - bool utf8_to_ucs4(const unsigned char* src, unicode_charlist& result_list); - - /** - * @brief Convert a UCS-4 codepoint to a utf-8 encoded string - */ - size_t ucs4_to_utf8(char* utf8, unsigned int ucs); } // namespace utils } // namespace cairo diff --git a/include/utils/string.hpp b/include/utils/string.hpp index 68399933..176ee978 100644 --- a/include/utils/string.hpp +++ b/include/utils/string.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include "common.hpp" @@ -8,8 +9,6 @@ POLYBAR_NS class sstream { public: - sstream() : m_stream() {} - template sstream& operator<<(const T& object) { m_stream << object; @@ -25,7 +24,7 @@ class sstream { return m_stream.str(); } - const string to_string() const { + string to_string() const { return m_stream.str(); } @@ -39,6 +38,26 @@ namespace string_util { */ using hash_type = unsigned long; +/** + * @brief Unicode character containing converted codepoint + * and details on where its position in the source string + */ +struct unicode_character { + /** + * The numerical codepoint. Between U+0000 and U+10FFFF + */ + unsigned long codepoint{0}; + /** + * Byte offset of this character in the original string + */ + int offset{0}; + /** + * Number of bytes used by this character in the original string + */ + int length{0}; +}; +using unicode_charlist = std::list; + bool contains(const string& haystack, const string& needle); bool contains_ignore_case(const string& haystack, const string& needle); bool ends_with(const string& haystack, const string& suffix); @@ -66,6 +85,15 @@ string trim(string&& value, const char& needle = ' '); size_t char_len(const string& value); string utf8_truncate(string&& value, size_t len); +/** + * @brief Create a UCS-4 codepoint from a utf-8 encoded string + */ +bool utf8_to_ucs4(const unsigned char* src, unicode_charlist& result_list); + +/** + * @brief Convert a UCS-4 codepoint to a utf-8 encoded string + */ +size_t ucs4_to_utf8(char* utf8, unsigned int ucs); string join(const vector& strs, const string& delim); vector split(const string& s, char delim); diff --git a/src/cairo/utils.cpp b/src/cairo/utils.cpp index f5649130..b7cc5e06 100644 --- a/src/cairo/utils.cpp +++ b/src/cairo/utils.cpp @@ -38,11 +38,6 @@ namespace utils { return m_face; } - // }}} - // implementation : unicode_character {{{ - - unicode_character::unicode_character() : codepoint(0), offset(0), length(0) {} - // }}} /** @@ -87,89 +82,6 @@ namespace utils { auto it = modes.find(mode); return it != modes.end() ? it->second : fallback; } - - /** - * @brief Create a UCS-4 codepoint from a utf-8 encoded string - */ - bool utf8_to_ucs4(const unsigned char* src, unicode_charlist& result_list) { - if (!src) { - return false; - } - const unsigned char* first = src; - while (*first) { - int len = 0; - unsigned long result = 0; - if ((*first >> 7) == 0) { - len = 1; - result = *first; - } else if ((*first >> 5) == 6) { - len = 2; - result = *first & 31; - } else if ((*first >> 4) == 14) { - len = 3; - result = *first & 15; - } else if ((*first >> 3) == 30) { - len = 4; - result = *first & 7; - } else { - return false; - } - const unsigned char* next; - for (next = first + 1; *next && ((*next >> 6) == 2) && (next - first < len); next++) { - result = result << 6; - result |= *next & 63; - } - unicode_character uc_char; - uc_char.codepoint = result; - uc_char.offset = first - src; - uc_char.length = next - first; - result_list.push_back(uc_char); - first = next; - } - return true; - } - - /** - * @brief Convert a UCS-4 codepoint to a utf-8 encoded string - */ - size_t ucs4_to_utf8(char* utf8, unsigned int ucs) { - if (ucs <= 0x7f) { - *utf8 = ucs; - return 1; - } else if (ucs <= 0x07ff) { - *(utf8++) = ((ucs >> 6) & 0xff) | 0xc0; - *utf8 = (ucs & 0x3f) | 0x80; - return 2; - } else if (ucs <= 0xffff) { - *(utf8++) = ((ucs >> 12) & 0x0f) | 0xe0; - *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; - *utf8 = (ucs & 0x3f) | 0x80; - return 3; - } else if (ucs <= 0x1fffff) { - *(utf8++) = ((ucs >> 18) & 0x07) | 0xf0; - *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; - *utf8 = (ucs & 0x3f) | 0x80; - return 4; - } else if (ucs <= 0x03ffffff) { - *(utf8++) = ((ucs >> 24) & 0x03) | 0xf8; - *(utf8++) = ((ucs >> 18) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; - *utf8 = (ucs & 0x3f) | 0x80; - return 5; - } else if (ucs <= 0x7fffffff) { - *(utf8++) = ((ucs >> 30) & 0x01) | 0xfc; - *(utf8++) = ((ucs >> 24) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 18) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; - *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; - *utf8 = (ucs & 0x3f) | 0x80; - return 6; - } else { - return 0; - } - } } // namespace utils } // namespace cairo diff --git a/src/utils/string.cpp b/src/utils/string.cpp index 86f7f95d..681eae4d 100644 --- a/src/utils/string.cpp +++ b/src/utils/string.cpp @@ -224,6 +224,89 @@ string utf8_truncate(string&& value, size_t len) { return forward(value); } +/** + * @brief Create a UCS-4 codepoint from a utf-8 encoded string + */ +bool utf8_to_ucs4(const unsigned char* src, unicode_charlist& result_list) { + if (!src) { + return false; + } + const unsigned char* first = src; + while (*first) { + int len = 0; + unsigned long result = 0; + if ((*first >> 7) == 0) { + len = 1; + result = *first; + } else if ((*first >> 5) == 6) { + len = 2; + result = *first & 31; + } else if ((*first >> 4) == 14) { + len = 3; + result = *first & 15; + } else if ((*first >> 3) == 30) { + len = 4; + result = *first & 7; + } else { + return false; + } + const unsigned char* next; + for (next = first + 1; *next && ((*next >> 6) == 2) && (next - first < len); next++) { + result = result << 6; + result |= *next & 63; + } + unicode_character uc_char; + uc_char.codepoint = result; + uc_char.offset = first - src; + uc_char.length = next - first; + result_list.push_back(uc_char); + first = next; + } + return true; +} + +/** + * @brief Convert a UCS-4 codepoint to a utf-8 encoded string + */ +size_t ucs4_to_utf8(char* utf8, unsigned int ucs) { + if (ucs <= 0x7f) { + *utf8 = ucs; + return 1; + } else if (ucs <= 0x07ff) { + *(utf8++) = ((ucs >> 6) & 0xff) | 0xc0; + *utf8 = (ucs & 0x3f) | 0x80; + return 2; + } else if (ucs <= 0xffff) { + *(utf8++) = ((ucs >> 12) & 0x0f) | 0xe0; + *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; + *utf8 = (ucs & 0x3f) | 0x80; + return 3; + } else if (ucs <= 0x1fffff) { + *(utf8++) = ((ucs >> 18) & 0x07) | 0xf0; + *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; + *utf8 = (ucs & 0x3f) | 0x80; + return 4; + } else if (ucs <= 0x03ffffff) { + *(utf8++) = ((ucs >> 24) & 0x03) | 0xf8; + *(utf8++) = ((ucs >> 18) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; + *utf8 = (ucs & 0x3f) | 0x80; + return 5; + } else if (ucs <= 0x7fffffff) { + *(utf8++) = ((ucs >> 30) & 0x01) | 0xfc; + *(utf8++) = ((ucs >> 24) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 18) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 12) & 0x3f) | 0x80; + *(utf8++) = ((ucs >> 6) & 0x3f) | 0x80; + *utf8 = (ucs & 0x3f) | 0x80; + return 6; + } else { + return 0; + } +} + /** * Join all strings in vector into a single string separated by delim */