Fixed ASCII folding for non-foldable non-ASCII characters

Problem: Add an object, name it using cyrilic, Slice, click Export: the proposed name is mangled. Affects all platforms, 2.2.0 was fine.

Cause: It is mangled during ASCII folding, which was broken with 28c0880 when it was generalized to be used from the fuzzy matcher.
    fold_to_ascii(wchar_t c, char *out) relies on narrowing char = wchar_t assignment, the old one used std::back_insert_iterator<std::wstring>.
    It is thus unable to leave a character alone even when it should (it should, right?).

Solution:
    1. Typed the fold_to_ascii function so it works on wchar_t only, which should mimic the old behaviour.
    2. Changed the respective call in fts_fuzzy_match.h. That function also works with wide char C-strings.

Cleanup:
    1. Removed the unused fold_utf8_to_ascii(const char *src) overload to avoid code duplication.
    2. Untemplated the fold_to_ascii(wchar_t c, std::back_insert_iterator<std::wstring>& out) function, it was never called with a different type.
    3. The function is now static in ASCIIFolding.cpp, nobody else needs to know.
This commit is contained in:
Lukas Matena 2020-12-18 21:22:26 +01:00
parent 1f5ac7d2f7
commit 78a3d8b63e
3 changed files with 17 additions and 29 deletions

View file

@ -116,10 +116,10 @@ namespace fts {
int num_matched = std::tolower(*pattern) == std::tolower(*str) ? 1 : 0;
bool folded_match = false;
if (! num_matched) {
char tmp[4];
char *end = Slic3r::fold_to_ascii(*str, tmp);
char *c = tmp;
for (const wchar_t* d = pattern; c != end && *d != 0 && wchar_t(std::tolower(*c)) == std::tolower(*d); ++c, ++d);
wchar_t tmp[4];
wchar_t *end = Slic3r::fold_to_ascii(*str, tmp);
wchar_t *c = tmp;
for (const wchar_t* d = pattern; c != end && *d != 0 && std::towlower(*c) == std::towlower(*d); ++c, ++d);
if (c == end) {
folded_match = true;
num_matched = end - tmp;

View file

@ -11,7 +11,7 @@ namespace Slic3r {
// Convert the input UNICODE character to a string of maximum 4 output ASCII characters.
// Return the end of the string written to the output.
// The output buffer must be at least 4 characters long.
char* fold_to_ascii(wchar_t c, char *out)
wchar_t* fold_to_ascii(wchar_t c, wchar_t *out)
{
if (c < 0x080) {
*out ++ = c;
@ -1925,7 +1925,7 @@ char* fold_to_ascii(wchar_t c, char *out)
*out ++ = '~';
break;
default:
*out ++ = c;
*out ++ = c;
break;
}
}
@ -1934,6 +1934,14 @@ char* fold_to_ascii(wchar_t c, char *out)
return out;
}
static void fold_to_ascii(wchar_t c, std::back_insert_iterator<std::wstring>& out)
{
wchar_t tmp[4];
wchar_t *end = fold_to_ascii(c, tmp);
for (wchar_t *it = tmp; it != end; ++ it)
*out = *it;
}
std::string fold_utf8_to_ascii(const std::string &src)
{
std::wstring wstr = boost::locale::conv::utf_to_utf<wchar_t>(src.c_str(), src.c_str() + src.size());
@ -1945,15 +1953,5 @@ std::string fold_utf8_to_ascii(const std::string &src)
return boost::locale::conv::utf_to_utf<char>(dst.c_str(), dst.c_str() + dst.size());
}
std::string fold_utf8_to_ascii(const char *src)
{
std::wstring wstr = boost::locale::conv::utf_to_utf<wchar_t>(src, src + strlen(src));
std::wstring dst;
dst.reserve(wstr.size());
auto out = std::back_insert_iterator<std::wstring>(dst);
for (wchar_t c : wstr)
fold_to_ascii(c, out);
return boost::locale::conv::utf_to_utf<char>(dst.c_str(), dst.c_str() + dst.size());
}
}; // namespace Slic3r
} // namespace Slic3r

View file

@ -7,23 +7,13 @@ namespace Slic3r {
// If possible, remove accents from accented latin characters.
// This function is useful for generating file names to be processed by legacy firmwares.
extern std::string fold_utf8_to_ascii(const char *src);
extern std::string fold_utf8_to_ascii(const std::string &src);
// Convert the input UNICODE character to a string of maximum 4 output ASCII characters.
// Return the end of the string written to the output.
// The output buffer must be at least 4 characters long.
extern char* fold_to_ascii(wchar_t c, char *out);
extern wchar_t* fold_to_ascii(wchar_t c, wchar_t *out);
template<typename OUTPUT_ITERATOR>
void fold_to_ascii(wchar_t c, OUTPUT_ITERATOR out)
{
char tmp[4];
char *end = fold_to_ascii(c, tmp);
for (char *it = tmp; it != end; ++ it)
*out = *it;
}
}; // namespace Slic3r
} // namespace Slic3r
#endif /* slic3r_ASCIIFolding_hpp_ */