ASCII folding applied over the matched string

This commit is contained in:
bubnikv 2020-05-03 15:45:47 +02:00
parent e27c6c7928
commit 28c0880b24
3 changed files with 487 additions and 450 deletions

View file

@ -37,6 +37,8 @@
#include <cstdio> #include <cstdio>
#include "../Utils/ASCIIFolding.hpp"
// Public interface // Public interface
namespace fts { namespace fts {
using char_type = wchar_t; using char_type = wchar_t;
@ -111,11 +113,24 @@ namespace fts {
bool first_match = true; bool first_match = true;
while (*pattern != '\0' && *str != '\0') { while (*pattern != '\0' && *str != '\0') {
int num_matched = std::tolower(*pattern) == std::tolower(*str) ? 1 : 0;
bool folded_match = false;
if (! num_matched) {
char tmp[4];
char *end = Slic3r::fold_to_ascii(*str, tmp);
char *c = tmp;
for (const wchar_t* d = pattern; c != end && *d != 0 && wchar_t(std::tolower(*c)) == std::tolower(*d); ++c, ++d);
if (c == end) {
folded_match = true;
num_matched = end - tmp;
}
}
// Found match // Found match
if (tolower(*pattern) == tolower(*str)) { if (num_matched) {
// Supplied matches buffer was too short // Supplied matches buffer was too short
if (nextMatch >= max_matches) if (nextMatch + num_matched > max_matches)
return false; return false;
// "Copy-on-Write" srcMatches into matches // "Copy-on-Write" srcMatches into matches
@ -141,7 +156,8 @@ namespace fts {
matches[nextMatch++] = (pos_type)(str - strBegin); matches[nextMatch++] = (pos_type)(str - strBegin);
// Write a stopper sign. // Write a stopper sign.
matches[nextMatch] = stopper; matches[nextMatch] = stopper;
++pattern; // Advance pattern by the number of matched characters (could be more if ASCII folding triggers in).
pattern += num_matched;
} }
++str; ++str;
} }
@ -172,8 +188,10 @@ namespace fts {
first_letter_bonus : first_letter_bonus :
std::max(matches[0] * leading_letter_penalty, max_leading_letter_penalty); std::max(matches[0] * leading_letter_penalty, max_leading_letter_penalty);
// Apply unmatched letters after the end penalty
// outScore += (int(str - strBegin) - matches[nextMatch-1] + 1) * unmatched_letter_penalty;
// Apply unmatched penalty // Apply unmatched penalty
outScore += (int(str - strBegin) - matches[nextMatch-1] + 1) * unmatched_letter_penalty; outScore += (int(str - strBegin) - nextMatch) * unmatched_letter_penalty;
// Apply ordering bonuses // Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i) { for (int i = 0; i < nextMatch; ++i) {

File diff suppressed because it is too large Load diff

View file

@ -7,8 +7,22 @@ namespace Slic3r {
// If possible, remove accents from accented latin characters. // If possible, remove accents from accented latin characters.
// This function is useful for generating file names to be processed by legacy firmwares. // This function is useful for generating file names to be processed by legacy firmwares.
extern std::string fold_utf8_to_ascii(const char *src); extern std::string fold_utf8_to_ascii(const char *src);
extern std::string fold_utf8_to_ascii(const std::string &src); extern std::string fold_utf8_to_ascii(const std::string &src);
// Convert the input UNICODE character to a string of maximum 4 output ASCII characters.
// Return the end of the string written to the output.
// The output buffer must be at least 4 characters long.
extern char* fold_to_ascii(wchar_t c, char *out);
template<typename OUTPUT_ITERATOR>
void fold_to_ascii(wchar_t c, OUTPUT_ITERATOR out)
{
char tmp[4];
char *end = fold_to_ascii(c, tmp);
for (char *it = tmp; it != end; ++ it)
*out = *it;
}
}; // namespace Slic3r }; // namespace Slic3r