ASCII folding applied over the matched string

This commit is contained in:
bubnikv 2020-05-03 15:45:47 +02:00
parent e27c6c7928
commit 28c0880b24
3 changed files with 487 additions and 450 deletions

View file

@ -37,6 +37,8 @@
#include <cstdio>
#include "../Utils/ASCIIFolding.hpp"
// Public interface
namespace fts {
using char_type = wchar_t;
@ -110,14 +112,27 @@ namespace fts {
// Loop through pattern and str looking for a match
bool first_match = true;
while (*pattern != '\0' && *str != '\0') {
int num_matched = std::tolower(*pattern) == std::tolower(*str) ? 1 : 0;
bool folded_match = false;
if (! num_matched) {
char tmp[4];
char *end = Slic3r::fold_to_ascii(*str, tmp);
char *c = tmp;
for (const wchar_t* d = pattern; c != end && *d != 0 && wchar_t(std::tolower(*c)) == std::tolower(*d); ++c, ++d);
if (c == end) {
folded_match = true;
num_matched = end - tmp;
}
}
// Found match
if (tolower(*pattern) == tolower(*str)) {
if (num_matched) {
// Supplied matches buffer was too short
if (nextMatch >= max_matches)
if (nextMatch + num_matched > max_matches)
return false;
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) {
memcpy(matches, srcMatches, sizeof(pos_type) * (nextMatch + 1)); // including the stopper
@ -141,8 +156,9 @@ namespace fts {
matches[nextMatch++] = (pos_type)(str - strBegin);
// Write a stopper sign.
matches[nextMatch] = stopper;
++pattern;
}
// Advance pattern by the number of matched characters (could be more if ASCII folding triggers in).
pattern += num_matched;
}
++str;
}
@ -172,8 +188,10 @@ namespace fts {
first_letter_bonus :
std::max(matches[0] * leading_letter_penalty, max_leading_letter_penalty);
// Apply unmatched letters after the end penalty
// outScore += (int(str - strBegin) - matches[nextMatch-1] + 1) * unmatched_letter_penalty;
// Apply unmatched penalty
outScore += (int(str - strBegin) - matches[nextMatch-1] + 1) * unmatched_letter_penalty;
outScore += (int(str - strBegin) - nextMatch) * unmatched_letter_penalty;
// Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i) {

File diff suppressed because it is too large Load diff

View file

@ -7,8 +7,22 @@ namespace Slic3r {
// If possible, remove accents from accented latin characters.
// This function is useful for generating file names to be processed by legacy firmwares.
extern std::string fold_utf8_to_ascii(const char *src);
extern std::string fold_utf8_to_ascii(const std::string &src);
extern std::string fold_utf8_to_ascii(const char *src);
extern std::string fold_utf8_to_ascii(const std::string &src);
// Convert the input UNICODE character to a string of maximum 4 output ASCII characters.
// Return the end of the string written to the output.
// The output buffer must be at least 4 characters long.
extern char* fold_to_ascii(wchar_t c, char *out);
template<typename OUTPUT_ITERATOR>
void fold_to_ascii(wchar_t c, OUTPUT_ITERATOR out)
{
char tmp[4];
char *end = fold_to_ascii(c, tmp);
for (char *it = tmp; it != end; ++ it)
*out = *it;
}
}; // namespace Slic3r