Handle UTF in long filenames (#20087)
Co-authored-by: Scott Lahteine <thinkyhead@users.noreply.github.com>
This commit is contained in:
parent
0465e0ae3a
commit
5b9aeb2e5f
5 changed files with 124 additions and 100 deletions
|
@ -1256,6 +1256,10 @@
|
|||
// Note: Only affects SCROLL_LONG_FILENAMES with SDSORT_CACHE_NAMES but not SDSORT_DYNAMIC_RAM.
|
||||
#endif
|
||||
|
||||
// Allow international symbols in long filenames. To display correctly, the
|
||||
// LCD's font must contain the characters. Check your selected LCD language.
|
||||
#define UTF_FILENAME_SUPPORT
|
||||
|
||||
// This allows hosts to request long names for files and folders with M33
|
||||
//#define LONG_FILENAME_HOST_SUPPORT
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "../inc/MarlinConfig.h"
|
||||
|
||||
#define MAX_UTF8_CHAR_SIZE 4
|
||||
|
||||
#if HAS_WIRED_LCD
|
||||
#include "marlinui.h"
|
||||
#include "../MarlinCore.h"
|
||||
|
@ -79,6 +81,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
|||
uint32_t val = 0;
|
||||
uint8_t *p = pstart;
|
||||
|
||||
#define NEXT_6_BITS() do{ val <<= 6; p++; valcur = cb_read_byte(p); val |= (valcur & 0x3F); }while(0)
|
||||
|
||||
uint8_t valcur = cb_read_byte(p);
|
||||
if (0 == (0x80 & valcur)) {
|
||||
val = valcur;
|
||||
|
@ -86,74 +90,51 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
|||
}
|
||||
else if (0xC0 == (0xE0 & valcur)) {
|
||||
val = valcur & 0x1F;
|
||||
val <<= 6;
|
||||
p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
p++;
|
||||
}
|
||||
else if (0xE0 == (0xF0 & valcur)) {
|
||||
val = valcur & 0x0F;
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
p++;
|
||||
}
|
||||
else if (0xF0 == (0xF8 & valcur)) {
|
||||
val = valcur & 0x07;
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
p++;
|
||||
}
|
||||
else if (0xF8 == (0xFC & valcur)) {
|
||||
val = valcur & 0x03;
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
p++;
|
||||
}
|
||||
else if (0xFC == (0xFE & valcur)) {
|
||||
val = valcur & 0x01;
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
val <<= 6; p++;
|
||||
valcur = cb_read_byte(p);
|
||||
val |= (valcur & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
}
|
||||
#if MAX_UTF8_CHAR_SIZE >= 3
|
||||
else if (0xE0 == (0xF0 & valcur)) {
|
||||
val = valcur & 0x0F;
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
}
|
||||
#endif
|
||||
#if MAX_UTF8_CHAR_SIZE >= 4
|
||||
else if (0xF0 == (0xF8 & valcur)) {
|
||||
val = valcur & 0x07;
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
}
|
||||
#endif
|
||||
#if MAX_UTF8_CHAR_SIZE >= 5
|
||||
else if (0xF8 == (0xFC & valcur)) {
|
||||
val = valcur & 0x03;
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
}
|
||||
#endif
|
||||
#if MAX_UTF8_CHAR_SIZE >= 6
|
||||
else if (0xFC == (0xFE & valcur)) {
|
||||
val = valcur & 0x01;
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
}
|
||||
#endif
|
||||
else if (0x80 == (0xC0 & valcur))
|
||||
for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); }
|
||||
else
|
||||
for (; ((0xFE & valcur) > 0xFC); ) { p++; valcur = cb_read_byte(p); }
|
||||
for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); }
|
||||
|
||||
if (pval) *pval = val;
|
||||
|
||||
|
|
|
@ -1103,19 +1103,67 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) {
|
|||
if (WITHIN(seq, 1, MAX_VFAT_ENTRIES)) {
|
||||
// TODO: Store the filename checksum to verify if a long-filename-unaware system modified the file table.
|
||||
n = (seq - 1) * (FILENAME_LENGTH);
|
||||
LOOP_L_N(i, FILENAME_LENGTH)
|
||||
longFilename[n + i] = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11];
|
||||
LOOP_L_N(i, FILENAME_LENGTH) {
|
||||
uint16_t utf16_ch = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11];
|
||||
#if ENABLED(UTF_FILENAME_SUPPORT)
|
||||
// We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks
|
||||
// needs static bytes addressing. So here just store full UTF-16LE words to re-convert later.
|
||||
uint16_t idx = (n + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding
|
||||
longFilename[idx] = utf16_ch & 0xFF;
|
||||
longFilename[idx+1] = (utf16_ch >> 8) & 0xFF;
|
||||
#else
|
||||
// Replace all multibyte characters to '_'
|
||||
longFilename[n + i] = (utf16_ch > 0xFF) ? '_' : (utf16_ch & 0xFF);
|
||||
#endif
|
||||
}
|
||||
// If this VFAT entry is the last one, add a NUL terminator at the end of the string
|
||||
if (VFAT->sequenceNumber & 0x40) longFilename[n + FILENAME_LENGTH] = '\0';
|
||||
if (VFAT->sequenceNumber & 0x40) longFilename[(n + FILENAME_LENGTH) * LONG_FILENAME_CHARSIZE] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return if normal file or subdirectory
|
||||
if (DIR_IS_FILE_OR_SUBDIR(dir)) return n;
|
||||
if (DIR_IS_FILE_OR_SUBDIR(dir)) {
|
||||
#if ENABLED(UTF_FILENAME_SUPPORT)
|
||||
// Convert filename from utf-16 to utf-8 as Marlin expects
|
||||
#if LONG_FILENAME_CHARSIZE > 2
|
||||
// Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte
|
||||
// codepoints to 3-byte in-place will break the rest of filename)
|
||||
#error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints."
|
||||
#endif
|
||||
uint16_t currentPos = 0;
|
||||
LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) {
|
||||
uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding
|
||||
|
||||
uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8);
|
||||
if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_'
|
||||
longFilename[currentPos++] = '_';
|
||||
else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char
|
||||
longFilename[currentPos++] = utf16_ch & 0x007F;
|
||||
else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char
|
||||
longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F);
|
||||
longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F);
|
||||
}
|
||||
else {
|
||||
#if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char
|
||||
longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F);
|
||||
longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F);
|
||||
longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F);
|
||||
#else // Encode as '_'
|
||||
longFilename[currentPos++] = '_';
|
||||
#endif
|
||||
}
|
||||
|
||||
if (0 == utf16_ch) break; // End of filename
|
||||
}
|
||||
return currentPos;
|
||||
#else
|
||||
return n;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Read next directory entry into the cache
|
||||
// Assumes file is correctly positioned
|
||||
dir_t* SdBaseFile::readDirCache() {
|
||||
|
|
|
@ -103,5 +103,10 @@
|
|||
|
||||
#define FILENAME_LENGTH 13 // Number of UTF-16 characters per entry
|
||||
|
||||
// UTF-8 may use up to 3 bytes to represent single UTF-16 code point.
|
||||
// We discard 3-byte characters allowing only 2-bytes
|
||||
// or 1-byte if UTF_FILENAME_SUPPORT disabled.
|
||||
#define LONG_FILENAME_CHARSIZE TERN(UTF_FILENAME_SUPPORT, 2, 1)
|
||||
|
||||
// Total bytes needed to store a single long filename
|
||||
#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * MAX_VFAT_ENTRIES + 1)
|
||||
#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * LONG_FILENAME_CHARSIZE * MAX_VFAT_ENTRIES + 1)
|
||||
|
|
|
@ -71,63 +71,49 @@ uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
|
|||
|
||||
assert(NULL != pstart);
|
||||
|
||||
#define NEXT_6_BITS() do{ val <<= 6; p++; val |= (*p & 0x3F); }while(0)
|
||||
|
||||
if (0 == (0x80 & *p)) {
|
||||
val = (size_t)*p;
|
||||
p++;
|
||||
}
|
||||
else if (0xC0 == (0xE0 & *p)) {
|
||||
val = *p & 0x1F;
|
||||
val <<= 6;
|
||||
p++;
|
||||
val |= (*p & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||||
}
|
||||
else if (0xE0 == (0xF0 & *p)) {
|
||||
val = *p & 0x0F;
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||||
}
|
||||
else if (0xF0 == (0xF8 & *p)) {
|
||||
val = *p & 0x07;
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||||
}
|
||||
else if (0xF8 == (0xFC & *p)) {
|
||||
val = *p & 0x03;
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||||
}
|
||||
else if (0xFC == (0xFE & *p)) {
|
||||
val = *p & 0x01;
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
val <<= 6; p++;
|
||||
val |= (*p & 0x3F);
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
NEXT_6_BITS();
|
||||
p++;
|
||||
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue