build: Add source file encoding check
Source files are checked using a small utility in src/build-utils This is done to prevent bugs in build and localization caused by weird non-UTF-8 encodings interpreted by MSVC in terms of local codepages rather than UTF-8.
This commit is contained in:
parent
f937209619
commit
0ded335488
7 changed files with 168 additions and 0 deletions
|
@ -1,5 +1,6 @@
|
||||||
project(PrusaSlicer-native)
|
project(PrusaSlicer-native)
|
||||||
|
|
||||||
|
add_subdirectory(build-utils)
|
||||||
add_subdirectory(admesh)
|
add_subdirectory(admesh)
|
||||||
add_subdirectory(avrdude)
|
add_subdirectory(avrdude)
|
||||||
# boost/nowide
|
# boost/nowide
|
||||||
|
|
|
@ -100,6 +100,9 @@ add_dependencies(avrdude gen_conf_h)
|
||||||
add_executable(avrdude-slic3r main-standalone.cpp)
|
add_executable(avrdude-slic3r main-standalone.cpp)
|
||||||
target_link_libraries(avrdude-slic3r avrdude)
|
target_link_libraries(avrdude-slic3r avrdude)
|
||||||
|
|
||||||
|
encoding_check(avrdude)
|
||||||
|
encoding_check(avrdude-slic3r)
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
target_compile_definitions(avrdude PRIVATE WIN32NATIVE=1)
|
target_compile_definitions(avrdude PRIVATE WIN32NATIVE=1)
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
|
|
39
src/build-utils/CMakeLists.txt
Normal file
39
src/build-utils/CMakeLists.txt
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
add_executable(encoding-check encoding-check.cpp)
|
||||||
|
|
||||||
|
# A global no-op target which depends on all encodings checks,
|
||||||
|
# and on which in turn all checked targets depend.
|
||||||
|
# This is done to make encoding checks the first thing to be
|
||||||
|
# performed before actually compiling any sources of the checked targets
|
||||||
|
# to make the check fail as early as possible.
|
||||||
|
add_custom_target(global-encoding-check
|
||||||
|
ALL
|
||||||
|
DEPENDS encoding-check
|
||||||
|
)
|
||||||
|
|
||||||
|
# Function that adds source file encoding check to a target
|
||||||
|
# using the above encoding-check binary
|
||||||
|
|
||||||
|
function(encoding_check TARGET)
|
||||||
|
# Obtain target source files
|
||||||
|
get_target_property(T_SOURCES ${TARGET} SOURCES)
|
||||||
|
|
||||||
|
# Define top-level encoding check target for this ${TARGET}
|
||||||
|
add_custom_target(encoding-check-${TARGET}
|
||||||
|
DEPENDS encoding-check ${T_SOURCES}
|
||||||
|
COMMENT "Checking source files encodings for target ${TARGET}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add checking of each source file as a subcommand of encoding-check-${TARGET}
|
||||||
|
foreach(file ${T_SOURCES})
|
||||||
|
add_custom_command(TARGET encoding-check-${TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:encoding-check> ${TARGET} ${file}
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
# This adds dependency on encoding-check-${TARGET} to ${TARET}
|
||||||
|
# via the global-encoding-check
|
||||||
|
add_dependencies(global-encoding-check encoding-check-${TARGET})
|
||||||
|
add_dependencies(${TARGET} global-encoding-check)
|
||||||
|
endfunction()
|
119
src/build-utils/encoding-check.cpp
Normal file
119
src/build-utils/encoding-check.cpp
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The utf8_check() function scans the '\0'-terminated string starting
|
||||||
|
* at s. It returns a pointer to the first byte of the first malformed
|
||||||
|
* or overlong UTF-8 sequence found, or NULL if the string contains
|
||||||
|
* only correct UTF-8. It also spots UTF-8 sequences that could cause
|
||||||
|
* trouble if converted to UTF-16, namely surrogate characters
|
||||||
|
* (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This
|
||||||
|
* routine is very likely to find a malformed sequence if the input
|
||||||
|
* uses any other encoding than UTF-8. It therefore can be used as a
|
||||||
|
* very effective heuristic for distinguishing between UTF-8 and other
|
||||||
|
* encodings.
|
||||||
|
*
|
||||||
|
* I wrote this code mainly as a specification of functionality; there
|
||||||
|
* are no doubt performance optimizations possible for certain CPUs.
|
||||||
|
*
|
||||||
|
* Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30
|
||||||
|
* License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned char *utf8_check(unsigned char *s)
|
||||||
|
{
|
||||||
|
while (*s) {
|
||||||
|
if (*s < 0x80) {
|
||||||
|
// 0xxxxxxx
|
||||||
|
s++;
|
||||||
|
} else if ((s[0] & 0xe0) == 0xc0) {
|
||||||
|
// 110xxxxx 10xxxxxx
|
||||||
|
if ((s[1] & 0xc0) != 0x80 ||
|
||||||
|
(s[0] & 0xfe) == 0xc0) { // overlong?
|
||||||
|
return s;
|
||||||
|
} else {
|
||||||
|
s += 2;
|
||||||
|
}
|
||||||
|
} else if ((s[0] & 0xf0) == 0xe0) {
|
||||||
|
// 1110xxxx 10xxxxxx 10xxxxxx
|
||||||
|
if ((s[1] & 0xc0) != 0x80 ||
|
||||||
|
(s[2] & 0xc0) != 0x80 ||
|
||||||
|
(s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || // overlong?
|
||||||
|
(s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || // surrogate?
|
||||||
|
(s[0] == 0xef && s[1] == 0xbf &&
|
||||||
|
(s[2] & 0xfe) == 0xbe)) { // U+FFFE or U+FFFF?
|
||||||
|
return s;
|
||||||
|
} else {
|
||||||
|
s += 3;
|
||||||
|
}
|
||||||
|
} else if ((s[0] & 0xf8) == 0xf0) {
|
||||||
|
// 11110xxX 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
if ((s[1] & 0xc0) != 0x80 ||
|
||||||
|
(s[2] & 0xc0) != 0x80 ||
|
||||||
|
(s[3] & 0xc0) != 0x80 ||
|
||||||
|
(s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || // overlong?
|
||||||
|
(s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { // > U+10FFFF?
|
||||||
|
return s;
|
||||||
|
} else {
|
||||||
|
s += 4;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
if (argc != 3) {
|
||||||
|
std::cerr << "Usage: " << argv[0] << " <program/library> <file>" << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* target = argv[1];
|
||||||
|
const char* filename = argv[2];
|
||||||
|
|
||||||
|
const auto error_exit = [=](const char* error) {
|
||||||
|
std::cerr << "\n\tError: " << error << ": " << filename << "\n"
|
||||||
|
<< "\tTarget: " << target << "\n"
|
||||||
|
<< std::endl;
|
||||||
|
std::exit(-2);
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ifstream file(filename, std::ios::binary | std::ios::ate);
|
||||||
|
const auto size = file.tellg();
|
||||||
|
|
||||||
|
if (size == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
file.seekg(0, std::ios::beg);
|
||||||
|
std::vector<char> buffer(size);
|
||||||
|
|
||||||
|
if (file.read(buffer.data(), size)) {
|
||||||
|
buffer.push_back('\0');
|
||||||
|
|
||||||
|
// Check UTF-8 validity
|
||||||
|
if (utf8_check(reinterpret_cast<unsigned char*>(buffer.data())) != nullptr) {
|
||||||
|
error_exit("Source file does not contain (valid) UTF-8");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check against a BOM mark
|
||||||
|
if (buffer.size() >= 3
|
||||||
|
&& buffer[0] == '\xef'
|
||||||
|
&& buffer[1] == '\xbb'
|
||||||
|
&& buffer[2] == '\xbf') {
|
||||||
|
error_exit("Source file is valid UTF-8 but contains a BOM mark");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error_exit("Could not read source file");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -187,6 +187,8 @@ add_library(libslic3r STATIC
|
||||||
SLA/SLARasterWriter.cpp
|
SLA/SLARasterWriter.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
encoding_check(libslic3r)
|
||||||
|
|
||||||
if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY)
|
if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY)
|
||||||
add_precompiled_header(libslic3r pchheader.hpp FORCEINCLUDE)
|
add_precompiled_header(libslic3r pchheader.hpp FORCEINCLUDE)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -5,3 +5,5 @@ add_library(semver STATIC
|
||||||
semver.c
|
semver.c
|
||||||
semver.h
|
semver.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
encoding_check(semver)
|
||||||
|
|
|
@ -163,6 +163,8 @@ endif ()
|
||||||
|
|
||||||
add_library(libslic3r_gui STATIC ${SLIC3R_GUI_SOURCES})
|
add_library(libslic3r_gui STATIC ${SLIC3R_GUI_SOURCES})
|
||||||
|
|
||||||
|
encoding_check(libslic3r_gui)
|
||||||
|
|
||||||
target_link_libraries(libslic3r_gui libslic3r avrdude cereal imgui ${GLEW_LIBRARIES})
|
target_link_libraries(libslic3r_gui libslic3r avrdude cereal imgui ${GLEW_LIBRARIES})
|
||||||
if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY)
|
if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY)
|
||||||
add_precompiled_header(libslic3r_gui pchheader.hpp FORCEINCLUDE)
|
add_precompiled_header(libslic3r_gui pchheader.hpp FORCEINCLUDE)
|
||||||
|
|
Loading…
Reference in a new issue