diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9f3dbcec8..31cb24f24 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ project(PrusaSlicer-native) +add_subdirectory(build-utils) add_subdirectory(admesh) add_subdirectory(avrdude) # boost/nowide diff --git a/src/avrdude/CMakeLists.txt b/src/avrdude/CMakeLists.txt index f2204db0c..e6748a5aa 100644 --- a/src/avrdude/CMakeLists.txt +++ b/src/avrdude/CMakeLists.txt @@ -100,6 +100,9 @@ add_dependencies(avrdude gen_conf_h) add_executable(avrdude-slic3r main-standalone.cpp) target_link_libraries(avrdude-slic3r avrdude) +encoding_check(avrdude) +encoding_check(avrdude-slic3r) + if (WIN32) target_compile_definitions(avrdude PRIVATE WIN32NATIVE=1) if(MSVC) diff --git a/src/build-utils/CMakeLists.txt b/src/build-utils/CMakeLists.txt new file mode 100644 index 000000000..3b3961b56 --- /dev/null +++ b/src/build-utils/CMakeLists.txt @@ -0,0 +1,39 @@ + +add_executable(encoding-check encoding-check.cpp) + +# A global no-op target which depends on all encodings checks, +# and on which in turn all checked targets depend. +# This is done to make encoding checks the first thing to be +# performed before actually compiling any sources of the checked targets +# to make the check fail as early as possible. +add_custom_target(global-encoding-check + ALL + DEPENDS encoding-check +) + +# Function that adds source file encoding check to a target +# using the above encoding-check binary + +function(encoding_check TARGET) + # Obtain target source files + get_target_property(T_SOURCES ${TARGET} SOURCES) + + # Define top-level encoding check target for this ${TARGET} + add_custom_target(encoding-check-${TARGET} + DEPENDS encoding-check ${T_SOURCES} + COMMENT "Checking source files encodings for target ${TARGET}" + ) + + # Add checking of each source file as a subcommand of encoding-check-${TARGET} + foreach(file ${T_SOURCES}) + add_custom_command(TARGET encoding-check-${TARGET} + COMMAND $ ${TARGET} ${file} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + endforeach() + + # This adds dependency on encoding-check-${TARGET} to ${TARET} + # via the global-encoding-check + add_dependencies(global-encoding-check encoding-check-${TARGET}) + add_dependencies(${TARGET} global-encoding-check) +endfunction() diff --git a/src/build-utils/encoding-check.cpp b/src/build-utils/encoding-check.cpp new file mode 100644 index 000000000..89f225572 --- /dev/null +++ b/src/build-utils/encoding-check.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include + + +/* + * The utf8_check() function scans the '\0'-terminated string starting + * at s. It returns a pointer to the first byte of the first malformed + * or overlong UTF-8 sequence found, or NULL if the string contains + * only correct UTF-8. It also spots UTF-8 sequences that could cause + * trouble if converted to UTF-16, namely surrogate characters + * (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This + * routine is very likely to find a malformed sequence if the input + * uses any other encoding than UTF-8. It therefore can be used as a + * very effective heuristic for distinguishing between UTF-8 and other + * encodings. + * + * I wrote this code mainly as a specification of functionality; there + * are no doubt performance optimizations possible for certain CPUs. + * + * Markus Kuhn -- 2005-03-30 + * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html + */ + +unsigned char *utf8_check(unsigned char *s) +{ + while (*s) { + if (*s < 0x80) { + // 0xxxxxxx + s++; + } else if ((s[0] & 0xe0) == 0xc0) { + // 110xxxxx 10xxxxxx + if ((s[1] & 0xc0) != 0x80 || + (s[0] & 0xfe) == 0xc0) { // overlong? + return s; + } else { + s += 2; + } + } else if ((s[0] & 0xf0) == 0xe0) { + // 1110xxxx 10xxxxxx 10xxxxxx + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || // overlong? + (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || // surrogate? + (s[0] == 0xef && s[1] == 0xbf && + (s[2] & 0xfe) == 0xbe)) { // U+FFFE or U+FFFF? + return s; + } else { + s += 3; + } + } else if ((s[0] & 0xf8) == 0xf0) { + // 11110xxX 10xxxxxx 10xxxxxx 10xxxxxx + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || // overlong? + (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { // > U+10FFFF? + return s; + } else { + s += 4; + } + } else { + return s; + } + } + + return NULL; +} + + +int main(int argc, char const *argv[]) +{ + if (argc != 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return -1; + } + + const char* target = argv[1]; + const char* filename = argv[2]; + + const auto error_exit = [=](const char* error) { + std::cerr << "\n\tError: " << error << ": " << filename << "\n" + << "\tTarget: " << target << "\n" + << std::endl; + std::exit(-2); + }; + + std::ifstream file(filename, std::ios::binary | std::ios::ate); + const auto size = file.tellg(); + + if (size == 0) { + return 0; + } + + file.seekg(0, std::ios::beg); + std::vector buffer(size); + + if (file.read(buffer.data(), size)) { + buffer.push_back('\0'); + + // Check UTF-8 validity + if (utf8_check(reinterpret_cast(buffer.data())) != nullptr) { + error_exit("Source file does not contain (valid) UTF-8"); + } + + // Check against a BOM mark + if (buffer.size() >= 3 + && buffer[0] == '\xef' + && buffer[1] == '\xbb' + && buffer[2] == '\xbf') { + error_exit("Source file is valid UTF-8 but contains a BOM mark"); + } + } else { + error_exit("Could not read source file"); + } + + return 0; +} diff --git a/src/libslic3r/CMakeLists.txt b/src/libslic3r/CMakeLists.txt index a5abf43ec..1a9a153b9 100644 --- a/src/libslic3r/CMakeLists.txt +++ b/src/libslic3r/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(libslic3r STATIC SLA/SLARasterWriter.cpp ) +encoding_check(libslic3r) + if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY) add_precompiled_header(libslic3r pchheader.hpp FORCEINCLUDE) endif () diff --git a/src/semver/CMakeLists.txt b/src/semver/CMakeLists.txt index e3457bf29..c273121d4 100644 --- a/src/semver/CMakeLists.txt +++ b/src/semver/CMakeLists.txt @@ -5,3 +5,5 @@ add_library(semver STATIC semver.c semver.h ) + +encoding_check(semver) diff --git a/src/slic3r/CMakeLists.txt b/src/slic3r/CMakeLists.txt index e3a910d6d..b3e2990f9 100644 --- a/src/slic3r/CMakeLists.txt +++ b/src/slic3r/CMakeLists.txt @@ -163,6 +163,8 @@ endif () add_library(libslic3r_gui STATIC ${SLIC3R_GUI_SOURCES}) +encoding_check(libslic3r_gui) + target_link_libraries(libslic3r_gui libslic3r avrdude cereal imgui ${GLEW_LIBRARIES}) if (SLIC3R_PCH AND NOT SLIC3R_SYNTAXONLY) add_precompiled_header(libslic3r_gui pchheader.hpp FORCEINCLUDE)