Optimization of GCodeViewer vertex buffer generator:

Rounding by round_to_bin() is now 2x faster on MSVC.
2022-08-18 13:32:30 +02:00 · 2022-08-18 13:32:30 +02:00 · 55835aa050
commit 55835aa050
parent c03085a1f6
4 changed files with 62 additions and 2 deletions
--- a/src/libslic3r/libslic3r.h
+++ b/src/libslic3r/libslic3r.h
@ -345,6 +345,25 @@ constexpr T NaN = std::numeric_limits<T>::quiet_NaN();
 constexpr float NaNf = NaN<float>;
 constexpr double NaNd = NaN<double>;
 // Rounding up.
 // 1.5 is rounded to 2
 // 1.49 is rounded to 1
 // 0.5 is rounded to 1,
 // 0.49 is rounded to 0
 // -0.5 is rounded to 0,
 // -0.51 is rounded to -1,
 // -1.5 is rounded to -1.
 // -1.51 is rounded to -2.
 // If input is not a valid float (it is infinity NaN or if it does not fit)
 // the float to int conversion produces a max int on Intel and +-max int on ARM.
 template<typename I>
 inline IntegerOnly<I, I> fast_round_up(double a)
 {
    // Why does Java Math.round(0.49999999999999994) return 1?
    // https://stackoverflow.com/questions/9902968/why-does-math-round0-49999999999999994-return-1
    return a == 0.49999999999999994 ? I(0) : I(floor(a + 0.5));
 }
 } // namespace Slic3r
 #endif
--- a/src/slic3r/GUI/GCodeViewer.cpp
+++ b/src/slic3r/GUI/GCodeViewer.cpp
@ -51,7 +51,7 @@ static EMoveType buffer_type(unsigned char id) {
 // Equivalent to conversion to string with sprintf(buf, "%.2g", value) and conversion back to float, but faster.
 static float round_to_bin(const float value)
 {
-//    assert(value > 0);
+//    assert(value >= 0);
    constexpr float const scale    [5] = { 100.f,  1000.f,  10000.f,  100000.f,  1000000.f };
    constexpr float const invscale [5] = { 0.01f,  0.001f,  0.0001f,  0.00001f,  0.000001f };
    constexpr float const threshold[5] = { 0.095f, 0.0095f, 0.00095f, 0.000095f, 0.0000095f };
@ -59,7 +59,12 @@ static float round_to_bin(const float value)
    int                   i            = 0;
    // While the scaling factor is not yet large enough to get two integer digits after scaling and rounding:
    for (; value < threshold[i] && i < 4; ++ i) ;
-    return std::round(value * scale[i]) * invscale[i];
+    // At least on MSVC std::round() calls a complex function, which is pretty expensive.
    // our fast_round_up is much cheaper and it could be inlined.
 //    return std::round(value * scale[i]) * invscale[i];
    double a = value * scale[i];
    assert(std::abs(a) < double(std::numeric_limits<int64_t>::max()));
    return fast_round_up<int64_t>(a) * invscale[i];
 }
 void GCodeViewer::VBuffer::reset()
--- a/tests/libslic3r/CMakeLists.txt
+++ b/tests/libslic3r/CMakeLists.txt
@ -21,6 +21,7 @@ add_executable(${_TEST_NAME}_tests
 	test_meshboolean.cpp
 	test_marchingsquares.cpp
 	test_timeutils.cpp
 	test_utils.cpp
 	test_voronoi.cpp
    test_optimizers.cpp
    test_png_io.cpp
--- a/tests/libslic3r/test_utils.cpp
+++ b/tests/libslic3r/test_utils.cpp
@ -0,0 +1,35 @@
 #include <catch2/catch.hpp>
 #include "libslic3r/libslic3r.h"
 SCENARIO("Test fast_round_up()") {
    using namespace Slic3r;
    THEN("fast_round_up<int>(1.5) is 2") {
        REQUIRE(fast_round_up<int>(1.5) == 2);
    }
    THEN("fast_round_up<int>(1.499999999999999) is 1") {
        REQUIRE(fast_round_up<int>(1.499999999999999) == 1);
    }
    THEN("fast_round_up<int>(0.5) is 1") {
        REQUIRE(fast_round_up<int>(0.5) == 1);
    }
    THEN("fast_round_up<int>(0.49999999999999994) is 0") {
        REQUIRE(fast_round_up<int>(0.49999999999999994) == 0);
    }
    THEN("fast_round_up<int>(-0.5) is 0") {
        REQUIRE(fast_round_up<int>(-0.5) == 0);
    }
    THEN("fast_round_up<int>(-0.51) is -1") {
        REQUIRE(fast_round_up<int>(-0.51) == -1);
    }
    THEN("fast_round_up<int>(-0.51) is -1") {
        REQUIRE(fast_round_up<int>(-0.51) == -1);
    }
    THEN("fast_round_up<int>(-1.5) is -1") {
        REQUIRE(fast_round_up<int>(-1.5) == -1);
    }
    THEN("fast_round_up<int>(-1.51) is -2") {
        REQUIRE(fast_round_up<int>(-1.51) == -2);
    }
 }