From 55835aa05076954f3960b35084ebcce000ddbe29 Mon Sep 17 00:00:00 2001
From: Vojtech Bubnik <bubnikv@gmail.com>
Date: Thu, 18 Aug 2022 13:32:30 +0200
Subject: [PATCH] Optimization of GCodeViewer vertex buffer generator: Rounding
 by round_to_bin() is now 2x faster on MSVC.

---
 src/libslic3r/libslic3r.h      | 19 ++++++++++++++++++
 src/slic3r/GUI/GCodeViewer.cpp |  9 +++++++--
 tests/libslic3r/CMakeLists.txt |  1 +
 tests/libslic3r/test_utils.cpp | 35 ++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 tests/libslic3r/test_utils.cpp
diff --git a/src/libslic3r/libslic3r.h b/src/libslic3r/libslic3r.h
index 6f71930ab..2285c29a6 100644
--- a/src/libslic3r/libslic3r.h
+++ b/src/libslic3r/libslic3r.h
@@ -345,6 +345,25 @@ constexpr T NaN = std::numeric_limits<T>::quiet_NaN();
 constexpr float NaNf = NaN<float>;
 constexpr double NaNd = NaN<double>;
 
+// Rounding up.
+// 1.5 is rounded to 2
+// 1.49 is rounded to 1
+// 0.5 is rounded to 1,
+// 0.49 is rounded to 0
+// -0.5 is rounded to 0,
+// -0.51 is rounded to -1,
+// -1.5 is rounded to -1.
+// -1.51 is rounded to -2.
+// If input is not a valid float (it is infinity NaN or if it does not fit)
+// the float to int conversion produces a max int on Intel and +-max int on ARM.
+template<typename I>
+inline IntegerOnly<I, I> fast_round_up(double a)
+{
+    // Why does Java Math.round(0.49999999999999994) return 1?
+    // https://stackoverflow.com/questions/9902968/why-does-math-round0-49999999999999994-return-1
+    return a == 0.49999999999999994 ? I(0) : I(floor(a + 0.5));
+}
+
 } // namespace Slic3r
 
 #endif
diff --git a/src/slic3r/GUI/GCodeViewer.cpp b/src/slic3r/GUI/GCodeViewer.cpp
index 1e28d1287..8167507b4 100644
--- a/src/slic3r/GUI/GCodeViewer.cpp
+++ b/src/slic3r/GUI/GCodeViewer.cpp
@@ -51,7 +51,7 @@ static EMoveType buffer_type(unsigned char id) {
 // Equivalent to conversion to string with sprintf(buf, "%.2g", value) and conversion back to float, but faster.
 static float round_to_bin(const float value)
 {
-//    assert(value > 0);
+//    assert(value >= 0);
     constexpr float const scale    [5] = { 100.f,  1000.f,  10000.f,  100000.f,  1000000.f };
     constexpr float const invscale [5] = { 0.01f,  0.001f,  0.0001f,  0.00001f,  0.000001f };
     constexpr float const threshold[5] = { 0.095f, 0.0095f, 0.00095f, 0.000095f, 0.0000095f };
@@ -59,7 +59,12 @@ static float round_to_bin(const float value)
     int                   i            = 0;
     // While the scaling factor is not yet large enough to get two integer digits after scaling and rounding:
     for (; value < threshold[i] && i < 4; ++ i) ;
-    return std::round(value * scale[i]) * invscale[i];
+    // At least on MSVC std::round() calls a complex function, which is pretty expensive.
+    // our fast_round_up is much cheaper and it could be inlined.
+//    return std::round(value * scale[i]) * invscale[i];
+    double a = value * scale[i];
+    assert(std::abs(a) < double(std::numeric_limits<int64_t>::max()));
+    return fast_round_up<int64_t>(a) * invscale[i];
 }
 
 void GCodeViewer::VBuffer::reset()
diff --git a/tests/libslic3r/CMakeLists.txt b/tests/libslic3r/CMakeLists.txt
index cf89b2246..c47fa6109 100644
--- a/tests/libslic3r/CMakeLists.txt
+++ b/tests/libslic3r/CMakeLists.txt
@@ -21,6 +21,7 @@ add_executable(${_TEST_NAME}_tests
 	test_meshboolean.cpp
 	test_marchingsquares.cpp
 	test_timeutils.cpp
+	test_utils.cpp
 	test_voronoi.cpp
     test_optimizers.cpp
     test_png_io.cpp
diff --git a/tests/libslic3r/test_utils.cpp b/tests/libslic3r/test_utils.cpp
new file mode 100644
index 000000000..74d409496
--- /dev/null
+++ b/tests/libslic3r/test_utils.cpp
@@ -0,0 +1,35 @@
+#include <catch2/catch.hpp>
+
+#include "libslic3r/libslic3r.h"
+
+SCENARIO("Test fast_round_up()") {
+    using namespace Slic3r;
+
+    THEN("fast_round_up<int>(1.5) is 2") {
+        REQUIRE(fast_round_up<int>(1.5) == 2);
+    }
+    THEN("fast_round_up<int>(1.499999999999999) is 1") {
+        REQUIRE(fast_round_up<int>(1.499999999999999) == 1);
+    }
+    THEN("fast_round_up<int>(0.5) is 1") {
+        REQUIRE(fast_round_up<int>(0.5) == 1);
+    }
+    THEN("fast_round_up<int>(0.49999999999999994) is 0") {
+        REQUIRE(fast_round_up<int>(0.49999999999999994) == 0);
+    }
+    THEN("fast_round_up<int>(-0.5) is 0") {
+        REQUIRE(fast_round_up<int>(-0.5) == 0);
+    }
+    THEN("fast_round_up<int>(-0.51) is -1") {
+        REQUIRE(fast_round_up<int>(-0.51) == -1);
+    }
+    THEN("fast_round_up<int>(-0.51) is -1") {
+        REQUIRE(fast_round_up<int>(-0.51) == -1);
+    }
+    THEN("fast_round_up<int>(-1.5) is -1") {
+        REQUIRE(fast_round_up<int>(-1.5) == -1);
+    }
+    THEN("fast_round_up<int>(-1.51) is -2") {
+        REQUIRE(fast_round_up<int>(-1.51) == -2);
+    }
+}