Add new execution framework

Inspired by std::execution
2021-03-17 19:42:58 +01:00 · 2021-03-17 19:42:58 +01:00 · 7760d3fbc4
commit 7760d3fbc4
parent 4293a68aaa
6 changed files with 285 additions and 108 deletions
--- a/src/libslic3r/CMakeLists.txt
+++ b/src/libslic3r/CMakeLists.txt
@ -219,6 +219,9 @@ add_library(libslic3r STATIC
    SimplifyMeshImpl.hpp
    SimplifyMesh.cpp
    MarchingSquares.hpp
+    Execution/Execution.hpp
+    Execution/ExecutionSeq.hpp
+    Execution/ExecutionTBB.hpp
    Optimize/Optimizer.hpp
    Optimize/NLoptOptimizer.hpp
    Optimize/BruteforceOptimizer.hpp
--- a/src/libslic3r/Execution/Execution.hpp
+++ b/src/libslic3r/Execution/Execution.hpp
@ -0,0 +1,100 @@
+#ifndef EXECUTION_HPP
+#define EXECUTION_HPP
+
+#include <type_traits>
+#include <utility>
+#include <cstddef>
+#include <iterator>
+
+#include "libslic3r/libslic3r.h"
+
+namespace Slic3r {
+
+template<class T>
+using remove_cvref_t = std::remove_reference_t<std::remove_cv_t<T>>;
+
+// Override for valid execution policies
+template<class EP> struct IsExecutionPolicy_ : public std::false_type {};
+
+template<class EP> constexpr bool IsExecutionPolicy =
+    IsExecutionPolicy_<remove_cvref_t<EP>>::value;
+
+template<class EP, class T = void>
+using ExecutionPolicyOnly = std::enable_if_t<IsExecutionPolicy<EP>, T>;
+
+namespace execution {
+
+// This struct needs to be specialized for each execution policy.
+// See ExecutionSeq.hpp and ExecutionTBB.hpp for example.
+template<class EP, class En = void> struct Traits {};
+
+template<class EP> using AsTraits = Traits<remove_cvref_t<EP>>;
+
+// Each execution policy should declare two types of mutexes. A a spin lock and
+// a blocking mutex.
+template<class EP> using SpinningMutex = typename Traits<EP>::SpinningMutex;
+template<class EP> using BlockingMutex = typename Traits<EP>::BlockingMutex;
+
+// Query the available threads for concurrency.
+template<class EP, class = ExecutionPolicyOnly<EP> >
+size_t max_concurrency(const EP &ep)
+{
+    return AsTraits<EP>::max_concurrency(ep);
+}
+
+// foreach loop with the execution policy passed as argument. Granularity can
+// be specified explicitly. max_concurrency() can be used for optimal results.
+template<class EP, class It, class Fn, class = ExecutionPolicyOnly<EP>>
+void for_each(const EP &ep, It from, It to, Fn &&fn, size_t granularity = 1)
+{
+    AsTraits<EP>::for_each(ep, from, to, std::forward<Fn>(fn), granularity);
+}
+
+// A reduce operation with the execution policy passed as argument.
+// mergefn has T(const T&, const T&) signature
+// accessfn has T(I) signature if I is an integral type and
+// T(const I::value_type &) if I is an iterator type.
+template<class EP,
+         class I,
+         class MergeFn,
+         class T,
+         class AccessFn,
+         class = ExecutionPolicyOnly<EP> >
+T reduce(const EP & ep,
+         I          from,
+         I          to,
+         const T &  init,
+         MergeFn && mergefn,
+         AccessFn &&accessfn,
+         size_t     granularity = 1)
+{
+    return AsTraits<EP>::reduce(ep, from, to, init,
+                                std::forward<MergeFn>(mergefn),
+                                std::forward<AccessFn>(accessfn),
+                                granularity);
+}
+
+// An overload of reduce method to be used with iterators as 'from' and 'to'
+// arguments.
+template<class EP,
+         class I,
+         class MergeFn,
+         class T,
+         class = ExecutionPolicyOnly<EP>,
+         class = IteratorOnly<I> >
+T reduce(const EP &ep,
+         I         from,
+         I         to,
+         const T & init,
+         MergeFn &&mergefn,
+         size_t    granularity = 1)
+{
+    return reduce(
+        ep, from, to, init, std::forward<MergeFn>(mergefn),
+        [](typename I::value_type &i) { return i; }, granularity);
+}
+
+} // namespace execution_policy
+} // namespace Slic3r
+
+#endif // EXECUTION_HPP
--- a/src/libslic3r/Execution/ExecutionSeq.hpp
+++ b/src/libslic3r/Execution/ExecutionSeq.hpp
@ -0,0 +1,84 @@
+#ifndef EXECUTIONSEQ_HPP
+#define EXECUTIONSEQ_HPP
+
+#ifdef PRUSASLICER_USE_EXECUTION_STD // Conflicts with our version of TBB
+#include <execution>
+#endif
+
+#include "Execution.hpp"
+
+namespace Slic3r {
+
+// Execution policy implementing dummy sequential algorithms
+struct ExecutionSeq {};
+
+template<> struct IsExecutionPolicy_<ExecutionSeq> : public std::true_type {};
+
+static constexpr ExecutionSeq ex_seq = {};
+
+template<class EP> struct IsSequentialEP_ { static constexpr bool value = false; };
+
+template<> struct IsSequentialEP_<ExecutionSeq>: public std::true_type {};
+#ifdef PRUSASLICER_USE_EXECUTION_STD
+template<> struct IsExecutionPolicy_<std::execution::sequenced_policy>: public std::true_type {};
+template<> struct IsSequentialEP_<std::execution::sequenced_policy>: public std::true_type {};
+#endif
+
+template<class EP>
+constexpr bool IsSequentialEP = IsSequentialEP_<remove_cvref_t<EP>>::value;
+
+template<class EP, class R = EP>
+using SequentialEPOnly = std::enable_if_t<IsSequentialEP<EP>, R>;
+
+template<class EP>
+struct execution::Traits<EP, SequentialEPOnly<EP, void>> {
+private:
+    struct _Mtx { inline void lock() {} inline void unlock() {} };
+
+    template<class Fn, class It>
+    static IteratorOnly<It, void> loop_(It from, It to, Fn &&fn)
+    {
+        for (auto it = from; it != to; ++it) fn(*it);
+    }
+
+    template<class Fn, class I>
+    static IntegerOnly<I, void> loop_(I from, I to, Fn &&fn)
+    {
+        for (I i = from; i < to; ++i) fn(i);
+    }
+
+public:
+    using SpinningMutex = _Mtx;
+    using BlockingMutex = _Mtx;
+
+    template<class It, class Fn>
+    static void for_each(const EP &,
+                         It   from,
+                         It   to,
+                         Fn &&fn,
+                         size_t /* ignore granularity */ = 1)
+    {
+        loop_(from, to, std::forward<Fn>(fn));
+    }
+
+    template<class I, class MergeFn, class T, class AccessFn>
+    static T reduce(const EP &,
+                    I         from,
+                    I         to,
+                    const T & init,
+                    MergeFn  &&mergefn,
+                    AccessFn &&access,
+                    size_t   /*granularity*/ = 1
+                    )
+    {
+        T acc = init;
+        loop_(from, to, [&](auto &i) { acc = mergefn(acc, access(i)); });
+        return acc;
+    }
+
+    static size_t max_concurrency(const EP &) { return 1; }
+};
+
+} // namespace Slic3r
+
+#endif // EXECUTIONSEQ_HPP
--- a/src/libslic3r/Execution/ExecutionTBB.hpp
+++ b/src/libslic3r/Execution/ExecutionTBB.hpp
@ -0,0 +1,77 @@
+#ifndef EXECUTIONTBB_HPP
+#define EXECUTIONTBB_HPP
+
+#include <tbb/spin_mutex.h>
+#include <tbb/mutex.h>
+#include <tbb/parallel_for.h>
+#include <tbb/parallel_reduce.h>
+#include <tbb/task_arena.h>
+
+#include "Execution.hpp"
+
+namespace Slic3r {
+
+struct ExecutionTBB {};
+template<> struct IsExecutionPolicy_<ExecutionTBB> : public std::true_type {};
+
+// Execution policy using Intel TBB library under the hood.
+static constexpr ExecutionTBB ex_tbb = {};
+
+template<> struct execution::Traits<ExecutionTBB> {
+private:
+
+    template<class Fn, class It>
+    static IteratorOnly<It, void> loop_(const tbb::blocked_range<It> &range, Fn &&fn)
+    {
+        for (auto &el : range) fn(el);
+    }
+
+    template<class Fn, class I>
+    static IntegerOnly<I, void> loop_(const tbb::blocked_range<I> &range, Fn &&fn)
+    {
+        for (I i = range.begin(); i < range.end(); ++i) fn(i);
+    }
+
+public:
+    using SpinningMutex = tbb::spin_mutex;
+    using BlockingMutex = tbb::mutex;
+
+    template<class It, class Fn>
+    static void for_each(const ExecutionTBB &,
+                         It from, It to, Fn &&fn, size_t granularity)
+    {
+        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
+                          [&fn](const auto &range) {
+            loop_(range, std::forward<Fn>(fn));
+        });
+    }
+
+    template<class I, class MergeFn, class T, class AccessFn>
+    static T reduce(const ExecutionTBB &,
+                    I          from,
+                    I          to,
+                    const T   &init,
+                    MergeFn  &&mergefn,
+                    AccessFn &&access,
+                    size_t     granularity = 1
+                    )
+    {
+        return tbb::parallel_reduce(
+            tbb::blocked_range{from, to, granularity}, init,
+            [&](const auto &range, T subinit) {
+                T acc = subinit;
+                loop_(range, [&](auto &i) { acc = mergefn(acc, access(i)); });
+                return acc;
+            },
+            std::forward<MergeFn>(mergefn));
+    }
+
+    static size_t max_concurrency(const ExecutionTBB &)
+    {
+        return tbb::this_task_arena::max_concurrency();
+    }
+};
+
+}
+
+#endif // EXECUTIONTBB_HPP
--- a/src/libslic3r/MTUtils.hpp
+++ b/src/libslic3r/MTUtils.hpp
@ -106,13 +106,8 @@ template<class C> bool all_of(const C &container)
                       });
 }

-template<class T> struct remove_cvref
-{
-    using type =
-        typename std::remove_cv<typename std::remove_reference<T>::type>::type;
-};
-
-template<class T> using remove_cvref_t = typename remove_cvref<T>::type;
+template<class T>
+using remove_cvref_t = std::remove_reference_t<std::remove_cv_t<T>>;

 /// Exactly like Matlab https://www.mathworks.com/help/matlab/ref/linspace.html
 template<class T, class I, class = IntegerOnly<I>>
--- a/src/libslic3r/SLA/Concurrency.hpp
+++ b/src/libslic3r/SLA/Concurrency.hpp
@ -1,16 +1,10 @@
 #ifndef SLA_CONCURRENCY_H
 #define SLA_CONCURRENCY_H

-#include <tbb/spin_mutex.h>
-#include <tbb/mutex.h>
-#include <tbb/parallel_for.h>
-#include <tbb/parallel_reduce.h>
-#include <tbb/task_arena.h>
+// FIXME: Deprecated

-#include <algorithm>
-#include <numeric>
-
-#include <libslic3r/libslic3r.h>
+#include <libslic3r/Execution/ExecutionSeq.hpp>
+#include <libslic3r/Execution/ExecutionTBB.hpp>

 namespace Slic3r {
 namespace sla {
@ -23,124 +17,48 @@ template<bool> struct _ccr {};

 template<> struct _ccr<true>
 {
-    using SpinningMutex = tbb::spin_mutex;
-    using BlockingMutex = tbb::mutex;
-
-    template<class Fn, class It>
-    static IteratorOnly<It, void> loop_(const tbb::blocked_range<It> &range, Fn &&fn)
-    {
-        for (auto &el : range) fn(el);
-    }
-
-    template<class Fn, class I>
-    static IntegerOnly<I, void> loop_(const tbb::blocked_range<I> &range, Fn &&fn)
-    {
-        for (I i = range.begin(); i < range.end(); ++i) fn(i);
-    }
+    using SpinningMutex = execution::SpinningMutex<ExecutionTBB>;
+    using BlockingMutex = execution::BlockingMutex<ExecutionTBB>;

    template<class It, class Fn>
    static void for_each(It from, It to, Fn &&fn, size_t granularity = 1)
    {
-        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
-                          [&fn](const auto &range) {
-            loop_(range, std::forward<Fn>(fn));
-        });
+        execution::for_each(ex_tbb, from, to, std::forward<Fn>(fn), granularity);
    }

-    template<class I, class MergeFn, class T, class AccessFn>
-    static T reduce(I          from,
-                    I          to,
-                    const T   &init,
-                    MergeFn  &&mergefn,
-                    AccessFn &&access,
-                    size_t     granularity = 1
-                    )
+    template<class...Args>
+    static auto reduce(Args&&...args)
    {
-        return tbb::parallel_reduce(
-            tbb::blocked_range{from, to, granularity}, init,
-            [&](const auto &range, T subinit) {
-                T acc = subinit;
-                loop_(range, [&](auto &i) { acc = mergefn(acc, access(i)); });
-                return acc;
-            },
-            std::forward<MergeFn>(mergefn));
-    }
-
-    template<class I, class MergeFn, class T>
-    static IteratorOnly<I, T> reduce(I         from,
-                                     I         to,
-                                     const T & init,
-                                     MergeFn &&mergefn,
-                                     size_t    granularity = 1)
-    {
-        return reduce(
-            from, to, init, std::forward<MergeFn>(mergefn),
-            [](typename I::value_type &i) { return i; }, granularity);
+        return execution::reduce(ex_tbb, std::forward<Args>(args)...);
    }

    static size_t max_concurreny()
    {
-        return tbb::this_task_arena::max_concurrency();
+        return execution::max_concurrency(ex_tbb);
    }
 };

 template<> struct _ccr<false>
 {
-private:
-    struct _Mtx { inline void lock() {} inline void unlock() {} };
-
-public:
-    using SpinningMutex = _Mtx;
-    using BlockingMutex = _Mtx;
-
-    template<class Fn, class It>
-    static IteratorOnly<It, void> loop_(It from, It to, Fn &&fn)
-    {
-        for (auto it = from; it != to; ++it) fn(*it);
-    }
-
-    template<class Fn, class I>
-    static IntegerOnly<I, void> loop_(I from, I to, Fn &&fn)
-    {
-        for (I i = from; i < to; ++i) fn(i);
-    }
+    using SpinningMutex = execution::SpinningMutex<ExecutionSeq>;
+    using BlockingMutex = execution::BlockingMutex<ExecutionSeq>;

    template<class It, class Fn>
-    static void for_each(It   from,
-                         It   to,
-                         Fn &&fn,
-                         size_t /* ignore granularity */ = 1)
+    static void for_each(It from, It to, Fn &&fn, size_t granularity = 1)
    {
-        loop_(from, to, std::forward<Fn>(fn));
+        execution::for_each(ex_seq, from, to, std::forward<Fn>(fn), granularity);
    }

-    template<class I, class MergeFn, class T, class AccessFn>
-    static T reduce(I         from,
-                    I         to,
-                    const T & init,
-                    MergeFn  &&mergefn,
-                    AccessFn &&access,
-                    size_t   /*granularity*/ = 1
-                    )
+    template<class...Args>
+    static auto reduce(Args&&...args)
    {
-        T acc = init;
-        loop_(from, to, [&](auto &i) { acc = mergefn(acc, access(i)); });
-        return acc;
+        return execution::reduce(ex_seq, std::forward<Args>(args)...);
    }

-    template<class I, class MergeFn, class T>
-    static IteratorOnly<I, T> reduce(I          from,
-                                     I          to,
-                                     const T   &init,
-                                     MergeFn  &&mergefn,
-                                     size_t     /*granularity*/ = 1
-                                     )
+    static size_t max_concurreny()
    {
-        return reduce(from, to, init, std::forward<MergeFn>(mergefn),
-                      [](typename I::value_type &i) { return i; });
+        return execution::max_concurrency(ex_seq);
    }
-
-    static size_t max_concurreny() { return 1; }
 };

 using ccr = _ccr<USE_FULL_CONCURRENCY>;