Add new execution framework

Inspired by std::execution
2021-03-17 19:42:58 +01:00 · 2021-03-17 19:42:58 +01:00 · 7760d3fbc4
commit 7760d3fbc4
parent 4293a68aaa
6 changed files with 285 additions and 108 deletions
--- a/src/libslic3r/CMakeLists.txt
+++ b/src/libslic3r/CMakeLists.txt
@ -219,6 +219,9 @@ add_library(libslic3r STATIC
    SimplifyMeshImpl.hpp
    SimplifyMesh.cpp
    MarchingSquares.hpp
    Execution/Execution.hpp
    Execution/ExecutionSeq.hpp
    Execution/ExecutionTBB.hpp
    Optimize/Optimizer.hpp
    Optimize/NLoptOptimizer.hpp
    Optimize/BruteforceOptimizer.hpp
--- a/src/libslic3r/Execution/Execution.hpp
+++ b/src/libslic3r/Execution/Execution.hpp
@ -0,0 +1,100 @@
 #ifndef EXECUTION_HPP
 #define EXECUTION_HPP
 #include <type_traits>
 #include <utility>
 #include <cstddef>
 #include <iterator>
 #include "libslic3r/libslic3r.h"
 namespace Slic3r {
 template<class T>
 using remove_cvref_t = std::remove_reference_t<std::remove_cv_t<T>>;
 // Override for valid execution policies
 template<class EP> struct IsExecutionPolicy_ : public std::false_type {};
 template<class EP> constexpr bool IsExecutionPolicy =
    IsExecutionPolicy_<remove_cvref_t<EP>>::value;
 template<class EP, class T = void>
 using ExecutionPolicyOnly = std::enable_if_t<IsExecutionPolicy<EP>, T>;
 namespace execution {
 // This struct needs to be specialized for each execution policy.
 // See ExecutionSeq.hpp and ExecutionTBB.hpp for example.
 template<class EP, class En = void> struct Traits {};
 template<class EP> using AsTraits = Traits<remove_cvref_t<EP>>;
 // Each execution policy should declare two types of mutexes. A a spin lock and
 // a blocking mutex.
 template<class EP> using SpinningMutex = typename Traits<EP>::SpinningMutex;
 template<class EP> using BlockingMutex = typename Traits<EP>::BlockingMutex;
 // Query the available threads for concurrency.
 template<class EP, class = ExecutionPolicyOnly<EP> >
 size_t max_concurrency(const EP &ep)
 {
    return AsTraits<EP>::max_concurrency(ep);
 }
 // foreach loop with the execution policy passed as argument. Granularity can
 // be specified explicitly. max_concurrency() can be used for optimal results.
 template<class EP, class It, class Fn, class = ExecutionPolicyOnly<EP>>
 void for_each(const EP &ep, It from, It to, Fn &&fn, size_t granularity = 1)
 {
    AsTraits<EP>::for_each(ep, from, to, std::forward<Fn>(fn), granularity);
 }
 // A reduce operation with the execution policy passed as argument.
 // mergefn has T(const T&, const T&) signature
 // accessfn has T(I) signature if I is an integral type and
 // T(const I::value_type &) if I is an iterator type.
 template<class EP,
         class I,
         class MergeFn,
         class T,
         class AccessFn,
         class = ExecutionPolicyOnly<EP> >
 T reduce(const EP & ep,
         I          from,
         I          to,
         const T &  init,
         MergeFn && mergefn,
         AccessFn &&accessfn,
         size_t     granularity = 1)
 {
    return AsTraits<EP>::reduce(ep, from, to, init,
                                std::forward<MergeFn>(mergefn),
                                std::forward<AccessFn>(accessfn),
                                granularity);
 }
 // An overload of reduce method to be used with iterators as 'from' and 'to'
 // arguments.
 template<class EP,
         class I,
         class MergeFn,
         class T,
         class = ExecutionPolicyOnly<EP>,
         class = IteratorOnly<I> >
 T reduce(const EP &ep,
         I         from,
         I         to,
         const T & init,
         MergeFn &&mergefn,
         size_t    granularity = 1)
 {
    return reduce(
        ep, from, to, init, std::forward<MergeFn>(mergefn),
        [](typename I::value_type &i) { return i; }, granularity);
 }
 } // namespace execution_policy
 } // namespace Slic3r
 #endif // EXECUTION_HPP
--- a/src/libslic3r/Execution/ExecutionSeq.hpp
+++ b/src/libslic3r/Execution/ExecutionSeq.hpp
@ -0,0 +1,84 @@
 #ifndef EXECUTIONSEQ_HPP
 #define EXECUTIONSEQ_HPP
 #ifdef PRUSASLICER_USE_EXECUTION_STD // Conflicts with our version of TBB
 #include <execution>
 #endif
 #include "Execution.hpp"
 namespace Slic3r {
 // Execution policy implementing dummy sequential algorithms
 struct ExecutionSeq {};
 template<> struct IsExecutionPolicy_<ExecutionSeq> : public std::true_type {};
 static constexpr ExecutionSeq ex_seq = {};
 template<class EP> struct IsSequentialEP_ { static constexpr bool value = false; };
 template<> struct IsSequentialEP_<ExecutionSeq>: public std::true_type {};
 #ifdef PRUSASLICER_USE_EXECUTION_STD
 template<> struct IsExecutionPolicy_<std::execution::sequenced_policy>: public std::true_type {};
 template<> struct IsSequentialEP_<std::execution::sequenced_policy>: public std::true_type {};
 #endif
 template<class EP>
 constexpr bool IsSequentialEP = IsSequentialEP_<remove_cvref_t<EP>>::value;
 template<class EP, class R = EP>
 using SequentialEPOnly = std::enable_if_t<IsSequentialEP<EP>, R>;
 template<class EP>
 struct execution::Traits<EP, SequentialEPOnly<EP, void>> {
 private:
    struct _Mtx { inline void lock() {} inline void unlock() {} };
    template<class Fn, class It>
    static IteratorOnly<It, void> loop_(It from, It to, Fn &&fn)
    {
        for (auto it = from; it != to; ++it) fn(*it);
    }
    template<class Fn, class I>
    static IntegerOnly<I, void> loop_(I from, I to, Fn &&fn)
    {
        for (I i = from; i < to; ++i) fn(i);
    }
 public:
    using SpinningMutex = _Mtx;
    using BlockingMutex = _Mtx;
    template<class It, class Fn>
    static void for_each(const EP &,
                         It   from,
                         It   to,
                         Fn &&fn,
                         size_t /* ignore granularity */ = 1)
    {
        loop_(from, to, std::forward<Fn>(fn));
    }
    template<class I, class MergeFn, class T, class AccessFn>
    static T reduce(const EP &,
                    I         from,
                    I         to,
                    const T & init,
                    MergeFn  &&mergefn,
                    AccessFn &&access,
                    size_t   /*granularity*/ = 1
                    )
    {
        T acc = init;
        loop_(from, to, [&](auto &i) { acc = mergefn(acc, access(i)); });
        return acc;
    }
    static size_t max_concurrency(const EP &) { return 1; }
 };
 } // namespace Slic3r
 #endif // EXECUTIONSEQ_HPP
--- a/src/libslic3r/Execution/ExecutionTBB.hpp
+++ b/src/libslic3r/Execution/ExecutionTBB.hpp
@ -0,0 +1,77 @@
 #ifndef EXECUTIONTBB_HPP
 #define EXECUTIONTBB_HPP
 #include <tbb/spin_mutex.h>
 #include <tbb/mutex.h>
 #include <tbb/parallel_for.h>
 #include <tbb/parallel_reduce.h>
 #include <tbb/task_arena.h>
 #include "Execution.hpp"
 namespace Slic3r {
 struct ExecutionTBB {};
 template<> struct IsExecutionPolicy_<ExecutionTBB> : public std::true_type {};
 // Execution policy using Intel TBB library under the hood.
 static constexpr ExecutionTBB ex_tbb = {};
 template<> struct execution::Traits<ExecutionTBB> {
 private:
    template<class Fn, class It>
    static IteratorOnly<It, void> loop_(const tbb::blocked_range<It> &range, Fn &&fn)
    {
        for (auto &el : range) fn(el);
    }
    template<class Fn, class I>
    static IntegerOnly<I, void> loop_(const tbb::blocked_range<I> &range, Fn &&fn)
    {
        for (I i = range.begin(); i < range.end(); ++i) fn(i);
    }
 public:
    using SpinningMutex = tbb::spin_mutex;
    using BlockingMutex = tbb::mutex;
    template<class It, class Fn>
    static void for_each(const ExecutionTBB &,
                         It from, It to, Fn &&fn, size_t granularity)
    {
        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
                          [&fn](const auto &range) {
            loop_(range, std::forward<Fn>(fn));
        });
    }
    template<class I, class MergeFn, class T, class AccessFn>
    static T reduce(const ExecutionTBB &,
                    I          from,
                    I          to,
                    const T   &init,
                    MergeFn  &&mergefn,
                    AccessFn &&access,
                    size_t     granularity = 1
                    )
    {
        return tbb::parallel_reduce(
            tbb::blocked_range{from, to, granularity}, init,
            [&](const auto &range, T subinit) {
                T acc = subinit;
                loop_(range, [&](auto &i) { acc = mergefn(acc, access(i)); });
                return acc;
            },
            std::forward<MergeFn>(mergefn));
    }
    static size_t max_concurrency(const ExecutionTBB &)
    {
        return tbb::this_task_arena::max_concurrency();
    }
 };
 }
 #endif // EXECUTIONTBB_HPP
--- a/src/libslic3r/MTUtils.hpp
+++ b/src/libslic3r/MTUtils.hpp
@ -106,13 +106,8 @@ template<class C> bool all_of(const C &container)
                       });
 }
-template<class T> struct remove_cvref
+template<class T>
-{
+using remove_cvref_t = std::remove_reference_t<std::remove_cv_t<T>>;
    using type =
        typename std::remove_cv<typename std::remove_reference<T>::type>::type;
 };
 template<class T> using remove_cvref_t = typename remove_cvref<T>::type;
 /// Exactly like Matlab https://www.mathworks.com/help/matlab/ref/linspace.html
 template<class T, class I, class = IntegerOnly<I>>
--- a/src/libslic3r/SLA/Concurrency.hpp
+++ b/src/libslic3r/SLA/Concurrency.hpp
@ -1,16 +1,10 @@
 #ifndef SLA_CONCURRENCY_H
 #define SLA_CONCURRENCY_H
-#include <tbb/spin_mutex.h>
+// FIXME: Deprecated
 #include <tbb/mutex.h>
 #include <tbb/parallel_for.h>
 #include <tbb/parallel_reduce.h>
 #include <tbb/task_arena.h>
-#include <algorithm>
+#include <libslic3r/Execution/ExecutionSeq.hpp>
-#include <numeric>
+#include <libslic3r/Execution/ExecutionTBB.hpp>
 #include <libslic3r/libslic3r.h>
 namespace Slic3r {
 namespace sla {
@ -23,124 +17,48 @@ template<bool> struct _ccr {};
 template<> struct _ccr<true>
 {
-    using SpinningMutex = tbb::spin_mutex;
+    using SpinningMutex = execution::SpinningMutex<ExecutionTBB>;
-    using BlockingMutex = tbb::mutex;
+    using BlockingMutex = execution::BlockingMutex<ExecutionTBB>;
    template<class Fn, class It>
    static IteratorOnly<It, void> loop_(const tbb::blocked_range<It> &range, Fn &&fn)
    {
        for (auto &el : range) fn(el);
    }
    template<class Fn, class I>
    static IntegerOnly<I, void> loop_(const tbb::blocked_range<I> &range, Fn &&fn)
    {
        for (I i = range.begin(); i < range.end(); ++i) fn(i);
    }
    template<class It, class Fn>
    static void for_each(It from, It to, Fn &&fn, size_t granularity = 1)
    {
-        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
+        execution::for_each(ex_tbb, from, to, std::forward<Fn>(fn), granularity);
                          [&fn](const auto &range) {
            loop_(range, std::forward<Fn>(fn));
        });
    }
-    template<class I, class MergeFn, class T, class AccessFn>
+    template<class...Args>
-    static T reduce(I          from,
+    static auto reduce(Args&&...args)
                    I          to,
                    const T   &init,
                    MergeFn  &&mergefn,
                    AccessFn &&access,
                    size_t     granularity = 1
                    )
    {
-        return tbb::parallel_reduce(
+        return execution::reduce(ex_tbb, std::forward<Args>(args)...);
            tbb::blocked_range{from, to, granularity}, init,
            [&](const auto &range, T subinit) {
                T acc = subinit;
                loop_(range, [&](auto &i) { acc = mergefn(acc, access(i)); });
                return acc;
            },
            std::forward<MergeFn>(mergefn));
    }
    template<class I, class MergeFn, class T>
    static IteratorOnly<I, T> reduce(I         from,
                                     I         to,
                                     const T & init,
                                     MergeFn &&mergefn,
                                     size_t    granularity = 1)
    {
        return reduce(
            from, to, init, std::forward<MergeFn>(mergefn),
            [](typename I::value_type &i) { return i; }, granularity);
    }
    static size_t max_concurreny()
    {
-        return tbb::this_task_arena::max_concurrency();
+        return execution::max_concurrency(ex_tbb);
    }
 };
 template<> struct _ccr<false>
 {
-private:
+    using SpinningMutex = execution::SpinningMutex<ExecutionSeq>;
-    struct _Mtx { inline void lock() {} inline void unlock() {} };
+    using BlockingMutex = execution::BlockingMutex<ExecutionSeq>;
 public:
    using SpinningMutex = _Mtx;
    using BlockingMutex = _Mtx;
    template<class Fn, class It>
    static IteratorOnly<It, void> loop_(It from, It to, Fn &&fn)
    {
        for (auto it = from; it != to; ++it) fn(*it);
    }
    template<class Fn, class I>
    static IntegerOnly<I, void> loop_(I from, I to, Fn &&fn)
    {
        for (I i = from; i < to; ++i) fn(i);
    }
    template<class It, class Fn>
-    static void for_each(It   from,
+    static void for_each(It from, It to, Fn &&fn, size_t granularity = 1)
                         It   to,
                         Fn &&fn,
                         size_t /* ignore granularity */ = 1)
    {
-        loop_(from, to, std::forward<Fn>(fn));
+        execution::for_each(ex_seq, from, to, std::forward<Fn>(fn), granularity);
    }
-    template<class I, class MergeFn, class T, class AccessFn>
+    template<class...Args>
-    static T reduce(I         from,
+    static auto reduce(Args&&...args)
                    I         to,
                    const T & init,
                    MergeFn  &&mergefn,
                    AccessFn &&access,
                    size_t   /*granularity*/ = 1
                    )
    {
-        T acc = init;
+        return execution::reduce(ex_seq, std::forward<Args>(args)...);
        loop_(from, to, [&](auto &i) { acc = mergefn(acc, access(i)); });
        return acc;
    }
-    template<class I, class MergeFn, class T>
+    static size_t max_concurreny()
    static IteratorOnly<I, T> reduce(I          from,
                                     I          to,
                                     const T   &init,
                                     MergeFn  &&mergefn,
                                     size_t     /*granularity*/ = 1
                                     )
    {
-        return reduce(from, to, init, std::forward<MergeFn>(mergefn),
+        return execution::max_concurrency(ex_seq);
                      [](typename I::value_type &i) { return i; });
    }
    static size_t max_concurreny() { return 1; }
 };
 using ccr = _ccr<USE_FULL_CONCURRENCY>;