replace ccr_::enumerate with flexible for_each

enumerate is unusual and would only work effectively with random access iterators this for_each takes advantage of tbb blocked_range replace ccr_::enumerate with flexible for_each enumerate is unusual and would only work effectively with random access iterators this for_each takes advantage of tbb blocked_range
2020-08-05 15:49:36 +02:00 · 2020-08-05 15:49:36 +02:00 · 929cea59f3
commit 929cea59f3
parent 9486901b93
7 changed files with 169 additions and 125 deletions
--- a/src/libslic3r/SLA/Concurrency.hpp
+++ b/src/libslic3r/SLA/Concurrency.hpp
@ -4,6 +4,8 @@
 #include <tbb/spin_mutex.h>
 #include <tbb/mutex.h>
 #include <tbb/parallel_for.h>
+#include <algorithm>
+#include <libslic3r/libslic3r.h>

 namespace Slic3r {
 namespace sla {
@ -17,16 +19,29 @@ template<bool> struct _ccr {};
 template<> struct _ccr<true>
 {
    using SpinningMutex = tbb::spin_mutex;
-    using BlockingMutex  = tbb::mutex;
-    
+    using BlockingMutex = tbb::mutex;
+
    template<class It, class Fn>
-    static inline void enumerate(It from, It to, Fn fn)
+    static IteratorOnly<It, void> for_each(It     from,
+                                           It     to,
+                                           Fn &&  fn,
+                                           size_t granularity = 1)
    {
-        auto   iN = to - from;
-        size_t N  = iN < 0 ? 0 : size_t(iN);
-        
-        tbb::parallel_for(size_t(0), N, [from, fn](size_t n) {
-            fn(*(from + decltype(iN)(n)), n);
+        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
+                          [&fn, from](const auto &range) {
+                              for (auto &el : range) fn(el);
+                          });
+    }
+
+    template<class I, class Fn>
+    static IntegerOnly<I, void> for_each(I      from,
+                                         I      to,
+                                         Fn &&  fn,
+                                         size_t granularity = 1)
+    {
+        tbb::parallel_for(tbb::blocked_range{from, to, granularity},
+                          [&fn](const auto &range) {
+            for (I i = range.begin(); i < range.end(); ++i) fn(i);
        });
    }
 };
@ -39,11 +54,23 @@ private:
 public:
    using SpinningMutex = _Mtx;
    using BlockingMutex = _Mtx;
-    
+
    template<class It, class Fn>
-    static inline void enumerate(It from, It to, Fn fn)
+    static IteratorOnly<It, void> for_each(It   from,
+                                           It   to,
+                                           Fn &&fn,
+                                           size_t /* ignore granularity */ = 1)
    {
-        for (auto it = from; it != to; ++it) fn(*it, size_t(it - from));
+        for (auto it = from; it != to; ++it) fn(*it);
+    }
+
+    template<class I, class Fn>
+    static IntegerOnly<I, void> for_each(I    from,
+                                         I    to,
+                                         Fn &&fn,
+                                         size_t /* ignore granularity */ = 1)
+    {
+        for (I i = from; i < to; ++i) fn(i);
    }
 };

--- a/src/libslic3r/SLA/IndexedMesh.cpp
+++ b/src/libslic3r/SLA/IndexedMesh.cpp
@ -320,10 +320,10 @@ PointSet normals(const PointSet& points,
    PointSet ret(range.size(), 3);

    //    for (size_t ridx = 0; ridx < range.size(); ++ridx)
-    ccr::enumerate(
-        range.begin(), range.end(),
-        [&ret, &mesh, &points, thr, eps](unsigned el, size_t ridx) {
+    ccr::for_each(size_t(0), range.size(),
+        [&ret, &mesh, &points, thr, eps, &range](size_t ridx) {
            thr();
+            unsigned el = range[ridx];
            auto  eidx   = Eigen::Index(el);
            int   faceid = 0;
            Vec3d p;
--- a/src/libslic3r/SLA/SupportPointGenerator.cpp
+++ b/src/libslic3r/SLA/SupportPointGenerator.cpp
@ -4,6 +4,7 @@
 #include <tbb/parallel_for.h>

 #include "SupportPointGenerator.hpp"
+#include "Concurrency.hpp"
 #include "Model.hpp"
 #include "ExPolygon.hpp"
 #include "SVG.hpp"
@ -87,27 +88,28 @@ void SupportPointGenerator::project_onto_mesh(std::vector<sla::SupportPoint>& po
    // The function  makes sure that all the points are really exactly placed on the mesh.

    // Use a reasonable granularity to account for the worker thread synchronization cost.
-    tbb::parallel_for(tbb::blocked_range<size_t>(0, points.size(), 64),
-        [this, &points](const tbb::blocked_range<size_t>& range) {
-            for (size_t point_id = range.begin(); point_id < range.end(); ++ point_id) {
-                if ((point_id % 16) == 0)
-                    // Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
-                    m_throw_on_cancel();
-                Vec3f& p = points[point_id].pos;
-                // Project the point upward and downward and choose the closer intersection with the mesh.
-                sla::IndexedMesh::hit_result hit_up   = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
-                sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));
+    static constexpr size_t gransize = 64;

-                bool up   = hit_up.is_hit();
-                bool down = hit_down.is_hit();
+    ccr_par::for_each(size_t(0), points.size(), [this, &points](size_t idx)
+    {
+        if ((idx % 16) == 0)
+            // Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
+            m_throw_on_cancel();

-                if (!up && !down)
-                    continue;
+        Vec3f& p = points[idx].pos;
+        // Project the point upward and downward and choose the closer intersection with the mesh.
+        sla::IndexedMesh::hit_result hit_up   = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
+        sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));

-                sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
-                p = p + (hit.distance() * hit.direction()).cast<float>();
-            }
-        });
+        bool up   = hit_up.is_hit();
+        bool down = hit_down.is_hit();
+
+        if (!up && !down)
+            return;
+
+        sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
+        p = p + (hit.distance() * hit.direction()).cast<float>();
+    }, gransize);
 }

 static std::vector<SupportPointGenerator::MyLayer> make_layers(
@ -126,78 +128,80 @@ static std::vector<SupportPointGenerator::MyLayer> make_layers(
    //const float pixel_area = pow(wxGetApp().preset_bundle->project_config.option<ConfigOptionFloat>("display_width") / wxGetApp().preset_bundle->project_config.option<ConfigOptionInt>("display_pixels_x"), 2.f); //
    const float pixel_area = pow(0.047f, 2.f);

-    // Use a reasonable granularity to account for the worker thread synchronization cost.
-    tbb::parallel_for(tbb::blocked_range<size_t>(0, layers.size(), 32),
-        [&layers, &slices, &heights, pixel_area, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
-            for (size_t layer_id = range.begin(); layer_id < range.end(); ++ layer_id) {
-                if ((layer_id % 8) == 0)
-                    // Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
-                    throw_on_cancel();
-                SupportPointGenerator::MyLayer &layer   = layers[layer_id];
-                const ExPolygons		 &islands = slices[layer_id];
-                //FIXME WTF?
-                const float height = (layer_id>2 ? heights[layer_id-3] : heights[0]-(heights[1]-heights[0]));
-                layer.islands.reserve(islands.size());
-                for (const ExPolygon &island : islands) {
-                    float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
-                    if (area >= pixel_area)
-                        //FIXME this is not a correct centroid of a polygon with holes.
-                        layer.islands.emplace_back(layer, island, get_extents(island.contour), Slic3r::unscale(island.contour.centroid()).cast<float>(), area, height);
-                }
-            }
-        });
+    ccr_par::for_each(0ul, layers.size(),
+        [&layers, &slices, &heights, pixel_area, throw_on_cancel](size_t layer_id)
+    {
+        if ((layer_id % 8) == 0)
+            // Don't call the following function too often as it flushes
+            // CPU write caches due to synchronization primitves.
+            throw_on_cancel();
+
+        SupportPointGenerator::MyLayer &layer   = layers[layer_id];
+        const ExPolygons &              islands = slices[layer_id];
+        // FIXME WTF?
+        const float height = (layer_id > 2 ?
+                                  heights[layer_id - 3] :
+                                  heights[0] - (heights[1] - heights[0]));
+        layer.islands.reserve(islands.size());
+        for (const ExPolygon &island : islands) {
+            float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
+            if (area >= pixel_area)
+                // FIXME this is not a correct centroid of a polygon with holes.
+                layer.islands.emplace_back(layer, island, get_extents(island.contour),
+                                           unscaled<float>(island.contour.centroid()), area, height);
+        }
+    }, 32 /*gransize*/);

    // Calculate overlap of successive layers. Link overlapping islands.
-    tbb::parallel_for(tbb::blocked_range<size_t>(1, layers.size(), 8),
-        [&layers, &heights, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
-        for (size_t layer_id = range.begin(); layer_id < range.end(); ++layer_id) {
-            if ((layer_id % 2) == 0)
-                // Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
-                throw_on_cancel();
-            SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
-            SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
-            //FIXME WTF?
-            const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
-            const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
-            const float between_layers_offset =  float(scale_(layer_height / std::tan(safe_angle)));
-            const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
-            const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
-            //FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
-            for (SupportPointGenerator::Structure &top : layer_above.islands) {
-                for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
-                    float overlap_area = top.overlap_area(bottom);
-                    if (overlap_area > 0) {
-                        top.islands_below.emplace_back(&bottom, overlap_area);
-                        bottom.islands_above.emplace_back(&top, overlap_area);
-                    }
-                }
-                if (! top.islands_below.empty()) {
-                    Polygons top_polygons    = to_polygons(*top.polygon);
-                    Polygons bottom_polygons = top.polygons_below();
-                    top.overhangs = diff_ex(top_polygons, bottom_polygons);
-                    if (! top.overhangs.empty()) {
-                        top.overhangs_area = 0.f;
-                        std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
-                        for (ExPolygon &ex : top.overhangs) {
-                            float area = float(ex.area());
-                            expolys_with_areas.emplace_back(&ex, area);
-                            top.overhangs_area += area;
-                        }
-                        std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
+    ccr_par::for_each(1ul, layers.size(),
+                      [&layers, &heights, throw_on_cancel] (size_t layer_id)
+    {
+      if ((layer_id % 2) == 0)
+          // Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
+          throw_on_cancel();
+      SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
+      SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
+      //FIXME WTF?
+      const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
+      const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
+      const float between_layers_offset =  float(scale_(layer_height / std::tan(safe_angle)));
+      const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
+      const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
+      //FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
+      for (SupportPointGenerator::Structure &top : layer_above.islands) {
+          for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
+              float overlap_area = top.overlap_area(bottom);
+              if (overlap_area > 0) {
+                  top.islands_below.emplace_back(&bottom, overlap_area);
+                  bottom.islands_above.emplace_back(&top, overlap_area);
+              }
+          }
+          if (! top.islands_below.empty()) {
+              Polygons top_polygons    = to_polygons(*top.polygon);
+              Polygons bottom_polygons = top.polygons_below();
+              top.overhangs = diff_ex(top_polygons, bottom_polygons);
+              if (! top.overhangs.empty()) {
+                  top.overhangs_area = 0.f;
+                  std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
+                  for (ExPolygon &ex : top.overhangs) {
+                      float area = float(ex.area());
+                      expolys_with_areas.emplace_back(&ex, area);
+                      top.overhangs_area += area;
+                  }
+                  std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
                            [](const std::pair<ExPolygon*, float> &p1, const std::pair<ExPolygon*, float> &p2)
-                                { return p1.second > p2.second; });
-                        ExPolygons overhangs_sorted;
-                        for (auto &p : expolys_with_areas)
-                            overhangs_sorted.emplace_back(std::move(*p.first));
-                        top.overhangs = std::move(overhangs_sorted);
-                        top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
-                        top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
-                        top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
-                    }
-                }
-            }
-        }
-    });
+                            { return p1.second > p2.second; });
+                  ExPolygons overhangs_sorted;
+                  for (auto &p : expolys_with_areas)
+                      overhangs_sorted.emplace_back(std::move(*p.first));
+                  top.overhangs = std::move(overhangs_sorted);
+                  top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
+                  top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
+                  top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
+              }
+          }
+      }
+    }, 8 /* gransize */);

    return layers;
 }
--- a/src/libslic3r/SLA/SupportTreeBuildsteps.cpp
+++ b/src/libslic3r/SLA/SupportTreeBuildsteps.cpp
@ -209,14 +209,16 @@ IndexedMesh::hit_result SupportTreeBuildsteps::pinhead_mesh_intersect(
    // of the pinhead robe (side) surface. The result will be the smallest
    // hit distance.

-    ccr::enumerate(hits.begin(), hits.end(),
-                   [&m, &rings, sd](HitResult &hit, size_t i) {
+    ccr::for_each(size_t(0), hits.size(),
+                  [&m, &rings, sd, &hits](size_t i) {

       // Point on the circle on the pin sphere
       Vec3d ps = rings.pinring(i);
       // This is the point on the circle on the back sphere
       Vec3d p = rings.backring(i);

+       auto &hit = hits[i];
+
       // Point ps is not on mesh but can be inside or
       // outside as well. This would cause many problems
       // with ray-casting. To detect the position we will
@ -265,8 +267,10 @@ IndexedMesh::hit_result SupportTreeBuildsteps::bridge_mesh_intersect(
    // Hit results
    std::array<Hit, SAMPLES> hits;

-    ccr::enumerate(hits.begin(), hits.end(),
-                [this, r, src, /*ins_check,*/ &ring, dir, sd] (Hit &hit, size_t i) {
+    ccr::for_each(size_t(0), hits.size(),
+                 [this, r, src, /*ins_check,*/ &ring, dir, sd, &hits] (size_t i)
+    {
+        Hit &hit = hits[i];

        // Point on the circle on the pin sphere
        Vec3d p = ring.get(i, src, r + sd);
@ -744,10 +748,10 @@ void SupportTreeBuildsteps::filter()
        }
    };

-    ccr::enumerate(filtered_indices.begin(), filtered_indices.end(),
-                   [this, &filterfn](unsigned fidx, size_t i) {
-                       filterfn(fidx, i, m_cfg.head_back_radius_mm);
-                   });
+    ccr::for_each(0ul, filtered_indices.size(),
+                  [this, &filterfn, &filtered_indices] (size_t i) {
+                      filterfn(filtered_indices[i], i, m_cfg.head_back_radius_mm);
+                  });

    for (size_t i = 0; i < heads.size(); ++i)
        if (heads[i].is_valid()) {
@ -1033,8 +1037,8 @@ void SupportTreeBuildsteps::routing_to_model()
    // If it can be routed there with a bridge shorter than
    // min_bridge_distance.

-    ccr::enumerate(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
-                   [this] (const unsigned idx, size_t) {
+    ccr::for_each(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
+                  [this] (const unsigned idx) {
        m_thr();

        auto& head = m_builder.head(idx);
--- a/src/libslic3r/SLAPrint.hpp
+++ b/src/libslic3r/SLAPrint.hpp
@ -385,12 +385,13 @@ public:
    template<class Fn> void draw_layers(size_t layer_num, Fn &&drawfn)
    {
        m_layers.resize(layer_num);
-        sla::ccr::enumerate(m_layers.begin(), m_layers.end(),
-                            [this, &drawfn](sla::EncodedRaster& enc, size_t idx) {
-                                auto rst = create_raster();
-                                drawfn(*rst, idx);
-                                enc = rst->encode(get_encoder());
-                            });
+        sla::ccr::for_each(0ul, m_layers.size(),
+                           [this, &drawfn] (size_t idx) {
+                               sla::EncodedRaster& enc = m_layers[idx];
+                               auto rst = create_raster();
+                               drawfn(*rst, idx);
+                               enc = rst->encode(get_encoder());
+                           });
    }
 };

--- a/src/libslic3r/SLAPrintSteps.cpp
+++ b/src/libslic3r/SLAPrintSteps.cpp
@ -264,11 +264,12 @@ void SLAPrint::Steps::slice_model(SLAPrintObject &po)
        std::vector<ExPolygons> interior_slices;
        interior_slicer.slice(slice_grid, SlicingMode::Regular, closing_r, &interior_slices, thr);

-        sla::ccr::enumerate(interior_slices.begin(), interior_slices.end(),
-                            [&po](const ExPolygons &slice, size_t i) {
-                                po.m_model_slices[i] =
-                                    diff_ex(po.m_model_slices[i], slice);
-                            });
+        sla::ccr::for_each(0ul, interior_slices.size(),
+                           [&po, &interior_slices] (size_t i) {
+                              const ExPolygons &slice = interior_slices[i];
+                              po.m_model_slices[i] =
+                                  diff_ex(po.m_model_slices[i], slice);
+                           });
    }
    
    auto mit = slindex_it;
@ -679,14 +680,16 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
    using Lock = std::lock_guard<sla::ccr::SpinningMutex>;
    
    // Going to parallel:
-    auto printlayerfn = [
+    auto printlayerfn = [this,
            // functions and read only vars
            areafn, area_fill, display_area, exp_time, init_exp_time, fast_tilt, slow_tilt, delta_fade_time,
            
            // write vars
            &mutex, &models_volume, &supports_volume, &estim_time, &slow_layers,
-            &fast_layers, &fade_layer_time](PrintLayer& layer, size_t sliced_layer_cnt)
+            &fast_layers, &fade_layer_time](size_t sliced_layer_cnt)
    {
+        PrintLayer &layer = m_print->m_printer_input[sliced_layer_cnt];
+
        // vector of slice record references
        auto& slicerecord_references = layer.slices();
        
@ -789,7 +792,7 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
    
    // sequential version for debugging:
    // for(size_t i = 0; i < m_printer_input.size(); ++i) printlayerfn(i);
-    sla::ccr::enumerate(printer_input.begin(), printer_input.end(), printlayerfn);
+    sla::ccr::for_each(0ul, printer_input.size(), printlayerfn);
    
    auto SCALING2 = SCALING_FACTOR * SCALING_FACTOR;
    print_statistics.support_used_material = supports_volume * SCALING2;
--- a/src/libslic3r/libslic3r.h
+++ b/src/libslic3r/libslic3r.h
@ -261,6 +261,11 @@ using IntegerOnly = std::enable_if_t<std::is_integral<T>::value, O>;
 template<class T, class O = T>
 using ArithmeticOnly = std::enable_if_t<std::is_arithmetic<T>::value, O>;

+template<class T, class O = T>
+using IteratorOnly = std::enable_if_t<
+    !std::is_same_v<typename std::iterator_traits<T>::value_type, void>, O
+>;
+
 } // namespace Slic3r

 #endif