replace ccr_::enumerate with flexible for_each

enumerate is unusual and would only work effectively with random access iterators

this for_each takes advantage of tbb blocked_range
replace ccr_::enumerate with flexible for_each

enumerate is unusual and would only work effectively with random access iterators

this for_each takes advantage of tbb blocked_range
This commit is contained in:
tamasmeszaros 2020-08-05 15:49:36 +02:00
parent 9486901b93
commit 929cea59f3
7 changed files with 169 additions and 125 deletions

View File

@ -4,6 +4,8 @@
#include <tbb/spin_mutex.h>
#include <tbb/mutex.h>
#include <tbb/parallel_for.h>
#include <algorithm>
#include <libslic3r/libslic3r.h>
namespace Slic3r {
namespace sla {
@ -17,16 +19,29 @@ template<bool> struct _ccr {};
template<> struct _ccr<true>
{
using SpinningMutex = tbb::spin_mutex;
using BlockingMutex = tbb::mutex;
using BlockingMutex = tbb::mutex;
template<class It, class Fn>
static inline void enumerate(It from, It to, Fn fn)
static IteratorOnly<It, void> for_each(It from,
It to,
Fn && fn,
size_t granularity = 1)
{
auto iN = to - from;
size_t N = iN < 0 ? 0 : size_t(iN);
tbb::parallel_for(size_t(0), N, [from, fn](size_t n) {
fn(*(from + decltype(iN)(n)), n);
tbb::parallel_for(tbb::blocked_range{from, to, granularity},
[&fn, from](const auto &range) {
for (auto &el : range) fn(el);
});
}
template<class I, class Fn>
static IntegerOnly<I, void> for_each(I from,
I to,
Fn && fn,
size_t granularity = 1)
{
tbb::parallel_for(tbb::blocked_range{from, to, granularity},
[&fn](const auto &range) {
for (I i = range.begin(); i < range.end(); ++i) fn(i);
});
}
};
@ -39,11 +54,23 @@ private:
public:
using SpinningMutex = _Mtx;
using BlockingMutex = _Mtx;
template<class It, class Fn>
static inline void enumerate(It from, It to, Fn fn)
static IteratorOnly<It, void> for_each(It from,
It to,
Fn &&fn,
size_t /* ignore granularity */ = 1)
{
for (auto it = from; it != to; ++it) fn(*it, size_t(it - from));
for (auto it = from; it != to; ++it) fn(*it);
}
template<class I, class Fn>
static IntegerOnly<I, void> for_each(I from,
I to,
Fn &&fn,
size_t /* ignore granularity */ = 1)
{
for (I i = from; i < to; ++i) fn(i);
}
};

View File

@ -320,10 +320,10 @@ PointSet normals(const PointSet& points,
PointSet ret(range.size(), 3);
// for (size_t ridx = 0; ridx < range.size(); ++ridx)
ccr::enumerate(
range.begin(), range.end(),
[&ret, &mesh, &points, thr, eps](unsigned el, size_t ridx) {
ccr::for_each(size_t(0), range.size(),
[&ret, &mesh, &points, thr, eps, &range](size_t ridx) {
thr();
unsigned el = range[ridx];
auto eidx = Eigen::Index(el);
int faceid = 0;
Vec3d p;

View File

@ -4,6 +4,7 @@
#include <tbb/parallel_for.h>
#include "SupportPointGenerator.hpp"
#include "Concurrency.hpp"
#include "Model.hpp"
#include "ExPolygon.hpp"
#include "SVG.hpp"
@ -87,27 +88,28 @@ void SupportPointGenerator::project_onto_mesh(std::vector<sla::SupportPoint>& po
// The function makes sure that all the points are really exactly placed on the mesh.
// Use a reasonable granularity to account for the worker thread synchronization cost.
tbb::parallel_for(tbb::blocked_range<size_t>(0, points.size(), 64),
[this, &points](const tbb::blocked_range<size_t>& range) {
for (size_t point_id = range.begin(); point_id < range.end(); ++ point_id) {
if ((point_id % 16) == 0)
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
m_throw_on_cancel();
Vec3f& p = points[point_id].pos;
// Project the point upward and downward and choose the closer intersection with the mesh.
sla::IndexedMesh::hit_result hit_up = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));
static constexpr size_t gransize = 64;
bool up = hit_up.is_hit();
bool down = hit_down.is_hit();
ccr_par::for_each(size_t(0), points.size(), [this, &points](size_t idx)
{
if ((idx % 16) == 0)
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
m_throw_on_cancel();
if (!up && !down)
continue;
Vec3f& p = points[idx].pos;
// Project the point upward and downward and choose the closer intersection with the mesh.
sla::IndexedMesh::hit_result hit_up = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));
sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
p = p + (hit.distance() * hit.direction()).cast<float>();
}
});
bool up = hit_up.is_hit();
bool down = hit_down.is_hit();
if (!up && !down)
return;
sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
p = p + (hit.distance() * hit.direction()).cast<float>();
}, gransize);
}
static std::vector<SupportPointGenerator::MyLayer> make_layers(
@ -126,78 +128,80 @@ static std::vector<SupportPointGenerator::MyLayer> make_layers(
//const float pixel_area = pow(wxGetApp().preset_bundle->project_config.option<ConfigOptionFloat>("display_width") / wxGetApp().preset_bundle->project_config.option<ConfigOptionInt>("display_pixels_x"), 2.f); //
const float pixel_area = pow(0.047f, 2.f);
// Use a reasonable granularity to account for the worker thread synchronization cost.
tbb::parallel_for(tbb::blocked_range<size_t>(0, layers.size(), 32),
[&layers, &slices, &heights, pixel_area, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
for (size_t layer_id = range.begin(); layer_id < range.end(); ++ layer_id) {
if ((layer_id % 8) == 0)
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
throw_on_cancel();
SupportPointGenerator::MyLayer &layer = layers[layer_id];
const ExPolygons &islands = slices[layer_id];
//FIXME WTF?
const float height = (layer_id>2 ? heights[layer_id-3] : heights[0]-(heights[1]-heights[0]));
layer.islands.reserve(islands.size());
for (const ExPolygon &island : islands) {
float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
if (area >= pixel_area)
//FIXME this is not a correct centroid of a polygon with holes.
layer.islands.emplace_back(layer, island, get_extents(island.contour), Slic3r::unscale(island.contour.centroid()).cast<float>(), area, height);
}
}
});
ccr_par::for_each(0ul, layers.size(),
[&layers, &slices, &heights, pixel_area, throw_on_cancel](size_t layer_id)
{
if ((layer_id % 8) == 0)
// Don't call the following function too often as it flushes
// CPU write caches due to synchronization primitves.
throw_on_cancel();
SupportPointGenerator::MyLayer &layer = layers[layer_id];
const ExPolygons & islands = slices[layer_id];
// FIXME WTF?
const float height = (layer_id > 2 ?
heights[layer_id - 3] :
heights[0] - (heights[1] - heights[0]));
layer.islands.reserve(islands.size());
for (const ExPolygon &island : islands) {
float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
if (area >= pixel_area)
// FIXME this is not a correct centroid of a polygon with holes.
layer.islands.emplace_back(layer, island, get_extents(island.contour),
unscaled<float>(island.contour.centroid()), area, height);
}
}, 32 /*gransize*/);
// Calculate overlap of successive layers. Link overlapping islands.
tbb::parallel_for(tbb::blocked_range<size_t>(1, layers.size(), 8),
[&layers, &heights, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
for (size_t layer_id = range.begin(); layer_id < range.end(); ++layer_id) {
if ((layer_id % 2) == 0)
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
throw_on_cancel();
SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
//FIXME WTF?
const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
const float between_layers_offset = float(scale_(layer_height / std::tan(safe_angle)));
const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
//FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
for (SupportPointGenerator::Structure &top : layer_above.islands) {
for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
float overlap_area = top.overlap_area(bottom);
if (overlap_area > 0) {
top.islands_below.emplace_back(&bottom, overlap_area);
bottom.islands_above.emplace_back(&top, overlap_area);
}
}
if (! top.islands_below.empty()) {
Polygons top_polygons = to_polygons(*top.polygon);
Polygons bottom_polygons = top.polygons_below();
top.overhangs = diff_ex(top_polygons, bottom_polygons);
if (! top.overhangs.empty()) {
top.overhangs_area = 0.f;
std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
for (ExPolygon &ex : top.overhangs) {
float area = float(ex.area());
expolys_with_areas.emplace_back(&ex, area);
top.overhangs_area += area;
}
std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
ccr_par::for_each(1ul, layers.size(),
[&layers, &heights, throw_on_cancel] (size_t layer_id)
{
if ((layer_id % 2) == 0)
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
throw_on_cancel();
SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
//FIXME WTF?
const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
const float between_layers_offset = float(scale_(layer_height / std::tan(safe_angle)));
const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
//FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
for (SupportPointGenerator::Structure &top : layer_above.islands) {
for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
float overlap_area = top.overlap_area(bottom);
if (overlap_area > 0) {
top.islands_below.emplace_back(&bottom, overlap_area);
bottom.islands_above.emplace_back(&top, overlap_area);
}
}
if (! top.islands_below.empty()) {
Polygons top_polygons = to_polygons(*top.polygon);
Polygons bottom_polygons = top.polygons_below();
top.overhangs = diff_ex(top_polygons, bottom_polygons);
if (! top.overhangs.empty()) {
top.overhangs_area = 0.f;
std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
for (ExPolygon &ex : top.overhangs) {
float area = float(ex.area());
expolys_with_areas.emplace_back(&ex, area);
top.overhangs_area += area;
}
std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
[](const std::pair<ExPolygon*, float> &p1, const std::pair<ExPolygon*, float> &p2)
{ return p1.second > p2.second; });
ExPolygons overhangs_sorted;
for (auto &p : expolys_with_areas)
overhangs_sorted.emplace_back(std::move(*p.first));
top.overhangs = std::move(overhangs_sorted);
top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
}
}
}
}
});
{ return p1.second > p2.second; });
ExPolygons overhangs_sorted;
for (auto &p : expolys_with_areas)
overhangs_sorted.emplace_back(std::move(*p.first));
top.overhangs = std::move(overhangs_sorted);
top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
}
}
}
}, 8 /* gransize */);
return layers;
}

View File

@ -209,14 +209,16 @@ IndexedMesh::hit_result SupportTreeBuildsteps::pinhead_mesh_intersect(
// of the pinhead robe (side) surface. The result will be the smallest
// hit distance.
ccr::enumerate(hits.begin(), hits.end(),
[&m, &rings, sd](HitResult &hit, size_t i) {
ccr::for_each(size_t(0), hits.size(),
[&m, &rings, sd, &hits](size_t i) {
// Point on the circle on the pin sphere
Vec3d ps = rings.pinring(i);
// This is the point on the circle on the back sphere
Vec3d p = rings.backring(i);
auto &hit = hits[i];
// Point ps is not on mesh but can be inside or
// outside as well. This would cause many problems
// with ray-casting. To detect the position we will
@ -265,8 +267,10 @@ IndexedMesh::hit_result SupportTreeBuildsteps::bridge_mesh_intersect(
// Hit results
std::array<Hit, SAMPLES> hits;
ccr::enumerate(hits.begin(), hits.end(),
[this, r, src, /*ins_check,*/ &ring, dir, sd] (Hit &hit, size_t i) {
ccr::for_each(size_t(0), hits.size(),
[this, r, src, /*ins_check,*/ &ring, dir, sd, &hits] (size_t i)
{
Hit &hit = hits[i];
// Point on the circle on the pin sphere
Vec3d p = ring.get(i, src, r + sd);
@ -744,10 +748,10 @@ void SupportTreeBuildsteps::filter()
}
};
ccr::enumerate(filtered_indices.begin(), filtered_indices.end(),
[this, &filterfn](unsigned fidx, size_t i) {
filterfn(fidx, i, m_cfg.head_back_radius_mm);
});
ccr::for_each(0ul, filtered_indices.size(),
[this, &filterfn, &filtered_indices] (size_t i) {
filterfn(filtered_indices[i], i, m_cfg.head_back_radius_mm);
});
for (size_t i = 0; i < heads.size(); ++i)
if (heads[i].is_valid()) {
@ -1033,8 +1037,8 @@ void SupportTreeBuildsteps::routing_to_model()
// If it can be routed there with a bridge shorter than
// min_bridge_distance.
ccr::enumerate(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
[this] (const unsigned idx, size_t) {
ccr::for_each(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
[this] (const unsigned idx) {
m_thr();
auto& head = m_builder.head(idx);

View File

@ -385,12 +385,13 @@ public:
template<class Fn> void draw_layers(size_t layer_num, Fn &&drawfn)
{
m_layers.resize(layer_num);
sla::ccr::enumerate(m_layers.begin(), m_layers.end(),
[this, &drawfn](sla::EncodedRaster& enc, size_t idx) {
auto rst = create_raster();
drawfn(*rst, idx);
enc = rst->encode(get_encoder());
});
sla::ccr::for_each(0ul, m_layers.size(),
[this, &drawfn] (size_t idx) {
sla::EncodedRaster& enc = m_layers[idx];
auto rst = create_raster();
drawfn(*rst, idx);
enc = rst->encode(get_encoder());
});
}
};

View File

@ -264,11 +264,12 @@ void SLAPrint::Steps::slice_model(SLAPrintObject &po)
std::vector<ExPolygons> interior_slices;
interior_slicer.slice(slice_grid, SlicingMode::Regular, closing_r, &interior_slices, thr);
sla::ccr::enumerate(interior_slices.begin(), interior_slices.end(),
[&po](const ExPolygons &slice, size_t i) {
po.m_model_slices[i] =
diff_ex(po.m_model_slices[i], slice);
});
sla::ccr::for_each(0ul, interior_slices.size(),
[&po, &interior_slices] (size_t i) {
const ExPolygons &slice = interior_slices[i];
po.m_model_slices[i] =
diff_ex(po.m_model_slices[i], slice);
});
}
auto mit = slindex_it;
@ -679,14 +680,16 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
using Lock = std::lock_guard<sla::ccr::SpinningMutex>;
// Going to parallel:
auto printlayerfn = [
auto printlayerfn = [this,
// functions and read only vars
areafn, area_fill, display_area, exp_time, init_exp_time, fast_tilt, slow_tilt, delta_fade_time,
// write vars
&mutex, &models_volume, &supports_volume, &estim_time, &slow_layers,
&fast_layers, &fade_layer_time](PrintLayer& layer, size_t sliced_layer_cnt)
&fast_layers, &fade_layer_time](size_t sliced_layer_cnt)
{
PrintLayer &layer = m_print->m_printer_input[sliced_layer_cnt];
// vector of slice record references
auto& slicerecord_references = layer.slices();
@ -789,7 +792,7 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
// sequential version for debugging:
// for(size_t i = 0; i < m_printer_input.size(); ++i) printlayerfn(i);
sla::ccr::enumerate(printer_input.begin(), printer_input.end(), printlayerfn);
sla::ccr::for_each(0ul, printer_input.size(), printlayerfn);
auto SCALING2 = SCALING_FACTOR * SCALING_FACTOR;
print_statistics.support_used_material = supports_volume * SCALING2;

View File

@ -261,6 +261,11 @@ using IntegerOnly = std::enable_if_t<std::is_integral<T>::value, O>;
template<class T, class O = T>
using ArithmeticOnly = std::enable_if_t<std::is_arithmetic<T>::value, O>;
template<class T, class O = T>
using IteratorOnly = std::enable_if_t<
!std::is_same_v<typename std::iterator_traits<T>::value_type, void>, O
>;
} // namespace Slic3r
#endif