replace ccr_::enumerate with flexible for_each
enumerate is unusual and would only work effectively with random access iterators this for_each takes advantage of tbb blocked_range replace ccr_::enumerate with flexible for_each enumerate is unusual and would only work effectively with random access iterators this for_each takes advantage of tbb blocked_range
This commit is contained in:
parent
9486901b93
commit
929cea59f3
@ -4,6 +4,8 @@
|
||||
#include <tbb/spin_mutex.h>
|
||||
#include <tbb/mutex.h>
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <algorithm>
|
||||
#include <libslic3r/libslic3r.h>
|
||||
|
||||
namespace Slic3r {
|
||||
namespace sla {
|
||||
@ -17,16 +19,29 @@ template<bool> struct _ccr {};
|
||||
template<> struct _ccr<true>
|
||||
{
|
||||
using SpinningMutex = tbb::spin_mutex;
|
||||
using BlockingMutex = tbb::mutex;
|
||||
|
||||
using BlockingMutex = tbb::mutex;
|
||||
|
||||
template<class It, class Fn>
|
||||
static inline void enumerate(It from, It to, Fn fn)
|
||||
static IteratorOnly<It, void> for_each(It from,
|
||||
It to,
|
||||
Fn && fn,
|
||||
size_t granularity = 1)
|
||||
{
|
||||
auto iN = to - from;
|
||||
size_t N = iN < 0 ? 0 : size_t(iN);
|
||||
|
||||
tbb::parallel_for(size_t(0), N, [from, fn](size_t n) {
|
||||
fn(*(from + decltype(iN)(n)), n);
|
||||
tbb::parallel_for(tbb::blocked_range{from, to, granularity},
|
||||
[&fn, from](const auto &range) {
|
||||
for (auto &el : range) fn(el);
|
||||
});
|
||||
}
|
||||
|
||||
template<class I, class Fn>
|
||||
static IntegerOnly<I, void> for_each(I from,
|
||||
I to,
|
||||
Fn && fn,
|
||||
size_t granularity = 1)
|
||||
{
|
||||
tbb::parallel_for(tbb::blocked_range{from, to, granularity},
|
||||
[&fn](const auto &range) {
|
||||
for (I i = range.begin(); i < range.end(); ++i) fn(i);
|
||||
});
|
||||
}
|
||||
};
|
||||
@ -39,11 +54,23 @@ private:
|
||||
public:
|
||||
using SpinningMutex = _Mtx;
|
||||
using BlockingMutex = _Mtx;
|
||||
|
||||
|
||||
template<class It, class Fn>
|
||||
static inline void enumerate(It from, It to, Fn fn)
|
||||
static IteratorOnly<It, void> for_each(It from,
|
||||
It to,
|
||||
Fn &&fn,
|
||||
size_t /* ignore granularity */ = 1)
|
||||
{
|
||||
for (auto it = from; it != to; ++it) fn(*it, size_t(it - from));
|
||||
for (auto it = from; it != to; ++it) fn(*it);
|
||||
}
|
||||
|
||||
template<class I, class Fn>
|
||||
static IntegerOnly<I, void> for_each(I from,
|
||||
I to,
|
||||
Fn &&fn,
|
||||
size_t /* ignore granularity */ = 1)
|
||||
{
|
||||
for (I i = from; i < to; ++i) fn(i);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -320,10 +320,10 @@ PointSet normals(const PointSet& points,
|
||||
PointSet ret(range.size(), 3);
|
||||
|
||||
// for (size_t ridx = 0; ridx < range.size(); ++ridx)
|
||||
ccr::enumerate(
|
||||
range.begin(), range.end(),
|
||||
[&ret, &mesh, &points, thr, eps](unsigned el, size_t ridx) {
|
||||
ccr::for_each(size_t(0), range.size(),
|
||||
[&ret, &mesh, &points, thr, eps, &range](size_t ridx) {
|
||||
thr();
|
||||
unsigned el = range[ridx];
|
||||
auto eidx = Eigen::Index(el);
|
||||
int faceid = 0;
|
||||
Vec3d p;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <tbb/parallel_for.h>
|
||||
|
||||
#include "SupportPointGenerator.hpp"
|
||||
#include "Concurrency.hpp"
|
||||
#include "Model.hpp"
|
||||
#include "ExPolygon.hpp"
|
||||
#include "SVG.hpp"
|
||||
@ -87,27 +88,28 @@ void SupportPointGenerator::project_onto_mesh(std::vector<sla::SupportPoint>& po
|
||||
// The function makes sure that all the points are really exactly placed on the mesh.
|
||||
|
||||
// Use a reasonable granularity to account for the worker thread synchronization cost.
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0, points.size(), 64),
|
||||
[this, &points](const tbb::blocked_range<size_t>& range) {
|
||||
for (size_t point_id = range.begin(); point_id < range.end(); ++ point_id) {
|
||||
if ((point_id % 16) == 0)
|
||||
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
|
||||
m_throw_on_cancel();
|
||||
Vec3f& p = points[point_id].pos;
|
||||
// Project the point upward and downward and choose the closer intersection with the mesh.
|
||||
sla::IndexedMesh::hit_result hit_up = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
|
||||
sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));
|
||||
static constexpr size_t gransize = 64;
|
||||
|
||||
bool up = hit_up.is_hit();
|
||||
bool down = hit_down.is_hit();
|
||||
ccr_par::for_each(size_t(0), points.size(), [this, &points](size_t idx)
|
||||
{
|
||||
if ((idx % 16) == 0)
|
||||
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
|
||||
m_throw_on_cancel();
|
||||
|
||||
if (!up && !down)
|
||||
continue;
|
||||
Vec3f& p = points[idx].pos;
|
||||
// Project the point upward and downward and choose the closer intersection with the mesh.
|
||||
sla::IndexedMesh::hit_result hit_up = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., 1.));
|
||||
sla::IndexedMesh::hit_result hit_down = m_emesh.query_ray_hit(p.cast<double>(), Vec3d(0., 0., -1.));
|
||||
|
||||
sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
|
||||
p = p + (hit.distance() * hit.direction()).cast<float>();
|
||||
}
|
||||
});
|
||||
bool up = hit_up.is_hit();
|
||||
bool down = hit_down.is_hit();
|
||||
|
||||
if (!up && !down)
|
||||
return;
|
||||
|
||||
sla::IndexedMesh::hit_result& hit = (!down || (hit_up.distance() < hit_down.distance())) ? hit_up : hit_down;
|
||||
p = p + (hit.distance() * hit.direction()).cast<float>();
|
||||
}, gransize);
|
||||
}
|
||||
|
||||
static std::vector<SupportPointGenerator::MyLayer> make_layers(
|
||||
@ -126,78 +128,80 @@ static std::vector<SupportPointGenerator::MyLayer> make_layers(
|
||||
//const float pixel_area = pow(wxGetApp().preset_bundle->project_config.option<ConfigOptionFloat>("display_width") / wxGetApp().preset_bundle->project_config.option<ConfigOptionInt>("display_pixels_x"), 2.f); //
|
||||
const float pixel_area = pow(0.047f, 2.f);
|
||||
|
||||
// Use a reasonable granularity to account for the worker thread synchronization cost.
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0, layers.size(), 32),
|
||||
[&layers, &slices, &heights, pixel_area, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
|
||||
for (size_t layer_id = range.begin(); layer_id < range.end(); ++ layer_id) {
|
||||
if ((layer_id % 8) == 0)
|
||||
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
|
||||
throw_on_cancel();
|
||||
SupportPointGenerator::MyLayer &layer = layers[layer_id];
|
||||
const ExPolygons &islands = slices[layer_id];
|
||||
//FIXME WTF?
|
||||
const float height = (layer_id>2 ? heights[layer_id-3] : heights[0]-(heights[1]-heights[0]));
|
||||
layer.islands.reserve(islands.size());
|
||||
for (const ExPolygon &island : islands) {
|
||||
float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
|
||||
if (area >= pixel_area)
|
||||
//FIXME this is not a correct centroid of a polygon with holes.
|
||||
layer.islands.emplace_back(layer, island, get_extents(island.contour), Slic3r::unscale(island.contour.centroid()).cast<float>(), area, height);
|
||||
}
|
||||
}
|
||||
});
|
||||
ccr_par::for_each(0ul, layers.size(),
|
||||
[&layers, &slices, &heights, pixel_area, throw_on_cancel](size_t layer_id)
|
||||
{
|
||||
if ((layer_id % 8) == 0)
|
||||
// Don't call the following function too often as it flushes
|
||||
// CPU write caches due to synchronization primitves.
|
||||
throw_on_cancel();
|
||||
|
||||
SupportPointGenerator::MyLayer &layer = layers[layer_id];
|
||||
const ExPolygons & islands = slices[layer_id];
|
||||
// FIXME WTF?
|
||||
const float height = (layer_id > 2 ?
|
||||
heights[layer_id - 3] :
|
||||
heights[0] - (heights[1] - heights[0]));
|
||||
layer.islands.reserve(islands.size());
|
||||
for (const ExPolygon &island : islands) {
|
||||
float area = float(island.area() * SCALING_FACTOR * SCALING_FACTOR);
|
||||
if (area >= pixel_area)
|
||||
// FIXME this is not a correct centroid of a polygon with holes.
|
||||
layer.islands.emplace_back(layer, island, get_extents(island.contour),
|
||||
unscaled<float>(island.contour.centroid()), area, height);
|
||||
}
|
||||
}, 32 /*gransize*/);
|
||||
|
||||
// Calculate overlap of successive layers. Link overlapping islands.
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(1, layers.size(), 8),
|
||||
[&layers, &heights, throw_on_cancel](const tbb::blocked_range<size_t>& range) {
|
||||
for (size_t layer_id = range.begin(); layer_id < range.end(); ++layer_id) {
|
||||
if ((layer_id % 2) == 0)
|
||||
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
|
||||
throw_on_cancel();
|
||||
SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
|
||||
SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
|
||||
//FIXME WTF?
|
||||
const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
|
||||
const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
|
||||
const float between_layers_offset = float(scale_(layer_height / std::tan(safe_angle)));
|
||||
const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
|
||||
const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
|
||||
//FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
|
||||
for (SupportPointGenerator::Structure &top : layer_above.islands) {
|
||||
for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
|
||||
float overlap_area = top.overlap_area(bottom);
|
||||
if (overlap_area > 0) {
|
||||
top.islands_below.emplace_back(&bottom, overlap_area);
|
||||
bottom.islands_above.emplace_back(&top, overlap_area);
|
||||
}
|
||||
}
|
||||
if (! top.islands_below.empty()) {
|
||||
Polygons top_polygons = to_polygons(*top.polygon);
|
||||
Polygons bottom_polygons = top.polygons_below();
|
||||
top.overhangs = diff_ex(top_polygons, bottom_polygons);
|
||||
if (! top.overhangs.empty()) {
|
||||
top.overhangs_area = 0.f;
|
||||
std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
|
||||
for (ExPolygon &ex : top.overhangs) {
|
||||
float area = float(ex.area());
|
||||
expolys_with_areas.emplace_back(&ex, area);
|
||||
top.overhangs_area += area;
|
||||
}
|
||||
std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
|
||||
ccr_par::for_each(1ul, layers.size(),
|
||||
[&layers, &heights, throw_on_cancel] (size_t layer_id)
|
||||
{
|
||||
if ((layer_id % 2) == 0)
|
||||
// Don't call the following function too often as it flushes CPU write caches due to synchronization primitves.
|
||||
throw_on_cancel();
|
||||
SupportPointGenerator::MyLayer &layer_above = layers[layer_id];
|
||||
SupportPointGenerator::MyLayer &layer_below = layers[layer_id - 1];
|
||||
//FIXME WTF?
|
||||
const float layer_height = (layer_id!=0 ? heights[layer_id]-heights[layer_id-1] : heights[0]);
|
||||
const float safe_angle = 5.f * (float(M_PI)/180.f); // smaller number - less supports
|
||||
const float between_layers_offset = float(scale_(layer_height / std::tan(safe_angle)));
|
||||
const float slope_angle = 75.f * (float(M_PI)/180.f); // smaller number - less supports
|
||||
const float slope_offset = float(scale_(layer_height / std::tan(slope_angle)));
|
||||
//FIXME This has a quadratic time complexity, it will be excessively slow for many tiny islands.
|
||||
for (SupportPointGenerator::Structure &top : layer_above.islands) {
|
||||
for (SupportPointGenerator::Structure &bottom : layer_below.islands) {
|
||||
float overlap_area = top.overlap_area(bottom);
|
||||
if (overlap_area > 0) {
|
||||
top.islands_below.emplace_back(&bottom, overlap_area);
|
||||
bottom.islands_above.emplace_back(&top, overlap_area);
|
||||
}
|
||||
}
|
||||
if (! top.islands_below.empty()) {
|
||||
Polygons top_polygons = to_polygons(*top.polygon);
|
||||
Polygons bottom_polygons = top.polygons_below();
|
||||
top.overhangs = diff_ex(top_polygons, bottom_polygons);
|
||||
if (! top.overhangs.empty()) {
|
||||
top.overhangs_area = 0.f;
|
||||
std::vector<std::pair<ExPolygon*, float>> expolys_with_areas;
|
||||
for (ExPolygon &ex : top.overhangs) {
|
||||
float area = float(ex.area());
|
||||
expolys_with_areas.emplace_back(&ex, area);
|
||||
top.overhangs_area += area;
|
||||
}
|
||||
std::sort(expolys_with_areas.begin(), expolys_with_areas.end(),
|
||||
[](const std::pair<ExPolygon*, float> &p1, const std::pair<ExPolygon*, float> &p2)
|
||||
{ return p1.second > p2.second; });
|
||||
ExPolygons overhangs_sorted;
|
||||
for (auto &p : expolys_with_areas)
|
||||
overhangs_sorted.emplace_back(std::move(*p.first));
|
||||
top.overhangs = std::move(overhangs_sorted);
|
||||
top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
|
||||
top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
|
||||
top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
{ return p1.second > p2.second; });
|
||||
ExPolygons overhangs_sorted;
|
||||
for (auto &p : expolys_with_areas)
|
||||
overhangs_sorted.emplace_back(std::move(*p.first));
|
||||
top.overhangs = std::move(overhangs_sorted);
|
||||
top.overhangs_area *= float(SCALING_FACTOR * SCALING_FACTOR);
|
||||
top.overhangs_slopes = diff_ex(top_polygons, offset(bottom_polygons, slope_offset));
|
||||
top.dangling_areas = diff_ex(top_polygons, offset(bottom_polygons, between_layers_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 8 /* gransize */);
|
||||
|
||||
return layers;
|
||||
}
|
||||
|
@ -209,14 +209,16 @@ IndexedMesh::hit_result SupportTreeBuildsteps::pinhead_mesh_intersect(
|
||||
// of the pinhead robe (side) surface. The result will be the smallest
|
||||
// hit distance.
|
||||
|
||||
ccr::enumerate(hits.begin(), hits.end(),
|
||||
[&m, &rings, sd](HitResult &hit, size_t i) {
|
||||
ccr::for_each(size_t(0), hits.size(),
|
||||
[&m, &rings, sd, &hits](size_t i) {
|
||||
|
||||
// Point on the circle on the pin sphere
|
||||
Vec3d ps = rings.pinring(i);
|
||||
// This is the point on the circle on the back sphere
|
||||
Vec3d p = rings.backring(i);
|
||||
|
||||
auto &hit = hits[i];
|
||||
|
||||
// Point ps is not on mesh but can be inside or
|
||||
// outside as well. This would cause many problems
|
||||
// with ray-casting. To detect the position we will
|
||||
@ -265,8 +267,10 @@ IndexedMesh::hit_result SupportTreeBuildsteps::bridge_mesh_intersect(
|
||||
// Hit results
|
||||
std::array<Hit, SAMPLES> hits;
|
||||
|
||||
ccr::enumerate(hits.begin(), hits.end(),
|
||||
[this, r, src, /*ins_check,*/ &ring, dir, sd] (Hit &hit, size_t i) {
|
||||
ccr::for_each(size_t(0), hits.size(),
|
||||
[this, r, src, /*ins_check,*/ &ring, dir, sd, &hits] (size_t i)
|
||||
{
|
||||
Hit &hit = hits[i];
|
||||
|
||||
// Point on the circle on the pin sphere
|
||||
Vec3d p = ring.get(i, src, r + sd);
|
||||
@ -744,10 +748,10 @@ void SupportTreeBuildsteps::filter()
|
||||
}
|
||||
};
|
||||
|
||||
ccr::enumerate(filtered_indices.begin(), filtered_indices.end(),
|
||||
[this, &filterfn](unsigned fidx, size_t i) {
|
||||
filterfn(fidx, i, m_cfg.head_back_radius_mm);
|
||||
});
|
||||
ccr::for_each(0ul, filtered_indices.size(),
|
||||
[this, &filterfn, &filtered_indices] (size_t i) {
|
||||
filterfn(filtered_indices[i], i, m_cfg.head_back_radius_mm);
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < heads.size(); ++i)
|
||||
if (heads[i].is_valid()) {
|
||||
@ -1033,8 +1037,8 @@ void SupportTreeBuildsteps::routing_to_model()
|
||||
// If it can be routed there with a bridge shorter than
|
||||
// min_bridge_distance.
|
||||
|
||||
ccr::enumerate(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
|
||||
[this] (const unsigned idx, size_t) {
|
||||
ccr::for_each(m_iheads_onmodel.begin(), m_iheads_onmodel.end(),
|
||||
[this] (const unsigned idx) {
|
||||
m_thr();
|
||||
|
||||
auto& head = m_builder.head(idx);
|
||||
|
@ -385,12 +385,13 @@ public:
|
||||
template<class Fn> void draw_layers(size_t layer_num, Fn &&drawfn)
|
||||
{
|
||||
m_layers.resize(layer_num);
|
||||
sla::ccr::enumerate(m_layers.begin(), m_layers.end(),
|
||||
[this, &drawfn](sla::EncodedRaster& enc, size_t idx) {
|
||||
auto rst = create_raster();
|
||||
drawfn(*rst, idx);
|
||||
enc = rst->encode(get_encoder());
|
||||
});
|
||||
sla::ccr::for_each(0ul, m_layers.size(),
|
||||
[this, &drawfn] (size_t idx) {
|
||||
sla::EncodedRaster& enc = m_layers[idx];
|
||||
auto rst = create_raster();
|
||||
drawfn(*rst, idx);
|
||||
enc = rst->encode(get_encoder());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -264,11 +264,12 @@ void SLAPrint::Steps::slice_model(SLAPrintObject &po)
|
||||
std::vector<ExPolygons> interior_slices;
|
||||
interior_slicer.slice(slice_grid, SlicingMode::Regular, closing_r, &interior_slices, thr);
|
||||
|
||||
sla::ccr::enumerate(interior_slices.begin(), interior_slices.end(),
|
||||
[&po](const ExPolygons &slice, size_t i) {
|
||||
po.m_model_slices[i] =
|
||||
diff_ex(po.m_model_slices[i], slice);
|
||||
});
|
||||
sla::ccr::for_each(0ul, interior_slices.size(),
|
||||
[&po, &interior_slices] (size_t i) {
|
||||
const ExPolygons &slice = interior_slices[i];
|
||||
po.m_model_slices[i] =
|
||||
diff_ex(po.m_model_slices[i], slice);
|
||||
});
|
||||
}
|
||||
|
||||
auto mit = slindex_it;
|
||||
@ -679,14 +680,16 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
|
||||
using Lock = std::lock_guard<sla::ccr::SpinningMutex>;
|
||||
|
||||
// Going to parallel:
|
||||
auto printlayerfn = [
|
||||
auto printlayerfn = [this,
|
||||
// functions and read only vars
|
||||
areafn, area_fill, display_area, exp_time, init_exp_time, fast_tilt, slow_tilt, delta_fade_time,
|
||||
|
||||
// write vars
|
||||
&mutex, &models_volume, &supports_volume, &estim_time, &slow_layers,
|
||||
&fast_layers, &fade_layer_time](PrintLayer& layer, size_t sliced_layer_cnt)
|
||||
&fast_layers, &fade_layer_time](size_t sliced_layer_cnt)
|
||||
{
|
||||
PrintLayer &layer = m_print->m_printer_input[sliced_layer_cnt];
|
||||
|
||||
// vector of slice record references
|
||||
auto& slicerecord_references = layer.slices();
|
||||
|
||||
@ -789,7 +792,7 @@ void SLAPrint::Steps::merge_slices_and_eval_stats() {
|
||||
|
||||
// sequential version for debugging:
|
||||
// for(size_t i = 0; i < m_printer_input.size(); ++i) printlayerfn(i);
|
||||
sla::ccr::enumerate(printer_input.begin(), printer_input.end(), printlayerfn);
|
||||
sla::ccr::for_each(0ul, printer_input.size(), printlayerfn);
|
||||
|
||||
auto SCALING2 = SCALING_FACTOR * SCALING_FACTOR;
|
||||
print_statistics.support_used_material = supports_volume * SCALING2;
|
||||
|
@ -261,6 +261,11 @@ using IntegerOnly = std::enable_if_t<std::is_integral<T>::value, O>;
|
||||
template<class T, class O = T>
|
||||
using ArithmeticOnly = std::enable_if_t<std::is_arithmetic<T>::value, O>;
|
||||
|
||||
template<class T, class O = T>
|
||||
using IteratorOnly = std::enable_if_t<
|
||||
!std::is_same_v<typename std::iterator_traits<T>::value_type, void>, O
|
||||
>;
|
||||
|
||||
} // namespace Slic3r
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user