From 40a882d01e39c5c787400b0f06618fd874860051 Mon Sep 17 00:00:00 2001 From: bubnikv Date: Thu, 13 Jul 2017 15:52:19 +0200 Subject: [PATCH] Experimental feature, which may make the Clipper offsets run faster due to avoiding the 128bit multiply operations: A filtered predicate is tried to calculate SlopesEqual() to minimize the invocation of 128bit multiply operations. --- xs/src/clipper.cpp | 240 ++++++++++++------------------ xs/src/clipper.hpp | 29 ++-- xs/src/libslic3r/ClipperUtils.hpp | 2 + 3 files changed, 110 insertions(+), 161 deletions(-) diff --git a/xs/src/clipper.cpp b/xs/src/clipper.cpp index 4d40d4aa6..415c54bab 100644 --- a/xs/src/clipper.cpp +++ b/xs/src/clipper.cpp @@ -130,67 +130,37 @@ bool PolyNode::IsHole() const } //------------------------------------------------------------------------------ -#ifndef use_int32 - //------------------------------------------------------------------------------ // Int128 class (enables safe math on signed 64bit integers) -// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1 -// Int128 val2((long64)9223372036854775807); +// eg Int128 val1((int64_t)9223372036854775807); //ie 2^63 -1 +// Int128 val2((int64_t)9223372036854775807); // Int128 val3 = val1 * val2; // val3.AsString => "85070591730234615847396907784232501249" (8.5e+37) //------------------------------------------------------------------------------ - +// Used by the SlopesEqual() functions. class Int128 { public: - ulong64 lo; - long64 hi; + uint64_t lo; + int64_t hi; - Int128(long64 _lo = 0) - { - lo = (ulong64)_lo; - if (_lo < 0) hi = -1; else hi = 0; - } - - - Int128(const Int128 &val): lo(val.lo), hi(val.hi){} - - Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){} + Int128(int64_t _lo = 0) : lo((uint64_t)_lo), hi((_lo < 0) ? -1 : 0) {} + Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {} + Int128(const int64_t& _hi, const uint64_t& _lo) : lo(_lo), hi(_hi) {} - Int128& operator = (const long64 &val) + Int128& operator = (const int64_t &val) { - lo = (ulong64)val; - if (val < 0) hi = -1; else hi = 0; + lo = (uint64_t)val; + hi = (val < 0) ? -1 : 0; return *this; } - bool operator == (const Int128 &val) const - {return (hi == val.hi && lo == val.lo);} - - bool operator != (const Int128 &val) const - { return !(*this == val);} - - bool operator > (const Int128 &val) const - { - if (hi != val.hi) - return hi > val.hi; - else - return lo > val.lo; - } - - bool operator < (const Int128 &val) const - { - if (hi != val.hi) - return hi < val.hi; - else - return lo < val.lo; - } - - bool operator >= (const Int128 &val) const - { return !(*this < val);} - - bool operator <= (const Int128 &val) const - { return !(*this > val);} + bool operator == (const Int128 &val) const { return hi == val.hi && lo == val.lo; } + bool operator != (const Int128 &val) const { return ! (*this == val); } + bool operator > (const Int128 &val) const { return (hi == val.hi) ? lo > val.lo : hi > val.hi; } + bool operator < (const Int128 &val) const { return (hi == val.hi) ? lo < val.lo : hi < val.hi; } + bool operator >= (const Int128 &val) const { return ! (*this < val); } + bool operator <= (const Int128 &val) const { return ! (*this > val); } Int128& operator += (const Int128 &rhs) { @@ -220,58 +190,47 @@ class Int128 return result; } - Int128 operator-() const //unary negation - { - if (lo == 0) - return Int128(-hi, 0); - else - return Int128(~hi, ~lo + 1); - } + Int128 operator-() const { return (lo == 0) ? Int128(-hi, 0) : Int128(~hi, ~lo + 1); } operator double() const { const double shift64 = 18446744073709551616.0; //2^64 - if (hi < 0) - { - if (lo == 0) return (double)hi * shift64; - else return -(double)(~lo + ~hi * shift64); - } - else - return (double)(lo + hi * shift64); + return (hi < 0) ? + ((lo == 0) ? + (double)hi * shift64 : + -(double)(~lo + ~hi * shift64)) : + (double)(lo + hi * shift64); } + static inline Int128 Multiply(int64_t lhs, int64_t rhs) + { + bool negate = (lhs < 0) != (rhs < 0); + + if (lhs < 0) lhs = -lhs; + uint64_t int1Hi = uint64_t(lhs) >> 32; + uint64_t int1Lo = uint64_t(lhs & 0xFFFFFFFF); + + if (rhs < 0) rhs = -rhs; + uint64_t int2Hi = uint64_t(rhs) >> 32; + uint64_t int2Lo = uint64_t(rhs & 0xFFFFFFFF); + + //because the high (sign) bits in both int1Hi & int2Hi have been zeroed, + //there's no risk of 64 bit overflow in the following assignment + //(ie: $7FFFFFFF*$FFFFFFFF + $7FFFFFFF*$FFFFFFFF < 64bits) + uint64_t a = int1Hi * int2Hi; + uint64_t b = int1Lo * int2Lo; + //Result = A shl 64 + C shl 32 + B ... + uint64_t c = int1Hi * int2Lo + int1Lo * int2Hi; + + Int128 tmp; + tmp.hi = int64_t(a + (c >> 32)); + tmp.lo = int64_t(c << 32); + tmp.lo += int64_t(b); + if (tmp.lo < b) tmp.hi++; + if (negate) tmp = -tmp; + return tmp; + } }; -//------------------------------------------------------------------------------ - -inline Int128 Int128Mul (long64 lhs, long64 rhs) -{ - bool negate = (lhs < 0) != (rhs < 0); - - if (lhs < 0) lhs = -lhs; - ulong64 int1Hi = ulong64(lhs) >> 32; - ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF); - - if (rhs < 0) rhs = -rhs; - ulong64 int2Hi = ulong64(rhs) >> 32; - ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF); - - //because the high (sign) bits in both int1Hi & int2Hi have been zeroed, - //there's no risk of 64 bit overflow in the following assignment - //(ie: $7FFFFFFF*$FFFFFFFF + $7FFFFFFF*$FFFFFFFF < 64bits) - ulong64 a = int1Hi * int2Hi; - ulong64 b = int1Lo * int2Lo; - //Result = A shl 64 + C shl 32 + B ... - ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi; - - Int128 tmp; - tmp.hi = long64(a + (c >> 32)); - tmp.lo = long64(c << 32); - tmp.lo += long64(b); - if (tmp.lo < b) tmp.hi++; - if (negate) tmp = -tmp; - return tmp; -}; -#endif //------------------------------------------------------------------------------ // Miscellaneous global functions @@ -330,11 +289,8 @@ int PointInPolygon(const IntPoint &pt, const Path &path) for(size_t i = 1; i <= cnt; ++i) { IntPoint ipNext = (i == cnt ? path[0] : path[i]); - if (ipNext.Y == pt.Y) - { - if ((ipNext.X == pt.X) || (ip.Y == pt.Y && - ((ipNext.X > pt.X) == (ip.X < pt.X)))) return -1; - } + if (ipNext.Y == pt.Y && ((ipNext.X == pt.X) || (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X))))) + return -1; if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y)) { if (ip.X >= pt.X) @@ -342,8 +298,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path) if (ipNext.X > pt.X) result = 1 - result; else { - double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); + double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); if (!d) return -1; if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result; } @@ -351,8 +306,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path) { if (ipNext.X > pt.X) { - double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); + double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y); if (!d) return -1; if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result; } @@ -384,8 +338,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op) if (op->Next->Pt.X > pt.X) result = 1 - result; else { - double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); + double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); if (!d) return -1; if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result; } @@ -393,8 +346,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op) { if (op->Next->Pt.X > pt.X) { - double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); + double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y); if (!d) return -1; if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result; } @@ -423,39 +375,43 @@ bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2) } //---------------------------------------------------------------------- +// Approximate calculation of SlopesEqual() for "UseFullInt64Range" +// Returns true if the slopes are unequal for sure, +// otherwise returns false if the slopes may or may not be equal. +inline bool SlopesUnequalFilter(cInt dx1, cInt dy1, cInt dx2, cInt dy2) { + // Round dx1, dy1, dx2, dy2 to 31 bits. + dx1 = (dx1 + (1 << 30)) >> 32; + dy1 = (dy1 + (1 << 30)) >> 32; + dx2 = (dx2 + (1 << 30)) >> 32; + dy2 = (dy2 + (1 << 30)) >> 32; + // Result fits 63 bits, it is an approximate of the determinant divided by 2^64. + cInt discr = std::abs(dy1 * dx2 - dx1 * dy2); + // Maximum absolute of the remainder of the exact determinant, divided by 2^64. + cInt error = ((std::abs(dx1) + std::abs(dy1) + std::abs(dx2) + std::abs(dy2)) >> 1) + 1; + return discr > error; +} + +inline bool SlopesEqual(const cInt dx1, const cInt dy1, const cInt dx2, const cInt dy2, bool UseFullInt64Range) { + return (UseFullInt64Range) ? + // |dx1| < 2^63, |dx2| < 2^63 etc, +#if 1 + // Instead of jumping to 128bit multiply on a 32bit or 64bit CPU, + // calculate an approximate value of the determinant and its error. + // If the determinant is above the error, the slopes are certainly not equal. + ! SlopesUnequalFilter(dx1, dy1, dx2, dy2) && +#endif + Int128::Multiply(dy1, dx2) == Int128::Multiply(dx1, dy2) : + // |dx1| < 2^31, |dx2| < 2^31 etc, + // therefore the following computation could be done with 64bit arithmetics. + dy1 * dx2 == dx1 * dy2; +} inline bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range) -{ -#ifndef use_int32 - if (UseFullInt64Range) - return Int128Mul(e1.Delta.Y, e2.Delta.X) == Int128Mul(e1.Delta.X, e2.Delta.Y); - else -#endif - return e1.Delta.Y * e2.Delta.X == e1.Delta.X * e2.Delta.Y; -} -//------------------------------------------------------------------------------ + { return SlopesEqual(e1.Delta.X, e1.Delta.Y, e2.Delta.X, e2.Delta.Y, UseFullInt64Range); } +inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, bool UseFullInt64Range) + { return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt2.X-pt3.X, pt2.Y-pt3.Y, UseFullInt64Range); } +inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range) + { return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt3.X-pt4.X, pt3.Y-pt4.Y, UseFullInt64Range); } -inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, - const IntPoint &pt3, bool UseFullInt64Range) -{ -#ifndef use_int32 - if (UseFullInt64Range) - return Int128Mul(pt1.Y-pt2.Y, pt2.X-pt3.X) == Int128Mul(pt1.X-pt2.X, pt2.Y-pt3.Y); - else -#endif - return (pt1.Y-pt2.Y)*(pt2.X-pt3.X) == (pt1.X-pt2.X)*(pt2.Y-pt3.Y); -} -//------------------------------------------------------------------------------ - -inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, - const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range) -{ -#ifndef use_int32 - if (UseFullInt64Range) - return Int128Mul(pt1.Y-pt2.Y, pt3.X-pt4.X) == Int128Mul(pt1.X-pt2.X, pt3.Y-pt4.Y); - else -#endif - return (pt1.Y-pt2.Y)*(pt3.X-pt4.X) == (pt1.X-pt2.X)*(pt3.Y-pt4.Y); -} //------------------------------------------------------------------------------ inline bool IsHorizontal(TEdge &e) @@ -473,8 +429,9 @@ inline double GetDx(const IntPoint &pt1, const IntPoint &pt2) inline cInt TopX(TEdge &edge, const cInt currentY) { - return ( currentY == edge.Top.Y ) ? - edge.Top.X : edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y)); + return (currentY == edge.Top.Y) ? + edge.Top.X : + edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y)); } //------------------------------------------------------------------------------ @@ -519,10 +476,9 @@ void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip) b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx; double q = (b2-b1) / (Edge1.Dx - Edge2.Dx); ip.Y = Round(q); - if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) - ip.X = Round(Edge1.Dx * q + b1); - else - ip.X = Round(Edge2.Dx * q + b2); + ip.X = (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) ? + Round(Edge1.Dx * q + b1) : + Round(Edge2.Dx * q + b2); } if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y) diff --git a/xs/src/clipper.hpp b/xs/src/clipper.hpp index aabe4f53b..2e7069619 100644 --- a/xs/src/clipper.hpp +++ b/xs/src/clipper.hpp @@ -34,11 +34,9 @@ #ifndef clipper_hpp #define clipper_hpp -#define CLIPPER_VERSION "6.2.6" +#include -//use_int32: When enabled 32bit ints are used instead of 64bit ints. This -//improve performance but coordinate values are limited to the range +/- 46340 -//#define use_int32 +#define CLIPPER_VERSION "6.2.6" //use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance. //#define use_xyz @@ -68,18 +66,12 @@ enum PolyType { ptSubject, ptClip }; //see http://glprogramming.com/red/chapter11.html enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative }; -#ifdef use_int32 - typedef int cInt; - static cInt const loRange = 0x7FFF; - static cInt const hiRange = 0x7FFF; -#else - typedef signed long long cInt; - static cInt const loRange = 0x3FFFFFFF; - static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL; - typedef signed long long long64; //used by Int128 class - typedef unsigned long long ulong64; - -#endif +// Point coordinate type +typedef int64_t cInt; +// Maximum cInt value to allow a cross product calculation using 32bit expressions. +static cInt const loRange = 0x3FFFFFFF; +// Maximum allowed cInt value. +static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL; struct IntPoint { cInt X; @@ -262,12 +254,11 @@ enum EdgeSide { esLeft = 1, esRight = 2}; }; // Point of an output polygon. - // 36B on 64bit system with not use_int32 and not use_xyz. + // 36B on 64bit system without use_xyz. struct OutPt { // 4B int Idx; - // 8B (if use_int32 and not use_xyz) or 16B (if not use_int32 and not use_xyz) - // or 12B (if use_int32 and use_xyz) or 24B (if not use_int32 and use_xyz) + // 16B without use_xyz / 24B with use_xyz IntPoint Pt; // 4B on 32bit system, 8B on 64bit system OutPt *Next; diff --git a/xs/src/libslic3r/ClipperUtils.hpp b/xs/src/libslic3r/ClipperUtils.hpp index 3c0bec1ec..247a53d8f 100644 --- a/xs/src/libslic3r/ClipperUtils.hpp +++ b/xs/src/libslic3r/ClipperUtils.hpp @@ -15,7 +15,9 @@ using ClipperLib::jtSquare; // Factor to convert from coord_t (which is int32) to an int64 type used by the Clipper library // for general offsetting (the offset(), offset2(), offset_ex() functions) and for the safety offset, // which is optionally executed by other functions (union, intersection, diff). +// This scaling (cca 130t) is applied over the usual SCALING_FACTOR. // By the way, is the scalling for offset needed at all? +// The reason to apply this scaling may be to match the resolution of the double mantissa. #define CLIPPER_OFFSET_POWER_OF_2 17 // 2^17=131072 #define CLIPPER_OFFSET_SCALE (1 << CLIPPER_OFFSET_POWER_OF_2)