Experimental feature, which may make the Clipper offsets run faster

due to avoiding the 128bit multiply operations:
A filtered predicate is tried to calculate SlopesEqual()
to minimize the invocation of 128bit multiply operations.
This commit is contained in:
bubnikv 2017-07-13 15:52:19 +02:00
parent bd93d2f334
commit 40a882d01e
3 changed files with 110 additions and 161 deletions

View File

@ -130,67 +130,37 @@ bool PolyNode::IsHole() const
}
//------------------------------------------------------------------------------
#ifndef use_int32
//------------------------------------------------------------------------------
// Int128 class (enables safe math on signed 64bit integers)
// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
// Int128 val2((long64)9223372036854775807);
// eg Int128 val1((int64_t)9223372036854775807); //ie 2^63 -1
// Int128 val2((int64_t)9223372036854775807);
// Int128 val3 = val1 * val2;
// val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
//------------------------------------------------------------------------------
// Used by the SlopesEqual() functions.
class Int128
{
public:
ulong64 lo;
long64 hi;
uint64_t lo;
int64_t hi;
Int128(long64 _lo = 0)
{
lo = (ulong64)_lo;
if (_lo < 0) hi = -1; else hi = 0;
}
Int128(const Int128 &val): lo(val.lo), hi(val.hi){}
Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
Int128(int64_t _lo = 0) : lo((uint64_t)_lo), hi((_lo < 0) ? -1 : 0) {}
Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {}
Int128(const int64_t& _hi, const uint64_t& _lo) : lo(_lo), hi(_hi) {}
Int128& operator = (const long64 &val)
Int128& operator = (const int64_t &val)
{
lo = (ulong64)val;
if (val < 0) hi = -1; else hi = 0;
lo = (uint64_t)val;
hi = (val < 0) ? -1 : 0;
return *this;
}
bool operator == (const Int128 &val) const
{return (hi == val.hi && lo == val.lo);}
bool operator != (const Int128 &val) const
{ return !(*this == val);}
bool operator > (const Int128 &val) const
{
if (hi != val.hi)
return hi > val.hi;
else
return lo > val.lo;
}
bool operator < (const Int128 &val) const
{
if (hi != val.hi)
return hi < val.hi;
else
return lo < val.lo;
}
bool operator >= (const Int128 &val) const
{ return !(*this < val);}
bool operator <= (const Int128 &val) const
{ return !(*this > val);}
bool operator == (const Int128 &val) const { return hi == val.hi && lo == val.lo; }
bool operator != (const Int128 &val) const { return ! (*this == val); }
bool operator > (const Int128 &val) const { return (hi == val.hi) ? lo > val.lo : hi > val.hi; }
bool operator < (const Int128 &val) const { return (hi == val.hi) ? lo < val.lo : hi < val.hi; }
bool operator >= (const Int128 &val) const { return ! (*this < val); }
bool operator <= (const Int128 &val) const { return ! (*this > val); }
Int128& operator += (const Int128 &rhs)
{
@ -220,58 +190,47 @@ class Int128
return result;
}
Int128 operator-() const //unary negation
{
if (lo == 0)
return Int128(-hi, 0);
else
return Int128(~hi, ~lo + 1);
}
Int128 operator-() const { return (lo == 0) ? Int128(-hi, 0) : Int128(~hi, ~lo + 1); }
operator double() const
{
const double shift64 = 18446744073709551616.0; //2^64
if (hi < 0)
{
if (lo == 0) return (double)hi * shift64;
else return -(double)(~lo + ~hi * shift64);
}
else
return (double)(lo + hi * shift64);
return (hi < 0) ?
((lo == 0) ?
(double)hi * shift64 :
-(double)(~lo + ~hi * shift64)) :
(double)(lo + hi * shift64);
}
static inline Int128 Multiply(int64_t lhs, int64_t rhs)
{
bool negate = (lhs < 0) != (rhs < 0);
if (lhs < 0) lhs = -lhs;
uint64_t int1Hi = uint64_t(lhs) >> 32;
uint64_t int1Lo = uint64_t(lhs & 0xFFFFFFFF);
if (rhs < 0) rhs = -rhs;
uint64_t int2Hi = uint64_t(rhs) >> 32;
uint64_t int2Lo = uint64_t(rhs & 0xFFFFFFFF);
//because the high (sign) bits in both int1Hi & int2Hi have been zeroed,
//there's no risk of 64 bit overflow in the following assignment
//(ie: $7FFFFFFF*$FFFFFFFF + $7FFFFFFF*$FFFFFFFF < 64bits)
uint64_t a = int1Hi * int2Hi;
uint64_t b = int1Lo * int2Lo;
//Result = A shl 64 + C shl 32 + B ...
uint64_t c = int1Hi * int2Lo + int1Lo * int2Hi;
Int128 tmp;
tmp.hi = int64_t(a + (c >> 32));
tmp.lo = int64_t(c << 32);
tmp.lo += int64_t(b);
if (tmp.lo < b) tmp.hi++;
if (negate) tmp = -tmp;
return tmp;
}
};
//------------------------------------------------------------------------------
inline Int128 Int128Mul (long64 lhs, long64 rhs)
{
bool negate = (lhs < 0) != (rhs < 0);
if (lhs < 0) lhs = -lhs;
ulong64 int1Hi = ulong64(lhs) >> 32;
ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
if (rhs < 0) rhs = -rhs;
ulong64 int2Hi = ulong64(rhs) >> 32;
ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
//because the high (sign) bits in both int1Hi & int2Hi have been zeroed,
//there's no risk of 64 bit overflow in the following assignment
//(ie: $7FFFFFFF*$FFFFFFFF + $7FFFFFFF*$FFFFFFFF < 64bits)
ulong64 a = int1Hi * int2Hi;
ulong64 b = int1Lo * int2Lo;
//Result = A shl 64 + C shl 32 + B ...
ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
Int128 tmp;
tmp.hi = long64(a + (c >> 32));
tmp.lo = long64(c << 32);
tmp.lo += long64(b);
if (tmp.lo < b) tmp.hi++;
if (negate) tmp = -tmp;
return tmp;
};
#endif
//------------------------------------------------------------------------------
// Miscellaneous global functions
@ -330,11 +289,8 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
for(size_t i = 1; i <= cnt; ++i)
{
IntPoint ipNext = (i == cnt ? path[0] : path[i]);
if (ipNext.Y == pt.Y)
{
if ((ipNext.X == pt.X) || (ip.Y == pt.Y &&
((ipNext.X > pt.X) == (ip.X < pt.X)))) return -1;
}
if (ipNext.Y == pt.Y && ((ipNext.X == pt.X) || (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X)))))
return -1;
if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y))
{
if (ip.X >= pt.X)
@ -342,8 +298,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
if (ipNext.X > pt.X) result = 1 - result;
else
{
double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
(double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
if (!d) return -1;
if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
}
@ -351,8 +306,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
{
if (ipNext.X > pt.X)
{
double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
(double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
if (!d) return -1;
if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
}
@ -384,8 +338,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op)
if (op->Next->Pt.X > pt.X) result = 1 - result;
else
{
double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
(double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
if (!d) return -1;
if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
}
@ -393,8 +346,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op)
{
if (op->Next->Pt.X > pt.X)
{
double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
(double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
if (!d) return -1;
if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
}
@ -423,39 +375,43 @@ bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2)
}
//----------------------------------------------------------------------
// Approximate calculation of SlopesEqual() for "UseFullInt64Range"
// Returns true if the slopes are unequal for sure,
// otherwise returns false if the slopes may or may not be equal.
inline bool SlopesUnequalFilter(cInt dx1, cInt dy1, cInt dx2, cInt dy2) {
// Round dx1, dy1, dx2, dy2 to 31 bits.
dx1 = (dx1 + (1 << 30)) >> 32;
dy1 = (dy1 + (1 << 30)) >> 32;
dx2 = (dx2 + (1 << 30)) >> 32;
dy2 = (dy2 + (1 << 30)) >> 32;
// Result fits 63 bits, it is an approximate of the determinant divided by 2^64.
cInt discr = std::abs(dy1 * dx2 - dx1 * dy2);
// Maximum absolute of the remainder of the exact determinant, divided by 2^64.
cInt error = ((std::abs(dx1) + std::abs(dy1) + std::abs(dx2) + std::abs(dy2)) >> 1) + 1;
return discr > error;
}
inline bool SlopesEqual(const cInt dx1, const cInt dy1, const cInt dx2, const cInt dy2, bool UseFullInt64Range) {
return (UseFullInt64Range) ?
// |dx1| < 2^63, |dx2| < 2^63 etc,
#if 1
// Instead of jumping to 128bit multiply on a 32bit or 64bit CPU,
// calculate an approximate value of the determinant and its error.
// If the determinant is above the error, the slopes are certainly not equal.
! SlopesUnequalFilter(dx1, dy1, dx2, dy2) &&
#endif
Int128::Multiply(dy1, dx2) == Int128::Multiply(dx1, dy2) :
// |dx1| < 2^31, |dx2| < 2^31 etc,
// therefore the following computation could be done with 64bit arithmetics.
dy1 * dx2 == dx1 * dy2;
}
inline bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range)
{
#ifndef use_int32
if (UseFullInt64Range)
return Int128Mul(e1.Delta.Y, e2.Delta.X) == Int128Mul(e1.Delta.X, e2.Delta.Y);
else
#endif
return e1.Delta.Y * e2.Delta.X == e1.Delta.X * e2.Delta.Y;
}
//------------------------------------------------------------------------------
{ return SlopesEqual(e1.Delta.X, e1.Delta.Y, e2.Delta.X, e2.Delta.Y, UseFullInt64Range); }
inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, bool UseFullInt64Range)
{ return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt2.X-pt3.X, pt2.Y-pt3.Y, UseFullInt64Range); }
inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range)
{ return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt3.X-pt4.X, pt3.Y-pt4.Y, UseFullInt64Range); }
inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2,
const IntPoint &pt3, bool UseFullInt64Range)
{
#ifndef use_int32
if (UseFullInt64Range)
return Int128Mul(pt1.Y-pt2.Y, pt2.X-pt3.X) == Int128Mul(pt1.X-pt2.X, pt2.Y-pt3.Y);
else
#endif
return (pt1.Y-pt2.Y)*(pt2.X-pt3.X) == (pt1.X-pt2.X)*(pt2.Y-pt3.Y);
}
//------------------------------------------------------------------------------
inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2,
const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range)
{
#ifndef use_int32
if (UseFullInt64Range)
return Int128Mul(pt1.Y-pt2.Y, pt3.X-pt4.X) == Int128Mul(pt1.X-pt2.X, pt3.Y-pt4.Y);
else
#endif
return (pt1.Y-pt2.Y)*(pt3.X-pt4.X) == (pt1.X-pt2.X)*(pt3.Y-pt4.Y);
}
//------------------------------------------------------------------------------
inline bool IsHorizontal(TEdge &e)
@ -473,8 +429,9 @@ inline double GetDx(const IntPoint &pt1, const IntPoint &pt2)
inline cInt TopX(TEdge &edge, const cInt currentY)
{
return ( currentY == edge.Top.Y ) ?
edge.Top.X : edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
return (currentY == edge.Top.Y) ?
edge.Top.X :
edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
}
//------------------------------------------------------------------------------
@ -519,10 +476,9 @@ void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip)
b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
double q = (b2-b1) / (Edge1.Dx - Edge2.Dx);
ip.Y = Round(q);
if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
ip.X = Round(Edge1.Dx * q + b1);
else
ip.X = Round(Edge2.Dx * q + b2);
ip.X = (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) ?
Round(Edge1.Dx * q + b1) :
Round(Edge2.Dx * q + b2);
}
if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y)

View File

@ -34,11 +34,9 @@
#ifndef clipper_hpp
#define clipper_hpp
#define CLIPPER_VERSION "6.2.6"
#include <inttypes.h>
//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
//improve performance but coordinate values are limited to the range +/- 46340
//#define use_int32
#define CLIPPER_VERSION "6.2.6"
//use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
//#define use_xyz
@ -68,18 +66,12 @@ enum PolyType { ptSubject, ptClip };
//see http://glprogramming.com/red/chapter11.html
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
#ifdef use_int32
typedef int cInt;
static cInt const loRange = 0x7FFF;
static cInt const hiRange = 0x7FFF;
#else
typedef signed long long cInt;
static cInt const loRange = 0x3FFFFFFF;
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
typedef signed long long long64; //used by Int128 class
typedef unsigned long long ulong64;
#endif
// Point coordinate type
typedef int64_t cInt;
// Maximum cInt value to allow a cross product calculation using 32bit expressions.
static cInt const loRange = 0x3FFFFFFF;
// Maximum allowed cInt value.
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
struct IntPoint {
cInt X;
@ -262,12 +254,11 @@ enum EdgeSide { esLeft = 1, esRight = 2};
};
// Point of an output polygon.
// 36B on 64bit system with not use_int32 and not use_xyz.
// 36B on 64bit system without use_xyz.
struct OutPt {
// 4B
int Idx;
// 8B (if use_int32 and not use_xyz) or 16B (if not use_int32 and not use_xyz)
// or 12B (if use_int32 and use_xyz) or 24B (if not use_int32 and use_xyz)
// 16B without use_xyz / 24B with use_xyz
IntPoint Pt;
// 4B on 32bit system, 8B on 64bit system
OutPt *Next;

View File

@ -15,7 +15,9 @@ using ClipperLib::jtSquare;
// Factor to convert from coord_t (which is int32) to an int64 type used by the Clipper library
// for general offsetting (the offset(), offset2(), offset_ex() functions) and for the safety offset,
// which is optionally executed by other functions (union, intersection, diff).
// This scaling (cca 130t) is applied over the usual SCALING_FACTOR.
// By the way, is the scalling for offset needed at all?
// The reason to apply this scaling may be to match the resolution of the double mantissa.
#define CLIPPER_OFFSET_POWER_OF_2 17
// 2^17=131072
#define CLIPPER_OFFSET_SCALE (1 << CLIPPER_OFFSET_POWER_OF_2)