Experimental feature, which may make the Clipper offsets run faster

due to avoiding the 128bit multiply operations: A filtered predicate is tried to calculate SlopesEqual() to minimize the invocation of 128bit multiply operations.
2017-07-13 15:52:19 +02:00 · 2017-07-13 15:52:19 +02:00 · 40a882d01e
commit 40a882d01e
parent bd93d2f334
3 changed files with 110 additions and 161 deletions
--- a/xs/src/clipper.cpp
+++ b/xs/src/clipper.cpp
@ -130,67 +130,37 @@ bool PolyNode::IsHole() const
 }  
 //------------------------------------------------------------------------------
 #ifndef use_int32
 //------------------------------------------------------------------------------
 // Int128 class (enables safe math on signed 64bit integers)
-// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
+// eg Int128 val1((int64_t)9223372036854775807); //ie 2^63 -1
-//    Int128 val2((long64)9223372036854775807);
+//    Int128 val2((int64_t)9223372036854775807);
 //    Int128 val3 = val1 * val2;
 //    val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
 //------------------------------------------------------------------------------
-
+// Used by the SlopesEqual() functions.
 class Int128
 {
  public:
-    ulong64 lo;
+    uint64_t lo;
-    long64 hi;
+    int64_t  hi;
    Int128(long64 _lo = 0)
    {
      lo = (ulong64)_lo;   
      if (_lo < 0)  hi = -1; else hi = 0; 
    }
    Int128(int64_t _lo = 0) : lo((uint64_t)_lo), hi((_lo < 0) ? -1 : 0) {}
    Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {}
    Int128(const int64_t& _hi, const uint64_t& _lo) : lo(_lo), hi(_hi) {}
-    Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
+    Int128& operator = (const int64_t &val)
    Int128& operator = (const long64 &val)
    {
-      lo = (ulong64)val;
+      lo = (uint64_t)val;
-      if (val < 0) hi = -1; else hi = 0;
+      hi = (val < 0) ? -1 : 0;
      return *this;
    }
-    bool operator == (const Int128 &val) const
+    bool operator == (const Int128 &val) const { return hi == val.hi && lo == val.lo; }
-      {return (hi == val.hi && lo == val.lo);}
+    bool operator != (const Int128 &val) const { return ! (*this == val); }
-
+    bool operator >  (const Int128 &val) const { return (hi == val.hi) ? lo > val.lo : hi > val.hi; }
-    bool operator != (const Int128 &val) const
+    bool operator <  (const Int128 &val) const { return (hi == val.hi) ? lo < val.lo : hi < val.hi; }
-      { return !(*this == val);}
+    bool operator >= (const Int128 &val) const { return ! (*this < val); }
-
+    bool operator <= (const Int128 &val) const { return ! (*this > val); }
    bool operator > (const Int128 &val) const
    {
      if (hi != val.hi)
        return hi > val.hi;
      else
        return lo > val.lo;
    }
    bool operator < (const Int128 &val) const
    {
      if (hi != val.hi)
        return hi < val.hi;
      else
        return lo < val.lo;
    }
    bool operator >= (const Int128 &val) const
      { return !(*this < val);}
    bool operator <= (const Int128 &val) const
      { return !(*this > val);}
    Int128& operator += (const Int128 &rhs)
    {
@ -220,58 +190,47 @@ class Int128
      return result;
    }
-    Int128 operator-() const //unary negation
+    Int128 operator-() const { return (lo == 0) ? Int128(-hi, 0) : Int128(~hi, ~lo + 1); }
    {
      if (lo == 0)
        return Int128(-hi, 0);
      else
        return Int128(~hi, ~lo + 1);
    }
    operator double() const
    {
      const double shift64 = 18446744073709551616.0; //2^64
-      if (hi < 0)
+      return (hi < 0) ?
-      {
+        ((lo == 0) ? 
-        if (lo == 0) return (double)hi * shift64;
+          (double)hi * shift64 :
-        else return -(double)(~lo + ~hi * shift64);
+          -(double)(~lo + ~hi * shift64)) :
-      }
+        (double)(lo + hi * shift64);
      else
        return (double)(lo + hi * shift64);
    }
-};
+    static inline Int128 Multiply(int64_t lhs, int64_t rhs)
 //------------------------------------------------------------------------------
 inline Int128 Int128Mul (long64 lhs, long64 rhs)
    {
      bool negate = (lhs < 0) != (rhs < 0);
      if (lhs < 0) lhs = -lhs;
-  ulong64 int1Hi = ulong64(lhs) >> 32;
+      uint64_t int1Hi = uint64_t(lhs) >> 32;
-  ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
+      uint64_t int1Lo = uint64_t(lhs & 0xFFFFFFFF);
      if (rhs < 0) rhs = -rhs;
-  ulong64 int2Hi = ulong64(rhs) >> 32;
+      uint64_t int2Hi = uint64_t(rhs) >> 32;
-  ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
+      uint64_t int2Lo = uint64_t(rhs & 0xFFFFFFFF);
      //because the high (sign) bits in both int1Hi & int2Hi have been zeroed,
      //there's no risk of 64 bit overflow in the following assignment
      //(ie: $7FFFFFFF*$FFFFFFFF + $7FFFFFFF*$FFFFFFFF < 64bits)
-  ulong64 a = int1Hi * int2Hi;
+      uint64_t a = int1Hi * int2Hi;
-  ulong64 b = int1Lo * int2Lo;
+      uint64_t b = int1Lo * int2Lo;
      //Result = A shl 64 + C shl 32 + B ...
-  ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
+      uint64_t c = int1Hi * int2Lo + int1Lo * int2Hi;
      Int128 tmp;
-  tmp.hi = long64(a + (c >> 32));
+      tmp.hi = int64_t(a + (c >> 32));
-  tmp.lo = long64(c << 32);
+      tmp.lo = int64_t(c << 32);
-  tmp.lo += long64(b);
+      tmp.lo += int64_t(b);
      if (tmp.lo < b) tmp.hi++;
      if (negate) tmp = -tmp;
      return tmp;
    }
 };
 #endif
 //------------------------------------------------------------------------------
 // Miscellaneous global functions
@ -330,11 +289,8 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
  for(size_t i = 1; i <= cnt; ++i)
  {
    IntPoint ipNext = (i == cnt ? path[0] : path[i]);
-    if (ipNext.Y == pt.Y)
+    if (ipNext.Y == pt.Y && ((ipNext.X == pt.X) || (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X)))))
-    {
+      return -1;
        if ((ipNext.X == pt.X) || (ip.Y == pt.Y && 
          ((ipNext.X > pt.X) == (ip.X < pt.X)))) return -1;
    }
    if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y))
    {
      if (ip.X >= pt.X)
@ -342,8 +298,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
        if (ipNext.X > pt.X) result = 1 - result;
        else
        {
-          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - 
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
            (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
          if (!d) return -1;
          if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
        }
@ -351,8 +306,7 @@ int PointInPolygon(const IntPoint &pt, const Path &path)
      {
        if (ipNext.X > pt.X)
        {
-          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - 
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) - (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
            (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
          if (!d) return -1;
          if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
        }
@ -384,8 +338,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op)
        if (op->Next->Pt.X > pt.X) result = 1 - result;
        else
        {
-          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - 
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
            (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
          if (!d) return -1;
          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
        }
@ -393,8 +346,7 @@ int PointInPolygon (const IntPoint &pt, OutPt *op)
      {
        if (op->Next->Pt.X > pt.X)
        {
-          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - 
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) - (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
            (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
          if (!d) return -1;
          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
        }
@ -423,39 +375,43 @@ bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2)
 }
 //----------------------------------------------------------------------
 // Approximate calculation of SlopesEqual() for "UseFullInt64Range"
 // Returns true if the slopes are unequal for sure, 
 // otherwise returns false if the slopes may or may not be equal.
 inline bool SlopesUnequalFilter(cInt dx1, cInt dy1, cInt dx2, cInt dy2) {
  // Round dx1, dy1, dx2, dy2 to 31 bits.
  dx1 = (dx1 + (1 << 30)) >> 32;
  dy1 = (dy1 + (1 << 30)) >> 32;
  dx2 = (dx2 + (1 << 30)) >> 32;
  dy2 = (dy2 + (1 << 30)) >> 32;
  // Result fits 63 bits, it is an approximate of the determinant divided by 2^64.
  cInt discr = std::abs(dy1 * dx2 - dx1 * dy2);
  // Maximum absolute of the remainder of the exact determinant, divided by 2^64.
  cInt error = ((std::abs(dx1) + std::abs(dy1) + std::abs(dx2) + std::abs(dy2)) >> 1) + 1;
  return discr > error;
 }
 inline bool SlopesEqual(const cInt dx1, const cInt dy1, const cInt dx2, const cInt dy2, bool UseFullInt64Range) {
  return (UseFullInt64Range) ?
    // |dx1| < 2^63, |dx2| < 2^63 etc,
 #if 1
    // Instead of jumping to 128bit multiply on a 32bit or 64bit CPU,
    // calculate an approximate value of the determinant and its error.
    // If the determinant is above the error, the slopes are certainly not equal.
    ! SlopesUnequalFilter(dx1, dy1, dx2, dy2) && 
 #endif
    Int128::Multiply(dy1, dx2) == Int128::Multiply(dx1, dy2) :
    // |dx1| < 2^31, |dx2| < 2^31 etc,
    // therefore the following computation could be done with 64bit arithmetics. 
    dy1 * dx2 == dx1 * dy2;
 }
 inline bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range)
-{
+  { return SlopesEqual(e1.Delta.X, e1.Delta.Y, e2.Delta.X, e2.Delta.Y, UseFullInt64Range); }
-#ifndef use_int32
+inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, bool UseFullInt64Range)
-  if (UseFullInt64Range)
+  { return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt2.X-pt3.X, pt2.Y-pt3.Y, UseFullInt64Range); }
-    return Int128Mul(e1.Delta.Y, e2.Delta.X) == Int128Mul(e1.Delta.X, e2.Delta.Y);
+inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2, const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range)
-  else 
+  { return SlopesEqual(pt1.X-pt2.X, pt1.Y-pt2.Y, pt3.X-pt4.X, pt3.Y-pt4.Y, UseFullInt64Range); }
 #endif
    return e1.Delta.Y * e2.Delta.X == e1.Delta.X * e2.Delta.Y;
 }
 //------------------------------------------------------------------------------
 inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2,
  const IntPoint &pt3, bool UseFullInt64Range)
 {
 #ifndef use_int32
  if (UseFullInt64Range)
    return Int128Mul(pt1.Y-pt2.Y, pt2.X-pt3.X) == Int128Mul(pt1.X-pt2.X, pt2.Y-pt3.Y);
  else 
 #endif
    return (pt1.Y-pt2.Y)*(pt2.X-pt3.X) == (pt1.X-pt2.X)*(pt2.Y-pt3.Y);
 }
 //------------------------------------------------------------------------------
 inline bool SlopesEqual(const IntPoint &pt1, const IntPoint &pt2,
  const IntPoint &pt3, const IntPoint &pt4, bool UseFullInt64Range)
 {
 #ifndef use_int32
  if (UseFullInt64Range)
    return Int128Mul(pt1.Y-pt2.Y, pt3.X-pt4.X) == Int128Mul(pt1.X-pt2.X, pt3.Y-pt4.Y);
  else 
 #endif
    return (pt1.Y-pt2.Y)*(pt3.X-pt4.X) == (pt1.X-pt2.X)*(pt3.Y-pt4.Y);
 }
 //------------------------------------------------------------------------------
 inline bool IsHorizontal(TEdge &e)
@ -474,7 +430,8 @@ inline double GetDx(const IntPoint &pt1, const IntPoint &pt2)
 inline cInt TopX(TEdge &edge, const cInt currentY)
 {
  return (currentY == edge.Top.Y) ?
-    edge.Top.X : edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
+    edge.Top.X : 
    edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
 }
 //------------------------------------------------------------------------------
@ -519,10 +476,9 @@ void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip)
    b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
    double q = (b2-b1) / (Edge1.Dx - Edge2.Dx);
    ip.Y = Round(q);
-    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+    ip.X = (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx)) ? 
-      ip.X = Round(Edge1.Dx * q + b1);
+      Round(Edge1.Dx * q + b1) :
-    else 
+      Round(Edge2.Dx * q + b2);
      ip.X = Round(Edge2.Dx * q + b2);
  }
  if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y) 
--- a/xs/src/clipper.hpp
+++ b/xs/src/clipper.hpp
@ -34,11 +34,9 @@
 #ifndef clipper_hpp
 #define clipper_hpp
-#define CLIPPER_VERSION "6.2.6"
+#include <inttypes.h>
-//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
+#define CLIPPER_VERSION "6.2.6"
 //improve performance but coordinate values are limited to the range +/- 46340
 //#define use_int32
 //use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
 //#define use_xyz
@ -68,18 +66,12 @@ enum PolyType { ptSubject, ptClip };
 //see http://glprogramming.com/red/chapter11.html
 enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
-#ifdef use_int32
+// Point coordinate type
-  typedef int cInt;
+typedef int64_t cInt;
-  static cInt const loRange = 0x7FFF;
+// Maximum cInt value to allow a cross product calculation using 32bit expressions.
  static cInt const hiRange = 0x7FFF;
 #else
  typedef signed long long cInt;
 static cInt const loRange = 0x3FFFFFFF;
 // Maximum allowed cInt value.
 static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
  typedef signed long long long64;     //used by Int128 class
  typedef unsigned long long ulong64;
 #endif
 struct IntPoint {
  cInt X;
@ -262,12 +254,11 @@ enum EdgeSide { esLeft = 1, esRight = 2};
  };
  // Point of an output polygon.
-  // 36B on 64bit system with not use_int32 and not use_xyz.
+  // 36B on 64bit system without use_xyz.
  struct OutPt {
    // 4B
    int       Idx;
-    // 8B (if use_int32 and not use_xyz) or 16B (if not use_int32 and not use_xyz)
+    // 16B without use_xyz / 24B with use_xyz
    // or 12B (if use_int32 and use_xyz) or 24B (if not use_int32 and use_xyz)
    IntPoint  Pt;
    // 4B on 32bit system, 8B on 64bit system
    OutPt    *Next;
--- a/xs/src/libslic3r/ClipperUtils.hpp
+++ b/xs/src/libslic3r/ClipperUtils.hpp
@ -15,7 +15,9 @@ using ClipperLib::jtSquare;
 // Factor to convert from coord_t (which is int32) to an int64 type used by the Clipper library
 // for general offsetting (the offset(), offset2(), offset_ex() functions) and for the safety offset,
 // which is optionally executed by other functions (union, intersection, diff).
 // This scaling (cca 130t) is applied over the usual SCALING_FACTOR.
 // By the way, is the scalling for offset needed at all?
 // The reason to apply this scaling may be to match the resolution of the double mantissa.
 #define CLIPPER_OFFSET_POWER_OF_2 17
 // 2^17=131072
 #define CLIPPER_OFFSET_SCALE (1 << CLIPPER_OFFSET_POWER_OF_2)