Fixed conflicts after merging with branch eigenize

2018-08-23 15:37:38 +02:00 · 2018-08-23 15:37:38 +02:00 · 66ce638439
commit 66ce638439
parent a5fcdeec23 76d60070eb
211 changed files with 4309 additions and 4920 deletions
--- a/lib/Slic3r/ExPolygon.pm
+++ b/lib/Slic3r/ExPolygon.pm
@ -7,18 +7,6 @@ use warnings;
 use List::Util qw(first);
 use Slic3r::Geometry::Clipper qw(union_ex diff_pl);
 sub wkt {
    my $self = shift;
    return sprintf "POLYGON(%s)", 
        join ',', map "($_)", map { join ',', map "$_->[0] $_->[1]", @$_ } @$self;
 }
 sub dump_perl {
    my $self = shift;
    return sprintf "[%s]", 
        join ',', map "[$_]", map { join ',', map "[$_->[0],$_->[1]]", @$_ } @$self;
 }
 sub offset {
    my $self = shift;
    return Slic3r::Geometry::Clipper::offset(\@$self, @_);
--- a/lib/Slic3r/GUI/2DBed.pm
+++ b/lib/Slic3r/GUI/2DBed.pm
@ -1,4 +1,5 @@
 # Bed shape dialog
 # still used by the Slic3r::GUI::Controller::ManualControlDialog Perl module.
 package Slic3r::GUI::2DBed;
 use strict;
--- a/lib/Slic3r/Point.pm
+++ b/lib/Slic3r/Point.pm
@ -7,11 +7,6 @@ sub new_scale {
    return $class->new(map Slic3r::Geometry::scale($_), @_);
 }
 sub dump_perl {
    my $self = shift;
    return sprintf "[%s,%s]", @$self;
 }
 package Slic3r::Pointf;
 use strict;
 use warnings;
--- a/lib/Slic3r/Polyline.pm
+++ b/lib/Slic3r/Polyline.pm
@ -10,9 +10,4 @@ sub new_scale {
    return $class->new(map [ Slic3r::Geometry::scale($_->[X]), Slic3r::Geometry::scale($_->[Y]) ], @points);
 }
 sub dump_perl {
    my $self = shift;
    return sprintf "[%s]", join ',', map "[$_->[0],$_->[1]]", @$self;
 }
 1;
--- a/xs/lib/Slic3r/XS.pm
+++ b/xs/lib/Slic3r/XS.pm
@ -33,16 +33,6 @@ use overload
    '@{}' => sub { $_[0]->arrayref },
    'fallback' => 1;
 package Slic3r::Point3;
 use overload
    '@{}' => sub { [ $_[0]->x, $_[0]->y, $_[0]->z ] },  #,
    'fallback' => 1;
 sub pp {
    my ($self) = @_;
    return [ @$self ];
 }
 package Slic3r::Pointf;
 use overload
    '@{}' => sub { $_[0]->arrayref },
--- a/xs/src/admesh/connect.cpp
+++ b/xs/src/admesh/connect.cpp
@ -25,11 +25,11 @@
 #include <string.h>
 #include <math.h>
 #include <boost/detail/endian.hpp>
 #include "stl.h"
 static void stl_match_neighbors_exact(stl_file *stl,
                                      stl_hash_edge *edge_a, stl_hash_edge *edge_b);
 static void stl_match_neighbors_nearby(stl_file *stl,
                                       stl_hash_edge *edge_a, stl_hash_edge *edge_b);
 static void stl_record_neighbors(stl_file *stl,
@ -43,7 +43,6 @@ static int stl_load_edge_nearby(stl_file *stl, stl_hash_edge *edge,
 static void insert_hash_edge(stl_file *stl, stl_hash_edge edge,
                             void (*match_neighbors)(stl_file *stl,
                                 stl_hash_edge *edge_a, stl_hash_edge *edge_b));
 static int stl_get_hash_for_edge(int M, stl_hash_edge *edge);
 static int stl_compare_function(stl_hash_edge *edge_a, stl_hash_edge *edge_b);
 static void stl_free_edges(stl_file *stl);
 static void stl_remove_facet(stl_file *stl, int facet_number);
@ -82,37 +81,20 @@ stl_check_facets_exact(stl_file *stl) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    facet = stl->facet_start[i];
-    // Positive and negative zeros are possible in the floats, which are considered equal by the FP unit.
+    // If any two of the three vertices are found to be exactally the same, call them degenerate and remove the facet.
-    // When using a memcmp on raw floats, those numbers report to be different.
+    if (facet.vertex[0] == facet.vertex[1] ||
-    // Unify all +0 and -0 to +0 to make the floats equal under memcmp.
+        facet.vertex[1] == facet.vertex[2] ||
-    {
+        facet.vertex[0] == facet.vertex[2]) {
      uint32_t *f = (uint32_t*)&facet;
      for (int j = 0; j < 12; ++ j, ++ f) // 3x vertex + normal: 4x3 = 12 floats
        if (*f == 0x80000000)
            // Negative zero, switch to positive zero.
            *f = 0;
    }
    /* If any two of the three vertices are found to be exactally the same, call them degenerate and remove the facet. */
    if(   !memcmp(&facet.vertex[0], &facet.vertex[1],
                  sizeof(stl_vertex))
          || !memcmp(&facet.vertex[1], &facet.vertex[2],
                     sizeof(stl_vertex))
          || !memcmp(&facet.vertex[0], &facet.vertex[2],
                     sizeof(stl_vertex))) {
      stl->stats.degenerate_facets += 1;
      stl_remove_facet(stl, i);
-      i--;
+      -- i;
      continue;
    }
    for(j = 0; j < 3; j++) {
      edge.facet_number = i;
      edge.which_edge = j;
-      stl_load_edge_exact(stl, &edge, &facet.vertex[j],
+      stl_load_edge_exact(stl, &edge, &facet.vertex[j], &facet.vertex[(j + 1) % 3]);
-                          &facet.vertex[(j + 1) % 3]);
+      insert_hash_edge(stl, edge, stl_record_neighbors);
      insert_hash_edge(stl, edge, stl_match_neighbors_exact);
    }
  }
  stl_free_edges(stl);
@ -131,28 +113,33 @@ stl_load_edge_exact(stl_file *stl, stl_hash_edge *edge,
  if (stl->error) return;
  {
-    float diff_x = ABS(a->x - b->x);
+    stl_vertex diff = (*a - *b).cwiseAbs();
-    float diff_y = ABS(a->y - b->y);
+    float max_diff = std::max(diff(0), std::max(diff(1), diff(2)));
-    float diff_z = ABS(a->z - b->z);
+    stl->stats.shortest_edge = std::min(max_diff, stl->stats.shortest_edge);
    float max_diff = STL_MAX(diff_x, diff_y);
    max_diff = STL_MAX(diff_z, max_diff);
    stl->stats.shortest_edge = STL_MIN(max_diff, stl->stats.shortest_edge);
  }
  // Ensure identical vertex ordering of equal edges.
  // This method is numerically robust.
-  if ((a->x != b->x) ? 
+  if (stl_vertex_lower(*a, *b)) {
        (a->x < b->x) : 
        ((a->y != b->y) ? 
            (a->y < b->y) : 
            (a->z < b->z))) {
    memcpy(&edge->key[0], a, sizeof(stl_vertex));
    memcpy(&edge->key[3], b, sizeof(stl_vertex));
  } else {
-    memcpy(&edge->key[0], b, sizeof(stl_vertex));
+    std::swap(a, b);
    memcpy(&edge->key[3], a, sizeof(stl_vertex));
    edge->which_edge += 3; /* this edge is loaded backwards */
  }
  memcpy(&edge->key[0],                  a->data(), sizeof(stl_vertex));
  memcpy(&edge->key[sizeof(stl_vertex)], b->data(), sizeof(stl_vertex));
  // Switch negative zeros to positive zeros, so memcmp will consider them to be equal.
  for (size_t i = 0; i < 6; ++ i) {
    unsigned char *p = edge->key + i * 4;
 #ifdef BOOST_LITTLE_ENDIAN
    if (p[0] == 0 && p[1] == 0 && p[2] == 0 && p[3] == 0x80)
      // Negative zero, switch to positive zero.
      p[3] = 0;
 #else /* BOOST_LITTLE_ENDIAN */
    if (p[0] == 0x80 && p[1] == 0 && p[2] == 0 && p[3] == 0)
      // Negative zero, switch to positive zero.
      p[0] = 0;
 #endif /* BOOST_LITTLE_ENDIAN */
  }
 }
 static void
@ -188,21 +175,17 @@ stl_initialize_facet_check_exact(stl_file *stl) {
  }
 }
-static void
+static void insert_hash_edge(stl_file *stl, stl_hash_edge edge,
 insert_hash_edge(stl_file *stl, stl_hash_edge edge,
                 void (*match_neighbors)(stl_file *stl,
-                     stl_hash_edge *edge_a, stl_hash_edge *edge_b)) {
+                     stl_hash_edge *edge_a, stl_hash_edge *edge_b))
-  stl_hash_edge *link;
+{
  stl_hash_edge *new_edge;
  stl_hash_edge *temp;
  int            chain_number;
  if (stl->error) return;
-  chain_number = stl_get_hash_for_edge(stl->M, &edge);
+  int            chain_number = edge.hash(stl->M);
-
+  stl_hash_edge *link = stl->heads[chain_number];
  link = stl->heads[chain_number];
  stl_hash_edge *new_edge;
  stl_hash_edge *temp;
  if(link == stl->tail) {
    /* This list doesn't have any edges currently in it.  Add this one. */
    new_edge = (stl_hash_edge*)malloc(sizeof(stl_hash_edge));
@ -252,30 +235,17 @@ insert_hash_edge(stl_file *stl, stl_hash_edge edge,
  }
 }
-
+// Return 1 if the edges are not matched.
-static int
+static inline int stl_compare_function(stl_hash_edge *edge_a, stl_hash_edge *edge_b)
-stl_get_hash_for_edge(int M, stl_hash_edge *edge) {
+{
-  return ((edge->key[0] / 23 + edge->key[1] / 19 + edge->key[2] / 17
+    // Don't match edges of the same facet
-           + edge->key[3] /13  + edge->key[4] / 11 + edge->key[5] / 7 ) % M);
+    return (edge_a->facet_number == edge_b->facet_number) || (*edge_a != *edge_b);
 }
-static int
+void stl_check_facets_nearby(stl_file *stl, float tolerance)
-stl_compare_function(stl_hash_edge *edge_a, stl_hash_edge *edge_b) {
+{
-  if(edge_a->facet_number == edge_b->facet_number) {
+  if (stl->error)
-    return 1;			/* Don't match edges of the same facet */
+    return;
  } else {
    return memcmp(edge_a, edge_b, SIZEOF_EDGE_SORT);
  }
 }
 void
 stl_check_facets_nearby(stl_file *stl, float tolerance) {
  stl_hash_edge  edge[3];
  stl_facet      facet;
  int            i;
  int            j;
  if (stl->error) return;
  if(   (stl->stats.connected_facets_1_edge == stl->stats.number_of_facets)
        && (stl->stats.connected_facets_2_edge == stl->stats.number_of_facets)
@ -286,27 +256,19 @@ stl_check_facets_nearby(stl_file *stl, float tolerance) {
  stl_initialize_facet_check_nearby(stl);
-  for(i = 0; i < stl->stats.number_of_facets; i++) {
+  for (int i = 0; i < stl->stats.number_of_facets; ++ i) {
-    facet = stl->facet_start[i];
+    //FIXME is the copy necessary?
-    // Positive and negative zeros are possible in the floats, which are considered equal by the FP unit.
+    stl_facet facet = stl->facet_start[i];
-    // When using a memcmp on raw floats, those numbers report to be different.
+    for (int j = 0; j < 3; j++) {
    // Unify all +0 and -0 to +0 to make the floats equal under memcmp.
    {
      uint32_t *f = (uint32_t*)&facet;
      for (int j = 0; j < 12; ++ j, ++ f) // 3x vertex + normal: 4x3 = 12 floats
        if (*f == 0x80000000)
            // Negative zero, switch to positive zero.
            *f = 0;
    }
    for(j = 0; j < 3; j++) {
      if(stl->neighbors_start[i].neighbor[j] == -1) {
-        edge[j].facet_number = i;
+        stl_hash_edge edge;
-        edge[j].which_edge = j;
+        edge.facet_number = i;
-        if(stl_load_edge_nearby(stl, &edge[j], &facet.vertex[j],
+        edge.which_edge = j;
        if(stl_load_edge_nearby(stl, &edge, &facet.vertex[j],
                                &facet.vertex[(j + 1) % 3],
                                tolerance)) {
          /* only insert edges that have different keys */
-          insert_hash_edge(stl, edge[j], stl_match_neighbors_nearby);
+          insert_hash_edge(stl, edge, stl_match_neighbors_nearby);
        }
      }
    }
@ -315,27 +277,17 @@ stl_check_facets_nearby(stl_file *stl, float tolerance) {
  stl_free_edges(stl);
 }
-static int
+static int stl_load_edge_nearby(stl_file *stl, stl_hash_edge *edge, stl_vertex *a, stl_vertex *b, float tolerance)
-stl_load_edge_nearby(stl_file *stl, stl_hash_edge *edge,
+{
                     stl_vertex *a, stl_vertex *b, float tolerance) {
  // Index of a grid cell spaced by tolerance.
-  uint32_t vertex1[3] = {
+  typedef Eigen::Matrix<int32_t,  3, 1, Eigen::DontAlign> Vec3i;
-    (uint32_t)((a->x - stl->stats.min.x) / tolerance),
+  Vec3i vertex1 = (*a / tolerance).cast<int32_t>();
-    (uint32_t)((a->y - stl->stats.min.y) / tolerance),
+  Vec3i vertex2 = (*b / tolerance).cast<int32_t>();
-    (uint32_t)((a->z - stl->stats.min.z) / tolerance)
+  static_assert(sizeof(Vec3i) == 12, "size of Vec3i incorrect");
  };
  uint32_t vertex2[3] = {
    (uint32_t)((b->x - stl->stats.min.x) / tolerance),
    (uint32_t)((b->y - stl->stats.min.y) / tolerance),
    (uint32_t)((b->z - stl->stats.min.z) / tolerance)
  };
-  if(   (vertex1[0] == vertex2[0])
+  if (vertex1 == vertex2)
-        && (vertex1[1] == vertex2[1])
+    // Both vertices hash to the same value
        && (vertex1[2] == vertex2[2])) {
    /* Both vertices hash to the same value */
    return 0;
  }
  // Ensure identical vertex ordering of edges, which vertices land into equal grid cells.
  // This method is numerically robust.
@ -344,30 +296,27 @@ stl_load_edge_nearby(stl_file *stl, stl_hash_edge *edge,
        ((vertex1[1] != vertex2[1]) ? 
            (vertex1[1] < vertex2[1]) : 
            (vertex1[2] < vertex2[2]))) {
-    memcpy(&edge->key[0], vertex1, sizeof(stl_vertex));
+    memcpy(&edge->key[0],                  vertex1.data(), sizeof(stl_vertex));
-    memcpy(&edge->key[3], vertex2, sizeof(stl_vertex));
+    memcpy(&edge->key[sizeof(stl_vertex)], vertex2.data(), sizeof(stl_vertex));
  } else {
-    memcpy(&edge->key[0], vertex2, sizeof(stl_vertex));
+    memcpy(&edge->key[0],                  vertex2.data(), sizeof(stl_vertex));
-    memcpy(&edge->key[3], vertex1, sizeof(stl_vertex));
+    memcpy(&edge->key[sizeof(stl_vertex)], vertex1.data(), sizeof(stl_vertex));
    edge->which_edge += 3; /* this edge is loaded backwards */
  }
  return 1;
 }
-static void
+static void stl_free_edges(stl_file *stl)
-stl_free_edges(stl_file *stl) {
+{
-  int i;
+  if (stl->error)
-  stl_hash_edge *temp;
+    return;
  if (stl->error) return;
  if(stl->stats.malloced != stl->stats.freed) {
-    for(i = 0; i < stl->M; i++) {
+    for (int i = 0; i < stl->M; i++) {
-      for(temp = stl->heads[i]; stl->heads[i] != stl->tail;
+      for (stl_hash_edge *temp = stl->heads[i]; stl->heads[i] != stl->tail; temp = stl->heads[i]) {
          temp = stl->heads[i]) {
        stl->heads[i] = stl->heads[i]->next;
        free(temp);
-        stl->stats.freed++;
+        ++ stl->stats.freed;
      }
    }
  }
@ -375,8 +324,8 @@ stl_free_edges(stl_file *stl) {
  free(stl->tail);
 }
-static void
+static void stl_initialize_facet_check_nearby(stl_file *stl)
-stl_initialize_facet_check_nearby(stl_file *stl) {
+{
  int i;
  if (stl->error) return;
@ -467,16 +416,8 @@ stl_record_neighbors(stl_file *stl,
  }
 }
-static void
+static void stl_match_neighbors_nearby(stl_file *stl, stl_hash_edge *edge_a, stl_hash_edge *edge_b)
-stl_match_neighbors_exact(stl_file *stl,
+{
                          stl_hash_edge *edge_a, stl_hash_edge *edge_b) {
  if (stl->error) return;
  stl_record_neighbors(stl, edge_a, edge_b);
 }
 static void
 stl_match_neighbors_nearby(stl_file *stl,
                           stl_hash_edge *edge_a, stl_hash_edge *edge_b) {
  int facet1;
  int facet2;
  int vertex1;
@ -517,9 +458,7 @@ stl_match_neighbors_nearby(stl_file *stl,
 }
-static void
+static void stl_change_vertices(stl_file *stl, int facet_num, int vnot, stl_vertex new_vertex) {
 stl_change_vertices(stl_file *stl, int facet_num, int vnot,
                    stl_vertex new_vertex) {
  int first_facet;
  int direction;
  int next_edge;
@ -551,30 +490,30 @@ stl_change_vertices(stl_file *stl, int facet_num, int vnot,
      }
    }
 #if 0
-    if (stl->facet_start[facet_num].vertex[pivot_vertex].x == new_vertex.x &&
+    if (stl->facet_start[facet_num].vertex[pivot_vertex](0) == new_vertex(0) &&
-        stl->facet_start[facet_num].vertex[pivot_vertex].y == new_vertex.y &&
+        stl->facet_start[facet_num].vertex[pivot_vertex](1) == new_vertex(1) &&
-        stl->facet_start[facet_num].vertex[pivot_vertex].z == new_vertex.z)
+        stl->facet_start[facet_num].vertex[pivot_vertex](2) == new_vertex(2))
      printf("Changing vertex %f,%f,%f: Same !!!\r\n", 
-        new_vertex.x, new_vertex.y, new_vertex.z);
+        new_vertex(0), new_vertex(1), new_vertex(2));
    else {
-      if (stl->facet_start[facet_num].vertex[pivot_vertex].x != new_vertex.x)
+      if (stl->facet_start[facet_num].vertex[pivot_vertex](0) != new_vertex(0))
        printf("Changing coordinate x, vertex %e (0x%08x) to %e(0x%08x)\r\n", 
-          stl->facet_start[facet_num].vertex[pivot_vertex].x,
+          stl->facet_start[facet_num].vertex[pivot_vertex](0),
-          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex].x),
+          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex](0)),
-          new_vertex.x,
+          new_vertex(0),
-          *reinterpret_cast<const int*>(&new_vertex.x));
+          *reinterpret_cast<const int*>(&new_vertex(0)));
-      if (stl->facet_start[facet_num].vertex[pivot_vertex].y != new_vertex.y)
+      if (stl->facet_start[facet_num].vertex[pivot_vertex](1) != new_vertex(1))
        printf("Changing coordinate x, vertex %e (0x%08x) to %e(0x%08x)\r\n", 
-          stl->facet_start[facet_num].vertex[pivot_vertex].y,
+          stl->facet_start[facet_num].vertex[pivot_vertex](1),
-          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex].y),
+          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex](1)),
-          new_vertex.y,
+          new_vertex(1),
-          *reinterpret_cast<const int*>(&new_vertex.y));
+          *reinterpret_cast<const int*>(&new_vertex(1)));
-      if (stl->facet_start[facet_num].vertex[pivot_vertex].z != new_vertex.z)
+      if (stl->facet_start[facet_num].vertex[pivot_vertex](2) != new_vertex(2))
        printf("Changing coordinate x, vertex %e (0x%08x) to %e(0x%08x)\r\n", 
-          stl->facet_start[facet_num].vertex[pivot_vertex].z,
+          stl->facet_start[facet_num].vertex[pivot_vertex](2),
-          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex].z),
+          *reinterpret_cast<const int*>(&stl->facet_start[facet_num].vertex[pivot_vertex](2)),
-          new_vertex.z,
+          new_vertex(2),
-          *reinterpret_cast<const int*>(&new_vertex.z));
+          *reinterpret_cast<const int*>(&new_vertex(2)));
    }
 #endif
    stl->facet_start[facet_num].vertex[pivot_vertex] = new_vertex;
@ -595,7 +534,6 @@ Try using a smaller tolerance or don't do a nearby check\n");
  }
 }
 static void
 stl_which_vertices_to_change(stl_file *stl, stl_hash_edge *edge_a,
                             stl_hash_edge *edge_b, int *facet1, int *vertex1,
@ -622,11 +560,10 @@ stl_which_vertices_to_change(stl_file *stl, stl_hash_edge *edge_a,
    v1b = (edge_b->which_edge + 1) % 3;
  }
-  /* Of the first pair, which vertex, if any, should be changed */
+  // Of the first pair, which vertex, if any, should be changed
-  if(!memcmp(&stl->facet_start[edge_a->facet_number].vertex[v1a],
+  if(stl->facet_start[edge_a->facet_number].vertex[v1a] == 
-             &stl->facet_start[edge_b->facet_number].vertex[v1b],
+     stl->facet_start[edge_b->facet_number].vertex[v1b]) {
-             sizeof(stl_vertex))) {
+    // These facets are already equal.  No need to change.
    /* These facets are already equal.  No need to change. */
    *facet1 = -1;
  } else {
    if(   (stl->neighbors_start[edge_a->facet_number].neighbor[v1a] == -1)
@ -644,10 +581,9 @@ stl_which_vertices_to_change(stl_file *stl, stl_hash_edge *edge_a,
  }
  /* Of the second pair, which vertex, if any, should be changed */
-  if(!memcmp(&stl->facet_start[edge_a->facet_number].vertex[v2a],
+  if(stl->facet_start[edge_a->facet_number].vertex[v2a] == 
-             &stl->facet_start[edge_b->facet_number].vertex[v2b],
+     stl->facet_start[edge_b->facet_number].vertex[v2b]) {
-             sizeof(stl_vertex))) {
+    // These facets are already equal.  No need to change.
    /* These facets are already equal.  No need to change. */
    *facet2 = -1;
  } else {
    if(   (stl->neighbors_start[edge_a->facet_number].neighbor[v2a] == -1)
@ -718,40 +654,35 @@ in stl_remove_facet: neighbor = %d numfacets = %d this is wrong\n",
  }
 }
-void
+void stl_remove_unconnected_facets(stl_file *stl)
-stl_remove_unconnected_facets(stl_file *stl) {
+{
  /* A couple of things need to be done here.  One is to remove any */
  /* completely unconnected facets (0 edges connected) since these are */
  /* useless and could be completely wrong.   The second thing that needs to */
  /* be done is to remove any degenerate facets that were created during */
  /* stl_check_facets_nearby(). */
  if (stl->error)
    return;
-  int i;
+  // remove degenerate facets
-
+  for (int i = 0; i < stl->stats.number_of_facets; ++ i) {
-  if (stl->error) return;
+    if(stl->facet_start[i].vertex[0] == stl->facet_start[i].vertex[1] ||
-
+       stl->facet_start[i].vertex[0] == stl->facet_start[i].vertex[2] ||
-  /* remove degenerate facets */
+       stl->facet_start[i].vertex[1] == stl->facet_start[i].vertex[2]) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    if(   !memcmp(&stl->facet_start[i].vertex[0],
                  &stl->facet_start[i].vertex[1], sizeof(stl_vertex))
          || !memcmp(&stl->facet_start[i].vertex[1],
                     &stl->facet_start[i].vertex[2], sizeof(stl_vertex))
          || !memcmp(&stl->facet_start[i].vertex[0],
                     &stl->facet_start[i].vertex[2], sizeof(stl_vertex))) {
      stl_remove_degenerate(stl, i);
      i--;
    }
  }
  if(stl->stats.connected_facets_1_edge < stl->stats.number_of_facets) {
-    /* remove completely unconnected facets */
+    // remove completely unconnected facets
-    for(i = 0; i < stl->stats.number_of_facets; i++) {
+    for (int i = 0; i < stl->stats.number_of_facets; i++) {
-      if(   (stl->neighbors_start[i].neighbor[0] == -1)
+      if (stl->neighbors_start[i].neighbor[0] == -1 &&
-            && (stl->neighbors_start[i].neighbor[1] == -1)
+          stl->neighbors_start[i].neighbor[1] == -1 &&
-            && (stl->neighbors_start[i].neighbor[2] == -1)) {
+          stl->neighbors_start[i].neighbor[2] == -1) {
-        /* This facet is completely unconnected.  Remove it. */
+        // This facet is completely unconnected.  Remove it.
        stl_remove_facet(stl, i);
-        i--;
+        -- i;
      }
    }
  }
@ -771,30 +702,24 @@ stl_remove_degenerate(stl_file *stl, int facet) {
  if (stl->error) return;
-  if(   !memcmp(&stl->facet_start[facet].vertex[0],
+  if (stl->facet_start[facet].vertex[0] == stl->facet_start[facet].vertex[1] &&
-                &stl->facet_start[facet].vertex[1], sizeof(stl_vertex))
+      stl->facet_start[facet].vertex[1] == stl->facet_start[facet].vertex[2]) {
        && !memcmp(&stl->facet_start[facet].vertex[1],
                   &stl->facet_start[facet].vertex[2], sizeof(stl_vertex))) {
    /* all 3 vertices are equal.  Just remove the facet.  I don't think*/
    /* this is really possible, but just in case... */
    printf("removing a facet in stl_remove_degenerate\n");
    stl_remove_facet(stl, facet);
    return;
  }
-  if(!memcmp(&stl->facet_start[facet].vertex[0],
+  if (stl->facet_start[facet].vertex[0] == stl->facet_start[facet].vertex[1]) {
             &stl->facet_start[facet].vertex[1], sizeof(stl_vertex))) {
    edge1 = 1;
    edge2 = 2;
    edge3 = 0;
-  } else if(!memcmp(&stl->facet_start[facet].vertex[1],
+  } else if (stl->facet_start[facet].vertex[1] == stl->facet_start[facet].vertex[2]) {
                    &stl->facet_start[facet].vertex[2], sizeof(stl_vertex))) {
    edge1 = 0;
    edge2 = 2;
    edge3 = 1;
-  } else if(!memcmp(&stl->facet_start[facet].vertex[2],
+  } else if (stl->facet_start[facet].vertex[2] == stl->facet_start[facet].vertex[0]) {
                    &stl->facet_start[facet].vertex[0], sizeof(stl_vertex))) {
    edge1 = 0;
    edge2 = 1;
    edge3 = 2;
@ -883,7 +808,7 @@ stl_fill_holes(stl_file *stl) {
      stl_load_edge_exact(stl, &edge, &facet.vertex[j],
                          &facet.vertex[(j + 1) % 3]);
-      insert_hash_edge(stl, edge, stl_match_neighbors_exact);
+      insert_hash_edge(stl, edge, stl_record_neighbors);
    }
  }
@ -939,7 +864,7 @@ stl_fill_holes(stl_file *stl) {
            stl_load_edge_exact(stl, &edge, &new_facet.vertex[k],
                                &new_facet.vertex[(k + 1) % 3]);
-            insert_hash_edge(stl, edge, stl_match_neighbors_exact);
+            insert_hash_edge(stl, edge, stl_record_neighbors);
          }
          break;
        } else {
@ -977,9 +902,7 @@ stl_add_facet(stl_file *stl, stl_facet *new_facet) {
  stl->facet_start[stl->stats.number_of_facets] = *new_facet;
  /* note that the normal vector is not set here, just initialized to 0 */
-  stl->facet_start[stl->stats.number_of_facets].normal.x = 0.0;
+  stl->facet_start[stl->stats.number_of_facets].normal = stl_normal::Zero();
  stl->facet_start[stl->stats.number_of_facets].normal.y = 0.0;
  stl->facet_start[stl->stats.number_of_facets].normal.z = 0.0;
  stl->neighbors_start[stl->stats.number_of_facets].neighbor[0] = -1;
  stl->neighbors_start[stl->stats.number_of_facets].neighbor[1] = -1;
--- a/xs/src/admesh/normals.cpp
+++ b/xs/src/admesh/normals.cpp
@ -27,12 +27,6 @@
 #include "stl.h"
 static void stl_reverse_vector(float v[]) {
  v[0] *= -1;
  v[1] *= -1;
  v[2] *= -1;
 }
 static int stl_check_normal_vector(stl_file *stl, int facet_num, int normal_fix_flag);
 static void
@ -228,102 +222,52 @@ static int stl_check_normal_vector(stl_file *stl, int facet_num, int normal_fix_
  /* Returns 2 if the normal is not within tolerance and backwards */
  /* Returns 4 if the status is unknown. */
  float normal[3];
  float test_norm[3];
  stl_facet *facet;
  facet = &stl->facet_start[facet_num];
  stl_normal normal;
  stl_calculate_normal(normal, facet);
  stl_normalize_vector(normal);
  stl_normal normal_dif = (normal - facet->normal).cwiseAbs();
-  if(   (ABS(normal[0] - facet->normal.x) < 0.001)
+  const float eps = 0.001f;
-        && (ABS(normal[1] - facet->normal.y) < 0.001)
+  if (normal_dif(0) < eps && normal_dif(1) < eps && normal_dif(2) < eps) {
        && (ABS(normal[2] - facet->normal.z) < 0.001)) {
    /* It is not really necessary to change the values here */
    /* but just for consistency, I will. */
-    facet->normal.x = normal[0];
+    facet->normal = normal;
    facet->normal.y = normal[1];
    facet->normal.z = normal[2];
    return 0;
  }
-  test_norm[0] = facet->normal.x;
+  stl_normal test_norm = facet->normal;
  test_norm[1] = facet->normal.y;
  test_norm[2] = facet->normal.z;
  stl_normalize_vector(test_norm);
-  if(   (ABS(normal[0] - test_norm[0]) < 0.001)
+  normal_dif = (normal - test_norm).cwiseAbs();
-        && (ABS(normal[1] - test_norm[1]) < 0.001)
+  if (normal_dif(0) < eps && normal_dif(1) < eps && normal_dif(2) < eps) {
        && (ABS(normal[2] - test_norm[2]) < 0.001)) {
    if(normal_fix_flag) {
-      facet->normal.x = normal[0];
+      facet->normal = normal;
      facet->normal.y = normal[1];
      facet->normal.z = normal[2];
      stl->stats.normals_fixed += 1;
    }
    return 1;
  }
-  stl_reverse_vector(test_norm);
+  test_norm *= -1.f;
-  if(   (ABS(normal[0] - test_norm[0]) < 0.001)
+  normal_dif = (normal - test_norm).cwiseAbs();
-        && (ABS(normal[1] - test_norm[1]) < 0.001)
+  if (normal_dif(0) < eps && normal_dif(1) < eps && normal_dif(2) < eps) {
-        && (ABS(normal[2] - test_norm[2]) < 0.001)) {
+    // Facet is backwards.
    /* Facet is backwards. */
    if(normal_fix_flag) {
-      facet->normal.x = normal[0];
+      facet->normal = normal;
      facet->normal.y = normal[1];
      facet->normal.z = normal[2];
      stl->stats.normals_fixed += 1;
    }
    return 2;
  }
  if(normal_fix_flag) {
-    facet->normal.x = normal[0];
+    facet->normal = normal;
    facet->normal.y = normal[1];
    facet->normal.z = normal[2];
    stl->stats.normals_fixed += 1;
  }
  return 4;
 }
-void stl_calculate_normal(float normal[], stl_facet *facet) {
+void stl_fix_normal_values(stl_file *stl) {
  float v1[3] = {
    facet->vertex[1].x - facet->vertex[0].x,
    facet->vertex[1].y - facet->vertex[0].y,
    facet->vertex[1].z - facet->vertex[0].z
  };
  float v2[3] = {
    facet->vertex[2].x - facet->vertex[0].x,
    facet->vertex[2].y - facet->vertex[0].y,
    facet->vertex[2].z - facet->vertex[0].z
  };
  normal[0] = (float)((double)v1[1] * (double)v2[2]) - ((double)v1[2] * (double)v2[1]);
  normal[1] = (float)((double)v1[2] * (double)v2[0]) - ((double)v1[0] * (double)v2[2]);
  normal[2] = (float)((double)v1[0] * (double)v2[1]) - ((double)v1[1] * (double)v2[0]);
 }
 void stl_normalize_vector(float v[]) {
  double length;
  double factor;
  float min_normal_length;
  length = sqrt((double)v[0] * (double)v[0] + (double)v[1] * (double)v[1] + (double)v[2] * (double)v[2]);
  min_normal_length = 0.000000000001;
  if(length < min_normal_length) {
    v[0] = 0.0;
    v[1] = 0.0;
    v[2] = 0.0;
    return;
  }
  factor = 1.0 / length;
  v[0] *= factor;
  v[1] *= factor;
  v[2] *= factor;
 }
 void
 stl_fix_normal_values(stl_file *stl) {
  int i;
  if (stl->error) return;
@ -333,20 +277,16 @@ stl_fix_normal_values(stl_file *stl) {
  }
 }
-void
+void stl_reverse_all_facets(stl_file *stl)
-stl_reverse_all_facets(stl_file *stl) {
+{
-  int i;
+  if (stl->error)
-  float normal[3];
+  	return;
-  if (stl->error) return;
+  stl_normal normal;
-
+  for(int i = 0; i < stl->stats.number_of_facets; i++) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    stl_reverse_facet(stl, i);
    stl_calculate_normal(normal, &stl->facet_start[i]);
    stl_normalize_vector(normal);
-    stl->facet_start[i].normal.x = normal[0];
+    stl->facet_start[i].normal = normal;
    stl->facet_start[i].normal.y = normal[1];
    stl->facet_start[i].normal.z = normal[2];
  }
 }
--- a/xs/src/admesh/shared.cpp
+++ b/xs/src/admesh/shared.cpp
@ -169,7 +169,7 @@ stl_write_off(stl_file *stl, char *file) {
  for(i = 0; i < stl->stats.shared_vertices; i++) {
    fprintf(fp, "\t%f %f %f\n",
-            stl->v_shared[i].x, stl->v_shared[i].y, stl->v_shared[i].z);
+            stl->v_shared[i](0), stl->v_shared[i](1), stl->v_shared[i](2));
  }
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    fprintf(fp, "\t3 %d %d %d\n", stl->v_indices[i].vertex[0],
@ -216,10 +216,10 @@ stl_write_vrml(stl_file *stl, char *file) {
  for(i = 0; i < (stl->stats.shared_vertices - 1); i++) {
    fprintf(fp, "\t\t\t\t%f %f %f,\n",
-            stl->v_shared[i].x, stl->v_shared[i].y, stl->v_shared[i].z);
+            stl->v_shared[i](0), stl->v_shared[i](1), stl->v_shared[i](2));
  }
  fprintf(fp, "\t\t\t\t%f %f %f]\n",
-          stl->v_shared[i].x, stl->v_shared[i].y, stl->v_shared[i].z);
+          stl->v_shared[i](0), stl->v_shared[i](1), stl->v_shared[i](2));
  fprintf(fp, "\t\t}\n");
  fprintf(fp, "\t\tDEF STLTriangles IndexedFaceSet {\n");
  fprintf(fp, "\t\t\tcoordIndex [\n");
@ -254,7 +254,7 @@ void stl_write_obj (stl_file *stl, char *file) {
  }
  for (i = 0; i < stl->stats.shared_vertices; i++) {
-    fprintf(fp, "v %f %f %f\n", stl->v_shared[i].x, stl->v_shared[i].y, stl->v_shared[i].z);
+    fprintf(fp, "v %f %f %f\n", stl->v_shared[i](0), stl->v_shared[i](1), stl->v_shared[i](2));
  }
  for (i = 0; i < stl->stats.number_of_facets; i++) {
    fprintf(fp, "f %d %d %d\n", stl->v_indices[i].vertex[0]+1, stl->v_indices[i].vertex[1]+1, stl->v_indices[i].vertex[2]+1);
--- a/xs/src/admesh/stl.h
+++ b/xs/src/admesh/stl.h
@ -27,9 +27,7 @@
 #include <stdint.h>
 #include <stddef.h>
-#define STL_MAX(A,B) ((A)>(B)? (A):(B))
+#include <Eigen/Geometry> 
 #define STL_MIN(A,B) ((A)<(B)? (A):(B))
 #define ABS(X)  ((X) < 0 ? -(X) : (X))
 // Size of the binary STL header, free form.
 #define LABEL_SIZE             80
@ -39,31 +37,16 @@
 #define HEADER_SIZE            84
 #define STL_MIN_FILE_SIZE      284
 #define ASCII_LINES_PER_FACET  7
 // Comparing an edge by memcmp, 2x3x4 bytes = 24
 #define SIZEOF_EDGE_SORT       24
 typedef struct {
  float x;
  float y;
  float z;
 } stl_vertex;
 typedef Eigen::Matrix<float, 3, 1, Eigen::DontAlign> stl_vertex;
 typedef Eigen::Matrix<float, 3, 1, Eigen::DontAlign> stl_normal;
 static_assert(sizeof(stl_vertex) == 12, "size of stl_vertex incorrect");
 typedef struct {
  float x;
  float y;
  float z;
 } stl_normal;
 static_assert(sizeof(stl_normal) == 12, "size of stl_normal incorrect");
 typedef char stl_extra[2];
 typedef struct {
  stl_normal normal;
  stl_vertex vertex[3];
-  stl_extra  extra;
+  char       extra[2];
 } stl_facet;
 #define SIZEOF_STL_FACET       50
@ -81,8 +64,12 @@ typedef struct {
 } stl_edge;
 typedef struct stl_hash_edge {
-  // Key of a hash edge: 2x binary copy of a floating point vertex.
+  // Key of a hash edge: sorted vertices of the edge.
-  uint32_t       key[6];
+  unsigned char key[2 * sizeof(stl_vertex)];
  // Compare two keys.
  bool operator==(const stl_hash_edge &rhs) { return memcmp(key, rhs.key, sizeof(key)) == 0; }
  bool operator!=(const stl_hash_edge &rhs) { return ! (*this == rhs); }
  int  hash(int M) const { return ((key[0] / 23 + key[1] / 19 + key[2] / 17 + key[3] /13  + key[4] / 11 + key[5] / 7 ) % M); }
  // Index of a facet owning this edge.
  int            facet_number;
  // Index of this edge inside the facet with an index of facet_number.
@ -91,8 +78,6 @@ typedef struct stl_hash_edge {
  struct stl_hash_edge  *next;
 } stl_hash_edge;
 static_assert(offsetof(stl_hash_edge, facet_number) == SIZEOF_EDGE_SORT, "size of stl_hash_edge.key incorrect");
 typedef struct {
  // Index of a neighbor facet.
  int   neighbor[3];
@ -179,8 +164,8 @@ extern void stl_fix_normal_values(stl_file *stl);
 extern void stl_reverse_all_facets(stl_file *stl);
 extern void stl_translate(stl_file *stl, float x, float y, float z);
 extern void stl_translate_relative(stl_file *stl, float x, float y, float z);
-extern void stl_scale_versor(stl_file *stl, float versor[3]);
+extern void stl_scale_versor(stl_file *stl, const stl_vertex &versor);
-extern void stl_scale(stl_file *stl, float factor);
+inline void stl_scale(stl_file *stl, float factor) { stl_scale_versor(stl, stl_vertex(factor, factor, factor)); }
 extern void stl_rotate_x(stl_file *stl, float angle);
 extern void stl_rotate_y(stl_file *stl, float angle);
 extern void stl_rotate_z(stl_file *stl, float angle);
@ -195,8 +180,20 @@ extern void stl_write_obj(stl_file *stl, char *file);
 extern void stl_write_off(stl_file *stl, char *file);
 extern void stl_write_dxf(stl_file *stl, char *file, char *label);
 extern void stl_write_vrml(stl_file *stl, char *file);
-extern void stl_calculate_normal(float normal[], stl_facet *facet);
+inline void stl_calculate_normal(stl_normal &normal, stl_facet *facet) {
-extern void stl_normalize_vector(float v[]);
+  normal = (facet->vertex[1] - facet->vertex[0]).cross(facet->vertex[2] - facet->vertex[0]);
 }
 inline void stl_normalize_vector(stl_normal &normal) {
  double length = normal.cast<double>().norm();
  if (length < 0.000000000001)
    normal = stl_normal::Zero();
  else
    normal *= (1.0 / length);
 }
 inline bool stl_vertex_lower(const stl_vertex &a, const stl_vertex &b) {
  return (a(0) != b(0)) ? (a(0) < b(0)) :
        ((a(1) != b(1)) ? (a(1) < b(1)) : (a(2) < b(2)));
 }
 extern void stl_calculate_volume(stl_file *stl);
 extern void stl_repair(stl_file *stl, int fixall_flag, int exact_flag, int tolerance_flag, float tolerance, int increment_flag, float increment, int nearby_flag, int iterations, int remove_unconnected_flag, int fill_holes_flag, int normal_directions_flag, int normal_values_flag, int reverse_all_flag, int verbose_flag);
@ -204,8 +201,8 @@ extern void stl_repair(stl_file *stl, int fixall_flag, int exact_flag, int toler
 extern void stl_initialize(stl_file *stl);
 extern void stl_count_facets(stl_file *stl, const char *file);
 extern void stl_allocate(stl_file *stl);
-extern void stl_read(stl_file *stl, int first_facet, int first);
+extern void stl_read(stl_file *stl, int first_facet, bool first);
-extern void stl_facet_stats(stl_file *stl, stl_facet facet, int first);
+extern void stl_facet_stats(stl_file *stl, stl_facet facet, bool &first);
 extern void stl_reallocate(stl_file *stl);
 extern void stl_add_facet(stl_file *stl, stl_facet *new_facet);
 extern void stl_get_size(stl_file *stl);
--- a/xs/src/admesh/stl_io.cpp
+++ b/xs/src/admesh/stl_io.cpp
@ -44,9 +44,9 @@ stl_print_edges(stl_file *stl, FILE *file) {
  for(i = 0; i < edges_allocated; i++) {
    fprintf(file, "%d, %f, %f, %f, %f, %f, %f\n",
            stl->edge_start[i].facet_number,
-            stl->edge_start[i].p1.x, stl->edge_start[i].p1.y,
+            stl->edge_start[i].p1(0), stl->edge_start[i].p1(1),
-            stl->edge_start[i].p1.z, stl->edge_start[i].p2.x,
+            stl->edge_start[i].p1(2), stl->edge_start[i].p2(0),
-            stl->edge_start[i].p2.y, stl->edge_start[i].p2.z);
+            stl->edge_start[i].p2(1), stl->edge_start[i].p2(2));
  }
 }
@ -75,11 +75,11 @@ File type          : ASCII STL file\n");
 Header             : %s\n", stl->stats.header);
  fprintf(file, "============== Size ==============\n");
  fprintf(file, "Min X = % f, Max X = % f\n",
-          stl->stats.min.x, stl->stats.max.x);
+          stl->stats.min(0), stl->stats.max(0));
  fprintf(file, "Min Y = % f, Max Y = % f\n",
-          stl->stats.min.y, stl->stats.max.y);
+          stl->stats.min(1), stl->stats.max(1));
  fprintf(file, "Min Z = % f, Max Z = % f\n",
-          stl->stats.min.z, stl->stats.max.z);
+          stl->stats.min(2), stl->stats.max(2));
  fprintf(file, "\
 ========= Facet Status ========== Original ============ Final ====\n");
@ -149,18 +149,18 @@ stl_write_ascii(stl_file *stl, const char *file, const char *label) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    fprintf(fp, "  facet normal % .8E % .8E % .8E\n",
-            stl->facet_start[i].normal.x, stl->facet_start[i].normal.y,
+            stl->facet_start[i].normal(0), stl->facet_start[i].normal(1),
-            stl->facet_start[i].normal.z);
+            stl->facet_start[i].normal(2));
    fprintf(fp, "    outer loop\n");
    fprintf(fp, "      vertex % .8E % .8E % .8E\n",
-            stl->facet_start[i].vertex[0].x, stl->facet_start[i].vertex[0].y,
+            stl->facet_start[i].vertex[0](0), stl->facet_start[i].vertex[0](1),
-            stl->facet_start[i].vertex[0].z);
+            stl->facet_start[i].vertex[0](2));
    fprintf(fp, "      vertex % .8E % .8E % .8E\n",
-            stl->facet_start[i].vertex[1].x, stl->facet_start[i].vertex[1].y,
+            stl->facet_start[i].vertex[1](0), stl->facet_start[i].vertex[1](1),
-            stl->facet_start[i].vertex[1].z);
+            stl->facet_start[i].vertex[1](2));
    fprintf(fp, "      vertex % .8E % .8E % .8E\n",
-            stl->facet_start[i].vertex[2].x, stl->facet_start[i].vertex[2].y,
+            stl->facet_start[i].vertex[2](0), stl->facet_start[i].vertex[2](1),
-            stl->facet_start[i].vertex[2].z);
+            stl->facet_start[i].vertex[2](2));
    fprintf(fp, "    endloop\n");
    fprintf(fp, "  endfacet\n");
  }
@ -264,9 +264,9 @@ void
 stl_write_vertex(stl_file *stl, int facet, int vertex) {
  if (stl->error) return;
  printf("  vertex %d/%d % .8E % .8E % .8E\n", vertex, facet,
-         stl->facet_start[facet].vertex[vertex].x,
+         stl->facet_start[facet].vertex[vertex](0),
-         stl->facet_start[facet].vertex[vertex].y,
+         stl->facet_start[facet].vertex[vertex](1),
-         stl->facet_start[facet].vertex[vertex].z);
+         stl->facet_start[facet].vertex[vertex](2));
 }
 void
@ -309,10 +309,10 @@ stl_write_quad_object(stl_file *stl, char *file) {
  int       i;
  int       j;
  char      *error_msg;
-  stl_vertex connect_color;
+  stl_vertex connect_color = stl_vertex::Zero();
-  stl_vertex uncon_1_color;
+  stl_vertex uncon_1_color = stl_vertex::Zero();
-  stl_vertex uncon_2_color;
+  stl_vertex uncon_2_color = stl_vertex::Zero();
-  stl_vertex uncon_3_color;
+  stl_vertex uncon_3_color = stl_vertex::Zero();
  stl_vertex color;
  if (stl->error) return;
@ -330,19 +330,6 @@ stl_write_quad_object(stl_file *stl, char *file) {
    return;
  }
  connect_color.x = 0.0;
  connect_color.y = 0.0;
  connect_color.z = 1.0;
  uncon_1_color.x = 0.0;
  uncon_1_color.y = 1.0;
  uncon_1_color.z = 0.0;
  uncon_2_color.x = 1.0;
  uncon_2_color.y = 1.0;
  uncon_2_color.z = 1.0;
  uncon_3_color.x = 1.0;
  uncon_3_color.y = 0.0;
  uncon_3_color.z = 0.0;
  fprintf(fp, "CQUAD\n");
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    j = ((stl->neighbors_start[i].neighbor[0] == -1) +
@ -358,21 +345,21 @@ stl_write_quad_object(stl_file *stl, char *file) {
      color = uncon_3_color;
    }
    fprintf(fp, "%f %f %f    %1.1f %1.1f %1.1f 1\n",
-            stl->facet_start[i].vertex[0].x,
+            stl->facet_start[i].vertex[0](0),
-            stl->facet_start[i].vertex[0].y,
+            stl->facet_start[i].vertex[0](1),
-            stl->facet_start[i].vertex[0].z, color.x, color.y, color.z);
+            stl->facet_start[i].vertex[0](2), color(0), color(1), color(2));
    fprintf(fp, "%f %f %f    %1.1f %1.1f %1.1f 1\n",
-            stl->facet_start[i].vertex[1].x,
+            stl->facet_start[i].vertex[1](0),
-            stl->facet_start[i].vertex[1].y,
+            stl->facet_start[i].vertex[1](1),
-            stl->facet_start[i].vertex[1].z, color.x, color.y, color.z);
+            stl->facet_start[i].vertex[1](2), color(0), color(1), color(2));
    fprintf(fp, "%f %f %f    %1.1f %1.1f %1.1f 1\n",
-            stl->facet_start[i].vertex[2].x,
+            stl->facet_start[i].vertex[2](0),
-            stl->facet_start[i].vertex[2].y,
+            stl->facet_start[i].vertex[2](1),
-            stl->facet_start[i].vertex[2].z, color.x, color.y, color.z);
+            stl->facet_start[i].vertex[2](2), color(0), color(1), color(2));
    fprintf(fp, "%f %f %f    %1.1f %1.1f %1.1f 1\n",
-            stl->facet_start[i].vertex[2].x,
+            stl->facet_start[i].vertex[2](0),
-            stl->facet_start[i].vertex[2].y,
+            stl->facet_start[i].vertex[2](1),
-            stl->facet_start[i].vertex[2].z, color.x, color.y, color.z);
+            stl->facet_start[i].vertex[2](2), color(0), color(1), color(2));
  }
  fclose(fp);
 }
@ -409,17 +396,17 @@ stl_write_dxf(stl_file *stl, char *file, char *label) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    fprintf(fp, "0\n3DFACE\n8\n0\n");
    fprintf(fp, "10\n%f\n20\n%f\n30\n%f\n",
-            stl->facet_start[i].vertex[0].x, stl->facet_start[i].vertex[0].y,
+            stl->facet_start[i].vertex[0](0), stl->facet_start[i].vertex[0](1),
-            stl->facet_start[i].vertex[0].z);
+            stl->facet_start[i].vertex[0](2));
    fprintf(fp, "11\n%f\n21\n%f\n31\n%f\n",
-            stl->facet_start[i].vertex[1].x, stl->facet_start[i].vertex[1].y,
+            stl->facet_start[i].vertex[1](0), stl->facet_start[i].vertex[1](1),
-            stl->facet_start[i].vertex[1].z);
+            stl->facet_start[i].vertex[1](2));
    fprintf(fp, "12\n%f\n22\n%f\n32\n%f\n",
-            stl->facet_start[i].vertex[2].x, stl->facet_start[i].vertex[2].y,
+            stl->facet_start[i].vertex[2](0), stl->facet_start[i].vertex[2](1),
-            stl->facet_start[i].vertex[2].z);
+            stl->facet_start[i].vertex[2](2));
    fprintf(fp, "13\n%f\n23\n%f\n33\n%f\n",
-            stl->facet_start[i].vertex[2].x, stl->facet_start[i].vertex[2].y,
+            stl->facet_start[i].vertex[2](0), stl->facet_start[i].vertex[2](1),
-            stl->facet_start[i].vertex[2].z);
+            stl->facet_start[i].vertex[2](2));
  }
  fprintf(fp, "0\nENDSEC\n0\nEOF\n");
--- a/xs/src/admesh/stlinit.cpp
+++ b/xs/src/admesh/stlinit.cpp
@ -40,7 +40,7 @@ stl_open(stl_file *stl, const char *file) {
  stl_initialize(stl);
  stl_count_facets(stl, file);
  stl_allocate(stl);
-  stl_read(stl, 0, 1);
+  stl_read(stl, 0, true);
  if (!stl->error) fclose(stl->fp);
 }
@ -227,7 +227,7 @@ stl_open_merge(stl_file *stl, char *file_to_merge) {
     Start at num_facets_so_far, the index to the first unused facet.  Also say
     that this isn't our first time so we should augment stats like min and max
     instead of erasing them. */
-  stl_read(stl, num_facets_so_far, 0);
+  stl_read(stl, num_facets_so_far, false);
  /* Restore the stl information we overwrote (for stl_read) so that it still accurately
     reflects the subject part: */
@ -255,8 +255,7 @@ stl_reallocate(stl_file *stl) {
 /* Reads the contents of the file pointed to by stl->fp into the stl structure,
   starting at facet first_facet.  The second argument says if it's our first
   time running this for the stl and therefore we should reset our max and min stats. */
-void
+void stl_read(stl_file *stl, int first_facet, bool first) {
 stl_read(stl_file *stl, int first_facet, int first) {
  stl_facet facet;
  int   i;
@ -294,11 +293,11 @@ stl_read(stl_file *stl, int first_facet, int first) {
      assert(res_normal == 3);
      int res_outer_loop = fscanf(stl->fp, " outer loop");
      assert(res_outer_loop == 0);
-      int res_vertex1    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[0].x, &facet.vertex[0].y, &facet.vertex[0].z);
+      int res_vertex1    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[0](0), &facet.vertex[0](1), &facet.vertex[0](2));
      assert(res_vertex1 == 3);
-      int res_vertex2    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[1].x, &facet.vertex[1].y, &facet.vertex[1].z);
+      int res_vertex2    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[1](0), &facet.vertex[1](1), &facet.vertex[1](2));
      assert(res_vertex2 == 3);
-      int res_vertex3    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[2].x, &facet.vertex[2].y, &facet.vertex[2].z);
+      int res_vertex3    = fscanf(stl->fp, " vertex %f %f %f", &facet.vertex[2](0), &facet.vertex[2](1), &facet.vertex[2](2));
      assert(res_vertex3 == 3);
      int res_endloop    = fscanf(stl->fp, " endloop");
      assert(res_endloop == 0);
@ -311,9 +310,9 @@ stl_read(stl_file *stl, int first_facet, int first) {
      }
      // The facet normal has been parsed as a single string as to workaround for not a numbers in the normal definition.
-	  if (sscanf(normal_buf[0], "%f", &facet.normal.x) != 1 ||
+	  if (sscanf(normal_buf[0], "%f", &facet.normal(0)) != 1 ||
-		  sscanf(normal_buf[1], "%f", &facet.normal.y) != 1 ||
+		  sscanf(normal_buf[1], "%f", &facet.normal(1)) != 1 ||
-		  sscanf(normal_buf[2], "%f", &facet.normal.z) != 1) {
+		  sscanf(normal_buf[2], "%f", &facet.normal(2)) != 1) {
 		  // Normal was mangled. Maybe denormals or "not a number" were stored?
 		  // Just reset the normal and silently ignore it.
 		  memset(&facet.normal, 0, sizeof(facet.normal));
@ -326,104 +325,45 @@ stl_read(stl_file *stl, int first_facet, int first) {
      // It may be worth to round these numbers to zero during loading to reduce the number of errors reported
      // during the STL import.
      for (size_t j = 0; j < 3; ++ j) {
-        if (facet.vertex[j].x > -1e-12f && facet.vertex[j].x < 1e-12f)
+        if (facet.vertex[j](0) > -1e-12f && facet.vertex[j](0) < 1e-12f)
-            printf("stl_read: facet %d.x = %e\r\n", j, facet.vertex[j].x);
+            printf("stl_read: facet %d(0) = %e\r\n", j, facet.vertex[j](0));
-        if (facet.vertex[j].y > -1e-12f && facet.vertex[j].y < 1e-12f)
+        if (facet.vertex[j](1) > -1e-12f && facet.vertex[j](1) < 1e-12f)
-            printf("stl_read: facet %d.y = %e\r\n", j, facet.vertex[j].y);
+            printf("stl_read: facet %d(1) = %e\r\n", j, facet.vertex[j](1));
-        if (facet.vertex[j].z > -1e-12f && facet.vertex[j].z < 1e-12f)
+        if (facet.vertex[j](2) > -1e-12f && facet.vertex[j](2) < 1e-12f)
-            printf("stl_read: facet %d.z = %e\r\n", j, facet.vertex[j].z);
+            printf("stl_read: facet %d(2) = %e\r\n", j, facet.vertex[j](2));
      }
 #endif
 #if 1
    {
      // Positive and negative zeros are possible in the floats, which are considered equal by the FP unit.
      // When using a memcmp on raw floats, those numbers report to be different.
      // Unify all +0 and -0 to +0 to make the floats equal under memcmp.
      uint32_t *f = (uint32_t*)&facet;
      for (int j = 0; j < 12; ++ j, ++ f) // 3x vertex + normal: 4x3 = 12 floats
        if (*f == 0x80000000)
          // Negative zero, switch to positive zero.
          *f = 0;
    }
 #else
    {
      // Due to the nature of the floating point numbers, close to zero values may be represented with singificantly higher precision 
      // than the rest of the vertices. Round them to zero.
      float *f = (float*)&facet;
      for (int j = 0; j < 12; ++ j, ++ f) // 3x vertex + normal: 4x3 = 12 floats
        if (*f > -1e-12f && *f < 1e-12f)
          // Negative zero, switch to positive zero.
          *f = 0;
    }
 #endif
    /* Write the facet into memory. */
-    memcpy(stl->facet_start+i, &facet, SIZEOF_STL_FACET);
+    stl->facet_start[i] = facet;
    stl_facet_stats(stl, facet, first);
    first = 0;
  }
-  stl->stats.size.x = stl->stats.max.x - stl->stats.min.x;
+  stl->stats.size = stl->stats.max - stl->stats.min;
-  stl->stats.size.y = stl->stats.max.y - stl->stats.min.y;
+  stl->stats.bounding_diameter = stl->stats.size.norm();
  stl->stats.size.z = stl->stats.max.z - stl->stats.min.z;
  stl->stats.bounding_diameter = sqrt(
                                   stl->stats.size.x * stl->stats.size.x +
                                   stl->stats.size.y * stl->stats.size.y +
                                   stl->stats.size.z * stl->stats.size.z
                                 );
 }
-void
+void stl_facet_stats(stl_file *stl, stl_facet facet, bool &first)
-stl_facet_stats(stl_file *stl, stl_facet facet, int first) {
+{
-  float diff_x;
+  if (stl->error)
-  float diff_y;
+  	return;
  float diff_z;
  float max_diff;
-  if (stl->error) return;
+  // While we are going through all of the facets, let's find the
  // maximum and minimum values for x, y, and z
  /* while we are going through all of the facets, let's find the  */
  /* maximum and minimum values for x, y, and z  */
  /* Initialize the max and min values the first time through*/
  if (first) {
-    stl->stats.max.x = facet.vertex[0].x;
+	// Initialize the max and min values the first time through
-    stl->stats.min.x = facet.vertex[0].x;
+    stl->stats.min = facet.vertex[0];
-    stl->stats.max.y = facet.vertex[0].y;
+    stl->stats.max = facet.vertex[0];
-    stl->stats.min.y = facet.vertex[0].y;
+    stl_vertex diff = (facet.vertex[1] - facet.vertex[0]).cwiseAbs();
-    stl->stats.max.z = facet.vertex[0].z;
+    stl->stats.shortest_edge = std::max(diff(0), std::max(diff(1), diff(2)));
-    stl->stats.min.z = facet.vertex[0].z;
+    first = false;
    diff_x = ABS(facet.vertex[0].x - facet.vertex[1].x);
    diff_y = ABS(facet.vertex[0].y - facet.vertex[1].y);
    diff_z = ABS(facet.vertex[0].z - facet.vertex[1].z);
    max_diff = STL_MAX(diff_x, diff_y);
    max_diff = STL_MAX(diff_z, max_diff);
    stl->stats.shortest_edge = max_diff;
    first = 0;
  }
-  /* now find the max and min values */
+  // Now find the max and min values.
-  stl->stats.max.x = STL_MAX(stl->stats.max.x, facet.vertex[0].x);
+  for (size_t i = 0; i < 3; ++ i) {
-  stl->stats.min.x = STL_MIN(stl->stats.min.x, facet.vertex[0].x);
+  	stl->stats.min = stl->stats.min.cwiseMin(facet.vertex[i]);
-  stl->stats.max.y = STL_MAX(stl->stats.max.y, facet.vertex[0].y);
+  	stl->stats.max = stl->stats.max.cwiseMax(facet.vertex[i]);
-  stl->stats.min.y = STL_MIN(stl->stats.min.y, facet.vertex[0].y);
+  }
  stl->stats.max.z = STL_MAX(stl->stats.max.z, facet.vertex[0].z);
  stl->stats.min.z = STL_MIN(stl->stats.min.z, facet.vertex[0].z);
  stl->stats.max.x = STL_MAX(stl->stats.max.x, facet.vertex[1].x);
  stl->stats.min.x = STL_MIN(stl->stats.min.x, facet.vertex[1].x);
  stl->stats.max.y = STL_MAX(stl->stats.max.y, facet.vertex[1].y);
  stl->stats.min.y = STL_MIN(stl->stats.min.y, facet.vertex[1].y);
  stl->stats.max.z = STL_MAX(stl->stats.max.z, facet.vertex[1].z);
  stl->stats.min.z = STL_MIN(stl->stats.min.z, facet.vertex[1].z);
  stl->stats.max.x = STL_MAX(stl->stats.max.x, facet.vertex[2].x);
  stl->stats.min.x = STL_MIN(stl->stats.min.x, facet.vertex[2].x);
  stl->stats.max.y = STL_MAX(stl->stats.max.y, facet.vertex[2].y);
  stl->stats.min.y = STL_MIN(stl->stats.min.y, facet.vertex[2].y);
  stl->stats.max.z = STL_MAX(stl->stats.max.z, facet.vertex[2].z);
  stl->stats.min.z = STL_MIN(stl->stats.min.z, facet.vertex[2].z);
 }
 void
--- a/xs/src/admesh/util.cpp
+++ b/xs/src/admesh/util.cpp
@ -62,7 +62,7 @@ stl_verify_neighbors(stl_file *stl) {
        edge_b.p1 = stl->facet_start[neighbor].vertex[(vnot + 1) % 3];
        edge_b.p2 = stl->facet_start[neighbor].vertex[(vnot + 2) % 3];
      }
-      if(memcmp(&edge_a, &edge_b, SIZEOF_EDGE_SORT) != 0) {
+      if (edge_a.p1 != edge_b.p1 || edge_a.p2 != edge_b.p2) {
        /* These edges should match but they don't.  Print results. */
        printf("edge %d of facet %d doesn't match edge %d of facet %d\n",
               j, i, vnot + 1, neighbor);
@ -73,114 +73,67 @@ stl_verify_neighbors(stl_file *stl) {
  }
 }
-void
+void stl_translate(stl_file *stl, float x, float y, float z)
-stl_translate(stl_file *stl, float x, float y, float z) {
+{
-  int i;
+  if (stl->error)
-  int j;
+  	return;
  if (stl->error) return;
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
      stl->facet_start[i].vertex[j].x -= (stl->stats.min.x - x);
      stl->facet_start[i].vertex[j].y -= (stl->stats.min.y - y);
      stl->facet_start[i].vertex[j].z -= (stl->stats.min.z - z);
    }
  }
  stl->stats.max.x -= (stl->stats.min.x - x);
  stl->stats.max.y -= (stl->stats.min.y - y);
  stl->stats.max.z -= (stl->stats.min.z - z);
  stl->stats.min.x = x;
  stl->stats.min.y = y;
  stl->stats.min.z = z;
  stl_vertex new_min(x, y, z);
  stl_vertex shift = new_min - stl->stats.min;
  for (int i = 0; i < stl->stats.number_of_facets; ++ i)
    for (int j = 0; j < 3; ++ j)
      stl->facet_start[i].vertex[j] += shift;
  stl->stats.min = new_min;
  stl->stats.max += shift;
  stl_invalidate_shared_vertices(stl);
 }
 /* Translates the stl by x,y,z, relatively from wherever it is currently */
-void
+void stl_translate_relative(stl_file *stl, float x, float y, float z)
-stl_translate_relative(stl_file *stl, float x, float y, float z) {
+{
-  int i;
+  if (stl->error)
-  int j;
+  	return;
  if (stl->error) return;
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
      stl->facet_start[i].vertex[j].x += x;
      stl->facet_start[i].vertex[j].y += y;
      stl->facet_start[i].vertex[j].z += z;
    }
  }
  stl->stats.min.x += x;
  stl->stats.min.y += y;
  stl->stats.min.z += z;
  stl->stats.max.x += x;
  stl->stats.max.y += y;
  stl->stats.max.z += z;
  stl_vertex shift(x, y, z);
  for (int i = 0; i < stl->stats.number_of_facets; ++ i)
    for (int j = 0; j < 3; ++ j)
      stl->facet_start[i].vertex[j] += shift;
  stl->stats.min += shift;
  stl->stats.max += shift;
  stl_invalidate_shared_vertices(stl);
 }
-void
+void stl_scale_versor(stl_file *stl, const stl_vertex &versor)
-stl_scale_versor(stl_file *stl, float versor[3]) {
+{
-  int i;
+  if (stl->error)
-  int j;
+  	return;
  if (stl->error) return;
  /* scale extents */
  stl->stats.min.x *= versor[0];
  stl->stats.min.y *= versor[1];
  stl->stats.min.z *= versor[2];
  stl->stats.max.x *= versor[0];
  stl->stats.max.y *= versor[1];
  stl->stats.max.z *= versor[2];
  /* scale size */
  stl->stats.size.x *= versor[0];
  stl->stats.size.y *= versor[1];
  stl->stats.size.z *= versor[2];
  /* scale volume */
  if (stl->stats.volume > 0.0) {
    stl->stats.volume *= (versor[0] * versor[1] * versor[2]);
  }
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
      stl->facet_start[i].vertex[j].x *= versor[0];
      stl->facet_start[i].vertex[j].y *= versor[1];
      stl->facet_start[i].vertex[j].z *= versor[2];
    }
  }
  // Scale extents.
  auto s = versor.array();
  stl->stats.min.array() *= s;
  stl->stats.max.array() *= s;
  // Scale size.
  stl->stats.size.array() *= s;
  // Scale volume.
  if (stl->stats.volume > 0.0)
    stl->stats.volume *= versor(0) * versor(1) * versor(2);
  // Scale the mesh.
  for (int i = 0; i < stl->stats.number_of_facets; ++ i)
    for (int j = 0; j < 3; ++ j)
      stl->facet_start[i].vertex[j].array() *= s;
  stl_invalidate_shared_vertices(stl);
 }
-void
+static void calculate_normals(stl_file *stl) 
-stl_scale(stl_file *stl, float factor) {
+{
-  float versor[3];
+  if (stl->error)
-
+  	return;
  if (stl->error) return;
  versor[0] = factor;
  versor[1] = factor;
  versor[2] = factor;
  stl_scale_versor(stl, versor);
 }
 static void calculate_normals(stl_file *stl) {
  float normal[3];
  if (stl->error) return;
  stl_normal normal;
  for(uint32_t i = 0; i < stl->stats.number_of_facets; i++) {
    stl_calculate_normal(normal, &stl->facet_start[i]);
    stl_normalize_vector(normal);
-    stl->facet_start[i].normal.x = normal[0];
+    stl->facet_start[i].normal = normal;
    stl->facet_start[i].normal.y = normal[1];
    stl->facet_start[i].normal.z = normal[2];
  }
 }
@ -193,9 +146,9 @@ void stl_transform(stl_file *stl, float *trafo3x4) {
    for (i_vertex = 0; i_vertex < 3; ++ i_vertex) {
      stl_vertex &v_dst = vertices[i_vertex];
      stl_vertex  v_src = v_dst;
-      v_dst.x = trafo3x4[0] * v_src.x + trafo3x4[1] * v_src.y + trafo3x4[2]  * v_src.z + trafo3x4[3];
+      v_dst(0) = trafo3x4[0] * v_src(0) + trafo3x4[1] * v_src(1) + trafo3x4[2]  * v_src(2) + trafo3x4[3];
-      v_dst.y = trafo3x4[4] * v_src.x + trafo3x4[5] * v_src.y + trafo3x4[6]  * v_src.z + trafo3x4[7];
+      v_dst(1) = trafo3x4[4] * v_src(0) + trafo3x4[5] * v_src(1) + trafo3x4[6]  * v_src(2) + trafo3x4[7];
-      v_dst.z = trafo3x4[8] * v_src.x + trafo3x4[9] * v_src.y + trafo3x4[10] * v_src.z + trafo3x4[11];
+      v_dst(2) = trafo3x4[8] * v_src(0) + trafo3x4[9] * v_src(1) + trafo3x4[10] * v_src(2) + trafo3x4[11];
    }
  }
  stl_get_size(stl);
@ -214,8 +167,8 @@ stl_rotate_x(stl_file *stl, float angle) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
-      stl_rotate(&stl->facet_start[i].vertex[j].y,
+      stl_rotate(&stl->facet_start[i].vertex[j](1),
-                 &stl->facet_start[i].vertex[j].z, c, s);
+                 &stl->facet_start[i].vertex[j](2), c, s);
    }
  }
  stl_get_size(stl);
@ -234,8 +187,8 @@ stl_rotate_y(stl_file *stl, float angle) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
-      stl_rotate(&stl->facet_start[i].vertex[j].z,
+      stl_rotate(&stl->facet_start[i].vertex[j](2),
-                 &stl->facet_start[i].vertex[j].x, c, s);
+                 &stl->facet_start[i].vertex[j](0), c, s);
    }
  }
  stl_get_size(stl);
@ -254,8 +207,8 @@ stl_rotate_z(stl_file *stl, float angle) {
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
-      stl_rotate(&stl->facet_start[i].vertex[j].x,
+      stl_rotate(&stl->facet_start[i].vertex[j](0),
-                 &stl->facet_start[i].vertex[j].y, c, s);
+                 &stl->facet_start[i].vertex[j](1), c, s);
    }
  }
  stl_get_size(stl);
@ -272,142 +225,98 @@ stl_rotate(float *x, float *y, const double c, const double s) {
  *y = float(s * xold + c * yold);
 }
-extern void
+void stl_get_size(stl_file *stl)
-stl_get_size(stl_file *stl) {
+{
-  int i;
+  if (stl->error || stl->stats.number_of_facets == 0)
-  int j;
+  	return;
-
+  stl->stats.min = stl->facet_start[0].vertex[0];
-  if (stl->error) return;
+  stl->stats.max = stl->stats.min;
-  if (stl->stats.number_of_facets == 0) return;
+  for (int i = 0; i < stl->stats.number_of_facets; ++ i) {
-
+  	const stl_facet &face = stl->facet_start[i];
-  stl->stats.min.x = stl->facet_start[0].vertex[0].x;
+    for (int j = 0; j < 3; ++ j) {
-  stl->stats.min.y = stl->facet_start[0].vertex[0].y;
+      stl->stats.min = stl->stats.min.cwiseMin(face.vertex[j]);
-  stl->stats.min.z = stl->facet_start[0].vertex[0].z;
+      stl->stats.max = stl->stats.max.cwiseMax(face.vertex[j]);
  stl->stats.max.x = stl->facet_start[0].vertex[0].x;
  stl->stats.max.y = stl->facet_start[0].vertex[0].y;
  stl->stats.max.z = stl->facet_start[0].vertex[0].z;
  for(i = 0; i < stl->stats.number_of_facets; i++) {
    for(j = 0; j < 3; j++) {
      stl->stats.min.x = STL_MIN(stl->stats.min.x,
                                 stl->facet_start[i].vertex[j].x);
      stl->stats.min.y = STL_MIN(stl->stats.min.y,
                                 stl->facet_start[i].vertex[j].y);
      stl->stats.min.z = STL_MIN(stl->stats.min.z,
                                 stl->facet_start[i].vertex[j].z);
      stl->stats.max.x = STL_MAX(stl->stats.max.x,
                                 stl->facet_start[i].vertex[j].x);
      stl->stats.max.y = STL_MAX(stl->stats.max.y,
                                 stl->facet_start[i].vertex[j].y);
      stl->stats.max.z = STL_MAX(stl->stats.max.z,
                                 stl->facet_start[i].vertex[j].z);
    }
  }
-  stl->stats.size.x = stl->stats.max.x - stl->stats.min.x;
+  stl->stats.size = stl->stats.max - stl->stats.min;
-  stl->stats.size.y = stl->stats.max.y - stl->stats.min.y;
+  stl->stats.bounding_diameter = stl->stats.size.norm();
  stl->stats.size.z = stl->stats.max.z - stl->stats.min.z;
  stl->stats.bounding_diameter = sqrt(
                                   stl->stats.size.x * stl->stats.size.x +
                                   stl->stats.size.y * stl->stats.size.y +
                                   stl->stats.size.z * stl->stats.size.z
                                 );
 }
-void
+void stl_mirror_xy(stl_file *stl)
-stl_mirror_xy(stl_file *stl) {
+{
-  int i;
+  if (stl->error) 
-  int j;
+  	return;
  float temp_size;
-  if (stl->error) return;
+  for(int i = 0; i < stl->stats.number_of_facets; i++) {
-
+    for(int j = 0; j < 3; j++) {
-  for(i = 0; i < stl->stats.number_of_facets; i++) {
+      stl->facet_start[i].vertex[j](2) *= -1.0;
    for(j = 0; j < 3; j++) {
      stl->facet_start[i].vertex[j].z *= -1.0;
    }
  }
-  temp_size = stl->stats.min.z;
+  float temp_size = stl->stats.min(2);
-  stl->stats.min.z = stl->stats.max.z;
+  stl->stats.min(2) = stl->stats.max(2);
-  stl->stats.max.z = temp_size;
+  stl->stats.max(2) = temp_size;
-  stl->stats.min.z *= -1.0;
+  stl->stats.min(2) *= -1.0;
-  stl->stats.max.z *= -1.0;
+  stl->stats.max(2) *= -1.0;
  stl_reverse_all_facets(stl);
  stl->stats.facets_reversed -= stl->stats.number_of_facets;  /* for not altering stats */
 }
-void
+void stl_mirror_yz(stl_file *stl)
-stl_mirror_yz(stl_file *stl) {
+{
  int i;
  int j;
  float temp_size;
  if (stl->error) return;
-  for(i = 0; i < stl->stats.number_of_facets; i++) {
+  for (int i = 0; i < stl->stats.number_of_facets; i++) {
-    for(j = 0; j < 3; j++) {
+    for (int j = 0; j < 3; j++) {
-      stl->facet_start[i].vertex[j].x *= -1.0;
+      stl->facet_start[i].vertex[j](0) *= -1.0;
    }
  }
-  temp_size = stl->stats.min.x;
+  float temp_size = stl->stats.min(0);
-  stl->stats.min.x = stl->stats.max.x;
+  stl->stats.min(0) = stl->stats.max(0);
-  stl->stats.max.x = temp_size;
+  stl->stats.max(0) = temp_size;
-  stl->stats.min.x *= -1.0;
+  stl->stats.min(0) *= -1.0;
-  stl->stats.max.x *= -1.0;
+  stl->stats.max(0) *= -1.0;
  stl_reverse_all_facets(stl);
  stl->stats.facets_reversed -= stl->stats.number_of_facets;  /* for not altering stats */
 }
-void
+void stl_mirror_xz(stl_file *stl)
-stl_mirror_xz(stl_file *stl) {
+{
-  int i;
+  if (stl->error)
-  int j;
+  	return;
  float temp_size;
-  if (stl->error) return;
+  for (int i = 0; i < stl->stats.number_of_facets; i++) {
-
+    for (int j = 0; j < 3; j++) {
-  for(i = 0; i < stl->stats.number_of_facets; i++) {
+      stl->facet_start[i].vertex[j](1) *= -1.0;
    for(j = 0; j < 3; j++) {
      stl->facet_start[i].vertex[j].y *= -1.0;
    }
  }
-  temp_size = stl->stats.min.y;
+  float temp_size = stl->stats.min(1);
-  stl->stats.min.y = stl->stats.max.y;
+  stl->stats.min(1) = stl->stats.max(1);
-  stl->stats.max.y = temp_size;
+  stl->stats.max(1) = temp_size;
-  stl->stats.min.y *= -1.0;
+  stl->stats.min(1) *= -1.0;
-  stl->stats.max.y *= -1.0;
+  stl->stats.max(1) *= -1.0;
  stl_reverse_all_facets(stl);
  stl->stats.facets_reversed -= stl->stats.number_of_facets;  /* for not altering stats */
 }
-static float get_volume(stl_file *stl) {
+static float get_volume(stl_file *stl)
-  stl_vertex p0;
+{
-  stl_vertex p;
+  if (stl->error)
-  stl_normal n;
+  	return 0;
  float height;
  float area;
  float volume = 0.0;
-  if (stl->error) return 0;
+  // Choose a point, any point as the reference.
-
+  stl_vertex p0 = stl->facet_start[0].vertex[0];
-  /* Choose a point, any point as the reference */
+  float volume = 0.f;
-  p0.x = stl->facet_start[0].vertex[0].x;
+  for(uint32_t i = 0; i < stl->stats.number_of_facets; ++ i) {
-  p0.y = stl->facet_start[0].vertex[0].y;
+    // Do dot product to get distance from point to plane.
-  p0.z = stl->facet_start[0].vertex[0].z;
+    float height = stl->facet_start[i].normal.dot(stl->facet_start[i].vertex[0] - p0);
-
+    float area   = get_area(&stl->facet_start[i]);
  for(uint32_t i = 0; i < stl->stats.number_of_facets; i++) {
    p.x = stl->facet_start[i].vertex[0].x - p0.x;
    p.y = stl->facet_start[i].vertex[0].y - p0.y;
    p.z = stl->facet_start[i].vertex[0].z - p0.z;
    /* Do dot product to get distance from point to plane */
    n = stl->facet_start[i].normal;
    height = (n.x * p.x) + (n.y * p.y) + (n.z * p.z);
    area = get_area(&stl->facet_start[i]);
    volume += (area * height) / 3.0f;
  }
  return volume;
 }
-void stl_calculate_volume(stl_file *stl) {
+void stl_calculate_volume(stl_file *stl)
 {
  if (stl->error) return;
  stl->stats.volume = get_volume(stl);
  if(stl->stats.volume < 0.0) {
@ -416,35 +325,32 @@ void stl_calculate_volume(stl_file *stl) {
  }
 }
-static float get_area(stl_facet *facet) {
+static float get_area(stl_facet *facet)
-  double cross[3][3];
+{
  float sum[3];
  float n[3];
  float area;
  int i;
  /* cast to double before calculating cross product because large coordinates
     can result in overflowing product
    (bad area is responsible for bad volume and bad facets reversal) */
-  for(i = 0; i < 3; i++) {
+  double cross[3][3];
-    cross[i][0]=(((double)facet->vertex[i].y * (double)facet->vertex[(i + 1) % 3].z) -
+  for (int i = 0; i < 3; i++) {
-                 ((double)facet->vertex[i].z * (double)facet->vertex[(i + 1) % 3].y));
+    cross[i][0]=(((double)facet->vertex[i](1) * (double)facet->vertex[(i + 1) % 3](2)) -
-    cross[i][1]=(((double)facet->vertex[i].z * (double)facet->vertex[(i + 1) % 3].x) -
+                 ((double)facet->vertex[i](2) * (double)facet->vertex[(i + 1) % 3](1)));
-                 ((double)facet->vertex[i].x * (double)facet->vertex[(i + 1) % 3].z));
+    cross[i][1]=(((double)facet->vertex[i](2) * (double)facet->vertex[(i + 1) % 3](0)) -
-    cross[i][2]=(((double)facet->vertex[i].x * (double)facet->vertex[(i + 1) % 3].y) -
+                 ((double)facet->vertex[i](0) * (double)facet->vertex[(i + 1) % 3](2)));
-                 ((double)facet->vertex[i].y * (double)facet->vertex[(i + 1) % 3].x));
+    cross[i][2]=(((double)facet->vertex[i](0) * (double)facet->vertex[(i + 1) % 3](1)) -
                 ((double)facet->vertex[i](1) * (double)facet->vertex[(i + 1) % 3](0)));
  }
-  sum[0] = cross[0][0] + cross[1][0] + cross[2][0];
+  stl_normal sum;
-  sum[1] = cross[0][1] + cross[1][1] + cross[2][1];
+  sum(0) = cross[0][0] + cross[1][0] + cross[2][0];
-  sum[2] = cross[0][2] + cross[1][2] + cross[2][2];
+  sum(1) = cross[0][1] + cross[1][1] + cross[2][1];
  sum(2) = cross[0][2] + cross[1][2] + cross[2][2];
-  /* This should already be done.  But just in case, let's do it again */
+  // This should already be done.  But just in case, let's do it again.
  //FIXME this is questionable. the "sum" normal should be accurate, while the normal "n" may be calculated with a low accuracy.
  stl_normal n;
  stl_calculate_normal(n, facet);
  stl_normalize_vector(n);
-
+  return 0.5f * n.dot(sum);
  area = 0.5 * (n[0] * sum[0] + n[1] * sum[1] + n[2] * sum[2]);
  return area;
 }
 void stl_repair(stl_file *stl,
--- a/xs/src/eigen/Eigen/Cholesky
+++ b/xs/src/eigen/Eigen/Cholesky
@ -9,6 +9,7 @@
 #define EIGEN_CHOLESKY_MODULE_H
 #include "Core"
 #include "Jacobi"
 #include "src/Core/util/DisableStupidWarnings.h"
@ -31,7 +32,11 @@
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/Cholesky/LLT_LAPACKE.h"
 #endif
--- a/xs/src/eigen/Eigen/Core
+++ b/xs/src/eigen/Eigen/Core
@ -14,6 +14,22 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 #if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
  #define EIGEN_CUDACC __CUDACC__
 #endif
 #if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
  #define EIGEN_CUDA_ARCH __CUDA_ARCH__
 #endif
 #if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
 #define EIGEN_CUDACC_VER  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
 #elif defined(__CUDACC_VER__)
 #define EIGEN_CUDACC_VER __CUDACC_VER__
 #else
 #define EIGEN_CUDACC_VER 0
 #endif
 // Handle NVCC/CUDA/SYCL
 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
  // Do not try asserts on CUDA and SYCL!
@ -155,6 +171,9 @@
      #ifdef __AVX512DQ__
        #define EIGEN_VECTORIZE_AVX512DQ
      #endif
      #ifdef __AVX512ER__
        #define EIGEN_VECTORIZE_AVX512ER
      #endif
    #endif
    // include files
@ -229,7 +248,7 @@
 #if defined __CUDACC__
  #define EIGEN_VECTORIZE_CUDA
  #include <vector_types.h>
-  #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+  #if EIGEN_CUDACC_VER >= 70500
    #define EIGEN_HAS_CUDA_FP16
  #endif
 #endif
@ -352,6 +371,7 @@ using std::ptrdiff_t;
 #include "src/Core/MathFunctions.h"
 #include "src/Core/GenericPacketMath.h"
 #include "src/Core/MathFunctionsImpl.h"
 #include "src/Core/arch/Default/ConjHelper.h"
 #if defined EIGEN_VECTORIZE_AVX512
  #include "src/Core/arch/SSE/PacketMath.h"
@ -367,6 +387,7 @@ using std::ptrdiff_t;
  #include "src/Core/arch/AVX/MathFunctions.h"
  #include "src/Core/arch/AVX/Complex.h"
  #include "src/Core/arch/AVX/TypeCasting.h"
  #include "src/Core/arch/SSE/TypeCasting.h"
 #elif defined EIGEN_VECTORIZE_SSE
  #include "src/Core/arch/SSE/PacketMath.h"
  #include "src/Core/arch/SSE/MathFunctions.h"
--- a/xs/src/eigen/Eigen/Eigenvalues
+++ b/xs/src/eigen/Eigen/Eigenvalues
@ -45,7 +45,11 @@
 #include "src/Eigenvalues/GeneralizedEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/Eigenvalues/RealSchur_LAPACKE.h"
 #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
 #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
--- a/xs/src/eigen/Eigen/LU
+++ b/xs/src/eigen/Eigen/LU
@ -28,7 +28,11 @@
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/LU/PartialPivLU_LAPACKE.h"
 #endif
 #include "src/LU/Determinant.h"
--- a/xs/src/eigen/Eigen/QR
+++ b/xs/src/eigen/Eigen/QR
@ -36,7 +36,11 @@
 #include "src/QR/ColPivHouseholderQR.h"
 #include "src/QR/CompleteOrthogonalDecomposition.h"
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/QR/HouseholderQR_LAPACKE.h"
 #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
 #endif
--- a/xs/src/eigen/Eigen/QtAlignedMalloc
+++ b/xs/src/eigen/Eigen/QtAlignedMalloc
@ -27,7 +27,7 @@ void qFree(void *ptr)
 void *qRealloc(void *ptr, std::size_t size)
 {
  void* newPtr = Eigen::internal::aligned_malloc(size);
-  memcpy(newPtr, ptr, size);
+  std::memcpy(newPtr, ptr, size);
  Eigen::internal::aligned_free(ptr);
  return newPtr;
 }
--- a/xs/src/eigen/Eigen/SVD
+++ b/xs/src/eigen/Eigen/SVD
@ -37,7 +37,11 @@
 #include "src/SVD/JacobiSVD.h"
 #include "src/SVD/BDCSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
 #else
 #include "src/misc/lapacke.h"
 #endif
 #include "src/SVD/JacobiSVD_LAPACKE.h"
 #endif
--- a/xs/src/eigen/Eigen/src/Cholesky/LDLT.h
+++ b/xs/src/eigen/Eigen/src/Cholesky/LDLT.h
@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    /** \brief Reports whether previous computation was successful.
      *
      * \returns \c Success if computation was succesful,
-      *          \c NumericalIssue if the matrix.appears to be negative.
+      *          \c NumericalIssue if the factorization failed because of a zero pivot.
      */
    ComputationInfo info() const
    {
@ -376,6 +376,8 @@ template<> struct ldlt_inplace<Lower>
      if((rs>0) && pivot_is_valid)
        A21 /= realAkk;
      else if(rs>0)
        ret = ret && (A21.array()==Scalar(0)).all();
      if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
      else if(!pivot_is_valid) found_zero_pivot = true;
@ -568,13 +570,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
  // more precisely, use pseudo-inverse of D (see bug 241)
  using std::abs;
  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
-  // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
+  // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
-  // as motivated by LAPACK's xGELSS:
+  // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
  // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
  // diagonal element is not well justified and leads to numerical issues in some cases.
  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
-  RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
+  // Using numeric_limits::min() gives us more robustness to denormals.
  RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
  for (Index i = 0; i < vecD.size(); ++i)
  {
--- a/xs/src/eigen/Eigen/src/Cholesky/LLT.h
+++ b/xs/src/eigen/Eigen/src/Cholesky/LLT.h
@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  * Example: \include LLT_example.cpp
  * Output: \verbinclude LLT_example.out
  *
  * \b Performance: for best performance, it is recommended to use a column-major storage format
  * with the Lower triangular part (the default), or, equivalently, a row-major storage format
  * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
  * step, and rank-updates can be up to 3 times slower.
  *
  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
  *
  * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
  * Therefore, the strict lower part does not have to store correct values.
  *
  * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
  */
 /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
  * the strict lower part does not have to store correct values.
  */
 template<typename _MatrixType, int _UpLo> class LLT
 {
  public:
@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
    }
    template<typename Derived>
-    void solveInPlace(MatrixBase<Derived> &bAndX) const;
+    void solveInPlace(const MatrixBase<Derived> &bAndX) const;
    template<typename InputType>
    LLT& compute(const EigenBase<InputType>& matrix);
@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
    /** \brief Reports whether previous computation was successful.
      *
      * \returns \c Success if computation was succesful,
-      *          \c NumericalIssue if the matrix.appears to be negative.
+      *          \c NumericalIssue if the matrix.appears not to be positive definite.
      */
    ComputationInfo info() const
    {
@ -425,6 +429,7 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
  if (!internal::is_same_dense(m_matrix, a.derived()))
    m_matrix = a.derived();
  // Compute matrix L1 norm = max abs column sum.
@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
  *
  * This version avoids a copy when the right hand side matrix b is not needed anymore.
  *
  * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
  * This function will const_cast it, so constness isn't honored here.
  *
  * \sa LLT::solve(), MatrixBase::llt()
  */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
+void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
 {
  eigen_assert(m_isInitialized && "LLT is not initialized.");
  eigen_assert(m_matrix.rows()==bAndX.rows());
--- a/xs/src/eigen/Eigen/src/Core/Array.h
+++ b/xs/src/eigen/Eigen/src/Core/Array.h
@ -231,10 +231,16 @@ class Array
            : Base(other)
    { }
  private:
    struct PrivateType {};
  public:
    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
                              typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
                                                           PrivateType>::type = PrivateType())
      : Base(other.derived())
    { }
--- a/xs/src/eigen/Eigen/src/Core/ArrayBase.h
+++ b/xs/src/eigen/Eigen/src/Core/ArrayBase.h
@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
  */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_STRONG_INLINE Derived &
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
 {
  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
  */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_STRONG_INLINE Derived &
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
 {
  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
  */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_STRONG_INLINE Derived &
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
 {
  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
  */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_STRONG_INLINE Derived &
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
 {
  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
--- a/xs/src/eigen/Eigen/src/Core/ArrayWrapper.h
+++ b/xs/src/eigen/Eigen/src/Core/ArrayWrapper.h
@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
  // Let's remove NestByRefBit
  enum {
    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
  };
 };
 }
@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
  // Let's remove NestByRefBit
  enum {
    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
  };
 };
 }
--- a/xs/src/eigen/Eigen/src/Core/AssignEvaluator.h
+++ b/xs/src/eigen/Eigen/src/Core/AssignEvaluator.h
@ -39,7 +39,7 @@ public:
  enum {
    DstAlignment = DstEvaluator::Alignment,
    SrcAlignment = SrcEvaluator::Alignment,
-    DstHasDirectAccess = DstFlags & DirectAccessBit,
+    DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
  };
@ -83,7 +83,7 @@ private:
                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
                       && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
-    MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+    MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
                       && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
         so it's only good for large enough sizes. */
--- a/xs/src/eigen/Eigen/src/Core/Assign_MKL.h
+++ b/xs/src/eigen/Eigen/src/Core/Assign_MKL.h
@ -84,7 +84,8 @@ class vml_assign_traits
  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                   \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                       \
      resize_if_allowed(dst, src, func);                                                                                        \
      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
        VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                 \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                     \
      resize_if_allowed(dst, src, func);                                                                                      \
      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
      VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \
      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
--- a/xs/src/eigen/Eigen/src/Core/CoreEvaluators.h
+++ b/xs/src/eigen/Eigen/src/Core/CoreEvaluators.h
@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
    OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
                             ? int(outer_stride_at_compile_time<ArgType>::ret)
                             : int(inner_stride_at_compile_time<ArgType>::ret),
-    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,    
    FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
    Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
    PacketAlignment = unpacket_traits<PacketScalar>::alignment,
-    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
                             && (OuterStrideAtCompileTime!=0)
                             && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
  };
  typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
    : m_argImpl(block.nestedExpression()), 
      m_startRow(block.startRow()), 
-      m_startCol(block.startCol()) 
+      m_startCol(block.startCol()),
      m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
  { }
  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  enum {
-    RowsAtCompileTime = XprType::RowsAtCompileTime
+    RowsAtCompileTime = XprType::RowsAtCompileTime,
    ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
  };
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1037,6 +1041,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  CoeffReturnType coeff(Index index) const
  { 
    if (ForwardLinearAccess)
      return m_argImpl.coeff(m_linear_offset.value() + index); 
    else
      return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
  }
@ -1049,6 +1056,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  Scalar& coeffRef(Index index)
  { 
    if (ForwardLinearAccess)
      return m_argImpl.coeffRef(m_linear_offset.value() + index); 
    else
      return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
  }
@ -1063,6 +1073,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
  EIGEN_STRONG_INLINE
  PacketType packet(Index index) const 
  { 
    if (ForwardLinearAccess)
      return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
    else
      return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
                                         RowsAtCompileTime == 1 ? index : 0);
  }
@ -1078,6 +1091,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
  EIGEN_STRONG_INLINE
  void writePacket(Index index, const PacketType& x) 
  {
    if (ForwardLinearAccess)
      return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
    else
      return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
                                              RowsAtCompileTime == 1 ? index : 0,
                                              x);
@ -1087,6 +1103,7 @@ protected:
  evaluator<ArgType> m_argImpl;
  const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
  const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
  const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
 };
 // TODO: This evaluator does not actually use the child evaluator; 
--- a/xs/src/eigen/Eigen/src/Core/CwiseNullaryOp.h
+++ b/xs/src/eigen/Eigen/src/Core/CwiseNullaryOp.h
@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
 {
  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
 {
  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(Index size, const Scalar& value)
 {
  return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(const Scalar& value)
 {
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
  * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const
  * \sa LinSpaced(Scalar,Scalar)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
  * Special version for fixed size types which does not require the size parameter.
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
 /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
 template<typename Derived>
-bool DenseBase<Derived>::isApproxToConstant
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
 (const Scalar& val, const RealScalar& prec) const
 {
  typename internal::nested_eval<Derived,1>::type self(derived());
@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
  *
  * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
 template<typename Derived>
-bool DenseBase<Derived>::isConstant
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
 (const Scalar& val, const RealScalar& prec) const
 {
  return isApproxToConstant(val, prec);
@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant
  * \sa setConstant(), Constant(), class CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
 {
  setConstant(val);
 }
@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
  * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
 {
  return derived() = Constant(rows(), cols(), val);
 }
@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
  * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
 {
  resize(size);
@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
  * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
 {
  resize(rows, cols);
@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
  * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
  * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return setLinSpaced(size(), low, high);
@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
  * \sa Zero(), Zero(Index)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero(Index rows, Index cols)
 {
  return Constant(rows, cols, Scalar(0));
@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
  * \sa Zero(), Zero(Index,Index)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero(Index size)
 {
  return Constant(size, Scalar(0));
@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
  * \sa Zero(Index), Zero(Index,Index)
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Zero()
 {
  return Constant(Scalar(0));
@ -478,7 +478,7 @@ DenseBase<Derived>::Zero()
  * \sa class CwiseNullaryOp, Zero()
  */
 template<typename Derived>
-bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
 {
  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
  * \sa class CwiseNullaryOp, Zero()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
 {
  return setConstant(Scalar(0));
 }
@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
  * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setZero(Index newSize)
 {
  resize(newSize);
@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
  * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setZero(Index rows, Index cols)
 {
  resize(rows, cols);
@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
  * \sa Ones(), Ones(Index), isOnes(), class Ones
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones(Index rows, Index cols)
 {
  return Constant(rows, cols, Scalar(1));
@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
  * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones(Index newSize)
 {
  return Constant(newSize, Scalar(1));
@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
  * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Ones()
 {
  return Constant(Scalar(1));
@ -608,7 +608,7 @@ DenseBase<Derived>::Ones()
  * \sa class CwiseNullaryOp, Ones()
  */
 template<typename Derived>
-bool DenseBase<Derived>::isOnes
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
 (const RealScalar& prec) const
 {
  return isApproxToConstant(Scalar(1), prec);
@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes
  * \sa class CwiseNullaryOp, Ones()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
 {
  return setConstant(Scalar(1));
 }
@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
  * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setOnes(Index newSize)
 {
  resize(newSize);
@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
  * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived&
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
 PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
 {
  resize(rows, cols);
@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
  * \sa Identity(), setIdentity(), isIdentity()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity(Index rows, Index cols)
 {
  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
  * \sa Identity(Index,Index), setIdentity(), isIdentity()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity()
 {
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true>
  * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
 {
  return internal::setIdentity_impl<Derived>::run(derived());
 }
@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
 {
  derived().resize(rows, cols);
  return setIdentity();
@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
  * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
  * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return BasisReturnType(SquareMatrixType::Identity(),i);
@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
 { return Derived::Unit(0); }
 /** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
 { return Derived::Unit(1); }
 /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
 { return Derived::Unit(2); }
 /** \returns an expression of the W axis unit vector (0,0,0,1)
@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
 { return Derived::Unit(3); }
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Core/DenseBase.h
+++ b/xs/src/eigen/Eigen/src/Core/DenseBase.h
@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& func);
-    /** \ínternal
+    /** \internal
      * Copies \a other into *this without evaluating other. \returns a reference to *this.
      * \deprecated */
    template<typename OtherDerived>
@ -484,9 +484,9 @@ template<typename Derived> class DenseBase
      return derived().coeff(0,0);
    }
-    bool all() const;
+    EIGEN_DEVICE_FUNC bool all() const;
-    bool any() const;
+    EIGEN_DEVICE_FUNC bool any() const;
-    Index count() const;
+    EIGEN_DEVICE_FUNC Index count() const;
    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
    typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
--- a/xs/src/eigen/Eigen/src/Core/Diagonal.h
+++ b/xs/src/eigen/Eigen/src/Core/Diagonal.h
@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
    EIGEN_DEVICE_FUNC
-    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
    {
      eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
    }
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
--- a/xs/src/eigen/Eigen/src/Core/Dot.h
+++ b/xs/src/eigen/Eigen/src/Core/Dot.h
@ -31,7 +31,8 @@ struct dot_nocheck
  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
-  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  EIGEN_STRONG_INLINE
  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
    return a.template binaryExpr<conj_prod>(b).sum();
  }
@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
-  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  EIGEN_STRONG_INLINE
  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
    return a.transpose().template binaryExpr<conj_prod>(b).sum();
  }
@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE
 typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
  * \sa lpNorm(), dot(), squaredNorm()
  */
 template<typename Derived>
-inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
  return numext::sqrt(squaredNorm());
 }
@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
  * \sa norm(), normalize()
  */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
  typedef typename internal::nested_eval<Derived,2>::type _Nested;
@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
  * \sa norm(), normalized()
  */
 template<typename Derived>
-inline void MatrixBase<Derived>::normalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
 {
  RealScalar z = squaredNorm();
  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
  * \sa stableNorm(), stableNormalize(), normalized()
  */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
  typedef typename internal::nested_eval<Derived,3>::type _Nested;
@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
  * \sa stableNorm(), stableNormalized(), normalize()
  */
 template<typename Derived>
-inline void MatrixBase<Derived>::stableNormalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
 {
  RealScalar w = cwiseAbs().maxCoeff();
  RealScalar z = (derived()/w).squaredNorm();
--- a/xs/src/eigen/Eigen/src/Core/EigenBase.h
+++ b/xs/src/eigen/Eigen/src/Core/EigenBase.h
@ -14,6 +14,7 @@
 namespace Eigen {
 /** \class EigenBase
  * \ingroup Core_Module
  * 
  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
  *
@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
  */
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
  call_assignment(derived(), other.derived());
@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
--- a/xs/src/eigen/Eigen/src/Core/GeneralProduct.h
+++ b/xs/src/eigen/Eigen/src/Core/GeneralProduct.h
@ -24,9 +24,14 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
 template<int Size, int MaxSize> struct product_size_category
 {
-  enum { is_large = MaxSize == Dynamic ||
+  enum {
    #ifndef EIGEN_CUDA_ARCH
    is_large = MaxSize == Dynamic ||
               Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
               (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
    #else
    is_large = 0,
    #endif
    value = is_large  ? Large
          : Size == 1 ? 1
                      : Small
@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
  *
  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
  */
 #ifndef __CUDACC__
 template<typename Derived>
 template<typename OtherDerived>
 inline const Product<Derived, OtherDerived>
@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
  return Product<Derived, OtherDerived>(derived(), other.derived());
 }
 #endif // __CUDACC__
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
  *
  * The returned product will behave like any other expressions: the coefficients of the product will be
--- a/xs/src/eigen/Eigen/src/Core/GenericPacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/GenericPacketMath.h
@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(
  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
  * Currently, this function is only used for scalar * complex products.
  */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 /** \internal \returns a packet with elements of \a *from quadrupled.
@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
 }
 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
  * by the current computation.
  */
 template<typename Packet, int LoadMode>
-inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
 {
  return ploadt<Packet, LoadMode>(from);
 }
--- a/xs/src/eigen/Eigen/src/Core/Map.h
+++ b/xs/src/eigen/Eigen/src/Core/Map.h
@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
 {
  typedef traits<PlainObjectType> TraitsBase;
  enum {
    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
                             ? PlainObjectType::ColsAtCompileTime
                             : PlainObjectType::RowsAtCompileTime,
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
                             : int(StrideType::InnerStrideAtCompileTime),
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
-                             ? int(PlainObjectType::OuterStrideAtCompileTime)
+                             ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
                                ? Dynamic
                                : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
                             : int(StrideType::OuterStrideAtCompileTime),
    Alignment = int(MapOptions)&int(AlignedMask),
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
-      return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+      return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
-           : IsVectorAtCompileTime ? this->size()
+           : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
-           : int(Flags)&RowMajorBit ? this->cols()
+           : IsVectorAtCompileTime ? (this->size() * innerStride())
-           : this->rows();
+           : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
           : (this->rows() * innerStride());
    }
    /** Constructor in the fixed-size case.
--- a/xs/src/eigen/Eigen/src/Core/MathFunctions.h
+++ b/xs/src/eigen/Eigen/src/Core/MathFunctions.h
@ -348,31 +348,7 @@ struct norm1_retval
 * Implementation of hypot                                                *
 ****************************************************************************/
-template<typename Scalar>
+template<typename Scalar> struct hypot_impl;
 struct hypot_impl
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x, const Scalar& y)
  {
    EIGEN_USING_STD_MATH(abs);
    EIGEN_USING_STD_MATH(sqrt);
    RealScalar _x = abs(x);
    RealScalar _y = abs(y);
    Scalar p, qp;
    if(_x>_y)
    {
      p = _x;
      qp = _y / p;
    }
    else
    {
      p = _y;
      qp = _x / p;
    }
    if(p==RealScalar(0)) return RealScalar(0);
    return p * sqrt(RealScalar(1) + qp*qp);
  }
 };
 template<typename Scalar>
 struct hypot_retval
@ -495,7 +471,7 @@ namespace std_fallback {
    typedef typename NumTraits<Scalar>::Real RealScalar;
    EIGEN_USING_STD_MATH(log);
    Scalar x1p = RealScalar(1) + x;
-    return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+    return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
  }
 }
@ -1061,11 +1037,24 @@ double log(const double &x) { return ::log(x); }
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-typename NumTraits<T>::Real abs(const T &x) {
+typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
 abs(const T &x) {
  EIGEN_USING_STD_MATH(abs);
  return abs(x);
 }
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
 abs(const T &x) {
  return x;
 }
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   abs(float x) { return cl::sycl::fabs(x); }
 EIGEN_ALWAYS_INLINE double  abs(double x) { return cl::sycl::fabs(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float abs(const float &x) { return ::fabsf(x); }
--- a/xs/src/eigen/Eigen/src/Core/MathFunctionsImpl.h
+++ b/xs/src/eigen/Eigen/src/Core/MathFunctionsImpl.h
@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
  return pdiv(p, q);
 }
 template<typename RealScalar>
 EIGEN_STRONG_INLINE
 RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
 {
  EIGEN_USING_STD_MATH(sqrt);
  RealScalar p, qp;
  p = numext::maxi(x,y);
  if(p==RealScalar(0)) return RealScalar(0);
  qp = numext::mini(y,x) / p;    
  return p * sqrt(RealScalar(1) + qp*qp);
 }
 template<typename Scalar>
 struct hypot_impl
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x, const Scalar& y)
  {
    EIGEN_USING_STD_MATH(abs);
    return positive_real_hypot<RealScalar>(abs(x), abs(y));
  }
 };
 } // end namespace internal
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Core/MatrixBase.h
+++ b/xs/src/eigen/Eigen/src/Core/MatrixBase.h
@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator-=(const MatrixBase<OtherDerived>& other);
 #ifdef __CUDACC__
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    const Product<Derived,OtherDerived,LazyProduct>
    operator*(const MatrixBase<OtherDerived> &other) const
    { return this->lazyProduct(other); }
 #else
    template<typename OtherDerived>
    const Product<Derived,OtherDerived>
    operator*(const MatrixBase<OtherDerived> &other) const;
 #endif
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    const Product<Derived,OtherDerived,LazyProduct>
@ -294,7 +285,7 @@ template<typename Derived> class MatrixBase
      *          fuzzy comparison such as isApprox()
      * \sa isApprox(), operator!= */
    template<typename OtherDerived>
-    inline bool operator==(const MatrixBase<OtherDerived>& other) const
+    EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
    { return cwiseEqual(other).all(); }
    /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@ -302,7 +293,7 @@ template<typename Derived> class MatrixBase
      *          fuzzy comparison such as isApprox()
      * \sa isApprox(), operator== */
    template<typename OtherDerived>
-    inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+    EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
    { return cwiseNotEqual(other).any(); }
    NoAlias<Derived,Eigen::MatrixBase > noalias();
--- a/xs/src/eigen/Eigen/src/Core/NumTraits.h
+++ b/xs/src/eigen/Eigen/src/Core/NumTraits.h
@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
  EIGEN_DEVICE_FUNC
  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
  static inline int digits10() { return NumTraits<Scalar>::digits10(); }
 };
 template<> struct NumTraits<std::string>
--- a/xs/src/eigen/Eigen/src/Core/PlainObjectBase.h
+++ b/xs/src/eigen/Eigen/src/Core/PlainObjectBase.h
@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
      * \a data pointers.
      *
      * Here is an example using strides:
      * \include Matrix_Map_stride.cpp
      * Output: \verbinclude Matrix_Map_stride.out
      *
      * \see class Map
      */
    //@{
--- a/xs/src/eigen/Eigen/src/Core/Product.h
+++ b/xs/src/eigen/Eigen/src/Core/Product.h
@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }
-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
    EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
    EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@ -127,7 +127,7 @@ public:
  using Base::derived;
  typedef typename Base::Scalar Scalar;
-  operator const Scalar() const
+  EIGEN_STRONG_INLINE operator const Scalar() const
  {
    return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
  }
@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
  public:
-    EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
    {
      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
      return internal::evaluator<Derived>(derived()).coeff(row,col);
    }
-    EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
    {
      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
--- a/xs/src/eigen/Eigen/src/Core/ProductEvaluators.h
+++ b/xs/src/eigen/Eigen/src/Core/ProductEvaluators.h
@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
  typedef Product<Lhs, Rhs, Options> XprType;
  typedef product_evaluator<XprType> Base;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
                               const Product<Lhs, Rhs, DefaultProduct> > XprType;
  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
    : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
  {}
 };
@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
    : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
        Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
        xpr.index() ))
@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
  static const bool value = true;
 };
 template<typename OtherXpr, typename Lhs, typename Rhs>
 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
  static const bool value = true;
 };
 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
 struct assignment_from_xpr_op_product
 {
@ -240,19 +246,19 @@ template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
 {
  template<typename Dst>
-  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
  }
  template<typename Dst>
-  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
  }
  template<typename Dst>
-  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
 };
@ -306,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
  };
  template<typename Dst>
-  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
  }
  template<typename Dst>
-  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
  }
  template<typename Dst>
-  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
  }
  template<typename Dst>
-  static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
  {
    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
  }
@ -779,7 +785,11 @@ public:
    _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
-    Alignment = evaluator<MatrixType>::Alignment
+    Alignment = evaluator<MatrixType>::Alignment,
    AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)
                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
  };
  diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -791,6 +801,9 @@ public:
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
  {
    if(AsScalarProduct)
      return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
    else
      return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
  }
--- a/xs/src/eigen/Eigen/src/Core/Redux.h
+++ b/xs/src/eigen/Eigen/src/Core/Redux.h
@ -407,7 +407,7 @@ protected:
  */
 template<typename Derived>
 template<typename Func>
-typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
--- a/xs/src/eigen/Eigen/src/Core/Ref.h
+++ b/xs/src/eigen/Eigen/src/Core/Ref.h
@ -95,6 +95,8 @@ protected:
  template<typename Expression>
  EIGEN_DEVICE_FUNC void construct(Expression& expr)
  {
    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
    if(PlainObjectType::RowsAtCompileTime==1)
    {
      eigen_assert(expr.rows()==1 || expr.cols()==1);
--- a/xs/src/eigen/Eigen/src/Core/SelfAdjointView.h
+++ b/xs/src/eigen/Eigen/src/Core/SelfAdjointView.h
@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
    EIGEN_DEVICE_FUNC
    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
-    {}
+    {
      EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
    }
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows(); }
@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
    }
-    typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+    typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
    /** \sa MatrixBase::conjugate() const */
    EIGEN_DEVICE_FUNC
    inline const ConjugateReturnType conjugate() const
--- a/xs/src/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/xs/src/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
@ -15,33 +15,29 @@ namespace Eigen {
 // TODO generalize the scalar type of 'other'
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
  return derived();
 }
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
  return derived();
 }
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
  return derived();
 }
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
  return derived();
 }
--- a/xs/src/eigen/Eigen/src/Core/Solve.h
+++ b/xs/src/eigen/Eigen/src/Core/Solve.h
@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
 template<typename Decomposition, typename RhsType>
 struct solve_traits<Decomposition,RhsType,Dense>
 {
-  typedef Matrix<typename RhsType::Scalar,
+  typedef typename make_proper_matrix_type<typename RhsType::Scalar,
                 Decomposition::ColsAtCompileTime,
                 RhsType::ColsAtCompileTime,
                 RhsType::PlainObject::Options,
                 Decomposition::MaxColsAtCompileTime,
-                 RhsType::MaxColsAtCompileTime> PlainObject;  
+                 RhsType::MaxColsAtCompileTime>::type PlainObject;
 };
 template<typename Decomposition, typename RhsType>
--- a/xs/src/eigen/Eigen/src/Core/StableNorm.h
+++ b/xs/src/eigen/Eigen/src/Core/StableNorm.h
@ -165,12 +165,13 @@ MatrixBase<Derived>::stableNorm() const
  typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
  typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
-  DerivedCopy copy(derived());
+  const DerivedCopy copy(derived());
  enum {
    CanAlign = (   (int(DerivedCopyClean::Flags)&DirectAccessBit)
                || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
-               ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
+               ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
                 && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
  };
  typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
                                                   typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
--- a/xs/src/eigen/Eigen/src/Core/Transpositions.h
+++ b/xs/src/eigen/Eigen/src/Core/Transpositions.h
@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
    const Product<OtherDerived, Transpose, AliasFreeProduct>
    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
    {
-      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
+      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
    }
    /** \returns the \a matrix with the inverse transpositions applied to the rows.
--- a/xs/src/eigen/Eigen/src/Core/arch/AVX/Complex.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AVX/Complex.h
@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
  }
 };
-template<> struct conj_helper<Packet8f, Packet4cf, false,false>
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
 {
  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
  { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
 };
 template<> struct conj_helper<Packet4cf, Packet8f, false,false>
 {
  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
  { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
 {
@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
  }
 };
-template<> struct conj_helper<Packet4d, Packet2cd, false,false>
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
 {
  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
  { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
 };
 template<> struct conj_helper<Packet2cd, Packet4d, false,false>
 {
  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
  { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
 {
--- a/xs/src/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
 }
 #ifndef EIGEN_VECTORIZE_AVX512
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 #endif
 template<> EIGEN_STRONG_INLINE float  pfirst<Packet8f>(const Packet8f& a) {
@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
 {
   __m256d tmp = _mm256_shuffle_pd(a,a,5);
  return _mm256_permute2f128_pd(tmp, tmp, 1);
-
+  #if 0
  // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
  // exhibit the same latency/throughput, but it is here for future reference/benchmarking...
  __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
    return _mm256_permute_pd(swap_halves,5);
  #endif
 }
 // pabs should be ok
--- a/xs/src/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
  //     x = x + x - 1.0;
  //   } else { x = x - 1.0; }
  __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
-  Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
+  Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
  x = psub(x, p16f_1);
-  e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
+  e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
  x = padd(x, tmp);
  Packet16f x2 = pmul(x, x);
@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
  x = padd(x, y2);
  // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
-  return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
+  return _mm512_mask_blend_ps(iszero_mask,
-                              _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
+                              _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
                              p16f_minus_inf);
 }
 #endif
@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
  // select only the inverse sqrt of positive normal inputs (denormals are
  // flushed to zero and cause infs as well).
  __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
-  Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
+  Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
                                     _mm512_setzero_ps());
  // Do a single step of Newton's iteration.
  x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
  // select only the inverse sqrt of positive normal inputs (denormals are
  // flushed to zero and cause infs as well).
  __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
-  Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
+  Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
                                    _mm512_setzero_pd());
  // Do a first step of Newton's iteration.
  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
  // select only the inverse sqrt of positive normal inputs (denormals are
  // flushed to zero and cause infs as well).
  __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
-  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
+  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
                                     _mm512_rsqrt14_ps(_x));
  // Fill in NaNs and Infs for the negative/zero entries.
  __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
  Packet16f infs_and_nans = _mm512_mask_blend_ps(
-      neg_mask, p16f_nan,
+      neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
      _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
  // Do a single step of Newton's iteration.
  x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
  // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
+  return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
 }
 template <>
@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
  // select only the inverse sqrt of positive normal inputs (denormals are
  // flushed to zero and cause infs as well).
  __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
-  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
+  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
                                    _mm512_rsqrt14_pd(_x));
  // Fill in NaNs and Infs for the negative/zero entries.
  __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
  Packet8d infs_and_nans = _mm512_mask_blend_pd(
-      neg_mask, p8d_nan,
+      neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
      _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
  // Do a first step of Newton's iteration.
  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
  // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
+  return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
 }
-#else
+#elif defined(EIGEN_VECTORIZE_AVX512ER)
 template <>
 EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
  return _mm512_rsqrt28_ps(x);
--- a/xs/src/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
  pstore(to, pa);
 }
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 template <>
 EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
--- a/xs/src/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
  }
 };
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 {
  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
  { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
 };
 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
 {
  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
  { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
    return pconj(internal::pmul(a, b));
  }
 };
 template<> struct conj_helper<Packet2d, Packet1cd, false,false>
 {
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
  { return padd(c, pmul(x,y)); }
-  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
  { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
 };
 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
 {
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
  { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
--- a/xs/src/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
-template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
 {
  #ifdef __VSX__
  Packet4f ret;
  __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
  return ret;
  #else
  return vec_min(a, b);
  #endif
 }
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
-template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
 {
  #ifdef __VSX__
  Packet4f ret;
  __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
  return ret;
  #else
  return vec_max(a, b);
  #endif
 }
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
-template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
 {
  Packet2d ret;
  __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
  return ret;
 }
-template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
 {
  Packet2d ret;
  __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
  return ret;
 }
 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
  Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
-  Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
+  Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
  return vec_sel(elsePacket, thenPacket, mask);
 }
 #endif // __VSX__
--- a/xs/src/eigen/Eigen/src/Core/arch/CUDA/Half.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/CUDA/Half.h
@ -13,7 +13,7 @@
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted.
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
@ -147,55 +147,55 @@ namespace half_impl {
 // versions to get the ALU speed increased), but you do save the
 // conversion steps back and forth.
-__device__ half operator + (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
  return __hadd(a, b);
 }
-__device__ half operator * (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
  return __hmul(a, b);
 }
-__device__ half operator - (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
  return __hsub(a, b);
 }
-__device__ half operator / (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
  float num = __half2float(a);
  float denom = __half2float(b);
  return __float2half(num / denom);
 }
-__device__ half operator - (const half& a) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
  return __hneg(a);
 }
-__device__ half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
  a = a + b;
  return a;
 }
-__device__ half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
  a = a * b;
  return a;
 }
-__device__ half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
  a = a - b;
  return a;
 }
-__device__ half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
  a = a / b;
  return a;
 }
-__device__ bool operator == (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
  return __heq(a, b);
 }
-__device__ bool operator != (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
  return __hne(a, b);
 }
-__device__ bool operator < (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
  return __hlt(a, b);
 }
-__device__ bool operator <= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
  return __hle(a, b);
 }
-__device__ bool operator > (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
  return __hgt(a, b);
 }
-__device__ bool operator >= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
  return __hge(a, b);
 }
@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
  return a;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
-  return float(a) == float(b);
+  return numext::equal_strict(float(a),float(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
-  return float(a) != float(b);
+  return numext::not_equal_strict(float(a), float(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
  return float(a) < float(b);
@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
  return result;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
  return half(hexp(a));
 #else
   return half(::expf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
-  return Eigen::half(::hlog(a));
+  return half(::hlog(a));
 #else
  return half(::logf(float(a)));
 #endif
@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
  return half(::log10f(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
  return half(hsqrt(a));
 #else
    return half(::sqrtf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
  return half(::powf(float(a), float(b)));
@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
  return half(::tanhf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
  return half(hfloor(a));
 #else
  return half(::floorf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
  return half(hceil(a));
 #else
  return half(::ceilf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@ -474,9 +490,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
 } // end namespace internal
 }  // end namespace Eigen
 namespace std {
 template<>
 struct numeric_limits<Eigen::half> {
  static const bool is_specialized = true;
  static const bool is_signed = true;
  static const bool is_integer = false;
  static const bool is_exact = false;
  static const bool has_infinity = true;
  static const bool has_quiet_NaN = true;
  static const bool has_signaling_NaN = true;
  static const float_denorm_style has_denorm = denorm_present;
  static const bool has_denorm_loss = false;
  static const std::float_round_style round_style = std::round_to_nearest;
  static const bool is_iec559 = false;
  static const bool is_bounded = false;
  static const bool is_modulo = false;
  static const int digits = 11;
  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
  static const int radix = 2;
  static const int min_exponent = -13;
  static const int min_exponent10 = -4;
  static const int max_exponent = 16;
  static const int max_exponent10 = 4;
  static const bool traps = true;
  static const bool tinyness_before = false;
  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
  static Eigen::half round_error() { return Eigen::half(0.5); }
  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
 };
 }
 namespace Eigen {
 template<> struct NumTraits<Eigen::half>
    : GenericNumTraits<Eigen::half>
 {
  enum {
    IsSigned = true,
    IsInteger = false,
    IsComplex = false,
    RequireInitialization = false
  };
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
    return half_impl::raw_uint16_to_half(0x0800);
  }
@ -507,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
  return Eigen::half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
  return Eigen::half(::hlog(a));
 #else
  return Eigen::half(::logf(float(a)));
--- a/xs/src/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h
@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<float4,4>& kernel) {
-  double tmp = kernel.packet[0].y;
+  float tmp = kernel.packet[0].y;
  kernel.packet[0].y = kernel.packet[1].x;
  kernel.packet[1].x = tmp;
--- a/xs/src/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
  return __floats2half2_rn(r1, r2);
 }
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
 template<>  __device__ EIGEN_STRONG_INLINE
 half2 plog<half2>(const half2& a) {
--- a/xs/src/eigen/Eigen/src/Core/arch/Default/ConjHelper.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/Default/ConjHelper.h
@ -0,0 +1,29 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_ARCH_CONJ_HELPER_H
 #define EIGEN_ARCH_CONJ_HELPER_H
 #define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)                                                          \
  template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> {                                          \
    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
    { return padd(c, pmul(x,y)); }                                                                                \
    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const                        \
    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); }                                           \
  };                                                                                                              \
                                                                                                                  \
  template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> {                                          \
    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
    { return padd(c, pmul(x,y)); }                                                                                \
    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const                        \
    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); }                                           \
  };
 #endif // EIGEN_ARCH_CONJ_HELPER_H
--- a/xs/src/eigen/Eigen/src/Core/arch/NEON/Complex.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/NEON/Complex.h
@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
 {
  float32x2_t r64;
-  r64 = vld1_f32((float *)&from);
+  r64 = vld1_f32((const float *)&from);
  return Packet2cf(vcombine_f32(r64, r64));
 }
@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
 }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
  }
 };
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
  // TODO optimize it for NEON
@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
  s = vmulq_f32(b.v, b.v);
  rev_s = vrev64q_f32(s);
-  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+  return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
 }
 EIGEN_DEVICE_FUNC inline void
@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
 {
@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
  }
 };
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
  // TODO optimize it for NEON
--- a/xs/src/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
@ -36,12 +36,43 @@ namespace internal {
 #endif
 #endif
 #if EIGEN_COMP_MSVC
 // In MSVC's arm_neon.h header file, all NEON vector types
 // are aliases to the same underlying type __n128.
 // We thus have to wrap them to make them different C++ types.
 // (See also bug 1428)
 template<typename T,int unique_id>
 struct eigen_packet_wrapper
 {
  operator T&() { return m_val; }
  operator const T&() const { return m_val; }
  eigen_packet_wrapper() {}
  eigen_packet_wrapper(const T &v) : m_val(v) {}
  eigen_packet_wrapper& operator=(const T &v) {
    m_val = v;
    return *this;
  }
  T m_val;
 };
 typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
 typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
 typedef eigen_packet_wrapper<int32x4_t  ,2> Packet4i;
 typedef eigen_packet_wrapper<int32x2_t  ,3> Packet2i;
 typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
 #else
 typedef float32x2_t Packet2f;
 typedef float32x4_t Packet4f;
 typedef int32x4_t   Packet4i;
 typedef int32x2_t   Packet2i;
 typedef uint32x4_t  Packet4ui;
 #endif // EIGEN_COMP_MSVC
 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
  const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@ -51,14 +82,17 @@ typedef uint32x4_t  Packet4ui;
 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
  const Packet4i p4i_##NAME = pset1<Packet4i>(X)
-// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
+#if EIGEN_ARCH_ARM64
-// which available on LLVM and GCC (at least)
+  // __builtin_prefetch tends to do nothing on ARM64 compilers because the
-#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+  // prefetch instructions there are too detailed for __builtin_prefetch to map
  // meaningfully to them.
  #define EIGEN_ARM_PREFETCH(ADDR)  __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
 #elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
  #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
 #elif defined __pld
  #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
-#elif !EIGEN_ARCH_ARM64
+#elif EIGEN_ARCH_ARM32
-  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( "   pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
 #else
  // by default no explicit prefetching
  #define EIGEN_ARM_PREFETCH(ADDR)
@ -113,7 +147,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t&    from)
 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
 {
-  const float32_t f[] = {0, 1, 2, 3};
+  const float f[] = {0, 1, 2, 3};
  Packet4f countdown = vld1q_f32(f);
  return vaddq_f32(pset1<Packet4f>(a), countdown);
 }
--- a/xs/src/eigen/Eigen/src/Core/arch/SSE/Complex.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/SSE/Complex.h
@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
                                     _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
 }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
  }
 };
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 {
  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
  { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
 };
 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
 {
  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
  { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
 {
@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
  }
 };
-template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 {
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
  { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
 };
 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
 {
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
  { return padd(c, pmul(x,y)); }
  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
  { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
 };
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
--- a/xs/src/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
  pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
 }
 #if EIGEN_COMP_PGI
 typedef const void * SsePrefetchPtrType;
 #else
 typedef const char * SsePrefetchPtrType;
 #endif
 #ifndef EIGEN_VECTORIZE_AVX
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 #endif
 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
 } // end namespace Eigen
 #if EIGEN_COMP_PGI
 // PGI++ does not define the following intrinsics in C++ mode.
 static inline __m128  _mm_castpd_ps   (__m128d x) { return reinterpret_cast<__m128&>(x);  }
 static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
 static inline __m128d _mm_castps_pd   (__m128  x) { return reinterpret_cast<__m128d&>(x); }
 static inline __m128i _mm_castps_si128(__m128  x) { return reinterpret_cast<__m128i&>(x); }
 static inline __m128  _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x);  }
 static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
 #endif
 #endif // EIGEN_PACKET_MATH_SSE_H
--- a/xs/src/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
@ -14,6 +14,7 @@ namespace Eigen {
 namespace internal {
 #ifndef EIGEN_VECTORIZE_AVX
 template <>
 struct type_casting_traits<float, int> {
  enum {
@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
  };
 };
 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
  return _mm_cvttps_epi32(a);
 }
 template <>
 struct type_casting_traits<int, float> {
  enum {
@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
  };
 };
 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
  return _mm_cvtepi32_ps(a);
 }
 template <>
 struct type_casting_traits<double, float> {
  enum {
@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
  };
 };
 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
 }
 template <>
 struct type_casting_traits<float, double> {
  enum {
@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
    TgtCoeffRatio = 2
  };
 };
 #endif
 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
  return _mm_cvttps_epi32(a);
 }
 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
  return _mm_cvtepi32_ps(a);
 }
 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
 }
 template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
  // Simply discard the second half of the input
--- a/xs/src/eigen/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/xs/src/eigen/Eigen/src/Core/arch/ZVector/Complex.h
@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
  }
 };
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
  // TODO optimize it for AltiVec
--- a/xs/src/eigen/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/xs/src/eigen/Eigen/src/Core/functors/BinaryFunctors.h
@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
 /** \internal
-  * \brief Template functor to compute the hypot of two scalars
+  * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
  *
  * \sa MatrixBase::stableNorm(), class Redux
  */
@ -263,22 +263,15 @@ template<typename Scalar>
 struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
 {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
-//   typedef typename NumTraits<Scalar>::Real result_type;
+
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
  {
-    EIGEN_USING_STD_MATH(sqrt)
+    // This functor is used by hypotNorm only for which it is faster to first apply abs
-    Scalar p, qp;
+    // on all coefficients prior to reduction through hypot.
-    if(_x>_y)
+    // This way we avoid calling abs on positive and real entries, and this also permits
-    {
+    // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
-      p = _x;
+    // through the same functor...
-      qp = _y / p;
+    return internal::positive_real_hypot(x,y);
    }
    else
    {
      p = _y;
      qp = _x / p;
    }
    return p * sqrt(Scalar(1) + qp*qp);
  }
 };
 template<typename Scalar>
--- a/xs/src/eigen/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/xs/src/eigen/Eigen/src/Core/functors/NullaryFunctors.h
@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
 {
  linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
    m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
    m_interPacket(plset<Packet>(0)),
    m_flip(numext::abs(high)<numext::abs(low))
  {}
  template<typename IndexType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
    typedef typename NumTraits<Scalar>::Real RealScalar;
    if(m_flip)
-      return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
+      return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
    else
-      return (i==m_size1)? m_high : (m_low + i*m_step);
+      return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
  }
  template<typename IndexType>
@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
    // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
    if(m_flip)
    {
-      Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
+      Packet pi = plset<Packet>(Scalar(i-m_size1));
      Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
      if(i==0)
        res = pinsertfirst(res, m_low);
@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
    }
    else
    {
-      Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
+      Packet pi = plset<Packet>(Scalar(i));
      Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
      if(i==m_size1-unpacket_traits<Packet>::size+1)
        res = pinsertlast(res, m_high);
@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
  const Scalar m_high;
  const Index m_size1;
  const Scalar m_step;
  const Packet m_interPacket;
  const bool m_flip;
 };
--- a/xs/src/eigen/Eigen/src/Core/functors/StlFunctors.h
+++ b/xs/src/eigen/Eigen/src/Core/functors/StlFunctors.h
@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
 { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
 #endif
 #if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
 // std::unary_negate is deprecated since c++17 and will be removed in c++20
 template<typename T>
 struct functor_traits<std::unary_negate<T> >
 { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
 // std::binary_negate is deprecated since c++17 and will be removed in c++20
 template<typename T>
 struct functor_traits<std::binary_negate<T> >
 { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
 #endif
 #ifdef EIGEN_STDEXT_SUPPORT
--- a/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
    enum {
      IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
      LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
-      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
+      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
      SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
    };
    Index size = mat.cols();
    if(SkipDiag)
      size--;
    Index depth = actualLhs.cols();
    typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
    internal::general_matrix_matrix_triangular_product<Index,
      typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
      typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-      IsRowMajor ? RowMajor : ColMajor, UpLo>
+      IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
      ::run(size, depth,
-            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+            &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
-            mat.data(), mat.outerStride(), actualAlpha, blocking);
+            &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
            mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
  }
 };
@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo>
 template<typename ProductType>
 TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
 {
  EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
  eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
  general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
--- a/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
  { \
-    if (lhs==rhs) { \
+    if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
    } else { \
@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
   BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
   char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
   EIGTYPE beta(1); \
-   BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
+   BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
  } \
 };
@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
  } \
 };
-
+#ifdef EIGEN_USE_MKL
 EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
 EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk)
 #else
 EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
 EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)
 #endif
 // TODO hanlde complex cases
 // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
--- a/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@ -46,7 +46,7 @@ namespace internal {
 // gemm specialization
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
 template< \
  typename Index, \
  int LhsStorageOrder, bool ConjugateLhs, \
@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
    ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
  } else b = _rhs; \
 \
-  BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+  BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 }};
-GEMM_SPECIALIZATION(double,   d,  double, d)
+#ifdef EIGEN_USE_MKL
-GEMM_SPECIALIZATION(float,    f,  float,  s)
+GEMM_SPECIALIZATION(double,   d,  double, dgemm)
-GEMM_SPECIALIZATION(dcomplex, cd, double, z)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm)
-GEMM_SPECIALIZATION(scomplex, cf, float,  c)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
 GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  cgemm)
 #else
 GEMM_SPECIALIZATION(double,   d,  double, dgemm_)
 GEMM_SPECIALIZATION(float,    f,  float,  sgemm_)
 GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
 GEMM_SPECIALIZATION(scomplex, cf, float,  cgemm_)
 #endif
 } // end namespase internal
--- a/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
    alignmentPattern = AllAligned;
  }
-  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
-  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
  Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
  for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
    alignmentPattern = AllAligned;
  }
-  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
-  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
--- a/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
 EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
-#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
 template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
 { \
@ -113,14 +113,21 @@ static void run( \
    x_ptr=x_tmp.data(); \
    incx=1; \
  } else x_ptr=rhs; \
-  BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
-EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  s)
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv)
-EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv)
-EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  c)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
 EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
 #else
 EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv_)
 EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv_)
 EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
 EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  cgemv_)
 #endif
 } // end namespase internal
--- a/xs/src/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@ -40,7 +40,7 @@ namespace internal {
 /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
-#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
    } else b = _rhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
  } \
 };
-#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
    } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
  } \
 };
-EIGEN_BLAS_SYMM_L(double, double, d, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_SYMM_L(float, float, f, s)
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
-EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
-EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
+EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
-
+EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
 #else
 EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
 EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
 EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
 EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
 #endif
 /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
-#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
    } else b = _lhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
  } \
 };
-#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
    } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
  } \
 };
-EIGEN_BLAS_SYMM_R(double, double, d, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_SYMM_R(float, float, f, s)
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
-EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
-EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
+EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
-
+EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
 #else
 EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
 EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
 EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
 EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
 #endif
 } // end namespace internal
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
    x_tmp=map_x.conjugate(); \
    x_ptr=x_tmp.data(); \
  } else x_ptr=_rhs; \
-  BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
 #ifdef EIGEN_USE_MKL
 EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv)
 EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv)
 EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
 EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
 #else
 EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float,  chemv_)
 #endif
 } // end namespace internal
--- a/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    // To work around an "error: member reference base type 'Matrix<...>
    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
    // not a structure or union" compilation error in nvcc (tested V8.0.61),
    // create a dummy internal::constructor_without_unaligned_array_assert
    // object to pass to the Matrix constructor.
    internal::constructor_without_unaligned_array_assert a;
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
    triangularBuffer.setZero();
    if((Mode&ZeroDiag)==ZeroDiag)
      triangularBuffer.diagonal().setZero();
@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    internal::constructor_without_unaligned_array_assert a;
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
    triangularBuffer.setZero();
    if((Mode&ZeroDiag)==ZeroDiag)
      triangularBuffer.diagonal().setZero();
@ -393,6 +400,8 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 {
  template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
  {
    typedef typename Lhs::Scalar  LhsScalar;
    typedef typename Rhs::Scalar  RhsScalar;
    typedef typename Dest::Scalar Scalar;
    typedef internal::blas_traits<Lhs> LhsBlasTraits;
@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
-    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
-                               * RhsBlasTraits::extractScalarFactor(a_rhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
    Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
        &dst.coeffRef(0,0), dst.outerStride(),    // result info
        actualAlpha, blocking
      );
    // Apply correction if the diagonal is unit and a scalar factor was nested:
    if ((Mode&UnitDiag)==UnitDiag)
    {
      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
      {
        Index diagSize = (std::min)(lhs.rows(),lhs.cols());
        dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
      }
      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
      {
        Index diagSize = (std::min)(rhs.rows(),rhs.cols());
        dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
      }
    }
  }
 };
--- a/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
 EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
 // implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
   } \
   /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
  } \
 };
-EIGEN_BLAS_TRMM_L(double, double, d, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
-EIGEN_BLAS_TRMM_L(float, float, f, s)
+EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
-EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm)
 EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
 #else
 EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
 EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
 EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
 EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
 #endif
 // implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
          int LhsStorageOrder, bool ConjugateLhs, \
          int RhsStorageOrder, bool ConjugateRhs> \
@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
   } \
   /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
  } \
 };
-EIGEN_BLAS_TRMM_R(double, double, d, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
-EIGEN_BLAS_TRMM_R(float, float, f, s)
+EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
-EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm)
-
+EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
 #else
 EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
 EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
 EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
 EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
 #endif
 } // end namespace internal
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
      else
        dest = MappedDest(actualDestPtr, dest.size());
    }
    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
    {
      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
    }
  }
 };
@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
    enum {
      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
            actualRhsPtr,1,
            dest.data(),dest.innerStride(),
            actualAlpha);
    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
    {
      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
    }
  }
 };
--- a/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
 // implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
  enum { \
@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
   diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
   if (size<(std::max)(rows,cols)) { \
     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
       m = convert_index<BlasIndex>(size); \
       n = convert_index<BlasIndex>(cols-size); \
     } \
-     BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
   } \
  } \
 };
-EIGEN_BLAS_TRMV_CM(double,   double, d,  d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d,)
-EIGEN_BLAS_TRMV_CM(float,    float,  f,  s)
+EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
-EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s,)
 EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8,  cf, c,)
 #else
 EIGEN_BLAS_TRMV_CM(double,   double, d,  d, _)
 EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
 EIGEN_BLAS_TRMV_CM(float,    float,  f,  s, _)
 EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c, _)
 #endif
 // implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
  enum { \
@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
   diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
   if (size<(std::max)(rows,cols)) { \
     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
       m = convert_index<BlasIndex>(size); \
       n = convert_index<BlasIndex>(cols-size); \
     } \
-     BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
   } \
  } \
 };
-EIGEN_BLAS_TRMV_RM(double,   double, d,  d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,)
-EIGEN_BLAS_TRMV_RM(float,    float,  f,  s)
+EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
-EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,)
 EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8,  cf, c,)
 #else
 EIGEN_BLAS_TRMV_RM(double,   double, d,  d,_)
 EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
 EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,_)
 EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c,_)
 #endif
 } // end namespase internal
--- a/xs/src/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/xs/src/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@ -38,7 +38,7 @@ namespace Eigen {
 namespace internal {
 // implements LeftSide op(triangular)^-1 * general
-#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
   } \
   if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
 } \
 };
-EIGEN_BLAS_TRSM_L(double,   double, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRSM_L(dcomplex, double, z)
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm)
-EIGEN_BLAS_TRSM_L(float,    float,  s)
+EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
-EIGEN_BLAS_TRSM_L(scomplex, float,  c)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm)
-
+EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
 #else
 EIGEN_BLAS_TRSM_L(double,   double, dtrsm_)
 EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
 EIGEN_BLAS_TRSM_L(float,    float,  strsm_)
 EIGEN_BLAS_TRSM_L(scomplex, float,  ctrsm_)
 #endif
 // implements RightSide general * op(triangular)^-1
-#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
   } \
   if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
   /*std::cout << "TRMS_L specialization!\n";*/ \
 } \
 };
-EIGEN_BLAS_TRSM_R(double,   double, d)
+#ifdef EIGEN_USE_MKL
-EIGEN_BLAS_TRSM_R(dcomplex, double, z)
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm)
-EIGEN_BLAS_TRSM_R(float,    float,  s)
+EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
-EIGEN_BLAS_TRSM_R(scomplex, float,  c)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm)
-
+EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8,  ctrsm)
 #else
 EIGEN_BLAS_TRSM_R(double,   double, dtrsm_)
 EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
 EIGEN_BLAS_TRSM_R(float,    float,  strsm_)
 EIGEN_BLAS_TRSM_R(scomplex, float,  ctrsm_)
 #endif
 } // end namespace internal
--- a/xs/src/eigen/Eigen/src/Core/util/MKL_support.h
+++ b/xs/src/eigen/Eigen/src/Core/util/MKL_support.h
@ -49,10 +49,11 @@
  #define EIGEN_USE_LAPACKE
 #endif
-#if defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
  #define EIGEN_USE_MKL
 #endif
 #if defined EIGEN_USE_MKL
 #   include <mkl.h> 
 /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@ -108,6 +109,10 @@
 #endif
 #endif
 #if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
 #include "../../misc/blas.h"
 #endif
 namespace Eigen {
 typedef std::complex<double> dcomplex;
@ -121,8 +126,5 @@ typedef int BlasIndex;
 } // end namespace Eigen
 #if defined(EIGEN_USE_BLAS)
 #include "../../misc/blas.h"
 #endif
 #endif // EIGEN_MKL_SUPPORT_H
--- a/xs/src/eigen/Eigen/src/Core/util/Macros.h
+++ b/xs/src/eigen/Eigen/src/Core/util/Macros.h
@ -13,7 +13,7 @@
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 3
-#define EIGEN_MINOR_VERSION 3
+#define EIGEN_MINOR_VERSION 5
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -399,7 +399,7 @@
 // Does the compiler support variadic templates?
 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
-  && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
    // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
    //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@ -413,7 +413,7 @@
 #ifdef __CUDACC__
 // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
  #define EIGEN_HAS_CONSTEXPR 1
 #endif
 #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@ -487,11 +487,13 @@
 // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
 // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
 // but GCC is still doing fine with just inline.
 #ifndef EIGEN_STRONG_INLINE
 #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
 #define EIGEN_STRONG_INLINE __forceinline
 #else
 #define EIGEN_STRONG_INLINE inline
 #endif
 #endif
 // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
 // attribute to maximize inlining. This should only be used when really necessary: in particular,
@ -812,7 +814,8 @@ namespace Eigen {
 // just an empty macro !
 #define EIGEN_EMPTY
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 ||  defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
    using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@ -986,7 +989,13 @@ namespace Eigen {
 #   define EIGEN_NOEXCEPT
 #   define EIGEN_NOEXCEPT_IF(x)
 #   define EIGEN_NO_THROW throw()
 #   if EIGEN_COMP_MSVC
      // MSVC does not support exception specifications (warning C4290),
      // and they are deprecated in c++11 anyway.
 #     define EIGEN_EXCEPTION_SPEC(X) throw()
 #   else
 #     define EIGEN_EXCEPTION_SPEC(X) throw(X)
 #   endif
 #endif
 #endif // EIGEN_MACROS_H
--- a/xs/src/eigen/Eigen/src/Core/util/Memory.h
+++ b/xs/src/eigen/Eigen/src/Core/util/Memory.h
@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
    throw std::bad_alloc();
  #else
    std::size_t huge = static_cast<std::size_t>(-1);
-    new int[huge];
+    ::operator new(huge);
  #endif
 }
@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
    IntPtr size = IntPtr(end)-IntPtr(start);
    if(size==0) return;
    eigen_internal_assert(start!=0 && end!=0 && target!=0);
-    memcpy(target, start, size);
+    std::memcpy(target, start, size);
  }
 };
@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 /** \class aligned_allocator
 * \ingroup Core_Module
 *
-* \brief STL compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
 *
 * The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
 * By default, it will thus provide at least 16 bytes alignment and more in following cases:
 *  - 32 bytes alignment if AVX is enabled.
 *  - 64 bytes alignment if AVX512 is enabled.
 *
 * This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
 * \link TopicPreprocessorDirectivesPerformance there \endlink.
 *
 * Example:
 * \code
--- a/xs/src/eigen/Eigen/src/Core/util/Meta.h
+++ b/xs/src/eigen/Eigen/src/Core/util/Meta.h
@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b)
  return (a+b-1) / b;
 }
 // The aim of the following functions is to bypass -Wfloat-equal warnings
 // when we really want a strict equality comparison on floating points.
 template<typename X, typename Y> EIGEN_STRONG_INLINE
 bool equal_strict(const X& x,const Y& y) { return x == y; }
 template<> EIGEN_STRONG_INLINE
 bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
 template<> EIGEN_STRONG_INLINE
 bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
 template<typename X, typename Y> EIGEN_STRONG_INLINE
 bool not_equal_strict(const X& x,const Y& y) { return x != y; }
 template<> EIGEN_STRONG_INLINE
 bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
 template<> EIGEN_STRONG_INLINE
 bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
 } // end namespace numext
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Core/util/StaticAssert.h
+++ b/xs/src/eigen/Eigen/src/Core/util/StaticAssert.h
@ -24,6 +24,7 @@
 *
 */
 #ifndef EIGEN_STATIC_ASSERT
 #ifndef EIGEN_NO_STATIC_ASSERT
  #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@ -44,64 +45,65 @@
    struct static_assertion<true>
    {
      enum {
-        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
+        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
-        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
+        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
-        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
+        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
-        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
+        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
-        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
+        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
-        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
+        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
-        OUT_OF_RANGE_ACCESS,
+        OUT_OF_RANGE_ACCESS=1,
-        YOU_MADE_A_PROGRAMMING_MISTAKE,
+        YOU_MADE_A_PROGRAMMING_MISTAKE=1,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
-        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
+        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
-        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
+        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
-        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
+        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
-        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
+        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
-        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
+        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
-        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
+        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
-        NUMERIC_TYPE_MUST_BE_REAL,
+        NUMERIC_TYPE_MUST_BE_REAL=1,
-        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
+        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
-        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
+        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
-        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
+        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
-        INVALID_MATRIX_PRODUCT,
+        INVALID_MATRIX_PRODUCT=1,
-        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
+        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
-        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
+        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
-        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
+        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
-        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
+        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
-        INVALID_MATRIX_TEMPLATE_PARAMETERS,
+        INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
-        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
+        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
-        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
+        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
-        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
+        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
-        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
+        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
-        YOU_ALREADY_SPECIFIED_THIS_STRIDE,
+        YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
-        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
+        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
-        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
+        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
-        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
+        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
-        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
+        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
-        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
+        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
-        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
+        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
-        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
+        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
-        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
+        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
-        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
-        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
+        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
-        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
+        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
-        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
+        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
-        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
+        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
-        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
+        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
-        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
+        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
-        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
+        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
-        STORAGE_LAYOUT_DOES_NOT_MATCH,
+        STORAGE_LAYOUT_DOES_NOT_MATCH=1,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
-        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
+        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
-        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
+        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
-        THIS_TYPE_IS_NOT_SUPPORTED,
+        THIS_TYPE_IS_NOT_SUPPORTED=1,
-        STORAGE_KIND_MUST_MATCH,
+        STORAGE_KIND_MUST_MATCH=1,
-        STORAGE_INDEX_MUST_MATCH,
+        STORAGE_INDEX_MUST_MATCH=1,
-        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
        SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
      };
    };
@ -131,7 +133,7 @@
  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
 #endif // EIGEN_NO_STATIC_ASSERT
-
+#endif // EIGEN_STATIC_ASSERT
 // static assertion failing if the type \a TYPE is not a vector type
 #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
--- a/xs/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/xs/src/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
    // Aliases:
    Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
    ComplexVectorType &cv = m_tmp;
    const MatrixType &mZ = m_realQZ.matrixZ();
    const MatrixType &mS = m_realQZ.matrixS();
    const MatrixType &mT = m_realQZ.matrixT();
@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
              }
            }
          }
-          m_eivec.col(i).real().noalias() = mZ.transpose() * v;
+          m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
          m_eivec.col(i).real().normalize();
          m_eivec.col(i).imag().setConstant(0);
        }
@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
                              / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
            }
          }
-          m_eivec.col(i+1).noalias() = (mZ.transpose() * cv);
+          m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
          m_eivec.col(i+1).normalize();
          m_eivec.col(i) = m_eivec.col(i+1).conjugate();
        }
--- a/xs/src/eigen/Eigen/src/Eigenvalues/RealSchur.h
+++ b/xs/src/eigen/Eigen/src/Eigenvalues/RealSchur.h
@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
  Scalar exshift(0);   // sum of exceptional shifts
  Scalar norm = computeNormOfT();
-  if(norm!=0)
+  if(norm!=Scalar(0))
  {
    while (iu >= 0)
    {
@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
      else // No convergence yet
      {
        // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
-        Vector3s firstHouseholderVector(0,0,0), shiftInfo;
+        Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
        computeShift(iu, iter, exshift, shiftInfo);
        iter = iter + 1;
        totalIter = totalIter + 1;
--- a/xs/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
+++ b/xs/src/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
@ -37,7 +37,7 @@ namespace Eigen {
 /** \internal Specialization for the data types supported by LAPACKe */
-#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \
+#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
 template<> template<typename InputType> inline \
 SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
 SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
          && (options&EigVecMask)!=EigVecMask \
          && "invalid option parameter"); \
  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
-  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \
+  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
  m_eivalues.resize(n,1); \
  m_subdiag.resize(n-1); \
  m_eivec = matrix; \
@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
  } \
 \
  lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
  matrix_order=LAPACKE_COLROW; \
  char jobz, uplo='L'/*, range='A'*/; \
  jobz = computeEigenvectors ? 'V' : 'N'; \
 \
-  info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
+  info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
  m_info = (info==0) ? Success : NoConvergence; \
  m_isInitialized = true; \
  m_eigenvectorsOk = computeEigenvectors; \
  return *this; \
 }
 #define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME )              \
        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor )  \
        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) 
-EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev)
-EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev)
-EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
-EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev)
 EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
 EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev, RowMajor, LAPACK_ROW_MAJOR)
 EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
 EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev, RowMajor, LAPACK_ROW_MAJOR)
 } // end namespace Eigen
--- a/xs/src/eigen/Eigen/src/Geometry/AngleAxis.h
+++ b/xs/src/eigen/Eigen/src/Geometry/AngleAxis.h
@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
  if (n != Scalar(0))
  {
    m_angle = Scalar(2)*atan2(n, abs(q.w()));
-    if(q.w() < 0)
+    if(q.w() < Scalar(0))
      n = -n;
    m_axis  = q.vec() / n;
  }
--- a/xs/src/eigen/Eigen/src/Geometry/Quaternion.h
+++ b/xs/src/eigen/Eigen/src/Geometry/Quaternion.h
@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
  typedef typename internal::traits<Derived>::Scalar Scalar;
  typedef typename NumTraits<Scalar>::Real RealScalar;
  typedef typename internal::traits<Derived>::Coefficients Coefficients;
  typedef typename Coefficients::CoeffReturnType CoeffReturnType;
  typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
                                        Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
  enum {
    Flags = Eigen::internal::traits<Derived>::Flags
  };
@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
  /** \returns the \c x coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
  /** \returns the \c y coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
  /** \returns the \c z coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
  /** \returns the \c w coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
-  /** \returns a reference to the \c x coefficient */
+  /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
-  /** \returns a reference to the \c y coefficient */
+  /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
-  /** \returns a reference to the \c z coefficient */
+  /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
-  /** \returns a reference to the \c w coefficient */
+  /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
-  EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
  /** \returns a read-only vector expression of the imaginary part (x,y,z) */
  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
@ -423,7 +428,7 @@ typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 // Generic Quaternion * Quaternion product
 // This product can be specialized for a given architecture via the Arch template argument.
 namespace internal {
-template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
+template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
    return Quaternion<Scalar>
@ -446,8 +451,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
  EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
  return internal::quat_product<Architecture::Target, Derived, OtherDerived,
-                         typename internal::traits<Derived>::Scalar,
+                         typename internal::traits<Derived>::Scalar>::run(*this, other);
                         EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
 }
 /** \sa operator*(Quaternion) */
@ -672,7 +676,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 // Generic conjugate of a Quaternion
 namespace internal {
-template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
+template<int Arch, class Derived, typename Scalar> struct quat_conj
 {
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
    return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
@ -691,8 +695,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::conjugate() const
 {
  return internal::quat_conj<Architecture::Target, Derived,
-                         typename internal::traits<Derived>::Scalar,
+                         typename internal::traits<Derived>::Scalar>::run(*this);
                         internal::traits<Derived>::Alignment>::run(*this);
 }
--- a/xs/src/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/xs/src/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h
@ -16,17 +16,23 @@ namespace Eigen {
 namespace internal {
 template<class Derived, class OtherDerived>
-struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
 {
  enum {
    AAlignment = traits<Derived>::Alignment,
    BAlignment = traits<OtherDerived>::Alignment,
    ResAlignment = traits<Quaternion<float> >::Alignment
  };
  static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
  {
    Quaternion<float> res;
    const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
-    __m128 a = _a.coeffs().template packet<Aligned16>(0);
+    __m128 a = _a.coeffs().template packet<AAlignment>(0);
-    __m128 b = _b.coeffs().template packet<Aligned16>(0);
+    __m128 b = _b.coeffs().template packet<BAlignment>(0);
    __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
    __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
-    pstore(&res.x(),
+    pstoret<float,Packet4f,ResAlignment>(
              &res.x(),
              _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
                                    _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
                                               vec4f_swizzle1(b,1,2,0,0))),
@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
  }
 };
-template<class Derived, int Alignment>
+template<class Derived>
-struct quat_conj<Architecture::SSE, Derived, float, Alignment>
+struct quat_conj<Architecture::SSE, Derived, float>
 {
  enum {
    ResAlignment = traits<Quaternion<float> >::Alignment
  };
  static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
  {
    Quaternion<float> res;
    const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
-    pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0)));
+    pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
    return res;
  }
 };
@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
 template<typename VectorLhs,typename VectorRhs>
 struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
 {
  enum {
    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
  };
  static inline typename plain_matrix_type<VectorLhs>::type
  run(const VectorLhs& lhs, const VectorRhs& rhs)
  {
@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
    __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
    __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
    typename plain_matrix_type<VectorLhs>::type res;
-    pstore(&res.x(),_mm_sub_ps(mul1,mul2));
+    pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
    return res;
  }
 };
@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
-template<class Derived, class OtherDerived, int Alignment>
+template<class Derived, class OtherDerived>
-struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
 {
  enum {
    BAlignment = traits<OtherDerived>::Alignment,
    ResAlignment = traits<Quaternion<double> >::Alignment
  };
  static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
  {
  const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
  Quaternion<double> res;
  const double* a = _a.coeffs().data();
-  Packet2d b_xy = _b.coeffs().template packet<Alignment>(0);
+  Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
-  Packet2d b_zw = _b.coeffs().template packet<Alignment>(2);
+  Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
  Packet2d a_xx = pset1<Packet2d>(a[0]);
  Packet2d a_yy = pset1<Packet2d>(a[1]);
  Packet2d a_zz = pset1<Packet2d>(a[2]);
@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
  t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
 #ifdef EIGEN_VECTORIZE_SSE3
  EIGEN_UNUSED_VARIABLE(mask)
-  pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
 #else
-  pstore(&res.x(), padd(t1, pxor(mask,preverse(t2))));
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
 #endif
  /*
@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
  t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
 #ifdef EIGEN_VECTORIZE_SSE3
  EIGEN_UNUSED_VARIABLE(mask)
-  pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
 #else
-  pstore(&res.z(), psub(t1, pxor(mask,preverse(t2))));
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
 #endif
  return res;
 }
 };
-template<class Derived, int Alignment>
+template<class Derived>
-struct quat_conj<Architecture::SSE, Derived, double, Alignment>
+struct quat_conj<Architecture::SSE, Derived, double>
 {
  enum {
    ResAlignment = traits<Quaternion<double> >::Alignment
  };
  static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
  {
    Quaternion<double> res;
    const __m128d mask0 = _mm_setr_pd(-0.,-0.);
    const __m128d mask2 = _mm_setr_pd(-0.,0.);
-    pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0)));
+    pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
-    pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2)));
+    pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
    return res;
  }
 };
--- a/xs/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/xs/src/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@ -152,14 +152,29 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
    {
      // Compute the inverse squared-norm of each column of mat
      m_invdiag.resize(mat.cols());
      if(MatType::IsRowMajor)
      {
        m_invdiag.setZero();
        for(Index j=0; j<mat.outerSize(); ++j)
        {
-        RealScalar sum = mat.innerVector(j).squaredNorm();
+          for(typename MatType::InnerIterator it(mat,j); it; ++it)
-        if(sum>0)
+            m_invdiag(it.index()) += numext::abs2(it.value());
        }
        for(Index j=0; j<mat.cols(); ++j)
          if(numext::real(m_invdiag(j))>RealScalar(0))
            m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));
      }
      else
      {
        for(Index j=0; j<mat.outerSize(); ++j)
        {
          RealScalar sum = mat.col(j).squaredNorm();
          if(sum>RealScalar(0))
            m_invdiag(j) = RealScalar(1)/sum;
          else
            m_invdiag(j) = RealScalar(1);
        }
      }
      Base::m_isInitialized = true;
      return *this;
    }
--- a/xs/src/eigen/Eigen/src/Jacobi/Jacobi.h
+++ b/xs/src/eigen/Eigen/src/Jacobi/Jacobi.h
@ -298,30 +298,40 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
 }
 namespace internal {
-template<typename VectorX, typename VectorY, typename OtherScalar>
+
-void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+template<typename Scalar, typename OtherScalar,
         int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
 struct apply_rotation_in_the_plane_selector
 {
-  typedef typename VectorX::Scalar Scalar;
+  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
-  enum { PacketSize = packet_traits<Scalar>::size };
+  {
    for(Index i=0; i<size; ++i)
    {
      Scalar xi = *x;
      Scalar yi = *y;
      *x =  c * xi + numext::conj(s) * yi;
      *y = -s * xi + numext::conj(c) * yi;
      x += incrx;
      y += incry;
    }
  }
 };
 template<typename Scalar, typename OtherScalar,
         int SizeAtCompileTime, int MinAlignment>
 struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
 {
  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
  {
    enum {
      PacketSize = packet_traits<Scalar>::size,
      OtherPacketSize = packet_traits<OtherScalar>::size
    };
    typedef typename packet_traits<Scalar>::type Packet;
-  eigen_assert(xpr_x.size() == xpr_y.size());
+    typedef typename packet_traits<OtherScalar>::type OtherPacket;
  Index size = xpr_x.size();
  Index incrx = xpr_x.derived().innerStride();
  Index incry = xpr_y.derived().innerStride();
  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
  OtherScalar c = j.c();
  OtherScalar s = j.s();
  if (c==OtherScalar(1) && s==OtherScalar(0))
    return;
    /*** dynamic-size vectorized paths ***/
-
+    if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
  if(VectorX::SizeAtCompileTime == Dynamic &&
    (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
    ((incrx==1 && incry==1) || PacketSize == 1))
    {
      // both vectors are sequentially stored in memory => vectorization
      enum { Peeling = 2 };
@ -329,9 +339,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
      Index alignedStart = internal::first_default_aligned(y, size);
      Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
-    const Packet pc = pset1<Packet>(c);
+      const OtherPacket pc = pset1<OtherPacket>(c);
-    const Packet ps = pset1<Packet>(s);
+      const OtherPacket ps = pset1<OtherPacket>(s);
-    conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
      conj_helper<OtherPacket,Packet,false,false> pm;
      for(Index i=0; i<alignedStart; ++i)
      {
@ -350,8 +361,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
        {
          Packet xi = pload<Packet>(px);
          Packet yi = pload<Packet>(py);
-        pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+          pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
          px += PacketSize;
          py += PacketSize;
        }
@ -365,10 +376,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
          Packet xi1  = ploadu<Packet>(px+PacketSize);
          Packet yi   = pload <Packet>(py);
          Packet yi1  = pload <Packet>(py+PacketSize);
-        pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
+          pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
-        pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+          pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
-        pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
+          pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
          px += Peeling*PacketSize;
          py += Peeling*PacketSize;
        }
@ -376,8 +387,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
        {
          Packet xi = ploadu<Packet>(x+peelingEnd);
          Packet yi = pload <Packet>(y+peelingEnd);
-        pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+          pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
        }
      }
@ -391,21 +402,20 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
    }
    /*** fixed-size vectorized path ***/
-  else if(VectorX::SizeAtCompileTime != Dynamic &&
+    else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
          (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
          (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
    {
-    const Packet pc = pset1<Packet>(c);
+      const OtherPacket pc = pset1<OtherPacket>(c);
-    const Packet ps = pset1<Packet>(s);
+      const OtherPacket ps = pset1<OtherPacket>(s);
-    conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
      conj_helper<OtherPacket,Packet,false,false> pm;
      Scalar* EIGEN_RESTRICT px = x;
      Scalar* EIGEN_RESTRICT py = y;
      for(Index i=0; i<size; i+=PacketSize)
      {
        Packet xi = pload<Packet>(px);
        Packet yi = pload<Packet>(py);
-      pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
+        pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-      pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+        pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
        px += PacketSize;
        py += PacketSize;
      }
@ -414,16 +424,36 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
    /*** non-vectorized path ***/
    else
    {
-    for(Index i=0; i<size; ++i)
+      apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
    {
      Scalar xi = *x;
      Scalar yi = *y;
      *x =  c * xi + numext::conj(s) * yi;
      *y = -s * xi + numext::conj(c) * yi;
      x += incrx;
      y += incry;
    }
  }
 };
 template<typename VectorX, typename VectorY, typename OtherScalar>
 void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
 {
  typedef typename VectorX::Scalar Scalar;
  const bool Vectorizable =    (VectorX::Flags & VectorY::Flags & PacketAccessBit)
                            && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
  eigen_assert(xpr_x.size() == xpr_y.size());
  Index size = xpr_x.size();
  Index incrx = xpr_x.derived().innerStride();
  Index incry = xpr_y.derived().innerStride();
  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
  OtherScalar c = j.c();
  OtherScalar s = j.s();
  if (c==OtherScalar(1) && s==OtherScalar(0))
    return;
  apply_rotation_in_the_plane_selector<
    Scalar,OtherScalar,
    VectorX::SizeAtCompileTime,
    EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
    Vectorizable>::run(x,incrx,y,incry,size,c,s);
 }
 } // end namespace internal
--- a/xs/src/eigen/Eigen/src/LU/InverseImpl.h
+++ b/xs/src/eigen/Eigen/src/LU/InverseImpl.h
@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
    const RealScalar& absDeterminantThreshold
  ) const
 {
-  RealScalar determinant;
+  Scalar determinant;
  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
  eigen_assert(rows() == cols());
  computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
--- a/xs/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/xs/src/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h
@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */
    COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
    /* get pivot column from head of minimum degree list */
-    while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
+    while (min_score < n_col && head [min_score] == COLAMD_EMPTY)
    {
      min_score++ ;
    }
--- a/xs/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/xs/src/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h
@ -64,28 +64,28 @@ namespace internal
    typedef typename _MatrixType::StorageIndex StorageIndex;
  };
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
  }
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
  }
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); 
  }
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
--- a/xs/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/xs/src/eigen/Eigen/src/QR/ColPivHouseholderQR.h
@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
    m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
  }
-  RealScalar threshold_helper =  numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
+  RealScalar threshold_helper =  numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
-  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
+  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
  m_maxpivot = RealScalar(0);
@ -553,11 +553,11 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
      // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
      // and used in LAPACK routines xGEQPF and xGEQP3.
      // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
-      if (m_colNormsUpdated.coeffRef(j) != 0) {
+      if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
        RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
        temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
-        temp = temp < 0 ? 0 : temp;
+        temp = temp <  RealScalar(0) ? RealScalar(0) : temp;
-        RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) /
+        RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
                                                           m_colNormsDirect.coeffRef(j));
        if (temp2 <= norm_downdate_threshold) {
          // The updated norm has become too inaccurate so re-compute the column
--- a/xs/src/eigen/Eigen/src/SVD/BDCSVD.h
+++ b/xs/src/eigen/Eigen/src/SVD/BDCSVD.h
@ -11,7 +11,7 @@
 // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
 // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
 // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
-// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -77,6 +77,7 @@ public:
  typedef _MatrixType MatrixType;
  typedef typename MatrixType::Scalar Scalar;
  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
  typedef typename NumTraits<RealScalar>::Literal Literal;
  enum {
    RowsAtCompileTime = MatrixType::RowsAtCompileTime, 
    ColsAtCompileTime = MatrixType::ColsAtCompileTime, 
@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
  //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
  RealScalar scale = matrix.cwiseAbs().maxCoeff();
-  if(scale==RealScalar(0)) scale = RealScalar(1);
+  if(scale==Literal(0)) scale = Literal(1);
  MatrixX copy;
  if (m_isTranspose) copy = matrix.adjoint()/scale;
  else               copy = matrix/scale;
@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
    Index k1=0, k2=0;
    for(Index j=0; j<n; ++j)
    {
-      if( (A.col(j).head(n1).array()!=0).any() )
+      if( (A.col(j).head(n1).array()!=Literal(0)).any() )
      {
        A1.col(k1) = A.col(j).head(n1);
        B1.row(k1) = B.row(j);
        ++k1;
      }
-      if( (A.col(j).tail(n2).array()!=0).any() )
+      if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
      {
        A2.col(k2) = A.col(j).tail(n2);
        B2.row(k2) = B.row(j);
@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
    l = m_naiveU.row(1).segment(firstCol, k);
    f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
  }
-  if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
+  if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
  if (r0<considerZero)
  {
-    c0 = 1;
+    c0 = Literal(1);
-    s0 = 0;
+    s0 = Literal(0);
  }
  else
  {
@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
  ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
  m_workspace.head(n) =  m_computed.block(firstCol, firstCol, n, n).diagonal();
  ArrayRef diag = m_workspace.head(n);
-  diag(0) = 0;
+  diag(0) = Literal(0);
  // Allocate space for singular values and vectors
  singVals.resize(n);
@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
  // but others are interleaved and we must ignore them at this stage.
  // To this end, let's compute a permutation skipping them:
  Index actual_n = n;
-  while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
+  while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
  Index m = 0; // size of the deflated problem
  for(Index k=0;k<actual_n;++k)
    if(abs(col0(k))>considerZero)
@ -691,11 +692,13 @@ template <typename MatrixType>
 typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
 {
  Index m = perm.size();
-  RealScalar res = 1;
+  RealScalar res = Literal(1);
  for(Index i=0; i<m; ++i)
  {
    Index j = perm(i);
-    res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
+    // The following expression could be rewritten to involve only a single division,
    // but this would make the expression more sensitive to overflow.
    res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
  }
  return res;
@ -707,19 +710,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
 {
  using std::abs;
  using std::swap;
  using std::sqrt;
  Index n = col0.size();
  Index actual_n = n;
-  while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
+  // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
  // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
  while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
  for (Index k = 0; k < n; ++k)
  {
-    if (col0(k) == 0 || actual_n==1)
+    if (col0(k) == Literal(0) || actual_n==1)
    {
      // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
      // if actual_n==1, then the deflated problem is already diagonalized
      singVals(k) = k==0 ? col0(0) : diag(k);
-      mus(k) = 0;
+      mus(k) = Literal(0);
      shifts(k) = k==0 ? col0(0) : diag(k);
      continue;
    } 
@ -731,15 +737,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
      right = (diag(actual_n-1) + col0.matrix().norm());
    else
    {
-      // Skip deflated singular values
+      // Skip deflated singular values,
      // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
      // This should be equivalent to using perm[]
      Index l = k+1;
-      while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); }
+      while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
      right = diag(l);
    }
    // first decide whether it's closer to the left end or the right end
-    RealScalar mid = left + (right-left) / 2;
+    RealScalar mid = left + (right-left) / Literal(2);
-    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0);
+    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
    std::cout << right-left << "\n";
    std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right)   << "\n";
@ -755,7 +763,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
              << " "       << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
              << " "       << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
 #endif
-    RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
+    RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
    // measure everything relative to shift
    Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
@ -785,13 +793,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
    // rational interpolation: fit a function of the form a / mu + b through the two previous
    // iterates and use its zero to compute the next iterate
-    bool useBisection = fPrev*fCur>0;
+    bool useBisection = fPrev*fCur>Literal(0);
-    while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+    while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
    {
      ++m_numIters;
      // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
-      RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
+      RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
      RealScalar b = fCur - a / muCur;
      // And find mu such that f(mu)==0:
      RealScalar muZero = -a/b;
@ -803,8 +811,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
      fCur = fZero;
-      if (shift == left  && (muCur < 0 || muCur > right - left)) useBisection = true;
+      if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
-      if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true;
+      if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
      if (abs(fCur)>abs(fPrev)) useBisection = true;
    }
@ -817,14 +825,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
      RealScalar leftShifted, rightShifted;
      if (shift == left)
      {
-        leftShifted = (std::numeric_limits<RealScalar>::min)();
+        // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
        // the factor 2 is to be more conservative
        leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
        // check that we did it right:
        eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
        // I don't understand why the case k==0 would be special there:
        // if (k == 0) rightShifted = right - left; else
-        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe
+        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
      }
      else
      {
-        leftShifted = -(right - left) * RealScalar(0.6);
+        leftShifted = -(right - left) * RealScalar(0.51);
        if(k+1<n)
          rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
        else
          rightShifted = -(std::numeric_limits<RealScalar>::min)();
      }
@ -841,13 +857,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
        std::cout << k << " : " <<  fLeft << " * " << fRight << " == " << fLeft * fRight << "  ;  " << left << " - " << right << " -> " <<  leftShifted << " " << rightShifted << "   shift=" << shift << "\n";
      }
 #endif
-      eigen_internal_assert(fLeft * fRight < 0);
+      eigen_internal_assert(fLeft * fRight < Literal(0));
-      while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
+      while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
      {
-        RealScalar midShifted = (leftShifted + rightShifted) / 2;
+        RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
        fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
-        if (fLeft * fMid < 0)
+        if (fLeft * fMid < Literal(0))
        {
          rightShifted = midShifted;
        }
@ -858,7 +874,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
        }
      }
-      muCur = (leftShifted + rightShifted) / 2;
+      muCur = (leftShifted + rightShifted) / Literal(2);
    }
    singVals[k] = shift + muCur;
@ -892,8 +908,8 @@ void BDCSVD<MatrixType>::perturbCol0
  // The offset permits to skip deflated entries while computing zhat
  for (Index k = 0; k < n; ++k)
  {
-    if (col0(k) == 0) // deflated
+    if (col0(k) == Literal(0)) // deflated
-      zhat(k) = 0;
+      zhat(k) = Literal(0);
    else
    {
      // see equation (3.6)
@ -918,7 +934,7 @@ void BDCSVD<MatrixType>::perturbCol0
      std::cout << "zhat(" << k << ") =  sqrt( " << prod << ")  ;  " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
 #endif
      RealScalar tmp = sqrt(prod);
-      zhat(k) = col0(k) > 0 ? tmp : -tmp;
+      zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
    }
  }
 }
@ -934,7 +950,7 @@ void BDCSVD<MatrixType>::computeSingVecs
  for (Index k = 0; k < n; ++k)
  {
-    if (zhat(k) == 0)
+    if (zhat(k) == Literal(0))
    {
      U.col(k) = VectorType::Unit(n+1, k);
      if (m_compV) V.col(k) = VectorType::Unit(n, k);
@ -947,7 +963,7 @@ void BDCSVD<MatrixType>::computeSingVecs
        Index i = perm(l);
        U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
      }
-      U(n,k) = 0;      
+      U(n,k) = Literal(0);
      U.col(k).normalize();
      if (m_compV)
@ -958,7 +974,7 @@ void BDCSVD<MatrixType>::computeSingVecs
          Index i = perm(l);
          V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
        }
-        V(0,k) = -1;
+        V(0,k) = Literal(-1);
        V.col(k).normalize();
      }
    }
@ -979,15 +995,15 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
  Index start = firstCol + shift;
  RealScalar c = m_computed(start, start);
  RealScalar s = m_computed(start+i, start);
-  RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+  RealScalar r = numext::hypot(c,s);
-  if (r == 0)
+  if (r == Literal(0))
  {
-    m_computed(start+i, start+i) = 0;
+    m_computed(start+i, start+i) = Literal(0);
    return;
  }
  m_computed(start,start) = r;  
-  m_computed(start+i, start) = 0;
+  m_computed(start+i, start) = Literal(0);
-  m_computed(start+i, start+i) = 0;
+  m_computed(start+i, start+i) = Literal(0);
  JacobiRotation<RealScalar> J(c/r,-s/r);
  if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
@ -1020,7 +1036,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
    << m_computed(firstColm + i+1, firstColm+i+1) << " "
    << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
 #endif
-  if (r==0)
+  if (r==Literal(0))
  {
    m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
    return;
@ -1029,7 +1045,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
  s/=r;
  m_computed(firstColm + i, firstColm) = r;  
  m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
-  m_computed(firstColm + j, firstColm) = 0;
+  m_computed(firstColm + j, firstColm) = Literal(0);
  JacobiRotation<RealScalar> J(c,-s);
  if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
@ -1053,7 +1069,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
  RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
  RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
-  RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
+  RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
  assert(m_naiveU.allFinite());
@ -1081,7 +1097,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
      std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << "  (diag(" << i << ")=" << diag(i) << ")\n";
 #endif
-      col0(i) = 0;
+      col0(i) = Literal(0);
    }
  //condition 4.3
--- a/xs/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h
+++ b/xs/src/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
    u    = (LAPACKE_TYPE*)m_matrixU.data(); \
  } else { ldu=1; u=&dummy; }\
  MatrixType localV; \
-  ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
+  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
  if (computeV()) { \
-    localV.resize(ldvt, m_cols); \
+    localV.resize(vt_rows, m_cols); \
    ldvt  = internal::convert_index<lapack_int>(localV.outerStride()); \
    vt   = (LAPACKE_TYPE*)localV.data(); \
  } else { ldvt=1; vt=&dummy; }\
  Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
--- a/xs/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/xs/src/eigen/Eigen/src/SVD/UpperBidiagonalization.h
@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
                                                      traits<MatrixType>::Flags & RowMajorBit> > Y)
 {
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
  typedef typename NumTraits<RealScalar>::Literal Literal;
  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
    SubMatType A10( A.block(bs,0, brows-bs,bs) );
    SubMatType A01( A.block(0,bs, bs,bcols-bs) );
    Scalar tmp = A01(bs-1,0);
-    A01(bs-1,0) = 1;
+    A01(bs-1,0) = Literal(1);
    A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
    A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
    A01(bs-1,0) = tmp;
--- a/Show More
+++ b/Show More