Updated the Eigen library to 3.3.5

This commit is contained in:
bubnikv 2018-08-17 17:33:29 +02:00
parent 5b94f53cd7
commit 0eba590fc6
95 changed files with 1381 additions and 874 deletions

View File

@ -9,6 +9,7 @@
#define EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H
#include "Core" #include "Core"
#include "Jacobi"
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
@ -31,7 +32,11 @@
#include "src/Cholesky/LLT.h" #include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h" #include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Cholesky/LLT_LAPACKE.h" #include "src/Cholesky/LLT_LAPACKE.h"
#endif #endif

View File

@ -14,6 +14,22 @@
// first thing Eigen does: stop the compiler from committing suicide // first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDACC __CUDACC__
#endif
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
#endif
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
#elif defined(__CUDACC_VER__)
#define EIGEN_CUDACC_VER __CUDACC_VER__
#else
#define EIGEN_CUDACC_VER 0
#endif
// Handle NVCC/CUDA/SYCL // Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL! // Do not try asserts on CUDA and SYCL!
@ -155,6 +171,9 @@
#ifdef __AVX512DQ__ #ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ #define EIGEN_VECTORIZE_AVX512DQ
#endif #endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#endif #endif
// include files // include files
@ -229,7 +248,7 @@
#if defined __CUDACC__ #if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA #define EIGEN_VECTORIZE_CUDA
#include <vector_types.h> #include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #if EIGEN_CUDACC_VER >= 70500
#define EIGEN_HAS_CUDA_FP16 #define EIGEN_HAS_CUDA_FP16
#endif #endif
#endif #endif
@ -352,6 +371,7 @@ using std::ptrdiff_t;
#include "src/Core/MathFunctions.h" #include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h" #include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h" #include "src/Core/MathFunctionsImpl.h"
#include "src/Core/arch/Default/ConjHelper.h"
#if defined EIGEN_VECTORIZE_AVX512 #if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
@ -367,6 +387,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX/TypeCasting.h" #include "src/Core/arch/AVX/TypeCasting.h"
#include "src/Core/arch/SSE/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_SSE #elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/MathFunctions.h"

View File

@ -45,7 +45,11 @@
#include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/GeneralizedEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Eigenvalues/RealSchur_LAPACKE.h" #include "src/Eigenvalues/RealSchur_LAPACKE.h"
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"

View File

@ -28,7 +28,11 @@
#include "src/LU/FullPivLU.h" #include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h" #include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/LU/PartialPivLU_LAPACKE.h" #include "src/LU/PartialPivLU_LAPACKE.h"
#endif #endif
#include "src/LU/Determinant.h" #include "src/LU/Determinant.h"

View File

@ -36,7 +36,11 @@
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h" #include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif #endif

View File

@ -27,7 +27,7 @@ void qFree(void *ptr)
void *qRealloc(void *ptr, std::size_t size) void *qRealloc(void *ptr, std::size_t size)
{ {
void* newPtr = Eigen::internal::aligned_malloc(size); void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size); std::memcpy(newPtr, ptr, size);
Eigen::internal::aligned_free(ptr); Eigen::internal::aligned_free(ptr);
return newPtr; return newPtr;
} }

View File

@ -37,7 +37,11 @@
#include "src/SVD/JacobiSVD.h" #include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h" #include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/SVD/JacobiSVD_LAPACKE.h" #include "src/SVD/JacobiSVD_LAPACKE.h"
#endif #endif

View File

@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the factorization failed because of a zero pivot.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
@ -376,6 +376,8 @@ template<> struct ldlt_inplace<Lower>
if((rs>0) && pivot_is_valid) if((rs>0) && pivot_is_valid)
A21 /= realAkk; A21 /= realAkk;
else if(rs>0)
ret = ret && (A21.array()==Scalar(0)).all();
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
else if(!pivot_is_valid) found_zero_pivot = true; else if(!pivot_is_valid) found_zero_pivot = true;
@ -568,13 +570,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
// more precisely, use pseudo-inverse of D (see bug 241) // more precisely, use pseudo-inverse of D (see bug 241)
using std::abs; using std::abs;
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
// as motivated by LAPACK's xGELSS: // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
// diagonal element is not well justified and leads to numerical issues in some cases. // diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance. // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); // Using numeric_limits::min() gives us more robustness to denormals.
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
for (Index i = 0; i < vecD.size(); ++i) for (Index i = 0; i < vecD.size(); ++i)
{ {

View File

@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* *
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read. * The other triangular part won't be read.
* *
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular. * matrix A such that A = LL^* = U^*U, where L is lower triangular.
@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp * Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out * Output: \verbinclude LLT_example.out
* *
* \b Performance: for best performance, it is recommended to use a column-major storage format
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
* step, and rank-updates can be up to 3 times slower.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
* *
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
* Therefore, the strict lower part does not have to store correct values.
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/ */
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LLT template<typename _MatrixType, int _UpLo> class LLT
{ {
public: public:
@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
} }
template<typename Derived> template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const; void solveInPlace(const MatrixBase<Derived> &bAndX) const;
template<typename InputType> template<typename InputType>
LLT& compute(const EigenBase<InputType>& matrix); LLT& compute(const EigenBase<InputType>& matrix);
@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the matrix.appears not to be positive definite.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
eigen_assert(a.rows()==a.cols()); eigen_assert(a.rows()==a.cols());
const Index size = a.rows(); const Index size = a.rows();
m_matrix.resize(size, size); m_matrix.resize(size, size);
m_matrix = a.derived(); if (!internal::is_same_dense(m_matrix, a.derived()))
m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum. // Compute matrix L1 norm = max abs column sum.
m_l1_norm = RealScalar(0); m_l1_norm = RealScalar(0);
@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
* *
* This version avoids a copy when the right hand side matrix b is not needed anymore. * This version avoids a copy when the right hand side matrix b is not needed anymore.
* *
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* \sa LLT::solve(), MatrixBase::llt() * \sa LLT::solve(), MatrixBase::llt()
*/ */
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
template<typename Derived> template<typename Derived>
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
{ {
eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==bAndX.rows()); eigen_assert(m_matrix.rows()==bAndX.rows());

View File

@ -231,10 +231,16 @@ class Array
: Base(other) : Base(other)
{ } { }
private:
struct PrivateType {};
public:
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
PrivateType>::type = PrivateType())
: Base(other.derived()) : Base(other.derived())
{ } { }

View File

@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());

View File

@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }
@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }

View File

@ -39,7 +39,7 @@ public:
enum { enum {
DstAlignment = DstEvaluator::Alignment, DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit, DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
}; };
@ -83,7 +83,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll, /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */ so it's only good for large enough sizes. */

View File

@ -84,7 +84,8 @@ class vml_assign_traits
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \ struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \ const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \ VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \

View File

@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret) ? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret), : int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit, FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment, PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
&& (OuterStrideAtCompileTime!=0)
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
}; };
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()), : m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()), m_startRow(block.startRow()),
m_startCol(block.startCol()) m_startCol(block.startCol()),
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
{ } { }
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
enum { enum {
RowsAtCompileTime = XprType::RowsAtCompileTime RowsAtCompileTime = XprType::RowsAtCompileTime,
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const CoeffReturnType coeff(Index index) const
{ {
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); if (ForwardLinearAccess)
return m_argImpl.coeff(m_linear_offset.value() + index);
else
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index) Scalar& coeffRef(Index index)
{ {
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); if (ForwardLinearAccess)
return m_argImpl.coeffRef(m_linear_offset.value() + index);
else
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, if (ForwardLinearAccess)
RowsAtCompileTime == 1 ? index : 0); return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
else
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, if (ForwardLinearAccess)
RowsAtCompileTime == 1 ? index : 0, return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
x); else
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
} }
protected: protected:
evaluator<ArgType> m_argImpl; evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
}; };
// TODO: This evaluator does not actually use the child evaluator; // TODO: This evaluator does not actually use the child evaluator;

View File

@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
*/ */
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
{ {
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func); return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
*/ */
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
{ {
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func); return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
* \sa class CwiseNullaryOp * \sa class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index size, const Scalar& value) DenseBase<Derived>::Constant(Index size, const Scalar& value)
{ {
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value)); return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
* \sa class CwiseNullaryOp * \sa class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(const Scalar& value) DenseBase<Derived>::Constant(const Scalar& value)
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const
* \sa LinSpaced(Scalar,Scalar) * \sa LinSpaced(Scalar,Scalar)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
* Special version for fixed size types which does not require the size parameter. * Special version for fixed size types which does not require the size parameter.
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const (const Scalar& val, const RealScalar& prec) const
{ {
typename internal::nested_eval<Derived,1>::type self(derived()); typename internal::nested_eval<Derived,1>::type self(derived());
@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
* *
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isConstant EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
(const Scalar& val, const RealScalar& prec) const (const Scalar& val, const RealScalar& prec) const
{ {
return isApproxToConstant(val, prec); return isApproxToConstant(val, prec);
@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant
* \sa setConstant(), Constant(), class CwiseNullaryOp * \sa setConstant(), Constant(), class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
{ {
setConstant(val); setConstant(val);
} }
@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
{ {
return derived() = Constant(rows(), cols(), val); return derived() = Constant(rows(), cols(), val);
} }
@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
{ {
resize(size); resize(size);
@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
{ {
resize(rows, cols); resize(rows, cols);
@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize)); return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return setLinSpaced(size(), low, high); return setLinSpaced(size(), low, high);
@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
* \sa Zero(), Zero(Index) * \sa Zero(), Zero(Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index rows, Index cols) DenseBase<Derived>::Zero(Index rows, Index cols)
{ {
return Constant(rows, cols, Scalar(0)); return Constant(rows, cols, Scalar(0));
@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
* \sa Zero(), Zero(Index,Index) * \sa Zero(), Zero(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index size) DenseBase<Derived>::Zero(Index size)
{ {
return Constant(size, Scalar(0)); return Constant(size, Scalar(0));
@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
* \sa Zero(Index), Zero(Index,Index) * \sa Zero(Index), Zero(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero() DenseBase<Derived>::Zero()
{ {
return Constant(Scalar(0)); return Constant(Scalar(0));
@ -478,7 +478,7 @@ DenseBase<Derived>::Zero()
* \sa class CwiseNullaryOp, Zero() * \sa class CwiseNullaryOp, Zero()
*/ */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{ {
typename internal::nested_eval<Derived,1>::type self(derived()); typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
* \sa class CwiseNullaryOp, Zero() * \sa class CwiseNullaryOp, Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
{ {
return setConstant(Scalar(0)); return setConstant(Scalar(0));
} }
@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index newSize) PlainObjectBase<Derived>::setZero(Index newSize)
{ {
resize(newSize); resize(newSize);
@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index rows, Index cols) PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
* \sa Ones(), Ones(Index), isOnes(), class Ones * \sa Ones(), Ones(Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index rows, Index cols) DenseBase<Derived>::Ones(Index rows, Index cols)
{ {
return Constant(rows, cols, Scalar(1)); return Constant(rows, cols, Scalar(1));
@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index newSize) DenseBase<Derived>::Ones(Index newSize)
{ {
return Constant(newSize, Scalar(1)); return Constant(newSize, Scalar(1));
@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones() DenseBase<Derived>::Ones()
{ {
return Constant(Scalar(1)); return Constant(Scalar(1));
@ -608,7 +608,7 @@ DenseBase<Derived>::Ones()
* \sa class CwiseNullaryOp, Ones() * \sa class CwiseNullaryOp, Ones()
*/ */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isOnes EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
(const RealScalar& prec) const (const RealScalar& prec) const
{ {
return isApproxToConstant(Scalar(1), prec); return isApproxToConstant(Scalar(1), prec);
@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes
* \sa class CwiseNullaryOp, Ones() * \sa class CwiseNullaryOp, Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
{ {
return setConstant(Scalar(1)); return setConstant(Scalar(1));
} }
@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index newSize) PlainObjectBase<Derived>::setOnes(Index newSize)
{ {
resize(newSize); resize(newSize);
@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index rows, Index cols) PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
* \sa Identity(), setIdentity(), isIdentity() * \sa Identity(), setIdentity(), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index rows, Index cols) MatrixBase<Derived>::Identity(Index rows, Index cols)
{ {
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>()); return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
* \sa Identity(Index,Index), setIdentity(), isIdentity() * \sa Identity(Index,Index), setIdentity(), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity() MatrixBase<Derived>::Identity()
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true>
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
{ {
return internal::setIdentity_impl<Derived>::run(derived()); return internal::setIdentity_impl<Derived>::run(derived());
} }
@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
{ {
derived().resize(rows, cols); derived().resize(rows, cols);
return setIdentity(); return setIdentity();
@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(),i); return BasisReturnType(SquareMatrixType::Identity(),i);
@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
{ return Derived::Unit(0); } { return Derived::Unit(0); }
/** \returns an expression of the Y axis unit vector (0,1{,0}^*) /** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
{ return Derived::Unit(1); } { return Derived::Unit(1); }
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
{ return Derived::Unit(2); } { return Derived::Unit(2); }
/** \returns an expression of the W axis unit vector (0,0,0,1) /** \returns an expression of the W axis unit vector (0,0,0,1)
@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
{ return Derived::Unit(3); } { return Derived::Unit(3); }
} // end namespace Eigen } // end namespace Eigen

View File

@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func); Derived& operator=(const ReturnByValue<OtherDerived>& func);
/** \ínternal /** \internal
* Copies \a other into *this without evaluating other. \returns a reference to *this. * Copies \a other into *this without evaluating other. \returns a reference to *this.
* \deprecated */ * \deprecated */
template<typename OtherDerived> template<typename OtherDerived>
@ -484,9 +484,9 @@ template<typename Derived> class DenseBase
return derived().coeff(0,0); return derived().coeff(0,0);
} }
bool all() const; EIGEN_DEVICE_FUNC bool all() const;
bool any() const; EIGEN_DEVICE_FUNC bool any() const;
Index count() const; EIGEN_DEVICE_FUNC Index count() const;
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType; typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType; typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;

View File

@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
{
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

View File

@ -31,7 +31,8 @@ struct dot_nocheck
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar; typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.template binaryExpr<conj_prod>(b).sum(); return a.template binaryExpr<conj_prod>(b).sum();
} }
@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar; typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.transpose().template binaryExpr<conj_prod>(b).sum(); return a.transpose().template binaryExpr<conj_prod>(b).sum();
} }
@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{ {
@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa lpNorm(), dot(), squaredNorm() * \sa lpNorm(), dot(), squaredNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{ {
return numext::sqrt(squaredNorm()); return numext::sqrt(squaredNorm());
} }
@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
* \sa norm(), normalize() * \sa norm(), normalize()
*/ */
template<typename Derived> template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const MatrixBase<Derived>::normalized() const
{ {
typedef typename internal::nested_eval<Derived,2>::type _Nested; typedef typename internal::nested_eval<Derived,2>::type _Nested;
@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
* \sa norm(), normalized() * \sa norm(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::normalize() EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
{ {
RealScalar z = squaredNorm(); RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
* \sa stableNorm(), stableNormalize(), normalized() * \sa stableNorm(), stableNormalize(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const MatrixBase<Derived>::stableNormalized() const
{ {
typedef typename internal::nested_eval<Derived,3>::type _Nested; typedef typename internal::nested_eval<Derived,3>::type _Nested;
@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
* \sa stableNorm(), stableNormalized(), normalize() * \sa stableNorm(), stableNormalized(), normalize()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::stableNormalize() EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
{ {
RealScalar w = cwiseAbs().maxCoeff(); RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm(); RealScalar z = (derived()/w).squaredNorm();

View File

@ -14,6 +14,7 @@
namespace Eigen { namespace Eigen {
/** \class EigenBase /** \class EigenBase
* \ingroup Core_Module
* *
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
* *
@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived()); call_assignment(derived(), other.derived());
@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());

View File

@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category template<int Size, int MaxSize> struct product_size_category
{ {
enum { is_large = MaxSize == Dynamic || enum {
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || #ifndef EIGEN_CUDA_ARCH
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), is_large = MaxSize == Dynamic ||
value = is_large ? Large Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
: Size == 1 ? 1 (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
: Small #else
is_large = 0,
#endif
value = is_large ? Large
: Size == 1 ? 1
: Small
}; };
}; };
@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
* *
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/ */
#ifndef __CUDACC__
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
inline const Product<Derived, OtherDerived> inline const Product<Derived, OtherDerived>
@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived()); return Product<Derived, OtherDerived>(derived(), other.derived());
} }
#endif // __CUDACC__
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
* *
* The returned product will behave like any other expressions: the coefficients of the product will be * The returned product will behave like any other expressions: the coefficients of the product will be

View File

@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
* Currently, this function is only used for scalar * complex products. * Currently, this function is only used for scalar * complex products.
*/ */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from quadrupled. /** \internal \returns a packet with elements of \a *from quadrupled.
@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
} }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Packet> inline Packet template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
plset(const typename unpacket_traits<Packet>::type& a) { return a; } plset(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
* by the current computation. * by the current computation.
*/ */
template<typename Packet, int LoadMode> template<typename Packet, int LoadMode>
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
{ {
return ploadt<Packet, LoadMode>(from); return ploadt<Packet, LoadMode>(from);
} }

View File

@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
{ {
typedef traits<PlainObjectType> TraitsBase; typedef traits<PlainObjectType> TraitsBase;
enum { enum {
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
? PlainObjectType::ColsAtCompileTime
: PlainObjectType::RowsAtCompileTime,
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime) ? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime), : int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime) ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
? Dynamic
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
: int(StrideType::OuterStrideAtCompileTime), : int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask), Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit), Flags0 = TraitsBase::Flags & (~NestByRefBit),
@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Index outerStride() const inline Index outerStride() const
{ {
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
: IsVectorAtCompileTime ? this->size() : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
: int(Flags)&RowMajorBit ? this->cols() : IsVectorAtCompileTime ? (this->size() * innerStride())
: this->rows(); : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
: (this->rows() * innerStride());
} }
/** Constructor in the fixed-size case. /** Constructor in the fixed-size case.

View File

@ -348,31 +348,7 @@ struct norm1_retval
* Implementation of hypot * * Implementation of hypot *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar> struct hypot_impl;
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
RealScalar _x = abs(x);
RealScalar _y = abs(y);
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
template<typename Scalar> template<typename Scalar>
struct hypot_retval struct hypot_retval
@ -495,7 +471,7 @@ namespace std_fallback {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log); EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x; Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
} }
} }
@ -1061,11 +1037,24 @@ double log(const double &x) { return ::log(x); }
template<typename T> template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename NumTraits<T>::Real abs(const T &x) { typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
abs(const T &x) {
EIGEN_USING_STD_MATH(abs); EIGEN_USING_STD_MATH(abs);
return abs(x); return abs(x);
} }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
abs(const T &x) {
return x;
}
#if defined(__SYCL_DEVICE_ONLY__)
EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
#ifdef __CUDACC__ #ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); } float abs(const float &x) { return ::fabsf(x); }

View File

@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
return pdiv(p, q); return pdiv(p, q);
} }
template<typename RealScalar>
EIGEN_STRONG_INLINE
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
{
EIGEN_USING_STD_MATH(sqrt);
RealScalar p, qp;
p = numext::maxi(x,y);
if(p==RealScalar(0)) return RealScalar(0);
qp = numext::mini(y,x) / p;
return p * sqrt(RealScalar(1) + qp*qp);
}
template<typename Scalar>
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
return positive_real_hypot<RealScalar>(abs(x), abs(y));
}
};
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other); Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
operator*(const MatrixBase<OtherDerived> &other) const
{ return this->lazyProduct(other); }
#else
template<typename OtherDerived>
const Product<Derived,OtherDerived> const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const; operator*(const MatrixBase<OtherDerived> &other) const;
#endif
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct> const Product<Derived,OtherDerived,LazyProduct>
@ -294,7 +285,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox() * fuzzy comparison such as isApprox()
* \sa isApprox(), operator!= */ * \sa isApprox(), operator!= */
template<typename OtherDerived> template<typename OtherDerived>
inline bool operator==(const MatrixBase<OtherDerived>& other) const EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
{ return cwiseEqual(other).all(); } { return cwiseEqual(other).all(); }
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@ -302,7 +293,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox() * fuzzy comparison such as isApprox()
* \sa isApprox(), operator== */ * \sa isApprox(), operator== */
template<typename OtherDerived> template<typename OtherDerived>
inline bool operator!=(const MatrixBase<OtherDerived>& other) const EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
{ return cwiseNotEqual(other).any(); } { return cwiseNotEqual(other).any(); }
NoAlias<Derived,Eigen::MatrixBase > noalias(); NoAlias<Derived,Eigen::MatrixBase > noalias();

View File

@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
}; };
template<> struct NumTraits<std::string> template<> struct NumTraits<std::string>

View File

@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers. * \a data pointers.
* *
* Here is an example using strides:
* \include Matrix_Map_stride.cpp
* Output: \verbinclude Matrix_Map_stride.out
*
* \see class Map * \see class Map
*/ */
//@{ //@{

View File

@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions"); && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
} }
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@ -127,7 +127,7 @@ public:
using Base::derived; using Base::derived;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
operator const Scalar() const EIGEN_STRONG_INLINE operator const Scalar() const
{ {
return internal::evaluator<ProductXpr>(derived()).coeff(0,0); return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
} }
@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
public: public:
EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
{ {
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
return internal::evaluator<Derived>(derived()).coeff(row,col); return internal::evaluator<Derived>(derived()).coeff(row,col);
} }
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
{ {
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );

View File

@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
typedef Product<Lhs, Rhs, Options> XprType; typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base; typedef product_evaluator<XprType> Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
}; };
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const Product<Lhs, Rhs, DefaultProduct> > XprType; const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
{} {}
}; };
@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
xpr.index() )) xpr.index() ))
@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
static const bool value = true; static const bool value = true;
}; };
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
struct assignment_from_xpr_op_product struct assignment_from_xpr_op_product
{ {
@ -240,19 +246,19 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{ {
template<typename Dst> template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
}; };
@ -306,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
}; };
template<typename Dst> template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
} }
@ -779,7 +785,11 @@ public:
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment Alignment = evaluator<MatrixType>::Alignment,
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
}; };
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -791,7 +801,10 @@ public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{ {
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); if(AsScalarProduct)
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
else
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
} }
protected: protected:

View File

@ -407,7 +407,7 @@ protected:
*/ */
template<typename Derived> template<typename Derived>
template<typename Func> template<typename Func>
typename internal::traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const DenseBase<Derived>::redux(const Func& func) const
{ {
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");

View File

@ -95,6 +95,8 @@ protected:
template<typename Expression> template<typename Expression>
EIGEN_DEVICE_FUNC void construct(Expression& expr) EIGEN_DEVICE_FUNC void construct(Expression& expr)
{ {
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
if(PlainObjectType::RowsAtCompileTime==1) if(PlainObjectType::RowsAtCompileTime==1)
{ {
eigen_assert(expr.rows()==1 || expr.cols()==1); eigen_assert(expr.rows()==1 || expr.cols()==1);

View File

@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{} {
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2); TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
} }
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */ /** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const inline const ConjugateReturnType conjugate() const

View File

@ -15,33 +15,29 @@ namespace Eigen {
// TODO generalize the scalar type of 'other' // TODO generalize the scalar type of 'other'
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }

View File

@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
template<typename Decomposition, typename RhsType> template<typename Decomposition, typename RhsType>
struct solve_traits<Decomposition,RhsType,Dense> struct solve_traits<Decomposition,RhsType,Dense>
{ {
typedef Matrix<typename RhsType::Scalar, typedef typename make_proper_matrix_type<typename RhsType::Scalar,
Decomposition::ColsAtCompileTime, Decomposition::ColsAtCompileTime,
RhsType::ColsAtCompileTime, RhsType::ColsAtCompileTime,
RhsType::PlainObject::Options, RhsType::PlainObject::Options,
Decomposition::MaxColsAtCompileTime, Decomposition::MaxColsAtCompileTime,
RhsType::MaxColsAtCompileTime> PlainObject; RhsType::MaxColsAtCompileTime>::type PlainObject;
}; };
template<typename Decomposition, typename RhsType> template<typename Decomposition, typename RhsType>

View File

@ -165,12 +165,13 @@ MatrixBase<Derived>::stableNorm() const
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy; typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean; typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived()); const DerivedCopy copy(derived());
enum { enum {
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
}; };
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>, typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper; typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;

View File

@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
const Product<OtherDerived, Transpose, AliasFreeProduct> const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt) operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{ {
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived()); return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
} }
/** \returns the \a matrix with the inverse transpositions applied to the rows. /** \returns the \a matrix with the inverse transpositions applied to the rows.

View File

@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
} }
}; };
template<> struct conj_helper<Packet8f, Packet4cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{ {
@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
} }
}; };
template<> struct conj_helper<Packet4d, Packet2cd, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{ {

View File

@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
} }
#ifndef EIGEN_VECTORIZE_AVX512 #ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) { template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{ {
__m256d tmp = _mm256_shuffle_pd(a,a,5); __m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1); return _mm256_permute2f128_pd(tmp, tmp, 1);
#if 0
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1); __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5); return _mm256_permute_pd(swap_halves,5);
#endif
} }
// pabs should be ok // pabs should be ok

View File

@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
// x = x + x - 1.0; // x = x + x - 1.0;
// } else { x = x - 1.0; } // } else { x = x - 1.0; }
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ); __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps()); Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
x = psub(x, p16f_1); x = psub(x, p16f_1);
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps())); e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
x = padd(x, tmp); x = padd(x, tmp);
Packet16f x2 = pmul(x, x); Packet16f x2 = pmul(x, x);
@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
x = padd(x, y2); x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf, return _mm512_mask_blend_ps(iszero_mask,
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x)); _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
p16f_minus_inf);
} }
#endif #endif
@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ); __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x), Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
_mm512_setzero_ps());
// Do a single step of Newton's iteration. // Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ); __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x), Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
_mm512_setzero_pd());
// Do a first step of Newton's iteration. // Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ); __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
_mm512_rsqrt14_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries. // Fill in NaNs and Infs for the negative/zero entries.
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ); __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
Packet16f infs_and_nans = _mm512_mask_blend_ps( Packet16f infs_and_nans = _mm512_mask_blend_ps(
neg_mask, p16f_nan, neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
// Do a single step of Newton's iteration. // Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Insert NaNs and Infs in all the right places. // Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x); return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
} }
template <> template <>
@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ); __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
_mm512_rsqrt14_pd(_x));
// Fill in NaNs and Infs for the negative/zero entries. // Fill in NaNs and Infs for the negative/zero entries.
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ); __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
Packet8d infs_and_nans = _mm512_mask_blend_pd( Packet8d infs_and_nans = _mm512_mask_blend_pd(
neg_mask, p8d_nan, neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
// Do a first step of Newton's iteration. // Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Insert NaNs and Infs in all the right places. // Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x); return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
} }
#else #elif defined(EIGEN_VECTORIZE_AVX512ER)
template <> template <>
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) { EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
return _mm512_rsqrt28_ps(x); return _mm512_rsqrt28_ps(x);

View File

@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
pstore(to, pa); pstore(to, pa);
} }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template <> template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) { EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {

View File

@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
template<> struct conj_helper<Packet4f, Packet2cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
return pconj(internal::pmul(a, b)); return pconj(internal::pmul(a, b));
} }
}; };
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {

View File

@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
#else #else
static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_min(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_max(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@ -764,7 +782,7 @@ typedef __vector __bool long Packet2bl;
static Packet2l p2l_ONE = { 1, 1 }; static Packet2l p2l_ONE = { 1, 1 };
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO); static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO); static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
static Packet2d p2d_MZERO = { -0.0, -0.0 }; static Packet2d p2d_MZERO = { -0.0, -0.0 };
@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
Packet2d v[2], sum; Packet2d v[2], sum;
v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8)); v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8)); v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
#ifdef _BIG_ENDIAN #ifdef _BIG_ENDIAN
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8)); sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
#else #else
@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)); Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
return vec_sel(elsePacket, thenPacket, mask); return vec_sel(elsePacket, thenPacket, mask);
} }
#endif // __VSX__ #endif // __VSX__

View File

@ -13,7 +13,7 @@
// Redistribution and use in source and binary forms, with or without // Redistribution and use in source and binary forms, with or without
// modification, are permitted. // modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
@ -147,55 +147,55 @@ namespace half_impl {
// versions to get the ALU speed increased), but you do save the // versions to get the ALU speed increased), but you do save the
// conversion steps back and forth. // conversion steps back and forth.
__device__ half operator + (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
return __hadd(a, b); return __hadd(a, b);
} }
__device__ half operator * (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b); return __hmul(a, b);
} }
__device__ half operator - (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b); return __hsub(a, b);
} }
__device__ half operator / (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
float num = __half2float(a); float num = __half2float(a);
float denom = __half2float(b); float denom = __half2float(b);
return __float2half(num / denom); return __float2half(num / denom);
} }
__device__ half operator - (const half& a) { EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a); return __hneg(a);
} }
__device__ half& operator += (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
a = a + b; a = a + b;
return a; return a;
} }
__device__ half& operator *= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
a = a * b; a = a * b;
return a; return a;
} }
__device__ half& operator -= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
a = a - b; a = a - b;
return a; return a;
} }
__device__ half& operator /= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
a = a / b; a = a / b;
return a; return a;
} }
__device__ bool operator == (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
return __heq(a, b); return __heq(a, b);
} }
__device__ bool operator != (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
return __hne(a, b); return __hne(a, b);
} }
__device__ bool operator < (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b); return __hlt(a, b);
} }
__device__ bool operator <= (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b); return __hle(a, b);
} }
__device__ bool operator > (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b); return __hgt(a, b);
} }
__device__ bool operator >= (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b); return __hge(a, b);
} }
@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
return a; return a;
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return float(a) == float(b); return numext::equal_strict(float(a),float(b));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return float(a) != float(b); return numext::not_equal_strict(float(a), float(b));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b); return float(a) < float(b);
@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result; return result;
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
return half(::expf(float(a))); #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hexp(a));
#else
return half(::expf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a)); return half(::hlog(a));
#else #else
return half(::logf(float(a))); return half(::logf(float(a)));
#endif #endif
@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a))); return half(::log10f(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
return half(::sqrtf(float(a))); #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hsqrt(a));
#else
return half(::sqrtf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
return half(::powf(float(a), float(b))); return half(::powf(float(a), float(b)));
@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a))); return half(::tanhf(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hfloor(a));
#else
return half(::floorf(float(a))); return half(::floorf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hceil(a));
#else
return half(::ceilf(float(a))); return half(::ceilf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@ -474,9 +490,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal } // end namespace internal
} // end namespace Eigen
namespace std {
template<>
struct numeric_limits<Eigen::half> {
static const bool is_specialized = true;
static const bool is_signed = true;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const float_denorm_style has_denorm = denorm_present;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_to_nearest;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 11;
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int radix = 2;
static const int min_exponent = -13;
static const int min_exponent10 = -4;
static const int max_exponent = 16;
static const int max_exponent10 = 4;
static const bool traps = true;
static const bool tinyness_before = false;
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
static Eigen::half round_error() { return Eigen::half(0.5); }
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
};
}
namespace Eigen {
template<> struct NumTraits<Eigen::half> template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half> : GenericNumTraits<Eigen::half>
{ {
enum {
IsSigned = true,
IsInteger = false,
IsComplex = false,
RequireInitialization = false
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return half_impl::raw_uint16_to_half(0x0800); return half_impl::raw_uint16_to_half(0x0800);
} }
@ -507,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a))); return Eigen::half(::expf(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a)); return Eigen::half(::hlog(a));
#else #else
return Eigen::half(::logf(float(a))); return Eigen::half(::logf(float(a)));

View File

@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
EIGEN_DEVICE_FUNC inline void EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) { ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y; float tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x; kernel.packet[0].y = kernel.packet[1].x;
kernel.packet[1].x = tmp; kernel.packet[1].x = tmp;

View File

@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
return __floats2half2_rn(r1, r2); return __floats2half2_rn(r1, r2);
} }
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) { half2 plog<half2>(const half2& a) {

View File

@ -0,0 +1,29 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
}; \
\
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
};
#endif // EIGEN_ARCH_CONJ_HELPER_H

View File

@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{ {
float32x2_t r64; float32x2_t r64;
r64 = vld1_f32((float *)&from); r64 = vld1_f32((const float *)&from);
return Packet2cf(vcombine_f32(r64, r64)); return Packet2cf(vcombine_f32(r64, r64));
} }
@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
} }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
// TODO optimize it for NEON // TODO optimize it for NEON
@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
s = vmulq_f32(b.v, b.v); s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s); rev_s = vrev64q_f32(s);
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
} }
EIGEN_DEVICE_FUNC inline void EIGEN_DEVICE_FUNC inline void
@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{ {
@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
// TODO optimize it for NEON // TODO optimize it for NEON

View File

@ -36,12 +36,43 @@ namespace internal {
#endif #endif
#endif #endif
#if EIGEN_COMP_MSVC
// In MSVC's arm_neon.h header file, all NEON vector types
// are aliases to the same underlying type __n128.
// We thus have to wrap them to make them different C++ types.
// (See also bug 1428)
template<typename T,int unique_id>
struct eigen_packet_wrapper
{
operator T&() { return m_val; }
operator const T&() const { return m_val; }
eigen_packet_wrapper() {}
eigen_packet_wrapper(const T &v) : m_val(v) {}
eigen_packet_wrapper& operator=(const T &v) {
m_val = v;
return *this;
}
T m_val;
};
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
#else
typedef float32x2_t Packet2f; typedef float32x2_t Packet2f;
typedef float32x4_t Packet4f; typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i; typedef int32x4_t Packet4i;
typedef int32x2_t Packet2i; typedef int32x2_t Packet2i;
typedef uint32x4_t Packet4ui; typedef uint32x4_t Packet4ui;
#endif // EIGEN_COMP_MSVC
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f p4f_##NAME = pset1<Packet4f>(X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@ -51,14 +82,17 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function #if EIGEN_ARCH_ARM64
// which available on LLVM and GCC (at least) // __builtin_prefetch tends to do nothing on ARM64 compilers because the
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC // prefetch instructions there are too detailed for __builtin_prefetch to map
// meaningfully to them.
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#elif defined __pld #elif defined __pld
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
#elif !EIGEN_ARCH_ARM64 #elif EIGEN_ARCH_ARM32
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
#else #else
// by default no explicit prefetching // by default no explicit prefetching
#define EIGEN_ARM_PREFETCH(ADDR) #define EIGEN_ARM_PREFETCH(ADDR)
@ -113,7 +147,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from)
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
{ {
const float32_t f[] = {0, 1, 2, 3}; const float f[] = {0, 1, 2, 3};
Packet4f countdown = vld1q_f32(f); Packet4f countdown = vld1q_f32(f);
return vaddq_f32(pset1<Packet4f>(a), countdown); return vaddq_f32(pset1<Packet4f>(a), countdown);
} }

View File

@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
} }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
template<> struct conj_helper<Packet4f, Packet2cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{ {
@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
} }
}; };
template<> struct conj_helper<Packet2d, Packet1cd, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {

View File

@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
} }
#if EIGEN_COMP_PGI
typedef const void * SsePrefetchPtrType;
#else
typedef const char * SsePrefetchPtrType;
#endif
#ifndef EIGEN_VECTORIZE_AVX #ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
} // end namespace Eigen } // end namespace Eigen
#if EIGEN_COMP_PGI
// PGI++ does not define the following intrinsics in C++ mode.
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
#endif
#endif // EIGEN_PACKET_MATH_SSE_H #endif // EIGEN_PACKET_MATH_SSE_H

View File

@ -14,6 +14,7 @@ namespace Eigen {
namespace internal { namespace internal {
#ifndef EIGEN_VECTORIZE_AVX
template <> template <>
struct type_casting_traits<float, int> { struct type_casting_traits<float, int> {
enum { enum {
@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template <> template <>
struct type_casting_traits<int, float> { struct type_casting_traits<int, float> {
enum { enum {
@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template <> template <>
struct type_casting_traits<double, float> { struct type_casting_traits<double, float> {
enum { enum {
@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template <> template <>
struct type_casting_traits<float, double> { struct type_casting_traits<float, double> {
enum { enum {
@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
TgtCoeffRatio = 2 TgtCoeffRatio = 2
}; };
}; };
#endif
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input // Simply discard the second half of the input

View File

@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
// TODO optimize it for AltiVec // TODO optimize it for AltiVec

View File

@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
/** \internal /** \internal
* \brief Template functor to compute the hypot of two scalars * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
* *
* \sa MatrixBase::stableNorm(), class Redux * \sa MatrixBase::stableNorm(), class Redux
*/ */
@ -263,22 +263,15 @@ template<typename Scalar>
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar> struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
{ {
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
{ {
EIGEN_USING_STD_MATH(sqrt) // This functor is used by hypotNorm only for which it is faster to first apply abs
Scalar p, qp; // on all coefficients prior to reduction through hypot.
if(_x>_y) // This way we avoid calling abs on positive and real entries, and this also permits
{ // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
p = _x; // through the same functor...
qp = _y / p; return internal::positive_real_hypot(x,y);
}
else
{
p = _y;
qp = _x / p;
}
return p * sqrt(Scalar(1) + qp*qp);
} }
}; };
template<typename Scalar> template<typename Scalar>

View File

@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_interPacket(plset<Packet>(0)),
m_flip(numext::abs(high)<numext::abs(low)) m_flip(numext::abs(high)<numext::abs(low))
{} {}
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
typedef typename NumTraits<Scalar>::Real RealScalar;
if(m_flip) if(m_flip)
return (i==0)? m_low : (m_high - (m_size1-i)*m_step); return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
else else
return (i==m_size1)? m_high : (m_low + i*m_step); return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
} }
template<typename IndexType> template<typename IndexType>
@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
if(m_flip) if(m_flip)
{ {
Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); Packet pi = plset<Packet>(Scalar(i-m_size1));
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
if(i==0) if(i==0)
res = pinsertfirst(res, m_low); res = pinsertfirst(res, m_low);
@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
} }
else else
{ {
Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); Packet pi = plset<Packet>(Scalar(i));
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
if(i==m_size1-unpacket_traits<Packet>::size+1) if(i==m_size1-unpacket_traits<Packet>::size+1)
res = pinsertlast(res, m_high); res = pinsertlast(res, m_high);
@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
const Scalar m_high; const Scalar m_high;
const Index m_size1; const Index m_size1;
const Scalar m_step; const Scalar m_step;
const Packet m_interPacket;
const bool m_flip; const bool m_flip;
}; };

View File

@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif #endif
#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
// std::unary_negate is deprecated since c++17 and will be removed in c++20
template<typename T> template<typename T>
struct functor_traits<std::unary_negate<T> > struct functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
// std::binary_negate is deprecated since c++17 and will be removed in c++20
template<typename T> template<typename T>
struct functor_traits<std::binary_negate<T> > struct functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
#endif
#ifdef EIGEN_STDEXT_SUPPORT #ifdef EIGEN_STDEXT_SUPPORT

View File

@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
enum { enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
}; };
Index size = mat.cols(); Index size = mat.cols();
if(SkipDiag)
size--;
Index depth = actualLhs.cols(); Index depth = actualLhs.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
IsRowMajor ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
::run(size, depth, ::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
mat.data(), mat.outerStride(), actualAlpha, blocking); &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
} }
}; };
@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo>
template<typename ProductType> template<typename ProductType>
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
{ {
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);

View File

@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \ { \
if (lhs==rhs) { \ if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \ } else { \
@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \ char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
EIGTYPE beta(1); \ EIGTYPE beta(1); \
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
} \ } \
}; };
@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
} \ } \
}; };
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
#else
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_) EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_) EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
#endif
// TODO hanlde complex cases // TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_) // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)

View File

@ -46,7 +46,7 @@ namespace internal {
// gemm specialization // gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \ #define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
template< \ template< \
typename Index, \ typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}}; }};
GEMM_SPECIALIZATION(double, d, double, d) #ifdef EIGEN_USE_MKL
GEMM_SPECIALIZATION(float, f, float, s) GEMM_SPECIALIZATION(double, d, double, dgemm)
GEMM_SPECIALIZATION(dcomplex, cd, double, z) GEMM_SPECIALIZATION(float, f, float, sgemm)
GEMM_SPECIALIZATION(scomplex, cf, float, c) GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
#else
GEMM_SPECIALIZATION(double, d, double, dgemm_)
GEMM_SPECIALIZATION(float, f, float, sgemm_)
GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
#endif
} // end namespase internal } // end namespase internal

View File

@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)

View File

@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex) EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex) EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \ #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \ { \
@ -113,14 +113,21 @@ static void run( \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
incx=1; \ incx=1; \
} else x_ptr=rhs; \ } else x_ptr=rhs; \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
#else
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
#endif
} // end namespase internal } // end namespase internal

View File

@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
EIGEN_BLAS_SYMM_L(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_L(float, float, f, s) EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \ } else b = _lhs; \
\ \
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \ } \
}; };
EIGEN_BLAS_SYMM_R(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_R(float, float, f, s) EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
#endif
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
x_tmp=map_x.conjugate(); \ x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \ } else x_ptr=_rhs; \
BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
#else
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
#endif
} // end namespace internal } // end namespace internal

View File

@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); // To work around an "error: member reference base type 'Matrix<...>
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
// not a structure or union" compilation error in nvcc (tested V8.0.61),
// create a dummy internal::constructor_without_unaligned_array_assert
// object to pass to the Matrix constructor.
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero(); triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag) if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero(); triangularBuffer.diagonal().setZero();
@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero(); triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag) if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero(); triangularBuffer.diagonal().setZero();
@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{ {
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{ {
typedef typename Dest::Scalar Scalar; typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar Scalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
* RhsBlasTraits::extractScalarFactor(a_rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking actualAlpha, blocking
); );
// Apply correction if the diagonal is unit and a scalar factor was nested:
if ((Mode&UnitDiag)==UnitDiag)
{
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
}
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
{
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
}
}
} }
}; };

View File

@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general) // implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \ } \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \ } \
}; };
EIGEN_BLAS_TRMM_L(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z) EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_L(float, float, f, s) EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, c) EIGEN_BLAS_TRMM_L(float, float, f, strmm)
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
#endif
// implements col-major += alpha * op(general) * op(triangular) // implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \ } \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \ } \
}; };
EIGEN_BLAS_TRMM_R(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z) EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_R(float, float, f, s) EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, c) EIGEN_BLAS_TRMM_R(float, float, f, strmm)
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
#endif
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
* RhsBlasTraits::extractScalarFactor(rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum { enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
else else
dest = MappedDest(actualDestPtr, dest.size()); dest = MappedDest(actualDestPtr, dest.size());
} }
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
} }
}; };
@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
* RhsBlasTraits::extractScalarFactor(rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum { enum {
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
actualRhsPtr,1, actualRhsPtr,1,
dest.data(),dest.innerStride(), dest.data(),dest.innerStride(),
actualAlpha); actualAlpha);
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
} }
}; };

View File

@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector // implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \ enum { \
@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \ m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_BLAS_TRMV_CM(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) EIGEN_BLAS_TRMV_CM(double, double, d, d,)
EIGEN_BLAS_TRMV_CM(float, float, f, s) EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) EIGEN_BLAS_TRMV_CM(float, float, f, s,)
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
#endif
// implements row-major: res += alpha * op(triangular) * vector // implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \ enum { \
@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \ m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_BLAS_TRMV_RM(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) EIGEN_BLAS_TRMV_RM(double, double, d, d,)
EIGEN_BLAS_TRMV_RM(float, float, f, s) EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) EIGEN_BLAS_TRMV_RM(float, float, f, s,)
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
#endif
} // end namespase internal } // end namespase internal

View File

@ -38,7 +38,7 @@ namespace Eigen {
namespace internal { namespace internal {
// implements LeftSide op(triangular)^-1 * general // implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ #define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \ } \
}; };
EIGEN_BLAS_TRSM_L(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_L(dcomplex, double, z) EIGEN_BLAS_TRSM_L(double, double, dtrsm)
EIGEN_BLAS_TRSM_L(float, float, s) EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_L(scomplex, float, c) EIGEN_BLAS_TRSM_L(float, float, strsm)
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_L(float, float, strsm_)
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
#endif
// implements RightSide general * op(triangular)^-1 // implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \ #define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \ /*std::cout << "TRMS_L specialization!\n";*/ \
} \ } \
}; };
EIGEN_BLAS_TRSM_R(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_R(dcomplex, double, z) EIGEN_BLAS_TRSM_R(double, double, dtrsm)
EIGEN_BLAS_TRSM_R(float, float, s) EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_R(scomplex, float, c) EIGEN_BLAS_TRSM_R(float, float, strsm)
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_R(float, float, strsm_)
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
#endif
} // end namespace internal } // end namespace internal

View File

@ -49,10 +49,11 @@
#define EIGEN_USE_LAPACKE #define EIGEN_USE_LAPACKE
#endif #endif
#if defined(EIGEN_USE_MKL_VML) #if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
#define EIGEN_USE_MKL #define EIGEN_USE_MKL
#endif #endif
#if defined EIGEN_USE_MKL #if defined EIGEN_USE_MKL
# include <mkl.h> # include <mkl.h>
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@ -108,6 +109,10 @@
#endif #endif
#endif #endif
#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
#include "../../misc/blas.h"
#endif
namespace Eigen { namespace Eigen {
typedef std::complex<double> dcomplex; typedef std::complex<double> dcomplex;
@ -121,8 +126,5 @@ typedef int BlasIndex;
} // end namespace Eigen } // end namespace Eigen
#if defined(EIGEN_USE_BLAS)
#include "../../misc/blas.h"
#endif
#endif // EIGEN_MKL_SUPPORT_H #endif // EIGEN_MKL_SUPPORT_H

View File

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3 #define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3 #define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 3 #define EIGEN_MINOR_VERSION 5
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -399,7 +399,7 @@
// Does the compiler support variadic templates? // Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1 // this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@ -413,7 +413,7 @@
#ifdef __CUDACC__ #ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@ -487,11 +487,13 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline. // but GCC is still doing fine with just inline.
#ifndef EIGEN_STRONG_INLINE
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
#define EIGEN_STRONG_INLINE __forceinline #define EIGEN_STRONG_INLINE __forceinline
#else #else
#define EIGEN_STRONG_INLINE inline #define EIGEN_STRONG_INLINE inline
#endif #endif
#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular, // attribute to maximize inlining. This should only be used when really necessary: in particular,
@ -812,7 +814,8 @@ namespace Eigen {
// just an empty macro ! // just an empty macro !
#define EIGEN_EMPTY #define EIGEN_EMPTY
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =; using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@ -986,7 +989,13 @@ namespace Eigen {
# define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw() # define EIGEN_NO_THROW throw()
# define EIGEN_EXCEPTION_SPEC(X) throw(X) # if EIGEN_COMP_MSVC
// MSVC does not support exception specifications (warning C4290),
// and they are deprecated in c++11 anyway.
# define EIGEN_EXCEPTION_SPEC(X) throw()
# else
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
# endif
#endif #endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H

View File

@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
throw std::bad_alloc(); throw std::bad_alloc();
#else #else
std::size_t huge = static_cast<std::size_t>(-1); std::size_t huge = static_cast<std::size_t>(-1);
new int[huge]; ::operator new(huge);
#endif #endif
} }
@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start); IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return; if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0); eigen_internal_assert(start!=0 && end!=0 && target!=0);
memcpy(target, start, size); std::memcpy(target, start, size);
} }
}; };
@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
/** \class aligned_allocator /** \class aligned_allocator
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief STL compatible allocator to use with with 16 byte aligned types * \brief STL compatible allocator to use with types requiring a non standrad alignment.
*
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
* - 32 bytes alignment if AVX is enabled.
* - 64 bytes alignment if AVX512 is enabled.
*
* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
* \link TopicPreprocessorDirectivesPerformance there \endlink.
* *
* Example: * Example:
* \code * \code

View File

@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b)
return (a+b-1) / b; return (a+b-1) / b;
} }
// The aim of the following functions is to bypass -Wfloat-equal warnings
// when we really want a strict equality comparison on floating points.
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool equal_strict(const X& x,const Y& y) { return x == y; }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
} // end namespace numext } // end namespace numext
} // end namespace Eigen } // end namespace Eigen

View File

@ -24,6 +24,7 @@
* *
*/ */
#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT
#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@ -44,64 +45,65 @@
struct static_assertion<true> struct static_assertion<true>
{ {
enum { enum {
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
OUT_OF_RANGE_ACCESS, OUT_OF_RANGE_ACCESS=1,
YOU_MADE_A_PROGRAMMING_MISTAKE, YOU_MADE_A_PROGRAMMING_MISTAKE=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
NUMERIC_TYPE_MUST_BE_REAL, NUMERIC_TYPE_MUST_BE_REAL=1,
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
INVALID_MATRIX_PRODUCT, INVALID_MATRIX_PRODUCT=1,
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
INVALID_MATRIX_TEMPLATE_PARAMETERS, INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
YOU_ALREADY_SPECIFIED_THIS_STRIDE, YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
STORAGE_LAYOUT_DOES_NOT_MATCH, STORAGE_LAYOUT_DOES_NOT_MATCH=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
THIS_TYPE_IS_NOT_SUPPORTED, THIS_TYPE_IS_NOT_SUPPORTED=1,
STORAGE_KIND_MUST_MATCH, STORAGE_KIND_MUST_MATCH=1,
STORAGE_INDEX_MUST_MATCH, STORAGE_INDEX_MUST_MATCH=1,
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
}; };
}; };
@ -131,7 +133,7 @@
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
#endif // EIGEN_NO_STATIC_ASSERT #endif // EIGEN_NO_STATIC_ASSERT
#endif // EIGEN_STATIC_ASSERT
// static assertion failing if the type \a TYPE is not a vector type // static assertion failing if the type \a TYPE is not a vector type
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \

View File

@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
// Aliases: // Aliases:
Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size); Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
ComplexVectorType &cv = m_tmp; ComplexVectorType &cv = m_tmp;
const MatrixType &mZ = m_realQZ.matrixZ();
const MatrixType &mS = m_realQZ.matrixS(); const MatrixType &mS = m_realQZ.matrixS();
const MatrixType &mT = m_realQZ.matrixT(); const MatrixType &mT = m_realQZ.matrixT();
@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
} }
} }
} }
m_eivec.col(i).real().noalias() = mZ.transpose() * v; m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
m_eivec.col(i).real().normalize(); m_eivec.col(i).real().normalize();
m_eivec.col(i).imag().setConstant(0); m_eivec.col(i).imag().setConstant(0);
} }
@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
/ (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j))); / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
} }
} }
m_eivec.col(i+1).noalias() = (mZ.transpose() * cv); m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
m_eivec.col(i+1).normalize(); m_eivec.col(i+1).normalize();
m_eivec.col(i) = m_eivec.col(i+1).conjugate(); m_eivec.col(i) = m_eivec.col(i+1).conjugate();
} }

View File

@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
Scalar exshift(0); // sum of exceptional shifts Scalar exshift(0); // sum of exceptional shifts
Scalar norm = computeNormOfT(); Scalar norm = computeNormOfT();
if(norm!=0) if(norm!=Scalar(0))
{ {
while (iu >= 0) while (iu >= 0)
{ {
@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
else // No convergence yet else // No convergence yet
{ {
// The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG ) // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
Vector3s firstHouseholderVector(0,0,0), shiftInfo; Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
computeShift(iu, iter, exshift, shiftInfo); computeShift(iu, iter, exshift, shiftInfo);
iter = iter + 1; iter = iter + 1;
totalIter = totalIter + 1; totalIter = totalIter + 1;

View File

@ -37,7 +37,7 @@ namespace Eigen {
/** \internal Specialization for the data types supported by LAPACKe */ /** \internal Specialization for the data types supported by LAPACKe */
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \ #define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
template<> template<typename InputType> inline \ template<> template<typename InputType> inline \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
&& (options&EigVecMask)!=EigVecMask \ && (options&EigVecMask)!=EigVecMask \
&& "invalid option parameter"); \ && "invalid option parameter"); \
bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \ lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
m_eivalues.resize(n,1); \ m_eivalues.resize(n,1); \
m_subdiag.resize(n-1); \ m_subdiag.resize(n-1); \
m_eivec = matrix; \ m_eivec = matrix; \
@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
} \ } \
\ \
lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \ lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
matrix_order=LAPACKE_COLROW; \
char jobz, uplo='L'/*, range='A'*/; \ char jobz, uplo='L'/*, range='A'*/; \
jobz = computeEigenvectors ? 'V' : 'N'; \ jobz = computeEigenvectors ? 'V' : 'N'; \
\ \
info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
m_info = (info==0) ? Success : NoConvergence; \ m_info = (info==0) ? Success : NoConvergence; \
m_isInitialized = true; \ m_isInitialized = true; \
m_eigenvectorsOk = computeEigenvectors; \ m_eigenvectorsOk = computeEigenvectors; \
return *this; \ return *this; \
} }
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor )
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev)
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR)
} // end namespace Eigen } // end namespace Eigen

View File

@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
if (n != Scalar(0)) if (n != Scalar(0))
{ {
m_angle = Scalar(2)*atan2(n, abs(q.w())); m_angle = Scalar(2)*atan2(n, abs(q.w()));
if(q.w() < 0) if(q.w() < Scalar(0))
n = -n; n = -n;
m_axis = q.vec() / n; m_axis = q.vec() / n;
} }

View File

@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename internal::traits<Derived>::Coefficients Coefficients; typedef typename internal::traits<Derived>::Coefficients Coefficients;
typedef typename Coefficients::CoeffReturnType CoeffReturnType;
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
enum { enum {
Flags = Eigen::internal::traits<Derived>::Flags Flags = Eigen::internal::traits<Derived>::Flags
}; };
@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
/** \returns the \c x coefficient */ /** \returns the \c x coefficient */
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); } EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
/** \returns the \c y coefficient */ /** \returns the \c y coefficient */
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); } EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
/** \returns the \c z coefficient */ /** \returns the \c z coefficient */
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); } EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
/** \returns the \c w coefficient */ /** \returns the \c w coefficient */
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); } EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
/** \returns a reference to the \c x coefficient */ /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
/** \returns a reference to the \c y coefficient */ /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
/** \returns a reference to the \c z coefficient */ /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
/** \returns a reference to the \c w coefficient */ /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
/** \returns a read-only vector expression of the imaginary part (x,y,z) */ /** \returns a read-only vector expression of the imaginary part (x,y,z) */
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); } EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
@ -423,7 +428,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
// Generic Quaternion * Quaternion product // Generic Quaternion * Quaternion product
// This product can be specialized for a given architecture via the Arch template argument. // This product can be specialized for a given architecture via the Arch template argument.
namespace internal { namespace internal {
template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
{ {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
return Quaternion<Scalar> return Quaternion<Scalar>
@ -446,8 +451,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value), EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived, return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar, typename internal::traits<Derived>::Scalar>::run(*this, other);
EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
} }
/** \sa operator*(Quaternion) */ /** \sa operator*(Quaternion) */
@ -672,7 +676,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
// Generic conjugate of a Quaternion // Generic conjugate of a Quaternion
namespace internal { namespace internal {
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj template<int Arch, class Derived, typename Scalar> struct quat_conj
{ {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z()); return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
@ -691,8 +695,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::conjugate() const QuaternionBase<Derived>::conjugate() const
{ {
return internal::quat_conj<Architecture::Target, Derived, return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar, typename internal::traits<Derived>::Scalar>::run(*this);
internal::traits<Derived>::Alignment>::run(*this);
} }

View File

@ -16,17 +16,23 @@ namespace Eigen {
namespace internal { namespace internal {
template<class Derived, class OtherDerived> template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
{ {
enum {
AAlignment = traits<Derived>::Alignment,
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{ {
Quaternion<float> res; Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
__m128 a = _a.coeffs().template packet<Aligned16>(0); __m128 a = _a.coeffs().template packet<AAlignment>(0);
__m128 b = _b.coeffs().template packet<Aligned16>(0); __m128 b = _b.coeffs().template packet<BAlignment>(0);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(), pstoret<float,Packet4f,ResAlignment>(
&res.x(),
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
vec4f_swizzle1(b,1,2,0,0))), vec4f_swizzle1(b,1,2,0,0))),
@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
} }
}; };
template<class Derived, int Alignment> template<class Derived>
struct quat_conj<Architecture::SSE, Derived, float, Alignment> struct quat_conj<Architecture::SSE, Derived, float>
{ {
enum {
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& q) static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
{ {
Quaternion<float> res; Quaternion<float> res;
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0))); pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
return res; return res;
} }
}; };
@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
template<typename VectorLhs,typename VectorRhs> template<typename VectorLhs,typename VectorRhs>
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
{ {
enum {
ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
};
static inline typename plain_matrix_type<VectorLhs>::type static inline typename plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs) run(const VectorLhs& lhs, const VectorRhs& rhs)
{ {
@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
typename plain_matrix_type<VectorLhs>::type res; typename plain_matrix_type<VectorLhs>::type res;
pstore(&res.x(),_mm_sub_ps(mul1,mul2)); pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
return res; return res;
} }
}; };
@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
template<class Derived, class OtherDerived, int Alignment> template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
{ {
enum {
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{ {
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
Quaternion<double> res; Quaternion<double> res;
const double* a = _a.coeffs().data(); const double* a = _a.coeffs().data();
Packet2d b_xy = _b.coeffs().template packet<Alignment>(0); Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
Packet2d b_zw = _b.coeffs().template packet<Alignment>(2); Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
Packet2d a_xx = pset1<Packet2d>(a[0]); Packet2d a_xx = pset1<Packet2d>(a[0]);
Packet2d a_yy = pset1<Packet2d>(a[1]); Packet2d a_yy = pset1<Packet2d>(a[1]);
Packet2d a_zz = pset1<Packet2d>(a[2]); Packet2d a_zz = pset1<Packet2d>(a[2]);
@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask) EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
#else #else
pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
#endif #endif
/* /*
@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask) EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
#else #else
pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
#endif #endif
return res; return res;
} }
}; };
template<class Derived, int Alignment> template<class Derived>
struct quat_conj<Architecture::SSE, Derived, double, Alignment> struct quat_conj<Architecture::SSE, Derived, double>
{ {
enum {
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& q) static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
{ {
Quaternion<double> res; Quaternion<double> res;
const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask0 = _mm_setr_pd(-0.,-0.);
const __m128d mask2 = _mm_setr_pd(-0.,0.); const __m128d mask2 = _mm_setr_pd(-0.,0.);
pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0))); pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2))); pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
return res; return res;
} }
}; };

View File

@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
{ {
// Compute the inverse squared-norm of each column of mat // Compute the inverse squared-norm of each column of mat
m_invdiag.resize(mat.cols()); m_invdiag.resize(mat.cols());
for(Index j=0; j<mat.outerSize(); ++j) if(MatType::IsRowMajor)
{ {
RealScalar sum = mat.innerVector(j).squaredNorm(); m_invdiag.setZero();
if(sum>0) for(Index j=0; j<mat.outerSize(); ++j)
m_invdiag(j) = RealScalar(1)/sum; {
else for(typename MatType::InnerIterator it(mat,j); it; ++it)
m_invdiag(j) = RealScalar(1); m_invdiag(it.index()) += numext::abs2(it.value());
}
for(Index j=0; j<mat.cols(); ++j)
if(numext::real(m_invdiag(j))>RealScalar(0))
m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));
}
else
{
for(Index j=0; j<mat.outerSize(); ++j)
{
RealScalar sum = mat.col(j).squaredNorm();
if(sum>RealScalar(0))
m_invdiag(j) = RealScalar(1)/sum;
else
m_invdiag(j) = RealScalar(1);
}
} }
Base::m_isInitialized = true; Base::m_isInitialized = true;
return *this; return *this;

View File

@ -298,12 +298,144 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
} }
namespace internal { namespace internal {
template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
struct apply_rotation_in_the_plane_selector
{
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
{
for(Index i=0; i<size; ++i)
{
Scalar xi = *x;
Scalar yi = *y;
*x = c * xi + numext::conj(s) * yi;
*y = -s * xi + numext::conj(c) * yi;
x += incrx;
y += incry;
}
}
};
template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment>
struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
{
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
{
enum {
PacketSize = packet_traits<Scalar>::size,
OtherPacketSize = packet_traits<OtherScalar>::size
};
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<OtherScalar>::type OtherPacket;
/*** dynamic-size vectorized paths ***/
if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
{
// both vectors are sequentially stored in memory => vectorization
enum { Peeling = 2 };
Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
const OtherPacket pc = pset1<OtherPacket>(c);
const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
for(Index i=0; i<alignedStart; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
Scalar* EIGEN_RESTRICT px = x + alignedStart;
Scalar* EIGEN_RESTRICT py = y + alignedStart;
if(internal::first_default_aligned(x, size)==alignedStart)
{
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
else
{
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
{
Packet xi = ploadu<Packet>(px);
Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize);
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
px += Peeling*PacketSize;
py += Peeling*PacketSize;
}
if(alignedEnd!=peelingEnd)
{
Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd);
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
}
}
for(Index i=alignedEnd; i<size; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
}
/*** fixed-size vectorized path ***/
else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
{
const OtherPacket pc = pset1<OtherPacket>(c);
const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
/*** non-vectorized path ***/
else
{
apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
}
}
};
template<typename VectorX, typename VectorY, typename OtherScalar> template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{ {
typedef typename VectorX::Scalar Scalar; typedef typename VectorX::Scalar Scalar;
enum { PacketSize = packet_traits<Scalar>::size }; const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
typedef typename packet_traits<Scalar>::type Packet; && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
eigen_assert(xpr_x.size() == xpr_y.size()); eigen_assert(xpr_x.size() == xpr_y.size());
Index size = xpr_x.size(); Index size = xpr_x.size();
Index incrx = xpr_x.derived().innerStride(); Index incrx = xpr_x.derived().innerStride();
@ -317,113 +449,11 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
if (c==OtherScalar(1) && s==OtherScalar(0)) if (c==OtherScalar(1) && s==OtherScalar(0))
return; return;
/*** dynamic-size vectorized paths ***/ apply_rotation_in_the_plane_selector<
Scalar,OtherScalar,
if(VectorX::SizeAtCompileTime == Dynamic && VectorX::SizeAtCompileTime,
(VectorX::Flags & VectorY::Flags & PacketAccessBit) && EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
((incrx==1 && incry==1) || PacketSize == 1)) Vectorizable>::run(x,incrx,y,incry,size,c,s);
{
// both vectors are sequentially stored in memory => vectorization
enum { Peeling = 2 };
Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
for(Index i=0; i<alignedStart; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
Scalar* EIGEN_RESTRICT px = x + alignedStart;
Scalar* EIGEN_RESTRICT py = y + alignedStart;
if(internal::first_default_aligned(x, size)==alignedStart)
{
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
else
{
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
{
Packet xi = ploadu<Packet>(px);
Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize);
pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
px += Peeling*PacketSize;
py += Peeling*PacketSize;
}
if(alignedEnd!=peelingEnd)
{
Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd);
pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
}
}
for(Index i=alignedEnd; i<size; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
}
/*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
/*** non-vectorized path ***/
else
{
for(Index i=0; i<size; ++i)
{
Scalar xi = *x;
Scalar yi = *y;
*x = c * xi + numext::conj(s) * yi;
*y = -s * xi + numext::conj(c) * yi;
x += incrx;
y += incry;
}
}
} }
} // end namespace internal } // end namespace internal

View File

@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
const RealScalar& absDeterminantThreshold const RealScalar& absDeterminantThreshold
) const ) const
{ {
RealScalar determinant; Scalar determinant;
// i'd love to put some static assertions there, but SFINAE means that they have no effect... // i'd love to put some static assertions there, but SFINAE means that they have no effect...
eigen_assert(rows() == cols()); eigen_assert(rows() == cols());
computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);

View File

@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */
COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
/* get pivot column from head of minimum degree list */ /* get pivot column from head of minimum degree list */
while (head [min_score] == COLAMD_EMPTY && min_score < n_col) while (min_score < n_col && head [min_score] == COLAMD_EMPTY)
{ {
min_score++ ; min_score++ ;
} }

View File

@ -64,28 +64,28 @@ namespace internal
typedef typename _MatrixType::StorageIndex StorageIndex; typedef typename _MatrixType::StorageIndex StorageIndex;
}; };
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}

View File

@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
} }
RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0); m_maxpivot = RealScalar(0);
@ -553,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
// and used in LAPACK routines xGEQPF and xGEQP3. // and used in LAPACK routines xGEQPF and xGEQP3.
// See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
if (m_colNormsUpdated.coeffRef(j) != 0) { if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
temp = temp < 0 ? 0 : temp; temp = temp < RealScalar(0) ? RealScalar(0) : temp;
RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) / RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j)); m_colNormsDirect.coeffRef(j));
if (temp2 <= norm_downdate_threshold) { if (temp2 <= norm_downdate_threshold) {
// The updated norm has become too inaccurate so re-compute the column // The updated norm has become too inaccurate so re-compute the column
// norm directly. // norm directly.

View File

@ -11,7 +11,7 @@
// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// Source Code Form is subject to the terms of the Mozilla // Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -77,6 +77,7 @@ public:
typedef _MatrixType MatrixType; typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff(); RealScalar scale = matrix.cwiseAbs().maxCoeff();
if(scale==RealScalar(0)) scale = RealScalar(1); if(scale==Literal(0)) scale = Literal(1);
MatrixX copy; MatrixX copy;
if (m_isTranspose) copy = matrix.adjoint()/scale; if (m_isTranspose) copy = matrix.adjoint()/scale;
else copy = matrix/scale; else copy = matrix/scale;
@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
Index k1=0, k2=0; Index k1=0, k2=0;
for(Index j=0; j<n; ++j) for(Index j=0; j<n; ++j)
{ {
if( (A.col(j).head(n1).array()!=0).any() ) if( (A.col(j).head(n1).array()!=Literal(0)).any() )
{ {
A1.col(k1) = A.col(j).head(n1); A1.col(k1) = A.col(j).head(n1);
B1.row(k1) = B.row(j); B1.row(k1) = B.row(j);
++k1; ++k1;
} }
if( (A.col(j).tail(n2).array()!=0).any() ) if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
{ {
A2.col(k2) = A.col(j).tail(n2); A2.col(k2) = A.col(j).tail(n2);
B2.row(k2) = B.row(j); B2.row(k2) = B.row(j);
@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
l = m_naiveU.row(1).segment(firstCol, k); l = m_naiveU.row(1).segment(firstCol, k);
f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
} }
if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
if (r0<considerZero) if (r0<considerZero)
{ {
c0 = 1; c0 = Literal(1);
s0 = 0; s0 = Literal(0);
} }
else else
{ {
@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef diag = m_workspace.head(n); ArrayRef diag = m_workspace.head(n);
diag(0) = 0; diag(0) = Literal(0);
// Allocate space for singular values and vectors // Allocate space for singular values and vectors
singVals.resize(n); singVals.resize(n);
@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
// but others are interleaved and we must ignore them at this stage. // but others are interleaved and we must ignore them at this stage.
// To this end, let's compute a permutation skipping them: // To this end, let's compute a permutation skipping them:
Index actual_n = n; Index actual_n = n;
while(actual_n>1 && diag(actual_n-1)==0) --actual_n; while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
Index m = 0; // size of the deflated problem Index m = 0; // size of the deflated problem
for(Index k=0;k<actual_n;++k) for(Index k=0;k<actual_n;++k)
if(abs(col0(k))>considerZero) if(abs(col0(k))>considerZero)
@ -691,11 +692,13 @@ template <typename MatrixType>
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
{ {
Index m = perm.size(); Index m = perm.size();
RealScalar res = 1; RealScalar res = Literal(1);
for(Index i=0; i<m; ++i) for(Index i=0; i<m; ++i)
{ {
Index j = perm(i); Index j = perm(i);
res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu)); // The following expression could be rewritten to involve only a single division,
// but this would make the expression more sensitive to overflow.
res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
} }
return res; return res;
@ -707,19 +710,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
{ {
using std::abs; using std::abs;
using std::swap; using std::swap;
using std::sqrt;
Index n = col0.size(); Index n = col0.size();
Index actual_n = n; Index actual_n = n;
while(actual_n>1 && col0(actual_n-1)==0) --actual_n; // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (col0(k) == 0 || actual_n==1) if (col0(k) == Literal(0) || actual_n==1)
{ {
// if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
// if actual_n==1, then the deflated problem is already diagonalized // if actual_n==1, then the deflated problem is already diagonalized
singVals(k) = k==0 ? col0(0) : diag(k); singVals(k) = k==0 ? col0(0) : diag(k);
mus(k) = 0; mus(k) = Literal(0);
shifts(k) = k==0 ? col0(0) : diag(k); shifts(k) = k==0 ? col0(0) : diag(k);
continue; continue;
} }
@ -731,15 +737,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
right = (diag(actual_n-1) + col0.matrix().norm()); right = (diag(actual_n-1) + col0.matrix().norm());
else else
{ {
// Skip deflated singular values // Skip deflated singular values,
// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
// This should be equivalent to using perm[]
Index l = k+1; Index l = k+1;
while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
right = diag(l); right = diag(l);
} }
// first decide whether it's closer to the left end or the right end // first decide whether it's closer to the left end or the right end
RealScalar mid = left + (right-left) / 2; RealScalar mid = left + (right-left) / Literal(2);
RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << right-left << "\n"; std::cout << right-left << "\n";
std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
@ -755,7 +763,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
<< " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
<< " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
#endif #endif
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
// measure everything relative to shift // measure everything relative to shift
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
@ -785,13 +793,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
// rational interpolation: fit a function of the form a / mu + b through the two previous // rational interpolation: fit a function of the form a / mu + b through the two previous
// iterates and use its zero to compute the next iterate // iterates and use its zero to compute the next iterate
bool useBisection = fPrev*fCur>0; bool useBisection = fPrev*fCur>Literal(0);
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
{ {
++m_numIters; ++m_numIters;
// Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
RealScalar b = fCur - a / muCur; RealScalar b = fCur - a / muCur;
// And find mu such that f(mu)==0: // And find mu such that f(mu)==0:
RealScalar muZero = -a/b; RealScalar muZero = -a/b;
@ -803,8 +811,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
fCur = fZero; fCur = fZero;
if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
if (abs(fCur)>abs(fPrev)) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true;
} }
@ -817,15 +825,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
RealScalar leftShifted, rightShifted; RealScalar leftShifted, rightShifted;
if (shift == left) if (shift == left)
{ {
leftShifted = (std::numeric_limits<RealScalar>::min)(); // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
// the factor 2 is to be more conservative
leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
// check that we did it right:
eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
// I don't understand why the case k==0 would be special there: // I don't understand why the case k==0 would be special there:
// if (k == 0) rightShifted = right - left; else // if (k == 0) rightShifted = right - left; else
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
} }
else else
{ {
leftShifted = -(right - left) * RealScalar(0.6); leftShifted = -(right - left) * RealScalar(0.51);
rightShifted = -(std::numeric_limits<RealScalar>::min)(); if(k+1<n)
rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
else
rightShifted = -(std::numeric_limits<RealScalar>::min)();
} }
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
@ -841,13 +857,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
} }
#endif #endif
eigen_internal_assert(fLeft * fRight < 0); eigen_internal_assert(fLeft * fRight < Literal(0));
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{ {
RealScalar midShifted = (leftShifted + rightShifted) / 2; RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < 0) if (fLeft * fMid < Literal(0))
{ {
rightShifted = midShifted; rightShifted = midShifted;
} }
@ -858,7 +874,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
} }
} }
muCur = (leftShifted + rightShifted) / 2; muCur = (leftShifted + rightShifted) / Literal(2);
} }
singVals[k] = shift + muCur; singVals[k] = shift + muCur;
@ -892,8 +908,8 @@ void BDCSVD<MatrixType>::perturbCol0
// The offset permits to skip deflated entries while computing zhat // The offset permits to skip deflated entries while computing zhat
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (col0(k) == 0) // deflated if (col0(k) == Literal(0)) // deflated
zhat(k) = 0; zhat(k) = Literal(0);
else else
{ {
// see equation (3.6) // see equation (3.6)
@ -918,7 +934,7 @@ void BDCSVD<MatrixType>::perturbCol0
std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
#endif #endif
RealScalar tmp = sqrt(prod); RealScalar tmp = sqrt(prod);
zhat(k) = col0(k) > 0 ? tmp : -tmp; zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
} }
} }
} }
@ -934,7 +950,7 @@ void BDCSVD<MatrixType>::computeSingVecs
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (zhat(k) == 0) if (zhat(k) == Literal(0))
{ {
U.col(k) = VectorType::Unit(n+1, k); U.col(k) = VectorType::Unit(n+1, k);
if (m_compV) V.col(k) = VectorType::Unit(n, k); if (m_compV) V.col(k) = VectorType::Unit(n, k);
@ -947,7 +963,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l); Index i = perm(l);
U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
} }
U(n,k) = 0; U(n,k) = Literal(0);
U.col(k).normalize(); U.col(k).normalize();
if (m_compV) if (m_compV)
@ -958,7 +974,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l); Index i = perm(l);
V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
} }
V(0,k) = -1; V(0,k) = Literal(-1);
V.col(k).normalize(); V.col(k).normalize();
} }
} }
@ -979,15 +995,15 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
Index start = firstCol + shift; Index start = firstCol + shift;
RealScalar c = m_computed(start, start); RealScalar c = m_computed(start, start);
RealScalar s = m_computed(start+i, start); RealScalar s = m_computed(start+i, start);
RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); RealScalar r = numext::hypot(c,s);
if (r == 0) if (r == Literal(0))
{ {
m_computed(start+i, start+i) = 0; m_computed(start+i, start+i) = Literal(0);
return; return;
} }
m_computed(start,start) = r; m_computed(start,start) = r;
m_computed(start+i, start) = 0; m_computed(start+i, start) = Literal(0);
m_computed(start+i, start+i) = 0; m_computed(start+i, start+i) = Literal(0);
JacobiRotation<RealScalar> J(c/r,-s/r); JacobiRotation<RealScalar> J(c/r,-s/r);
if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
@ -1020,7 +1036,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
<< m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+1, firstColm+i+1) << " "
<< m_computed(firstColm + i+2, firstColm+i+2) << "\n"; << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
#endif #endif
if (r==0) if (r==Literal(0))
{ {
m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
return; return;
@ -1029,7 +1045,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
s/=r; s/=r;
m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + i, firstColm) = r;
m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
m_computed(firstColm + j, firstColm) = 0; m_computed(firstColm + j, firstColm) = Literal(0);
JacobiRotation<RealScalar> J(c,-s); JacobiRotation<RealScalar> J(c,-s);
if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
@ -1053,7 +1069,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag); RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS #ifdef EIGEN_BDCSVD_SANITY_CHECKS
assert(m_naiveU.allFinite()); assert(m_naiveU.allFinite());
@ -1081,7 +1097,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
#endif #endif
col0(i) = 0; col0(i) = Literal(0);
} }
//condition 4.3 //condition 4.3

View File

@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
u = (LAPACKE_TYPE*)m_matrixU.data(); \ u = (LAPACKE_TYPE*)m_matrixU.data(); \
} else { ldu=1; u=&dummy; }\ } else { ldu=1; u=&dummy; }\
MatrixType localV; \ MatrixType localV; \
ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
if (computeV()) { \ if (computeV()) { \
localV.resize(ldvt, m_cols); \ localV.resize(vt_rows, m_cols); \
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
vt = (LAPACKE_TYPE*)localV.data(); \ vt = (LAPACKE_TYPE*)localV.data(); \
} else { ldvt=1; vt=&dummy; }\ } else { ldvt=1; vt=&dummy; }\
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \ Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \

View File

@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
traits<MatrixType>::Flags & RowMajorBit> > Y) traits<MatrixType>::Flags & RowMajorBit> > Y)
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A10( A.block(bs,0, brows-bs,bs) );
SubMatType A01( A.block(0,bs, bs,bcols-bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) );
Scalar tmp = A01(bs-1,0); Scalar tmp = A01(bs-1,0);
A01(bs-1,0) = 1; A01(bs-1,0) = Literal(1);
A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
A01(bs-1,0) = tmp; A01(bs-1,0) = tmp;

View File

@ -94,7 +94,7 @@ class AmbiVector
Index allocSize = m_allocatedElements * sizeof(ListEl); Index allocSize = m_allocatedElements * sizeof(ListEl);
allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
Scalar* newBuffer = new Scalar[allocSize]; Scalar* newBuffer = new Scalar[allocSize];
memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
delete[] m_buffer; delete[] m_buffer;
m_buffer = newBuffer; m_buffer = newBuffer;
} }

View File

@ -17,7 +17,9 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
{ {
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
// make sure to call innerSize/outerSize since we fake the storage order. // make sure to call innerSize/outerSize since we fake the storage order.
Index rows = lhs.innerSize(); Index rows = lhs.innerSize();
@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0);
ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
std::memset(mask,0,sizeof(bool)*rows); std::memset(mask,0,sizeof(bool)*rows);
@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
Index nnz = 0; Index nnz = 0;
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{ {
Scalar y = rhsIt.value(); RhsScalar y = rhsIt.value();
Index k = rhsIt.index(); Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{ {
Index i = lhsIt.index(); Index i = lhsIt.index();
Scalar x = lhsIt.value(); LhsScalar x = lhsIt.value();
if(!mask[i]) if(!mask[i])
{ {
mask[i] = true; mask[i] = true;
@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
RowMajorMatrix rhsRow = rhs; typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorMatrix resRow(lhs.rows(), rhs.cols()); RowMajorRhs rhsRow = rhs;
internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow); RowMajorRes resRow(lhs.rows(), rhs.cols());
res = resRow; internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
res = resRow;
} }
}; };
@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
RowMajorMatrix lhsRow = lhs; typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorMatrix resRow(lhs.rows(), rhs.cols()); RowMajorLhs lhsRow = lhs;
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow); RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
res = resRow; res = resRow;
} }
}; };
@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorMatrix lhsCol = lhs; typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorMatrix resCol(lhs.rows(), rhs.cols()); ColMajorLhs lhsCol = lhs;
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol); ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
res = resCol; res = resCol;
} }
}; };
@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorMatrix rhsCol = rhs; typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorMatrix resCol(lhs.rows(), rhs.cols()); ColMajorRhs rhsCol = rhs;
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol); ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
res = resCol; res = resCol;
} }
}; };
@ -263,7 +269,8 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
Index cols = rhs.outerSize(); Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs,
{ {
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{ {
Scalar y = rhsIt.value(); RhsScalar y = rhsIt.value();
Index k = rhsIt.index(); Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{ {
Index i = lhsIt.index(); Index i = lhsIt.index();
Scalar x = lhsIt.value(); LhsScalar x = lhsIt.value();
res.coeffRef(i,j) += x * y; res.coeffRef(i,j) += x * y;
} }
} }
@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorMatrix lhsCol(lhs); ColMajorLhs lhsCol(lhs);
internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res); internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
} }
}; };
@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorMatrix rhsCol(rhs); ColMajorRhs rhsCol(rhs);
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res); internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
} }
}; };

View File

@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
enum { enum {
Mode = _Mode, Mode = _Mode,
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
}; };
@ -310,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
while (i && i.index()<j) ++i; while (i && i.index()<j) ++i;
if(i && i.index()==j) if(i && i.index()==j)
{ {
res(j,k) += alpha * i.value() * rhs(j,k); res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
++i; ++i;
} }
} }
@ -323,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
{ {
LhsScalar lhs_ij = i.value(); LhsScalar lhs_ij = i.value();
if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij); if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
res_j += lhs_ij * rhs(i.index(),k); res_j += lhs_ij * rhs.coeff(i.index(),k);
res(i.index(),k) += numext::conj(lhs_ij) * rhs_j; res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
} }
res(j,k) += alpha * res_j; res.coeffRef(j,k) += alpha * res_j;
// handle diagonal coeff // handle diagonal coeff
if (ProcessFirstHalf && i && (i.index()==j)) if (ProcessFirstHalf && i && (i.index()==j))
res(j,k) += alpha * i.value() * rhs(j,k); res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
} }
} }
} }
@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
// transpose everything // transpose everything
Transpose<Dest> dstT(dst); Transpose<Dest> dstT(dst);
internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
} }
}; };

View File

@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{ {
// return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res); // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex; typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
// make sure to call innerSize/outerSize since we fake the storage order. // make sure to call innerSize/outerSize since we fake the storage order.
@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
// allocate a temporary buffer // allocate a temporary buffer
AmbiVector<Scalar,StorageIndex> tempVector(rows); AmbiVector<ResScalar,StorageIndex> tempVector(rows);
// mimics a resizeByInnerOuter: // mimics a resizeByInnerOuter:
if(ResultType::IsRowMajor) if(ResultType::IsRowMajor)
@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{ {
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
tempVector.restart(); tempVector.restart();
Scalar x = rhsIt.value(); RhsScalar x = rhsIt.value();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
{ {
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
} }
} }
res.startVec(j); res.startVec(j);
for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it) for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
res.insertBackByOuterInner(j,it.index()) = it.value(); res.insertBackByOuterInner(j,it.index()) = it.value();
} }
res.finalize(); res.finalize();
@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector;
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
{ {
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixLhs colLhs(lhs);
ColMajorMatrixRhs colRhs(rhs); ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
RowMajorMatrixLhs rowLhs(lhs); RowMajorMatrixLhs rowLhs(lhs);
sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance); sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
} }
@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
RowMajorMatrixRhs rowRhs(rhs); RowMajorMatrixRhs rowRhs(rhs);
sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance); sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
} }
@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixRhs colRhs(rhs); ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
} }
@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixLhs colLhs(lhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
} }

View File

@ -52,7 +52,7 @@ namespace internal {
* rank-revealing permutations. Use colsPermutation() to get it. * rank-revealing permutations. Use colsPermutation() to get it.
* *
* Q is the orthogonal matrix represented as products of Householder reflectors. * Q is the orthogonal matrix represented as products of Householder reflectors.
* Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
* You can then apply it to a vector. * You can then apply it to a vector.
* *
* R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
@ -65,6 +65,7 @@ namespace internal {
* \implsparsesolverconcept * \implsparsesolverconcept
* *
* \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
* \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
* *
*/ */
template<typename _MatrixType, typename _OrderingType> template<typename _MatrixType, typename _OrderingType>
@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
Index rank = this->rank(); Index rank = this->rank();
// Compute Q^T * b; // Compute Q^* * b;
typename Dest::PlainObject y, b; typename Dest::PlainObject y, b;
y = this->matrixQ().transpose() * B; y = this->matrixQ().adjoint() * B;
b = y; b = y;
// Solve with the triangular matrix R // Solve with the triangular matrix R
@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
// Get the references // Get the references
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
m_qr(qr),m_other(other),m_transpose(transpose) {} m_qr(qr),m_other(other),m_transpose(transpose) {}
inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); } inline Index rows() const { return m_qr.matrixQ().rows(); }
inline Index cols() const { return m_other.cols(); } inline Index cols() const { return m_other.cols(); }
// Assign to a vector // Assign to a vector
@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
} }
else else
{ {
eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes"); eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes");
res.conservativeResize(rows(), cols());
// Compute res = Q * other column by column // Compute res = Q * other column by column
for(Index j = 0; j < res.cols(); j++) for(Index j = 0; j < res.cols(); j++)
{ {
@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
Scalar tau = Scalar(0); Scalar tau = Scalar(0);
tau = m_qr.m_Q.col(k).dot(res.col(j)); tau = m_qr.m_Q.col(k).dot(res.col(j));
if(tau==Scalar(0)) continue; if(tau==Scalar(0)) continue;
tau = tau * m_qr.m_hcoeffs(k); tau = tau * numext::conj(m_qr.m_hcoeffs(k));
res.col(j) -= tau * m_qr.m_Q.col(k); res.col(j) -= tau * m_qr.m_Q.col(k);
} }
} }
@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
const SparseQRType& m_qr; const SparseQRType& m_qr;
const Derived& m_other; const Derived& m_other;
bool m_transpose; bool m_transpose; // TODO this actually means adjoint
}; };
template<typename SparseQRType> template<typename SparseQRType>
@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
{ {
return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false); return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);
} }
// To use for operations with the adjoint of Q
SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const
{ {
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
} }
inline Index rows() const { return m_qr.rows(); } inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); } inline Index cols() const { return m_qr.rows(); }
// To use for operations with the transpose of Q // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment
SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
{ {
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
const SparseQRType& m_qr; const SparseQRType& m_qr;
}; };
// TODO this actually represents the adjoint of Q
template<typename SparseQRType> template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType struct SparseQRMatrixQTransposeReturnType
{ {
@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
typedef typename DstXprType::StorageIndex StorageIndex; typedef typename DstXprType::StorageIndex StorageIndex;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
{ {
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); typename DstXprType::PlainObject idMat(src.rows(), src.cols());
idMat.setIdentity(); idMat.setIdentity();
// Sort the sparse householder reflectors if needed // Sort the sparse householder reflectors if needed
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q(); const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();

View File

@ -10,19 +10,37 @@
#ifndef EIGEN_UMFPACKSUPPORT_H #ifndef EIGEN_UMFPACKSUPPORT_H
#define EIGEN_UMFPACKSUPPORT_H #define EIGEN_UMFPACKSUPPORT_H
namespace Eigen { namespace Eigen {
/* TODO extract L, extract U, compute det, etc... */ /* TODO extract L, extract U, compute det, etc... */
// generic double/complex<double> wrapper functions: // generic double/complex<double> wrapper functions:
inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) inline void umfpack_defaults(double control[UMFPACK_CONTROL], double)
{ umfpack_di_defaults(control); } { umfpack_di_defaults(control); }
inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>) inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>)
{ umfpack_zi_defaults(control); } { umfpack_zi_defaults(control); }
inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double)
{ umfpack_di_report_info(control, info);}
inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>)
{ umfpack_zi_report_info(control, info);}
inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double)
{ umfpack_di_report_status(control, status);}
inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>)
{ umfpack_zi_report_status(control, status);}
inline void umfpack_report_control(double control[UMFPACK_CONTROL], double)
{ umfpack_di_report_control(control);}
inline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>)
{ umfpack_zi_report_control(control);}
inline void umfpack_free_numeric(void **Numeric, double) inline void umfpack_free_numeric(void **Numeric, double)
{ umfpack_di_free_numeric(Numeric); *Numeric = 0; } { umfpack_di_free_numeric(Numeric); *Numeric = 0; }
@ -156,6 +174,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
public: public:
typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl; typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl;
typedef Array<double, UMFPACK_INFO, 1> UmfpackInfo;
UmfPackLU() UmfPackLU()
: m_dummy(0,0), mp_matrix(m_dummy) : m_dummy(0,0), mp_matrix(m_dummy)
@ -215,7 +234,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
return m_q; return m_q;
} }
/** Computes the sparse Cholesky decomposition of \a matrix /** Computes the sparse Cholesky decomposition of \a matrix
* Note that the matrix should be column-major, and in compressed format for best performance. * Note that the matrix should be column-major, and in compressed format for best performance.
* \sa SparseMatrix::makeCompressed(). * \sa SparseMatrix::makeCompressed().
*/ */
@ -240,7 +259,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
{ {
if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar());
if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar());
grab(matrix.derived()); grab(matrix.derived());
analyzePattern_impl(); analyzePattern_impl();
@ -267,7 +286,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
{ {
return m_control; return m_control;
} }
/** Provides access to the control settings array used by UmfPack. /** Provides access to the control settings array used by UmfPack.
* *
* If this array contains NaN's, the default values are used. * If this array contains NaN's, the default values are used.
@ -278,7 +297,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
{ {
return m_control; return m_control;
} }
/** Performs a numeric decomposition of \a matrix /** Performs a numeric decomposition of \a matrix
* *
* The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed.
@ -293,10 +312,38 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
umfpack_free_numeric(&m_numeric,Scalar()); umfpack_free_numeric(&m_numeric,Scalar());
grab(matrix.derived()); grab(matrix.derived());
factorize_impl(); factorize_impl();
} }
/** Prints the current UmfPack control settings.
*
* \sa umfpackControl()
*/
void umfpackReportControl()
{
umfpack_report_control(m_control.data(), Scalar());
}
/** Prints statistics collected by UmfPack.
*
* \sa analyzePattern(), compute()
*/
void umfpackReportInfo()
{
eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar());
}
/** Prints the status of the previous factorization operation performed by UmfPack (symbolic or numerical factorization).
*
* \sa analyzePattern(), compute()
*/
void umfpackReportStatus() {
eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar());
}
/** \internal */ /** \internal */
template<typename BDerived,typename XDerived> template<typename BDerived,typename XDerived>
bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
@ -314,41 +361,42 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
m_numeric = 0; m_numeric = 0;
m_symbolic = 0; m_symbolic = 0;
m_extractedDataAreDirty = true; m_extractedDataAreDirty = true;
umfpack_defaults(m_control.data(), Scalar());
} }
void analyzePattern_impl() void analyzePattern_impl()
{ {
umfpack_defaults(m_control.data(), Scalar()); m_fact_errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()),
int errorCode = 0; internal::convert_index<int>(mp_matrix.cols()),
errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()), mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
internal::convert_index<int>(mp_matrix.cols()), &m_symbolic, m_control.data(), m_umfpackInfo.data());
mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
&m_symbolic, m_control.data(), 0);
m_isInitialized = true; m_isInitialized = true;
m_info = errorCode ? InvalidInput : Success; m_info = m_fact_errorCode ? InvalidInput : Success;
m_analysisIsOk = true; m_analysisIsOk = true;
m_factorizationIsOk = false; m_factorizationIsOk = false;
m_extractedDataAreDirty = true; m_extractedDataAreDirty = true;
} }
void factorize_impl() void factorize_impl()
{ {
m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
m_symbolic, &m_numeric, m_control.data(), 0); m_symbolic, &m_numeric, m_control.data(), m_umfpackInfo.data());
m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue; m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue;
m_factorizationIsOk = true; m_factorizationIsOk = true;
m_extractedDataAreDirty = true; m_extractedDataAreDirty = true;
} }
template<typename MatrixDerived> template<typename MatrixDerived>
void grab(const EigenBase<MatrixDerived> &A) void grab(const EigenBase<MatrixDerived> &A)
{ {
mp_matrix.~UmfpackMatrixRef(); mp_matrix.~UmfpackMatrixRef();
::new (&mp_matrix) UmfpackMatrixRef(A.derived()); ::new (&mp_matrix) UmfpackMatrixRef(A.derived());
} }
void grab(const UmfpackMatrixRef &A) void grab(const UmfpackMatrixRef &A)
{ {
if(&(A.derived()) != &mp_matrix) if(&(A.derived()) != &mp_matrix)
@ -357,19 +405,20 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
::new (&mp_matrix) UmfpackMatrixRef(A); ::new (&mp_matrix) UmfpackMatrixRef(A);
} }
} }
// cached data to reduce reallocation, etc. // cached data to reduce reallocation, etc.
mutable LUMatrixType m_l; mutable LUMatrixType m_l;
int m_fact_errorCode; int m_fact_errorCode;
UmfpackControl m_control; UmfpackControl m_control;
mutable UmfpackInfo m_umfpackInfo;
mutable LUMatrixType m_u; mutable LUMatrixType m_u;
mutable IntColVectorType m_p; mutable IntColVectorType m_p;
mutable IntRowVectorType m_q; mutable IntRowVectorType m_q;
UmfpackMatrixType m_dummy; UmfpackMatrixType m_dummy;
UmfpackMatrixRef mp_matrix; UmfpackMatrixRef mp_matrix;
void* m_numeric; void* m_numeric;
void* m_symbolic; void* m_symbolic;
@ -377,7 +426,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
int m_factorizationIsOk; int m_factorizationIsOk;
int m_analysisIsOk; int m_analysisIsOk;
mutable bool m_extractedDataAreDirty; mutable bool m_extractedDataAreDirty;
private: private:
UmfPackLU(const UmfPackLU& ) { } UmfPackLU(const UmfPackLU& ) { }
}; };
@ -427,7 +476,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas
eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet"); eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet");
eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet"); eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet");
eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve"); eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve");
int errorCode; int errorCode;
Scalar* x_ptr = 0; Scalar* x_ptr = 0;
Matrix<Scalar,Dynamic,1> x_tmp; Matrix<Scalar,Dynamic,1> x_tmp;
@ -442,7 +491,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas
x_ptr = &x.col(j).coeffRef(0); x_ptr = &x.col(j).coeffRef(0);
errorCode = umfpack_solve(UMFPACK_A, errorCode = umfpack_solve(UMFPACK_A,
mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0); x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), m_umfpackInfo.data());
if(x.innerStride()!=1) if(x.innerStride()!=1)
x.col(j) = x_tmp; x.col(j) = x_tmp;
if (errorCode!=0) if (errorCode!=0)

View File

@ -1,5 +1,5 @@
THIS IS NOT THE COMPLETE EIGEN DISTRIBUTION. ONLY FILES NEEDED FOR COMPILING EIGEN INTO SLIC3R WERE PUT INTO THE SLIC3R SOURCE DISTRIBUTION. THIS IS NOT THE COMPLETE EIGEN DISTRIBUTION. ONLY FILES NEEDED FOR COMPILING EIGEN INTO SLIC3R WERE PUT INTO THE SLIC3R SOURCE DISTRIBUTION.
THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.3 SOURCE DISTRIBUTION. THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.5 b3f3d4950030 SOURCE DISTRIBUTION.
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.** **Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**