Updated the Eigen library to 3.3.5

This commit is contained in:
bubnikv 2018-08-17 17:33:29 +02:00
parent 5b94f53cd7
commit 0eba590fc6
95 changed files with 1381 additions and 874 deletions

View File

@ -9,6 +9,7 @@
#define EIGEN_CHOLESKY_MODULE_H
#include "Core"
#include "Jacobi"
#include "src/Core/util/DisableStupidWarnings.h"
@ -31,7 +32,11 @@
#include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/Cholesky/LLT_LAPACKE.h"
#endif

View File

@ -14,6 +14,22 @@
// first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h"
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDACC __CUDACC__
#endif
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
#endif
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
#elif defined(__CUDACC_VER__)
#define EIGEN_CUDACC_VER __CUDACC_VER__
#else
#define EIGEN_CUDACC_VER 0
#endif
// Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL!
@ -155,6 +171,9 @@
#ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ
#endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#endif
// include files
@ -229,7 +248,7 @@
#if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA
#include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
#if EIGEN_CUDACC_VER >= 70500
#define EIGEN_HAS_CUDA_FP16
#endif
#endif
@ -352,6 +371,7 @@ using std::ptrdiff_t;
#include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h"
#include "src/Core/arch/Default/ConjHelper.h"
#if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h"
@ -367,6 +387,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX/TypeCasting.h"
#include "src/Core/arch/SSE/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"

View File

@ -45,7 +45,11 @@
#include "src/Eigenvalues/GeneralizedEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/Eigenvalues/RealSchur_LAPACKE.h"
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"

View File

@ -28,7 +28,11 @@
#include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/LU/PartialPivLU_LAPACKE.h"
#endif
#include "src/LU/Determinant.h"

View File

@ -36,7 +36,11 @@
#include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif

View File

@ -27,7 +27,7 @@ void qFree(void *ptr)
void *qRealloc(void *ptr, std::size_t size)
{
void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size);
std::memcpy(newPtr, ptr, size);
Eigen::internal::aligned_free(ptr);
return newPtr;
}

View File

@ -37,7 +37,11 @@
#include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h"
#endif
#include "src/SVD/JacobiSVD_LAPACKE.h"
#endif

View File

@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative.
* \c NumericalIssue if the factorization failed because of a zero pivot.
*/
ComputationInfo info() const
{
@ -376,6 +376,8 @@ template<> struct ldlt_inplace<Lower>
if((rs>0) && pivot_is_valid)
A21 /= realAkk;
else if(rs>0)
ret = ret && (A21.array()==Scalar(0)).all();
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
else if(!pivot_is_valid) found_zero_pivot = true;
@ -568,13 +570,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
// more precisely, use pseudo-inverse of D (see bug 241)
using std::abs;
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
// as motivated by LAPACK's xGELSS:
// In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
// and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
// diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
// Using numeric_limits::min() gives us more robustness to denormals.
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
for (Index i = 0; i < vecD.size(); ++i)
{

View File

@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
*
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read.
* The other triangular part won't be read.
*
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out
*
* \b Performance: for best performance, it is recommended to use a column-major storage format
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
* step, and rank-updates can be up to 3 times slower.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
* Therefore, the strict lower part does not have to store correct values.
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LLT
{
public:
@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
}
template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const;
void solveInPlace(const MatrixBase<Derived> &bAndX) const;
template<typename InputType>
LLT& compute(const EigenBase<InputType>& matrix);
@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative.
* \c NumericalIssue if the matrix.appears not to be positive definite.
*/
ComputationInfo info() const
{
@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);
m_matrix = a.derived();
if (!internal::is_same_dense(m_matrix, a.derived()))
m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum.
m_l1_norm = RealScalar(0);
@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
*
* This version avoids a copy when the right hand side matrix b is not needed anymore.
*
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* \sa LLT::solve(), MatrixBase::llt()
*/
template<typename MatrixType, int _UpLo>
template<typename Derived>
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
{
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==bAndX.rows());

View File

@ -231,10 +231,16 @@ class Array
: Base(other)
{ }
private:
struct PrivateType {};
public:
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
PrivateType>::type = PrivateType())
: Base(other.derived())
{ }

View File

@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived &
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());

View File

@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
// Let's remove NestByRefBit
enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
};
};
}
@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
// Let's remove NestByRefBit
enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
};
};
}

View File

@ -39,7 +39,7 @@ public:
enum {
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit,
DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
@ -83,7 +83,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */

View File

@ -84,7 +84,8 @@ class vml_assign_traits
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \

View File

@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
&& (OuterStrideAtCompileTime!=0)
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()),
m_startCol(block.startCol())
m_startCol(block.startCol()),
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
{ }
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
enum {
RowsAtCompileTime = XprType::RowsAtCompileTime
RowsAtCompileTime = XprType::RowsAtCompileTime,
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
if (ForwardLinearAccess)
return m_argImpl.coeff(m_linear_offset.value() + index);
else
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
if (ForwardLinearAccess)
return m_argImpl.coeffRef(m_linear_offset.value() + index);
else
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
template<int LoadMode, typename PacketType>
@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
if (ForwardLinearAccess)
return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
else
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
}
template<int StoreMode, typename PacketType>
@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
if (ForwardLinearAccess)
return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
else
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
}
protected:
evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
};
// TODO: This evaluator does not actually use the child evaluator;

View File

@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
{
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
*/
template<typename Derived>
template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
{
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
* \sa class CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index size, const Scalar& value)
{
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
* \sa class CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(const Scalar& value)
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const
* \sa LinSpaced(Scalar,Scalar)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
* Special version for fixed size types which does not require the size parameter.
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
*
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
template<typename Derived>
bool DenseBase<Derived>::isConstant
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
(const Scalar& val, const RealScalar& prec) const
{
return isApproxToConstant(val, prec);
@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant
* \sa setConstant(), Constant(), class CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
{
setConstant(val);
}
@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
{
return derived() = Constant(rows(), cols(), val);
}
@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
{
resize(size);
@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
{
resize(rows, cols);
@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return setLinSpaced(size(), low, high);
@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
* \sa Zero(), Zero(Index)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index rows, Index cols)
{
return Constant(rows, cols, Scalar(0));
@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
* \sa Zero(), Zero(Index,Index)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index size)
{
return Constant(size, Scalar(0));
@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
* \sa Zero(Index), Zero(Index,Index)
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero()
{
return Constant(Scalar(0));
@ -478,7 +478,7 @@ DenseBase<Derived>::Zero()
* \sa class CwiseNullaryOp, Zero()
*/
template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
* \sa class CwiseNullaryOp, Zero()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
{
return setConstant(Scalar(0));
}
@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index newSize)
{
resize(newSize);
@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{
resize(rows, cols);
@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
* \sa Ones(), Ones(Index), isOnes(), class Ones
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index rows, Index cols)
{
return Constant(rows, cols, Scalar(1));
@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index newSize)
{
return Constant(newSize, Scalar(1));
@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones()
{
return Constant(Scalar(1));
@ -608,7 +608,7 @@ DenseBase<Derived>::Ones()
* \sa class CwiseNullaryOp, Ones()
*/
template<typename Derived>
bool DenseBase<Derived>::isOnes
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
(const RealScalar& prec) const
{
return isApproxToConstant(Scalar(1), prec);
@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes
* \sa class CwiseNullaryOp, Ones()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
{
return setConstant(Scalar(1));
}
@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index newSize)
{
resize(newSize);
@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived&
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{
resize(rows, cols);
@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
* \sa Identity(), setIdentity(), isIdentity()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index rows, Index cols)
{
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
* \sa Identity(Index,Index), setIdentity(), isIdentity()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity()
{
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true>
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
{
return internal::setIdentity_impl<Derived>::run(derived());
}
@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
{
derived().resize(rows, cols);
return setIdentity();
@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(),i);
@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
{ return Derived::Unit(0); }
/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
{ return Derived::Unit(1); }
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
{ return Derived::Unit(2); }
/** \returns an expression of the W axis unit vector (0,0,0,1)
@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
{ return Derived::Unit(3); }
} // end namespace Eigen

View File

@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func);
/** \ínternal
/** \internal
* Copies \a other into *this without evaluating other. \returns a reference to *this.
* \deprecated */
template<typename OtherDerived>
@ -484,9 +484,9 @@ template<typename Derived> class DenseBase
return derived().coeff(0,0);
}
bool all() const;
bool any() const;
Index count() const;
EIGEN_DEVICE_FUNC bool all() const;
EIGEN_DEVICE_FUNC bool any() const;
EIGEN_DEVICE_FUNC Index count() const;
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;

View File

@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
{
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

View File

@ -31,7 +31,8 @@ struct dot_nocheck
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.template binaryExpr<conj_prod>(b).sum();
}
@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.transpose().template binaryExpr<conj_prod>(b).sum();
}
@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{
@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa lpNorm(), dot(), squaredNorm()
*/
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
return numext::sqrt(squaredNorm());
}
@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
* \sa norm(), normalize()
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const
{
typedef typename internal::nested_eval<Derived,2>::type _Nested;
@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
* \sa norm(), normalized()
*/
template<typename Derived>
inline void MatrixBase<Derived>::normalize()
EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
{
RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
* \sa stableNorm(), stableNormalize(), normalized()
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const
{
typedef typename internal::nested_eval<Derived,3>::type _Nested;
@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
* \sa stableNorm(), stableNormalized(), normalize()
*/
template<typename Derived>
inline void MatrixBase<Derived>::stableNormalize()
EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
{
RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm();

View File

@ -14,6 +14,7 @@
namespace Eigen {
/** \class EigenBase
* \ingroup Core_Module
*
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
*
@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
*/
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
{
call_assignment(derived(), other.derived());
@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
{
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
{
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());

View File

@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
enum { is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
value = is_large ? Large
: Size == 1 ? 1
: Small
enum {
#ifndef EIGEN_CUDA_ARCH
is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
#else
is_large = 0,
#endif
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
#ifndef __CUDACC__
template<typename Derived>
template<typename OtherDerived>
inline const Product<Derived, OtherDerived>
@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived());
}
#endif // __CUDACC__
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be

View File

@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
* Currently, this function is only used for scalar * complex products.
*/
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from quadrupled.
@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
}
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Packet> inline Packet
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
* by the current computation.
*/
template<typename Packet, int LoadMode>
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
{
return ploadt<Packet, LoadMode>(from);
}

View File

@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
{
typedef traits<PlainObjectType> TraitsBase;
enum {
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
? PlainObjectType::ColsAtCompileTime
: PlainObjectType::RowsAtCompileTime,
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
? Dynamic
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
: IsVectorAtCompileTime ? this->size()
: int(Flags)&RowMajorBit ? this->cols()
: this->rows();
return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
: int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
: IsVectorAtCompileTime ? (this->size() * innerStride())
: (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
: (this->rows() * innerStride());
}
/** Constructor in the fixed-size case.

View File

@ -348,31 +348,7 @@ struct norm1_retval
* Implementation of hypot *
****************************************************************************/
template<typename Scalar>
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
RealScalar _x = abs(x);
RealScalar _y = abs(y);
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
template<typename Scalar> struct hypot_impl;
template<typename Scalar>
struct hypot_retval
@ -495,7 +471,7 @@ namespace std_fallback {
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
}
}
@ -1061,11 +1037,24 @@ double log(const double &x) { return ::log(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename NumTraits<T>::Real abs(const T &x) {
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
abs(const T &x) {
EIGEN_USING_STD_MATH(abs);
return abs(x);
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
abs(const T &x) {
return x;
}
#if defined(__SYCL_DEVICE_ONLY__)
EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
#ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); }

View File

@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
return pdiv(p, q);
}
template<typename RealScalar>
EIGEN_STRONG_INLINE
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
{
EIGEN_USING_STD_MATH(sqrt);
RealScalar p, qp;
p = numext::maxi(x,y);
if(p==RealScalar(0)) return RealScalar(0);
qp = numext::mini(y,x) / p;
return p * sqrt(RealScalar(1) + qp*qp);
}
template<typename Scalar>
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
return positive_real_hypot<RealScalar>(abs(x), abs(y));
}
};
} // end namespace internal
} // end namespace Eigen

View File

@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
operator*(const MatrixBase<OtherDerived> &other) const
{ return this->lazyProduct(other); }
#else
template<typename OtherDerived>
const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const;
#endif
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
@ -294,7 +285,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox()
* \sa isApprox(), operator!= */
template<typename OtherDerived>
inline bool operator==(const MatrixBase<OtherDerived>& other) const
EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
{ return cwiseEqual(other).all(); }
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@ -302,7 +293,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox()
* \sa isApprox(), operator== */
template<typename OtherDerived>
inline bool operator!=(const MatrixBase<OtherDerived>& other) const
EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
{ return cwiseNotEqual(other).any(); }
NoAlias<Derived,Eigen::MatrixBase > noalias();

View File

@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
};
template<> struct NumTraits<std::string>

View File

@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
* Here is an example using strides:
* \include Matrix_Map_stride.cpp
* Output: \verbinclude Matrix_Map_stride.out
*
* \see class Map
*/
//@{

View File

@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@ -127,7 +127,7 @@ public:
using Base::derived;
typedef typename Base::Scalar Scalar;
operator const Scalar() const
EIGEN_STRONG_INLINE operator const Scalar() const
{
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
}
@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
public:
EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
{
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
return internal::evaluator<Derived>(derived()).coeff(row,col);
}
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
{
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );

View File

@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
{}
};
@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
xpr.index() ))
@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
static const bool value = true;
};
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
struct assignment_from_xpr_op_product
{
@ -240,19 +246,19 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{
template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
};
@ -306,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
};
template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
}
template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
}
template<typename Dst>
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
}
template<typename Dst>
static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
}
@ -779,7 +785,11 @@ public:
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment
Alignment = evaluator<MatrixType>::Alignment,
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -791,7 +801,10 @@ public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
if(AsScalarProduct)
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
else
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
}
protected:

View File

@ -407,7 +407,7 @@ protected:
*/
template<typename Derived>
template<typename Func>
typename internal::traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const
{
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");

View File

@ -95,6 +95,8 @@ protected:
template<typename Expression>
EIGEN_DEVICE_FUNC void construct(Expression& expr)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
if(PlainObjectType::RowsAtCompileTime==1)
{
eigen_assert(expr.rows()==1 || expr.cols()==1);

View File

@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{}
{
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); }
@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
}
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const

View File

@ -15,33 +15,29 @@ namespace Eigen {
// TODO generalize the scalar type of 'other'
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived();
}
template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived();
}
template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived();
}
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived();
}

View File

@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
template<typename Decomposition, typename RhsType>
struct solve_traits<Decomposition,RhsType,Dense>
{
typedef Matrix<typename RhsType::Scalar,
typedef typename make_proper_matrix_type<typename RhsType::Scalar,
Decomposition::ColsAtCompileTime,
RhsType::ColsAtCompileTime,
RhsType::PlainObject::Options,
Decomposition::MaxColsAtCompileTime,
RhsType::MaxColsAtCompileTime> PlainObject;
RhsType::MaxColsAtCompileTime>::type PlainObject;
};
template<typename Decomposition, typename RhsType>

View File

@ -165,12 +165,13 @@ MatrixBase<Derived>::stableNorm() const
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived());
const DerivedCopy copy(derived());
enum {
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
};
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;

View File

@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
}
/** \returns the \a matrix with the inverse transpositions applied to the rows.

View File

@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
}
};
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
}
};
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{

View File

@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
}
#ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{
__m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1);
#if 0
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5);
#endif
}
// pabs should be ok

View File

@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
// x = x + x - 1.0;
// } else { x = x - 1.0; }
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
x = psub(x, p16f_1);
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
x = padd(x, tmp);
Packet16f x2 = pmul(x, x);
@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
return _mm512_mask_blend_ps(iszero_mask,
_mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
p16f_minus_inf);
}
#endif
@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
_mm512_setzero_ps());
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
_mm512_setzero_pd());
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
_mm512_rsqrt14_ps(_x));
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
// Fill in NaNs and Infs for the negative/zero entries.
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
Packet16f infs_and_nans = _mm512_mask_blend_ps(
neg_mask, p16f_nan,
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
}
template <>
@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
_mm512_rsqrt14_pd(_x));
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
// Fill in NaNs and Infs for the negative/zero entries.
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
Packet8d infs_and_nans = _mm512_mask_blend_pd(
neg_mask, p8d_nan,
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
}
#else
#elif defined(EIGEN_VECTORIZE_AVX512ER)
template <>
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
return _mm512_rsqrt28_ps(x);

View File

@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {

View File

@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
return pconj(internal::pmul(a, b));
}
};
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{

View File

@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_min(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_max(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
return vec_sel(elsePacket, thenPacket, mask);
}
#endif // __VSX__

View File

@ -13,7 +13,7 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
@ -147,55 +147,55 @@ namespace half_impl {
// versions to get the ALU speed increased), but you do save the
// conversion steps back and forth.
__device__ half operator + (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
return __hadd(a, b);
}
__device__ half operator * (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b);
}
__device__ half operator - (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b);
}
__device__ half operator / (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
float num = __half2float(a);
float denom = __half2float(b);
return __float2half(num / denom);
}
__device__ half operator - (const half& a) {
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a);
}
__device__ half& operator += (half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
a = a + b;
return a;
}
__device__ half& operator *= (half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
a = a * b;
return a;
}
__device__ half& operator -= (half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
a = a - b;
return a;
}
__device__ half& operator /= (half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
a = a / b;
return a;
}
__device__ bool operator == (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
return __heq(a, b);
}
__device__ bool operator != (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
return __hne(a, b);
}
__device__ bool operator < (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b);
}
__device__ bool operator <= (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b);
}
__device__ bool operator > (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b);
}
__device__ bool operator >= (const half& a, const half& b) {
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b);
}
@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
return a;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return float(a) == float(b);
return numext::equal_strict(float(a),float(b));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return float(a) != float(b);
return numext::not_equal_strict(float(a), float(b));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b);
@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
return half(::expf(float(a)));
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hexp(a));
#else
return half(::expf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return Eigen::half(::hlog(a));
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return half(::hlog(a));
#else
return half(::logf(float(a)));
#endif
@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
return half(::sqrtf(float(a)));
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hsqrt(a));
#else
return half(::sqrtf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
return half(::powf(float(a), float(b)));
@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hfloor(a));
#else
return half(::floorf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hceil(a));
#else
return half(::ceilf(float(a)));
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@ -474,9 +490,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal
} // end namespace Eigen
namespace std {
template<>
struct numeric_limits<Eigen::half> {
static const bool is_specialized = true;
static const bool is_signed = true;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const float_denorm_style has_denorm = denorm_present;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_to_nearest;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 11;
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int radix = 2;
static const int min_exponent = -13;
static const int min_exponent10 = -4;
static const int max_exponent = 16;
static const int max_exponent10 = 4;
static const bool traps = true;
static const bool tinyness_before = false;
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
static Eigen::half round_error() { return Eigen::half(0.5); }
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
};
}
namespace Eigen {
template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half>
{
enum {
IsSigned = true,
IsInteger = false,
IsComplex = false,
RequireInitialization = false
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return half_impl::raw_uint16_to_half(0x0800);
}
@ -507,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a));
#else
return Eigen::half(::logf(float(a)));

View File

@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
float tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
kernel.packet[1].x = tmp;

View File

@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {

View File

@ -0,0 +1,29 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
}; \
\
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
};
#endif // EIGEN_ARCH_CONJ_HELPER_H

View File

@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
float32x2_t r64;
r64 = vld1_f32((float *)&from);
r64 = vld1_f32((const float *)&from);
return Packet2cf(vcombine_f32(r64, r64));
}
@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for NEON
@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s);
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
}
EIGEN_DEVICE_FUNC inline void
@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{
@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for NEON

View File

@ -36,12 +36,43 @@ namespace internal {
#endif
#endif
#if EIGEN_COMP_MSVC
// In MSVC's arm_neon.h header file, all NEON vector types
// are aliases to the same underlying type __n128.
// We thus have to wrap them to make them different C++ types.
// (See also bug 1428)
template<typename T,int unique_id>
struct eigen_packet_wrapper
{
operator T&() { return m_val; }
operator const T&() const { return m_val; }
eigen_packet_wrapper() {}
eigen_packet_wrapper(const T &v) : m_val(v) {}
eigen_packet_wrapper& operator=(const T &v) {
m_val = v;
return *this;
}
T m_val;
};
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
#else
typedef float32x2_t Packet2f;
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
typedef int32x2_t Packet2i;
typedef uint32x4_t Packet4ui;
#endif // EIGEN_COMP_MSVC
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@ -51,14 +82,17 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
// which available on LLVM and GCC (at least)
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#if EIGEN_ARCH_ARM64
// __builtin_prefetch tends to do nothing on ARM64 compilers because the
// prefetch instructions there are too detailed for __builtin_prefetch to map
// meaningfully to them.
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#elif defined __pld
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
#elif !EIGEN_ARCH_ARM64
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
#elif EIGEN_ARCH_ARM32
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
#else
// by default no explicit prefetching
#define EIGEN_ARM_PREFETCH(ADDR)
@ -113,7 +147,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from)
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
{
const float32_t f[] = {0, 1, 2, 3};
const float f[] = {0, 1, 2, 3};
Packet4f countdown = vld1q_f32(f);
return vaddq_f32(pset1<Packet4f>(a), countdown);
}

View File

@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
}
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
template<> struct conj_helper<Packet4f, Packet2cf, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
}
};
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{

View File

@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
}
#if EIGEN_COMP_PGI
typedef const void * SsePrefetchPtrType;
#else
typedef const char * SsePrefetchPtrType;
#endif
#ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
} // end namespace Eigen
#if EIGEN_COMP_PGI
// PGI++ does not define the following intrinsics in C++ mode.
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
#endif
#endif // EIGEN_PACKET_MATH_SSE_H

View File

@ -14,6 +14,7 @@ namespace Eigen {
namespace internal {
#ifndef EIGEN_VECTORIZE_AVX
template <>
struct type_casting_traits<float, int> {
enum {
@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
};
};
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template <>
struct type_casting_traits<int, float> {
enum {
@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
};
};
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template <>
struct type_casting_traits<double, float> {
enum {
@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
};
};
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template <>
struct type_casting_traits<float, double> {
enum {
@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
TgtCoeffRatio = 2
};
};
#endif
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input

View File

@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec

View File

@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
/** \internal
* \brief Template functor to compute the hypot of two scalars
* \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
*
* \sa MatrixBase::stableNorm(), class Redux
*/
@ -263,22 +263,15 @@ template<typename Scalar>
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
{
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
{
EIGEN_USING_STD_MATH(sqrt)
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
return p * sqrt(Scalar(1) + qp*qp);
// This functor is used by hypotNorm only for which it is faster to first apply abs
// on all coefficients prior to reduction through hypot.
// This way we avoid calling abs on positive and real entries, and this also permits
// to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
// through the same functor...
return internal::positive_real_hypot(x,y);
}
};
template<typename Scalar>

View File

@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_interPacket(plset<Packet>(0)),
m_flip(numext::abs(high)<numext::abs(low))
{}
template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
typedef typename NumTraits<Scalar>::Real RealScalar;
if(m_flip)
return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
else
return (i==m_size1)? m_high : (m_low + i*m_step);
return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
}
template<typename IndexType>
@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
if(m_flip)
{
Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
Packet pi = plset<Packet>(Scalar(i-m_size1));
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
if(i==0)
res = pinsertfirst(res, m_low);
@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
}
else
{
Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
Packet pi = plset<Packet>(Scalar(i));
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
if(i==m_size1-unpacket_traits<Packet>::size+1)
res = pinsertlast(res, m_high);
@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
const Scalar m_high;
const Index m_size1;
const Scalar m_step;
const Packet m_interPacket;
const bool m_flip;
};

View File

@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif
#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
// std::unary_negate is deprecated since c++17 and will be removed in c++20
template<typename T>
struct functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
// std::binary_negate is deprecated since c++17 and will be removed in c++20
template<typename T>
struct functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
#endif
#ifdef EIGEN_STDEXT_SUPPORT

View File

@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
};
Index size = mat.cols();
if(SkipDiag)
size--;
Index depth = actualLhs.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
IsRowMajor ? RowMajor : ColMajor, UpLo>
IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
mat.data(), mat.outerStride(), actualAlpha, blocking);
&actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
&actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
}
};
@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo>
template<typename ProductType>
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
{
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);

View File

@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \
if (lhs==rhs) { \
if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \
@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
EIGTYPE beta(1); \
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
} \
};
@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
#else
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
#endif
// TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)

View File

@ -46,7 +46,7 @@ namespace internal {
// gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}};
GEMM_SPECIALIZATION(double, d, double, d)
GEMM_SPECIALIZATION(float, f, float, s)
GEMM_SPECIALIZATION(dcomplex, cd, double, z)
GEMM_SPECIALIZATION(scomplex, cf, float, c)
#ifdef EIGEN_USE_MKL
GEMM_SPECIALIZATION(double, d, double, dgemm)
GEMM_SPECIALIZATION(float, f, float, sgemm)
GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
#else
GEMM_SPECIALIZATION(double, d, double, dgemm_)
GEMM_SPECIALIZATION(float, f, float, sgemm_)
GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
#endif
} // end namespase internal

View File

@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
alignmentPattern = AllAligned;
}
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
alignmentPattern = AllAligned;
}
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)

View File

@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
@ -113,14 +113,21 @@ static void run( \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
#else
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
#endif
} // end namespase internal

View File

@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
EIGEN_BLAS_SYMM_L(double, double, d, d)
EIGEN_BLAS_SYMM_L(float, float, f, s)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
EIGEN_BLAS_SYMM_R(double, double, d, d)
EIGEN_BLAS_SYMM_R(float, float, f, s)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
#endif
} // end namespace internal
} // end namespace Eigen

View File

@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \
BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
#else
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
#endif
} // end namespace internal

View File

@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
// To work around an "error: member reference base type 'Matrix<...>
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
// not a structure or union" compilation error in nvcc (tested V8.0.61),
// create a dummy internal::constructor_without_unaligned_array_assert
// object to pass to the Matrix constructor.
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{
typedef typename Dest::Scalar Scalar;
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar Scalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking
);
// Apply correction if the diagonal is unit and a scalar factor was nested:
if ((Mode&UnitDiag)==UnitDiag)
{
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
}
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
{
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
}
}
}
};

View File

@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
};
EIGEN_BLAS_TRMM_L(double, double, d, d)
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
EIGEN_BLAS_TRMM_L(float, float, f, s)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_L(float, float, f, strmm)
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
#endif
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
};
EIGEN_BLAS_TRMM_R(double, double, d, d)
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
EIGEN_BLAS_TRMM_R(float, float, f, s)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_R(float, float, f, strmm)
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
#endif
} // end namespace internal
} // end namespace Eigen

View File

@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
else
dest = MappedDest(actualDestPtr, dest.size());
}
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
}
};
@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
actualRhsPtr,1,
dest.data(),dest.innerStride(),
actualAlpha);
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
}
};

View File

@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
EIGEN_BLAS_TRMV_CM(double, double, d, d)
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
EIGEN_BLAS_TRMV_CM(float, float, f, s)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_CM(double, double, d, d,)
EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_CM(float, float, f, s,)
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
#endif
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
EIGEN_BLAS_TRMV_RM(double, double, d, d)
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
EIGEN_BLAS_TRMV_RM(float, float, f, s)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_RM(double, double, d, d,)
EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_RM(float, float, f, s,)
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
#endif
} // end namespase internal

View File

@ -38,7 +38,7 @@ namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
EIGEN_BLAS_TRSM_L(double, double, d)
EIGEN_BLAS_TRSM_L(dcomplex, double, z)
EIGEN_BLAS_TRSM_L(float, float, s)
EIGEN_BLAS_TRSM_L(scomplex, float, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_L(double, double, dtrsm)
EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_L(float, float, strsm)
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_L(float, float, strsm_)
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
#endif
// implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
EIGEN_BLAS_TRSM_R(double, double, d)
EIGEN_BLAS_TRSM_R(dcomplex, double, z)
EIGEN_BLAS_TRSM_R(float, float, s)
EIGEN_BLAS_TRSM_R(scomplex, float, c)
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_R(double, double, dtrsm)
EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_R(float, float, strsm)
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_R(float, float, strsm_)
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
#endif
} // end namespace internal

View File

@ -49,10 +49,11 @@
#define EIGEN_USE_LAPACKE
#endif
#if defined(EIGEN_USE_MKL_VML)
#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
#define EIGEN_USE_MKL
#endif
#if defined EIGEN_USE_MKL
# include <mkl.h>
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@ -108,6 +109,10 @@
#endif
#endif
#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
#include "../../misc/blas.h"
#endif
namespace Eigen {
typedef std::complex<double> dcomplex;
@ -121,8 +126,5 @@ typedef int BlasIndex;
} // end namespace Eigen
#if defined(EIGEN_USE_BLAS)
#include "../../misc/blas.h"
#endif
#endif // EIGEN_MKL_SUPPORT_H

View File

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 3
#define EIGEN_MINOR_VERSION 5
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -399,7 +399,7 @@
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
&& (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
@ -413,7 +413,7 @@
#ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
#define EIGEN_HAS_CONSTEXPR 1
#endif
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@ -487,11 +487,13 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
#ifndef EIGEN_STRONG_INLINE
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
@ -812,7 +814,8 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@ -986,7 +989,13 @@ namespace Eigen {
# define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw()
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
# if EIGEN_COMP_MSVC
// MSVC does not support exception specifications (warning C4290),
// and they are deprecated in c++11 anyway.
# define EIGEN_EXCEPTION_SPEC(X) throw()
# else
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
# endif
#endif
#endif // EIGEN_MACROS_H

View File

@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
throw std::bad_alloc();
#else
std::size_t huge = static_cast<std::size_t>(-1);
new int[huge];
::operator new(huge);
#endif
}
@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
memcpy(target, start, size);
std::memcpy(target, start, size);
}
};
@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
/** \class aligned_allocator
* \ingroup Core_Module
*
* \brief STL compatible allocator to use with with 16 byte aligned types
* \brief STL compatible allocator to use with types requiring a non standrad alignment.
*
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
* - 32 bytes alignment if AVX is enabled.
* - 64 bytes alignment if AVX512 is enabled.
*
* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
* \link TopicPreprocessorDirectivesPerformance there \endlink.
*
* Example:
* \code

View File

@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b)
return (a+b-1) / b;
}
// The aim of the following functions is to bypass -Wfloat-equal warnings
// when we really want a strict equality comparison on floating points.
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool equal_strict(const X& x,const Y& y) { return x == y; }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
} // end namespace numext
} // end namespace Eigen

View File

@ -24,6 +24,7 @@
*
*/
#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT
#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@ -44,64 +45,65 @@
struct static_assertion<true>
{
enum {
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
OUT_OF_RANGE_ACCESS,
YOU_MADE_A_PROGRAMMING_MISTAKE,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
NUMERIC_TYPE_MUST_BE_REAL,
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
INVALID_MATRIX_PRODUCT,
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
INVALID_MATRIX_TEMPLATE_PARAMETERS,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
YOU_ALREADY_SPECIFIED_THIS_STRIDE,
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
STORAGE_LAYOUT_DOES_NOT_MATCH,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
THIS_TYPE_IS_NOT_SUPPORTED,
STORAGE_KIND_MUST_MATCH,
STORAGE_INDEX_MUST_MATCH,
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
OUT_OF_RANGE_ACCESS=1,
YOU_MADE_A_PROGRAMMING_MISTAKE=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
NUMERIC_TYPE_MUST_BE_REAL=1,
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
INVALID_MATRIX_PRODUCT=1,
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
STORAGE_LAYOUT_DOES_NOT_MATCH=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
THIS_TYPE_IS_NOT_SUPPORTED=1,
STORAGE_KIND_MUST_MATCH=1,
STORAGE_INDEX_MUST_MATCH=1,
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
};
};
@ -131,7 +133,7 @@
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
#endif // EIGEN_NO_STATIC_ASSERT
#endif // EIGEN_STATIC_ASSERT
// static assertion failing if the type \a TYPE is not a vector type
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \

View File

@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
// Aliases:
Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
ComplexVectorType &cv = m_tmp;
const MatrixType &mZ = m_realQZ.matrixZ();
const MatrixType &mS = m_realQZ.matrixS();
const MatrixType &mT = m_realQZ.matrixT();
@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
}
}
}
m_eivec.col(i).real().noalias() = mZ.transpose() * v;
m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
m_eivec.col(i).real().normalize();
m_eivec.col(i).imag().setConstant(0);
}
@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
/ (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
}
}
m_eivec.col(i+1).noalias() = (mZ.transpose() * cv);
m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
m_eivec.col(i+1).normalize();
m_eivec.col(i) = m_eivec.col(i+1).conjugate();
}

View File

@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
Scalar exshift(0); // sum of exceptional shifts
Scalar norm = computeNormOfT();
if(norm!=0)
if(norm!=Scalar(0))
{
while (iu >= 0)
{
@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
else // No convergence yet
{
// The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
Vector3s firstHouseholderVector(0,0,0), shiftInfo;
Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
computeShift(iu, iter, exshift, shiftInfo);
iter = iter + 1;
totalIter = totalIter + 1;

View File

@ -37,7 +37,7 @@ namespace Eigen {
/** \internal Specialization for the data types supported by LAPACKe */
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \
#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
template<> template<typename InputType> inline \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
&& (options&EigVecMask)!=EigVecMask \
&& "invalid option parameter"); \
bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
m_eivalues.resize(n,1); \
m_subdiag.resize(n-1); \
m_eivec = matrix; \
@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
} \
\
lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
matrix_order=LAPACKE_COLROW; \
char jobz, uplo='L'/*, range='A'*/; \
jobz = computeEigenvectors ? 'V' : 'N'; \
\
info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
m_info = (info==0) ? Success : NoConvergence; \
m_isInitialized = true; \
m_eigenvectorsOk = computeEigenvectors; \
return *this; \
}
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor )
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev)
} // end namespace Eigen

View File

@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
if (n != Scalar(0))
{
m_angle = Scalar(2)*atan2(n, abs(q.w()));
if(q.w() < 0)
if(q.w() < Scalar(0))
n = -n;
m_axis = q.vec() / n;
}

View File

@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename internal::traits<Derived>::Coefficients Coefficients;
typedef typename Coefficients::CoeffReturnType CoeffReturnType;
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
enum {
Flags = Eigen::internal::traits<Derived>::Flags
};
@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
/** \returns the \c x coefficient */
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
/** \returns the \c y coefficient */
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
/** \returns the \c z coefficient */
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
/** \returns the \c w coefficient */
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
/** \returns a reference to the \c x coefficient */
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
/** \returns a reference to the \c y coefficient */
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
/** \returns a reference to the \c z coefficient */
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
/** \returns a reference to the \c w coefficient */
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
/** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
/** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
/** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
/** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
/** \returns a read-only vector expression of the imaginary part (x,y,z) */
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
@ -423,7 +428,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
// Generic Quaternion * Quaternion product
// This product can be specialized for a given architecture via the Arch template argument.
namespace internal {
template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
return Quaternion<Scalar>
@ -446,8 +451,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar,
EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
typename internal::traits<Derived>::Scalar>::run(*this, other);
}
/** \sa operator*(Quaternion) */
@ -672,7 +676,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
// Generic conjugate of a Quaternion
namespace internal {
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
template<int Arch, class Derived, typename Scalar> struct quat_conj
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
@ -691,8 +695,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::conjugate() const
{
return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::Alignment>::run(*this);
typename internal::traits<Derived>::Scalar>::run(*this);
}

View File

@ -16,17 +16,23 @@ namespace Eigen {
namespace internal {
template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
{
enum {
AAlignment = traits<Derived>::Alignment,
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
__m128 a = _a.coeffs().template packet<Aligned16>(0);
__m128 b = _b.coeffs().template packet<Aligned16>(0);
__m128 a = _a.coeffs().template packet<AAlignment>(0);
__m128 b = _b.coeffs().template packet<BAlignment>(0);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(),
pstoret<float,Packet4f,ResAlignment>(
&res.x(),
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
vec4f_swizzle1(b,1,2,0,0))),
@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
}
};
template<class Derived, int Alignment>
struct quat_conj<Architecture::SSE, Derived, float, Alignment>
template<class Derived>
struct quat_conj<Architecture::SSE, Derived, float>
{
enum {
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
{
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0)));
pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
return res;
}
};
@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
template<typename VectorLhs,typename VectorRhs>
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
{
enum {
ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
};
static inline typename plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
typename plain_matrix_type<VectorLhs>::type res;
pstore(&res.x(),_mm_sub_ps(mul1,mul2));
pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
return res;
}
};
@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
template<class Derived, class OtherDerived, int Alignment>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
{
enum {
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
Quaternion<double> res;
const double* a = _a.coeffs().data();
Packet2d b_xy = _b.coeffs().template packet<Alignment>(0);
Packet2d b_zw = _b.coeffs().template packet<Alignment>(2);
Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
Packet2d a_xx = pset1<Packet2d>(a[0]);
Packet2d a_yy = pset1<Packet2d>(a[1]);
Packet2d a_zz = pset1<Packet2d>(a[2]);
@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
#ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
#else
pstore(&res.x(), padd(t1, pxor(mask,preverse(t2))));
pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
#endif
/*
@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
#ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
#else
pstore(&res.z(), psub(t1, pxor(mask,preverse(t2))));
pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
#endif
return res;
}
};
template<class Derived, int Alignment>
struct quat_conj<Architecture::SSE, Derived, double, Alignment>
template<class Derived>
struct quat_conj<Architecture::SSE, Derived, double>
{
enum {
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
{
Quaternion<double> res;
const __m128d mask0 = _mm_setr_pd(-0.,-0.);
const __m128d mask2 = _mm_setr_pd(-0.,0.);
pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0)));
pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2)));
pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
return res;
}
};

View File

@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
{
// Compute the inverse squared-norm of each column of mat
m_invdiag.resize(mat.cols());
for(Index j=0; j<mat.outerSize(); ++j)
if(MatType::IsRowMajor)
{
RealScalar sum = mat.innerVector(j).squaredNorm();
if(sum>0)
m_invdiag(j) = RealScalar(1)/sum;
else
m_invdiag(j) = RealScalar(1);
m_invdiag.setZero();
for(Index j=0; j<mat.outerSize(); ++j)
{
for(typename MatType::InnerIterator it(mat,j); it; ++it)
m_invdiag(it.index()) += numext::abs2(it.value());
}
for(Index j=0; j<mat.cols(); ++j)
if(numext::real(m_invdiag(j))>RealScalar(0))
m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));
}
else
{
for(Index j=0; j<mat.outerSize(); ++j)
{
RealScalar sum = mat.col(j).squaredNorm();
if(sum>RealScalar(0))
m_invdiag(j) = RealScalar(1)/sum;
else
m_invdiag(j) = RealScalar(1);
}
}
Base::m_isInitialized = true;
return *this;

View File

@ -298,12 +298,144 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
}
namespace internal {
template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
struct apply_rotation_in_the_plane_selector
{
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
{
for(Index i=0; i<size; ++i)
{
Scalar xi = *x;
Scalar yi = *y;
*x = c * xi + numext::conj(s) * yi;
*y = -s * xi + numext::conj(c) * yi;
x += incrx;
y += incry;
}
}
};
template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment>
struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
{
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
{
enum {
PacketSize = packet_traits<Scalar>::size,
OtherPacketSize = packet_traits<OtherScalar>::size
};
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<OtherScalar>::type OtherPacket;
/*** dynamic-size vectorized paths ***/
if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
{
// both vectors are sequentially stored in memory => vectorization
enum { Peeling = 2 };
Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
const OtherPacket pc = pset1<OtherPacket>(c);
const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
for(Index i=0; i<alignedStart; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
Scalar* EIGEN_RESTRICT px = x + alignedStart;
Scalar* EIGEN_RESTRICT py = y + alignedStart;
if(internal::first_default_aligned(x, size)==alignedStart)
{
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
else
{
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
{
Packet xi = ploadu<Packet>(px);
Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize);
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
px += Peeling*PacketSize;
py += Peeling*PacketSize;
}
if(alignedEnd!=peelingEnd)
{
Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd);
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
}
}
for(Index i=alignedEnd; i<size; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
}
/*** fixed-size vectorized path ***/
else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
{
const OtherPacket pc = pset1<OtherPacket>(c);
const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
/*** non-vectorized path ***/
else
{
apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
}
}
};
template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{
typedef typename VectorX::Scalar Scalar;
enum { PacketSize = packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
&& (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
eigen_assert(xpr_x.size() == xpr_y.size());
Index size = xpr_x.size();
Index incrx = xpr_x.derived().innerStride();
@ -317,113 +449,11 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
if (c==OtherScalar(1) && s==OtherScalar(0))
return;
/*** dynamic-size vectorized paths ***/
if(VectorX::SizeAtCompileTime == Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
((incrx==1 && incry==1) || PacketSize == 1))
{
// both vectors are sequentially stored in memory => vectorization
enum { Peeling = 2 };
Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
for(Index i=0; i<alignedStart; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
Scalar* EIGEN_RESTRICT px = x + alignedStart;
Scalar* EIGEN_RESTRICT py = y + alignedStart;
if(internal::first_default_aligned(x, size)==alignedStart)
{
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
else
{
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
{
Packet xi = ploadu<Packet>(px);
Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize);
pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
px += Peeling*PacketSize;
py += Peeling*PacketSize;
}
if(alignedEnd!=peelingEnd)
{
Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd);
pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
}
}
for(Index i=alignedEnd; i<size; ++i)
{
Scalar xi = x[i];
Scalar yi = y[i];
x[i] = c * xi + numext::conj(s) * yi;
y[i] = -s * xi + numext::conj(c) * yi;
}
}
/*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize)
{
Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
px += PacketSize;
py += PacketSize;
}
}
/*** non-vectorized path ***/
else
{
for(Index i=0; i<size; ++i)
{
Scalar xi = *x;
Scalar yi = *y;
*x = c * xi + numext::conj(s) * yi;
*y = -s * xi + numext::conj(c) * yi;
x += incrx;
y += incry;
}
}
apply_rotation_in_the_plane_selector<
Scalar,OtherScalar,
VectorX::SizeAtCompileTime,
EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
Vectorizable>::run(x,incrx,y,incry,size,c,s);
}
} // end namespace internal

View File

@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
const RealScalar& absDeterminantThreshold
) const
{
RealScalar determinant;
Scalar determinant;
// i'd love to put some static assertions there, but SFINAE means that they have no effect...
eigen_assert(rows() == cols());
computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);

View File

@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */
COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
/* get pivot column from head of minimum degree list */
while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
while (min_score < n_col && head [min_score] == COLAMD_EMPTY)
{
min_score++ ;
}

View File

@ -64,28 +64,28 @@ namespace internal
typedef typename _MatrixType::StorageIndex StorageIndex;
};
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;}
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;}
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;}
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm);
}
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
{
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;}

View File

@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
}
RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0);
@ -553,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
// and used in LAPACK routines xGEQPF and xGEQP3.
// See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
if (m_colNormsUpdated.coeffRef(j) != 0) {
if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
temp = temp < 0 ? 0 : temp;
RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j));
temp = temp < RealScalar(0) ? RealScalar(0) : temp;
RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j));
if (temp2 <= norm_downdate_threshold) {
// The updated norm has become too inaccurate so re-compute the column
// norm directly.

View File

@ -11,7 +11,7 @@
// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@ -77,6 +77,7 @@ public:
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff();
if(scale==RealScalar(0)) scale = RealScalar(1);
if(scale==Literal(0)) scale = Literal(1);
MatrixX copy;
if (m_isTranspose) copy = matrix.adjoint()/scale;
else copy = matrix/scale;
@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
Index k1=0, k2=0;
for(Index j=0; j<n; ++j)
{
if( (A.col(j).head(n1).array()!=0).any() )
if( (A.col(j).head(n1).array()!=Literal(0)).any() )
{
A1.col(k1) = A.col(j).head(n1);
B1.row(k1) = B.row(j);
++k1;
}
if( (A.col(j).tail(n2).array()!=0).any() )
if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
{
A2.col(k2) = A.col(j).tail(n2);
B2.row(k2) = B.row(j);
@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
l = m_naiveU.row(1).segment(firstCol, k);
f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
}
if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
if (r0<considerZero)
{
c0 = 1;
s0 = 0;
c0 = Literal(1);
s0 = Literal(0);
}
else
{
@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef diag = m_workspace.head(n);
diag(0) = 0;
diag(0) = Literal(0);
// Allocate space for singular values and vectors
singVals.resize(n);
@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
// but others are interleaved and we must ignore them at this stage.
// To this end, let's compute a permutation skipping them:
Index actual_n = n;
while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
Index m = 0; // size of the deflated problem
for(Index k=0;k<actual_n;++k)
if(abs(col0(k))>considerZero)
@ -691,11 +692,13 @@ template <typename MatrixType>
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
{
Index m = perm.size();
RealScalar res = 1;
RealScalar res = Literal(1);
for(Index i=0; i<m; ++i)
{
Index j = perm(i);
res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
// The following expression could be rewritten to involve only a single division,
// but this would make the expression more sensitive to overflow.
res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
}
return res;
@ -707,19 +710,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
{
using std::abs;
using std::swap;
using std::sqrt;
Index n = col0.size();
Index actual_n = n;
while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
// Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
for (Index k = 0; k < n; ++k)
{
if (col0(k) == 0 || actual_n==1)
if (col0(k) == Literal(0) || actual_n==1)
{
// if col0(k) == 0, then entry is deflated, so singular value is on diagonal
// if actual_n==1, then the deflated problem is already diagonalized
singVals(k) = k==0 ? col0(0) : diag(k);
mus(k) = 0;
mus(k) = Literal(0);
shifts(k) = k==0 ? col0(0) : diag(k);
continue;
}
@ -731,15 +737,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
right = (diag(actual_n-1) + col0.matrix().norm());
else
{
// Skip deflated singular values
// Skip deflated singular values,
// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
// This should be equivalent to using perm[]
Index l = k+1;
while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); }
while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
right = diag(l);
}
// first decide whether it's closer to the left end or the right end
RealScalar mid = left + (right-left) / 2;
RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0);
RealScalar mid = left + (right-left) / Literal(2);
RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << right-left << "\n";
std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
@ -755,7 +763,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
<< " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
<< " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
#endif
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
// measure everything relative to shift
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
@ -785,13 +793,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
// rational interpolation: fit a function of the form a / mu + b through the two previous
// iterates and use its zero to compute the next iterate
bool useBisection = fPrev*fCur>0;
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
bool useBisection = fPrev*fCur>Literal(0);
while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
{
++m_numIters;
// Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
RealScalar b = fCur - a / muCur;
// And find mu such that f(mu)==0:
RealScalar muZero = -a/b;
@ -803,8 +811,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
fCur = fZero;
if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true;
if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true;
if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
if (abs(fCur)>abs(fPrev)) useBisection = true;
}
@ -817,15 +825,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
RealScalar leftShifted, rightShifted;
if (shift == left)
{
leftShifted = (std::numeric_limits<RealScalar>::min)();
// to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
// the factor 2 is to be more conservative
leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
// check that we did it right:
eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
// I don't understand why the case k==0 would be special there:
// if (k == 0) rightShifted = right - left; else
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
}
else
{
leftShifted = -(right - left) * RealScalar(0.6);
rightShifted = -(std::numeric_limits<RealScalar>::min)();
leftShifted = -(right - left) * RealScalar(0.51);
if(k+1<n)
rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
else
rightShifted = -(std::numeric_limits<RealScalar>::min)();
}
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
@ -841,13 +857,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
}
#endif
eigen_internal_assert(fLeft * fRight < 0);
eigen_internal_assert(fLeft * fRight < Literal(0));
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{
RealScalar midShifted = (leftShifted + rightShifted) / 2;
RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < 0)
if (fLeft * fMid < Literal(0))
{
rightShifted = midShifted;
}
@ -858,7 +874,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
}
}
muCur = (leftShifted + rightShifted) / 2;
muCur = (leftShifted + rightShifted) / Literal(2);
}
singVals[k] = shift + muCur;
@ -892,8 +908,8 @@ void BDCSVD<MatrixType>::perturbCol0
// The offset permits to skip deflated entries while computing zhat
for (Index k = 0; k < n; ++k)
{
if (col0(k) == 0) // deflated
zhat(k) = 0;
if (col0(k) == Literal(0)) // deflated
zhat(k) = Literal(0);
else
{
// see equation (3.6)
@ -918,7 +934,7 @@ void BDCSVD<MatrixType>::perturbCol0
std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
#endif
RealScalar tmp = sqrt(prod);
zhat(k) = col0(k) > 0 ? tmp : -tmp;
zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
}
}
}
@ -934,7 +950,7 @@ void BDCSVD<MatrixType>::computeSingVecs
for (Index k = 0; k < n; ++k)
{
if (zhat(k) == 0)
if (zhat(k) == Literal(0))
{
U.col(k) = VectorType::Unit(n+1, k);
if (m_compV) V.col(k) = VectorType::Unit(n, k);
@ -947,7 +963,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l);
U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
}
U(n,k) = 0;
U(n,k) = Literal(0);
U.col(k).normalize();
if (m_compV)
@ -958,7 +974,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l);
V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
}
V(0,k) = -1;
V(0,k) = Literal(-1);
V.col(k).normalize();
}
}
@ -979,15 +995,15 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
Index start = firstCol + shift;
RealScalar c = m_computed(start, start);
RealScalar s = m_computed(start+i, start);
RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
if (r == 0)
RealScalar r = numext::hypot(c,s);
if (r == Literal(0))
{
m_computed(start+i, start+i) = 0;
m_computed(start+i, start+i) = Literal(0);
return;
}
m_computed(start,start) = r;
m_computed(start+i, start) = 0;
m_computed(start+i, start+i) = 0;
m_computed(start+i, start) = Literal(0);
m_computed(start+i, start+i) = Literal(0);
JacobiRotation<RealScalar> J(c/r,-s/r);
if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
@ -1020,7 +1036,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
<< m_computed(firstColm + i+1, firstColm+i+1) << " "
<< m_computed(firstColm + i+2, firstColm+i+2) << "\n";
#endif
if (r==0)
if (r==Literal(0))
{
m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
return;
@ -1029,7 +1045,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
s/=r;
m_computed(firstColm + i, firstColm) = r;
m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
m_computed(firstColm + j, firstColm) = 0;
m_computed(firstColm + j, firstColm) = Literal(0);
JacobiRotation<RealScalar> J(c,-s);
if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
@ -1053,7 +1069,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
assert(m_naiveU.allFinite());
@ -1081,7 +1097,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
#endif
col0(i) = 0;
col0(i) = Literal(0);
}
//condition 4.3

View File

@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
u = (LAPACKE_TYPE*)m_matrixU.data(); \
} else { ldu=1; u=&dummy; }\
MatrixType localV; \
ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
if (computeV()) { \
localV.resize(ldvt, m_cols); \
localV.resize(vt_rows, m_cols); \
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
vt = (LAPACKE_TYPE*)localV.data(); \
} else { ldvt=1; vt=&dummy; }\
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \

View File

@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
traits<MatrixType>::Flags & RowMajorBit> > Y)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
SubMatType A10( A.block(bs,0, brows-bs,bs) );
SubMatType A01( A.block(0,bs, bs,bcols-bs) );
Scalar tmp = A01(bs-1,0);
A01(bs-1,0) = 1;
A01(bs-1,0) = Literal(1);
A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
A01(bs-1,0) = tmp;

View File

@ -94,7 +94,7 @@ class AmbiVector
Index allocSize = m_allocatedElements * sizeof(ListEl);
allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
Scalar* newBuffer = new Scalar[allocSize];
memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
delete[] m_buffer;
m_buffer = newBuffer;
}

View File

@ -17,7 +17,9 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType>
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
{
typedef typename remove_all<Lhs>::type::Scalar Scalar;
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
// make sure to call innerSize/outerSize since we fake the storage order.
Index rows = lhs.innerSize();
@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize());
ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0);
ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0);
ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
std::memset(mask,0,sizeof(bool)*rows);
@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
Index nnz = 0;
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
Scalar y = rhsIt.value();
RhsScalar y = rhsIt.value();
Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
Scalar x = lhsIt.value();
LhsScalar x = lhsIt.value();
if(!mask[i])
{
mask[i] = true;
@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix rhsRow = rhs;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
res = resRow;
typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorRhs rhsRow = rhs;
RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
res = resRow;
}
};
@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix lhsRow = lhs;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorLhs lhsRow = lhs;
RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
res = resRow;
}
};
@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix lhsCol = lhs;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorLhs lhsCol = lhs;
ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
res = resCol;
}
};
@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix rhsCol = rhs;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorRhs rhsCol = rhs;
ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
res = resCol;
}
};
@ -263,7 +269,8 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType>
static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef typename remove_all<Lhs>::type::Scalar Scalar;
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize());
@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs,
{
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
Scalar y = rhsIt.value();
RhsScalar y = rhsIt.value();
Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
Scalar x = lhsIt.value();
LhsScalar x = lhsIt.value();
res.coeffRef(i,j) += x * y;
}
}
@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix lhsCol(lhs);
internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res);
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorLhs lhsCol(lhs);
internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
}
};
@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix rhsCol(rhs);
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res);
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorRhs rhsCol(rhs);
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
}
};

View File

@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
enum {
Mode = _Mode,
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
};
@ -310,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
while (i && i.index()<j) ++i;
if(i && i.index()==j)
{
res(j,k) += alpha * i.value() * rhs(j,k);
res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
++i;
}
}
@ -323,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
{
LhsScalar lhs_ij = i.value();
if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
res_j += lhs_ij * rhs(i.index(),k);
res_j += lhs_ij * rhs.coeff(i.index(),k);
res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
}
res(j,k) += alpha * res_j;
res.coeffRef(j,k) += alpha * res_j;
// handle diagonal coeff
if (ProcessFirstHalf && i && (i.index()==j))
res(j,k) += alpha * i.value() * rhs(j,k);
res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
}
}
}
@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
// transpose everything
Transpose<Dest> dstT(dst);
internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
}
};

View File

@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{
// return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
typedef typename remove_all<Lhs>::type::Scalar Scalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
// make sure to call innerSize/outerSize since we fake the storage order.
@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize());
// allocate a temporary buffer
AmbiVector<Scalar,StorageIndex> tempVector(rows);
AmbiVector<ResScalar,StorageIndex> tempVector(rows);
// mimics a resizeByInnerOuter:
if(ResultType::IsRowMajor)
@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
tempVector.restart();
Scalar x = rhsIt.value();
RhsScalar x = rhsIt.value();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
{
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
}
}
res.startVec(j);
for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
res.insertBackByOuterInner(j,it.index()) = it.value();
}
res.finalize();
@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector;
template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
{
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixLhs colLhs(lhs);
ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
RowMajorMatrixLhs rowLhs(lhs);
sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
}
@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
RowMajorMatrixRhs rowRhs(rhs);
sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
}
@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
}
@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
ColMajorMatrixLhs colLhs(lhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
}

View File

@ -52,7 +52,7 @@ namespace internal {
* rank-revealing permutations. Use colsPermutation() to get it.
*
* Q is the orthogonal matrix represented as products of Householder reflectors.
* Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.
* Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
* You can then apply it to a vector.
*
* R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
@ -65,6 +65,7 @@ namespace internal {
* \implsparsesolverconcept
*
* \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
* \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
*
*/
template<typename _MatrixType, typename _OrderingType>
@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
Index rank = this->rank();
// Compute Q^T * b;
// Compute Q^* * b;
typename Dest::PlainObject y, b;
y = this->matrixQ().transpose() * B;
y = this->matrixQ().adjoint() * B;
b = y;
// Solve with the triangular matrix R
@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
// Get the references
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
m_qr(qr),m_other(other),m_transpose(transpose) {}
inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); }
inline Index rows() const { return m_qr.matrixQ().rows(); }
inline Index cols() const { return m_other.cols(); }
// Assign to a vector
@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
}
else
{
eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes");
eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes");
res.conservativeResize(rows(), cols());
// Compute res = Q * other column by column
for(Index j = 0; j < res.cols(); j++)
{
@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
Scalar tau = Scalar(0);
tau = m_qr.m_Q.col(k).dot(res.col(j));
if(tau==Scalar(0)) continue;
tau = tau * m_qr.m_hcoeffs(k);
tau = tau * numext::conj(m_qr.m_hcoeffs(k));
res.col(j) -= tau * m_qr.m_Q.col(k);
}
}
@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
const SparseQRType& m_qr;
const Derived& m_other;
bool m_transpose;
bool m_transpose; // TODO this actually means adjoint
};
template<typename SparseQRType>
@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
{
return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);
}
// To use for operations with the adjoint of Q
SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const
{
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
}
inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); }
// To use for operations with the transpose of Q
inline Index cols() const { return m_qr.rows(); }
// To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment
SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
{
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
const SparseQRType& m_qr;
};
// TODO this actually represents the adjoint of Q
template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType
{
@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
typedef typename DstXprType::StorageIndex StorageIndex;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
{
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
typename DstXprType::PlainObject idMat(src.rows(), src.cols());
idMat.setIdentity();
// Sort the sparse householder reflectors if needed
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();

View File

@ -23,6 +23,24 @@ inline void umfpack_defaults(double control[UMFPACK_CONTROL], double)
inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>)
{ umfpack_zi_defaults(control); }
inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double)
{ umfpack_di_report_info(control, info);}
inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>)
{ umfpack_zi_report_info(control, info);}
inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double)
{ umfpack_di_report_status(control, status);}
inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>)
{ umfpack_zi_report_status(control, status);}
inline void umfpack_report_control(double control[UMFPACK_CONTROL], double)
{ umfpack_di_report_control(control);}
inline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>)
{ umfpack_zi_report_control(control);}
inline void umfpack_free_numeric(void **Numeric, double)
{ umfpack_di_free_numeric(Numeric); *Numeric = 0; }
@ -156,6 +174,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
public:
typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl;
typedef Array<double, UMFPACK_INFO, 1> UmfpackInfo;
UmfPackLU()
: m_dummy(0,0), mp_matrix(m_dummy)
@ -297,6 +316,34 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
factorize_impl();
}
/** Prints the current UmfPack control settings.
*
* \sa umfpackControl()
*/
void umfpackReportControl()
{
umfpack_report_control(m_control.data(), Scalar());
}
/** Prints statistics collected by UmfPack.
*
* \sa analyzePattern(), compute()
*/
void umfpackReportInfo()
{
eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar());
}
/** Prints the status of the previous factorization operation performed by UmfPack (symbolic or numerical factorization).
*
* \sa analyzePattern(), compute()
*/
void umfpackReportStatus() {
eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar());
}
/** \internal */
template<typename BDerived,typename XDerived>
bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
@ -314,19 +361,19 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
m_numeric = 0;
m_symbolic = 0;
m_extractedDataAreDirty = true;
umfpack_defaults(m_control.data(), Scalar());
}
void analyzePattern_impl()
{
umfpack_defaults(m_control.data(), Scalar());
int errorCode = 0;
errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()),
internal::convert_index<int>(mp_matrix.cols()),
mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
&m_symbolic, m_control.data(), 0);
m_fact_errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()),
internal::convert_index<int>(mp_matrix.cols()),
mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
&m_symbolic, m_control.data(), m_umfpackInfo.data());
m_isInitialized = true;
m_info = errorCode ? InvalidInput : Success;
m_info = m_fact_errorCode ? InvalidInput : Success;
m_analysisIsOk = true;
m_factorizationIsOk = false;
m_extractedDataAreDirty = true;
@ -334,8 +381,9 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
void factorize_impl()
{
m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
m_symbolic, &m_numeric, m_control.data(), 0);
m_symbolic, &m_numeric, m_control.data(), m_umfpackInfo.data());
m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue;
m_factorizationIsOk = true;
@ -362,6 +410,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
mutable LUMatrixType m_l;
int m_fact_errorCode;
UmfpackControl m_control;
mutable UmfpackInfo m_umfpackInfo;
mutable LUMatrixType m_u;
mutable IntColVectorType m_p;
@ -442,7 +491,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas
x_ptr = &x.col(j).coeffRef(0);
errorCode = umfpack_solve(UMFPACK_A,
mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(),
x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0);
x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), m_umfpackInfo.data());
if(x.innerStride()!=1)
x.col(j) = x_tmp;
if (errorCode!=0)

View File

@ -1,5 +1,5 @@
THIS IS NOT THE COMPLETE EIGEN DISTRIBUTION. ONLY FILES NEEDED FOR COMPILING EIGEN INTO SLIC3R WERE PUT INTO THE SLIC3R SOURCE DISTRIBUTION.
THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.3 SOURCE DISTRIBUTION.
THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.5 b3f3d4950030 SOURCE DISTRIBUTION.
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**