[eigen] Blocking triangular Sylvester solver

```I am writing Sylvester-like solvers.  I began with triangular Sylvester
equation.  Now I have done with vectors, i.e. when the solution is a
column-vector or a row-vector.

However, when I look at Eigen/src/Core/products/TriangularSolverMatrix.h, I see
different blocking methods for on-the-left and on-the-right.  What is the best
blocking when the solution is a general matrix?

The attachment is my current work.  The TODO comment marks where I got stuck.

Regards,
Chen-Pang```
```// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Chen-Pang He <jdh8@xxxxxxxxxxxxxx>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_SYLVESTER_TRIANGULAR_H
#define EIGEN_SYLVESTER_TRIANGULAR_H

namespace Eigen {
/*!
* \ingroup Sylvester_module
*
* \brief This class solves triangular Sylvester equation.
*
* \param Time  Should be either #Continuous or #Discrete
* \param Lhs   The type of matrix \a A, which should be a TriangularView
* \param Rhs   The type of matrix \a B, which should be a TriangularView
*
* If Time is #Continuous, \c *this solves \f\$ AX + XB = C \f\$.  Otherwise, \c
* *this solves \f\$ AXB - X = C \f\$.  The matrices \a A and \a B are specified
* from the constructor, and the matrix \a C is specified from the member
* function solve() or solveInPlace().
*/
template <int Time, typename Lhs, typename Rhs>
class SylvesterTriangular;

namespace internal {

template <int Time, int Mode, int Index, int Size, bool Stop = Index==Size>
struct triangular_sylvester_unroller;

template <int Mode, int Index, int Size>
struct triangular_sylvester_unroller<Continuous,Mode,Index,Size,false>
{
template <typename Lhs, typename Derived>
static void run (const MatrixBase<Lhs>& lhs, const typename Derived::Scalar& rhs, Derived& vector)
{
enum {
IsLower = Mode & Lower,
I = IsLower ? Index : Size - Index - 1,
S = IsLower ? 0     : I + 1
};

if (Index)
vector.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
.cwiseProduct(vector.template segment<Index>(S)).sum();

vector.coeffRef(I) /= lhs.coeff(I,I) + rhs;
triangular_sylvester_unroller<Continuous,Mode,Index+1,Size>::run(lhs, rhs, vector);
}
};

template <int Mode, int Index, int Size>
struct triangular_sylvester_unroller<Discrete,Mode,Index,Size,false>
{
template <typename Lhs, typename Derived>
static void run (const MatrixBase<Lhs>& lhs, const typename Derived::Scalar& rhs, Derived& vector)
{
enum {
IsLower = Mode & Lower,
I = IsLower ? Index : Size - Index - 1,
S = IsLower ? 0     : I + 1
};

if (Index)
vector.coeffRef(I) -= rhs * lhs.row(I).template segment<Index>(S).transpose()
.cwiseProduct(vector.template segment<Index>(S)).sum();

vector.coeffRef(I) /= lhs.coeff(I,I) * rhs - 1;
triangular_sylvester_unroller<Discrete,Mode,Index+1,Size>::run(lhs, rhs, vector);
}
};

template <int Time, int Mode, int Index, int Size>
struct triangular_sylvester_unroller<Time,Mode,Index,Size,true>
{
template <typename Lhs, typename Derived>
static void run (const MatrixBase<Lhs>& lhs, const typename Derived::Scalar& rhs, Derived& vector)
{}
};

template <int Time, int Mode, bool Conjugate, int StorageOrder>
struct triangular_sylvester_kernel;

template <int Mode, bool Conjugate>
struct triangular_sylvester_kernel<Continuous,Mode,Conjugate,RowMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
typedef Map< const Matrix<Scalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
typename conj_expr_if<Conjugate,LhsMap>::type cjLhs(LhsMap(_lhs, size, size, OuterStride<>(lhsStride)));

for (Index k = 0; k < size; ++k)
{
Index i = IsLower ? k : size-k-1;
Index s = IsLower ? 0 : i+1;

if (k)
vector[i] -= cjLhs.row(i).segment(s,k).transpose()
.cwiseProduct(Map< const Matrix<Scalar,Dynamic,1> >(vector+s, k)).sum();

vector[i] /= cjLhs.coeff(i,i) + rhs;
}
}
};

template <int Mode, bool Conjugate>
struct triangular_sylvester_kernel<Discrete,Mode,Conjugate,RowMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
typedef Map< const Matrix<Scalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
typename conj_expr_if<Conjugate,LhsMap>::type cjLhs(LhsMap(_lhs, size, size, OuterStride<>(lhsStride)));

for (Index k = 0; k < size; ++k)
{
Index i = IsLower ? k : size-k-1;
Index s = IsLower ? 0 : i+1;

if (k)
vector[i] -= rhs * cjLhs.row(i).segment(s,k).transpose()
.cwiseProduct(Map< const Matrix<Scalar,Dynamic,1> >(vector+s, k)).sum();

vector[i] /= cjLhs.coeff(i,i) * rhs - 1;
}
}
};

template <int Mode, bool Conjugate>
struct triangular_sylvester_kernel<Continuous,Mode,Conjugate,ColMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
typedef Map< const Matrix<Scalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
typename conj_expr_if<Conjugate,LhsMap>::type cjLhs(LhsMap(_lhs, size, size, OuterStride<>(lhsStride)));

for (Index k = 0; k < size; ++k)
{
Index r = size - k - 1;
Index i = IsLower ? k   : r;
Index s = IsLower ? i+1 : 0;

vector[i] /= cjLhs.coeff(i,i) + rhs;

if (r > 0)
Map< Matrix<Scalar,Dynamic,1> >(vector+s, r) -= vector[i] * cjLhs.col(i).segment(s,r);
}
}
};

template <int Mode, bool Conjugate>
struct triangular_sylvester_kernel<Discrete,Mode,Conjugate,ColMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
typedef Map< const Matrix<Scalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
typename conj_expr_if<Conjugate,LhsMap>::type cjLhs(LhsMap(_lhs, size, size, OuterStride<>(lhsStride)));

for (Index k = 0; k < size; ++k)
{
Index r = size - k - 1;
Index i = IsLower ? k   : r;
Index s = IsLower ? i+1 : 0;

vector[i] /= cjLhs.coeff(i,i) * rhs - 1;

if (r > 0)
Map< Matrix<Scalar,Dynamic,1> >(vector+s, r) -= vector[i] * cjLhs.col(i).segment(s,r);
}
}
};

template <int Time, int Mode, bool Conjugate, int StorageOrder>
struct triangular_sylvester_vector;

template <int Time, int Mode, bool Conjugate>
struct triangular_sylvester_vector<Time,Mode,Conjugate,RowMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const_blas_data_mapper<Scalar,Index,RowMajor> lhs(_lhs, lhsStride);

for (Index pi = IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index r = IsLower ? pi : size - pi;  // remaining size
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;

if (r > 0)
{
general_matrix_vector_product<Index,Scalar,RowMajor,Conjugate,Scalar,false>::run(
actualPanelWidth, r,
&lhs(startRow,startCol), lhsStride,
vector + startCol, 1,
vector + startRow, 1,
Time==Continuous ? -1 : -rhs);
}

triangular_sylvester_kernel<Time,Mode,Conjugate,RowMajor>::run(
actualPanelWidth,
&lhs(startRow,startRow), lhsStride,
rhs,
vector + startRow);
}
}
};

template <int Time, int Mode, bool Conjugate>
struct triangular_sylvester_vector<Time,Mode,Conjugate,ColMajor>
{
template <typename Scalar, typename Index>
static void run (Index size, const Scalar* _lhs, Index lhsStride, const Scalar& rhs, Scalar* vector)
{
enum { IsLower = Mode & Lower };
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const_blas_data_mapper<Scalar,Index,ColMajor> lhs(_lhs, lhsStride);

for (Index pi = IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index startBlock = IsLower ? pi : pi-actualPanelWidth;
Index endBlock = IsLower ? pi+actualPanelWidth : 0;

triangular_sylvester_kernel<Time,Mode,Conjugate,ColMajor>::run(
actualPanelWidth,
&lhs(startBlock,startBlock), lhsStride,
rhs,
vector + startBlock);

Index r = IsLower ? size - endBlock : startBlock;

if (r > 0)
{
general_matrix_vector_product<Index,Scalar,ColMajor,Conjugate,Scalar,false>::run(
r, actualPanelWidth,
&lhs(endBlock,startBlock), lhsStride,
vector + startBlock, 1,
vector + endBlock, 1,
Time==Continuous ? -1 : -rhs);
}
}
}
};

template <int LhsMode, int RhsMode, bool LhsConjugate, bool RhsConjugate,
int LhsStorageOrder, int RhsStorageOrder, int StorageOrder>
struct triangular_sylvester_matrix;

template <int LhsMode, int RhsMode, bool LhsConjugate, bool RhsConjugate, int LhsStorageOrder, int RhsStorageOrder>
struct triangular_sylvester_matrix<LhsMode,RhsMode,LhsConjugate,RhsConjugate,LhsStorageOrder,RhsStorageOrder,RowMajor>
{
template <typename Scalar, typename Index>
static void run(
Index rows, Index cols,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* other, Index otherStride,
level3_blocking<Scalar,Scalar>& lhsBlocking,
level3_blocking<Scalar,Scalar>& rhsBlocking)
{
triangular_sylvester_matrix<
RhsMode^(Upper|Lower), LhsMode^(Upper|Lower),
NumTraits<Scalar>::IsComplex && RhsConjugate,
NumTraits<Scalar>::IsComplex && LhsConjugate,
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ColMajor>
::run(rows, cols, rhs, rhsStride, lhs, lhsStride, other, otherStride, rhsBlocking, lhsBlocking);
}
};

template <int LhsMode, int RhsMode, bool LhsConjugate, bool RhsConjugate, int LhsStorageOrder, int RhsStorageOrder>
struct triangular_sylvester_matrix<LhsMode,RhsMode,LhsConjugate,RhsConjugate,LhsStorageOrder,RhsStorageOrder,ColMajor>
{
template <typename Scalar, typename Index>
static EIGEN_DONT_INLINE void run(
Index, Index,
const Scalar*, Index,
const Scalar*, Index,
Scalar*, Index,
level3_blocking<Scalar,Scalar>&,
level3_blocking<Scalar,Scalar>&);
};

template <int LhsMode, int RhsMode, bool LhsConjugate, bool RhsConjugate, int LhsStorageOrder, int RhsStorageOrder>
template <typename Scalar, typename Index>
EIGEN_DONT_INLINE void
triangular_sylvester_matrix<LhsMode,RhsMode,LhsConjugate,RhsConjugate,LhsStorageOrder,RhsStorageOrder,ColMajor>::run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& lhsBlocking,
level3_blocking<Scalar,Scalar>& rhsBlocking)
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum { SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr, Traits::nr) };

const_blas_data_mapper<Scalar,Index,LhsStorageOrder> lhs(_lhs, lhsStride);
const_blas_data_mapper<Scalar,Index,RhsStorageOrder> rhs(_rhs, rhsStride);
blas_data_mapper<Scalar,Index,ColMajor> other(_other, otherStride);

// TODO
}

template <int Time, typename Derived, int LhsMode, int RhsMode,
int RowsCategory = product_size_category<Derived::RowsAtCompileTime, Derived::MaxRowsAtCompileTime>::value,
int ColsCategory = product_size_category<Derived::ColsAtCompileTime, Derived::MaxColsAtCompileTime>::value>
struct triangular_sylvester_selector
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& other)
{
typedef typename Derived::Scalar Scalar;
typedef blas_traits<Lhs> LhsTraits;
typedef blas_traits<Rhs> RhsTraits;
typedef typename LhsTraits::DirectLinearAccessType ActualLhs;
typedef typename RhsTraits::DirectLinearAccessType ActualRhs;

enum {
LhsStorageOrder = Lhs::IsRowMajor ? RowMajor : ColMajor,
RhsStorageOrder = Rhs::IsRowMajor ? RowMajor : ColMajor,
StorageOrder = Derived::IsRowMajor ? RowMajor : ColMajor,
Rows = Derived::MaxRowsAtCompileTime,
Cols = Derived::MaxColsAtCompileTime
};

gemm_blocking_space<StorageOrder,Scalar,Scalar,Rows,Cols,Rows,4> lhsBlocking(other.rows(), other.cols(), lhs.rows());
gemm_blocking_space<StorageOrder,Scalar,Scalar,Rows,Cols,Cols,4> rhsBlocking(other.rows(), other.cols(), rhs.cols());

triangular_sylvester_matrix<LhsMode, RhsMode, LhsTraits::NeedToConjugate, RhsTraits::NeedToConjugate,
LhsStorageOrder, RhsStorageOrder, StorageOrder>
::run(
other.rows(), other.cols(),
actualLhs.data(), actualLhs.outerStride(),
actualRhs.data(), actualRhs.outerStride(),
other.data(), other.outerStride(),
lhsBlocking, rhsBlocking);
}
};

template <typename Derived, int LhsMode, int RhsMode>
struct triangular_sylvester_selector<Continuous,Derived,LhsMode,RhsMode,1,1>
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& other)
{
other.coeffRef(0,0) /= lhs.coeff(0,0) + rhs.coeff(0,0);
}
};

template <typename Derived, int LhsMode, int RhsMode>
struct triangular_sylvester_selector<Discrete,Derived,LhsMode,RhsMode,1,1>
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& other)
{
other.coeffRef(0,0) /= lhs.coeff(0,0) * rhs.coeff(0,0) - 1;
}
};

template <int Time, typename Derived, int LhsMode, int RhsMode>
struct triangular_sylvester_selector<Time,Derived,LhsMode,RhsMode,Small,1>
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& vector)
{
triangular_sylvester_unroller<Time,LhsMode,0,Derived::SizeAtCompileTime>::run(lhs, rhs.coeff(0,0), vector);
}
};

template <int Time, typename Derived, int LhsMode, int RhsMode>
struct triangular_sylvester_selector<Time,Derived,LhsMode,RhsMode,Large,1>
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& vector)
{
typedef blas_traits<Lhs> LhsTraits;
typedef typename LhsTraits::DirectLinearAccessType ActualLhs;
typedef typename Derived::Scalar Scalar;
typedef Map<Matrix<Scalar,Dynamic,1>, Aligned> Map;

bool useVectorDirectly = Derived::InnerStrideAtCompileTime == 1 || vector.innerStride() == 1;
ActualLhs actualLhs = LhsTraits::extract(lhs);

ei_declare_aligned_stack_constructed_variable(Scalar, actualVector, vector.size(), useVectorDirectly ? vector.data() : 0);

if (!useVectorDirectly)
Map(actualVector, vector.size()) = vector;

triangular_sylvester_vector<Time, LhsMode, LhsTraits::NeedToConjugate, Lhs::IsRowMajor ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), rhs.coeff(0,0), actualVector);

if (!useVectorDirectly)
vector = Map(actualVector, vector.size());
}
};

template <int Time, typename Derived, int LhsMode, int RhsMode, int ColsCategory>
struct triangular_sylvester_selector<Time,Derived,LhsMode,RhsMode,1,ColsCategory>
{
template <typename Lhs, typename Rhs>
static void run (const MatrixBase<Lhs>& lhs, const MatrixBase<Rhs>& rhs, Derived& rowVector)
{
Transpose<Derived> vector(rowVector);
triangular_sylvester_selector<Time, Derived, RhsMode^(Upper|Lower), LhsMode^(Upper|Lower)>
::run(rhs.transpose(), lhs.transpose(), vector);
}
};

template <int Time, typename Lhs, typename Rhs, typename Derived>
class triangular_sylvester_retval
: public ReturnByValue< triangular_sylvester_retval<Time,Lhs,Rhs,Derived> >
{
private:
typedef SylvesterTriangular<Time,Lhs,Rhs> Solver;
typedef typename ReturnByValue<triangular_sylvester_retval>::Index Index;

public:
triangular_sylvester_retval(const Solver& solver, const Derived& other)
: _solver(solver), _other(other)
{}

template <typename Dest>
void evalTo (Dest& dst) const
{
dst = _other;
_solver.solveInPlace(dst);
}

Index rows() const { return _other.rows(); }
Index cols() const { return _other.cols(); }

private:
const Solver& _solver;
const Derived& _other;
};

template <int Time, typename Lhs, typename Rhs, typename Derived>
struct traits< triangular_sylvester_retval<Time,Lhs,Rhs,Derived> >
{
typedef typename plain_matrix_type_column_major<Derived>::type ReturnType;
};

} // namespace internal

template <int Time, typename Lhs, typename Rhs>
class SylvesterTriangular
{
public:
/*!
* \brief Construct from two triangular matrices
*
* \param[in] lhs  The matrix \a A, which should be triangular and square
* \param[in] rhs  The matrix \a B, which should be triangular and square
*/
SylvesterTriangular (const TriangularBase<Lhs>& lhs, const TriangularBase<Rhs>& rhs)
: _lhs(lhs.derived()), _rhs(rhs.derived())
{}

/*!
* \brief "In-place" version of solve().
*
* \param[in,out] other  The matrix \a C.  This function writes the
*                       solution \a X back here.
*
* This function solves the Sylvester equation.  If the template parameter
* \a Time is #Continuous, this solves \f\$ AX + XB = C \f\$.  Otherwise, it
* solves \f\$ AXB - X = C \f\$.
*
* \warning Like TriangularView::solveInPlace(), the parameter is only
* marked \b const to make the C++ compiler accept a temporary expression.
* This function will const_cast it, so constness isn't honored.
*/
template <typename Derived>
void solveInPlace (const MatrixBase<Derived>& other) const
{
Derived& solution = other.const_cast_derived();
internal::triangular_sylvester_selector<Time, Derived, Lhs::Mode, Rhs::Mode>
::run(_lhs.nestedExpression(), _rhs.nestedExpression(), solution);
}

/*!
* \brief Solve the Sylvester equation
*
* \param[in] other  The matrix \a C
* \return           The solution \a X, of the same size as \a C
*
* This function solves the Sylvester equation.  If the template parameter
* \a Time is #Continuous, this solves \f\$ AX + XB = C \f\$.  Otherwise, it
* solves \f\$ AXB - X = C \f\$.
*/
template <typename Derived>
const internal::triangular_sylvester_retval<Time,Lhs,Rhs,Derived>
solve (const MatrixBase<Derived>& other) const
{
return internal::triangular_sylvester_retval<Time,Lhs,Rhs,Derived>(*this, other.derived());
}

private:
const Lhs& _lhs;  //!< The matrix \a A.
const Rhs& _rhs;  //!< The matrix \a B.
};

/*!
* \ingroup Sylvester_module
*
* \brief This function solves triangular Sylvester equation.
*
* \tparam   Time  This defaults to #Continuous unless EIGEN_SYLVESTER_DEFAULT_TO_DISCRETE is defined.
* \param[in] lhs  The type of matrix \a A, which should be a TriangularView
* \param[in] rhs  The type of matrix \a B, which should be a TriangularView
* \return         A SylvesterTriangular containing \a A and \a B
*/
template <int Time, typename Lhs, typename Rhs>
SylvesterTriangular<Time,Lhs,Rhs>
sylvester (const TriangularBase<Lhs>& lhs, const TriangularBase<Rhs>& rhs)
{
return SylvesterTriangular<Time,Lhs,Rhs>(lhs,rhs);
}

template <typename Lhs, typename Rhs>
SylvesterTriangular<EIGEN_SYLVESTER_DEFAULT_TIME,Lhs,Rhs>
sylvester (const TriangularBase<Lhs>& lhs, const TriangularBase<Rhs>& rhs)
{
return SylvesterTriangular<EIGEN_SYLVESTER_DEFAULT_TIME,Lhs,Rhs>(lhs,rhs);
}

} // namespace Eigen

#endif // EIGEN_SYLVESTER_TRIANGULAR_H
```

 Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/