Re: [eigen] Pivoting for LDLT

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]

To: eigen <eigen@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [eigen] Pivoting for LDLT
From: Jitse Niesen <jitse@xxxxxxxxxxxxxxxxx>
Date: Mon, 2 Feb 2009 14:03:11 +0000 (GMT)

On Mon, 2 Feb 2009, Keir Mierle wrote:

I implemented a simple pivoting strategy for LDLT, but am not convincedit's better.
It looks for the largest diagonal element during each iteration, thenpivots it to become the next processed row. Attached is a simple testthat compares the old LDLT vs the pivoting one; it shows that theresults are not fantastic. I find that there is consistently 30% or moreresidual in the pivoting version.


That surprises me.

When I try it out on 100 random matrices of size 100 x 100, I get:

Average residual:  nopiv 4.45718e-14    piv 4.07206e-14
Std.dev.:          nopiv 1.43516e-15    piv 1.01811e-15
Minimum:           nopiv 4.09651e-14    piv 3.77983e-14
Maximum:           nopiv 4.83319e-14    piv 4.28309e-14

So pivoting is slightly better, but the residual matrix LDL^T - A is evenwithout pivoting so small that it's not clear whether it's worth theeffort for random matrices. However, random matrices often worksurprisingly well.

I attach my code, which is the same as Keir but with some parts commentedout that I did not understand or found unnecessary (please let me know ifI commented out any essential parts).


Here is one example in which pivoting does vastly better:

    A << 1e-5*MatrixXd::Identity(s/2,s/2), MatrixXd::Identity(s/2,s/2),
      MatrixXd::Identity(s/2,s/2), 1e+5*MatrixXd::Identity(s/2,s/2);
    A += 1e-5*MatrixXd::Random(s,s);
    A *= A.transpose();
    // and remove all the bits with SelfAdjointEigenSolver

Incidentally, the documentation for setRandom() can be improved. It shouldmention what distribution is used and also something about setting theseed.


Cheers,
Jitse

#define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, AlignCols, "  ", "\n", "", "", "", "")

#include <Eigen/Core>
#include <Eigen/Cholesky>
#include <Eigen/LU>
#include <Eigen/QR>
#include <Eigen/SVD>
#include <Eigen/Array>

using namespace Eigen;
using namespace std;

template<typename MatrixType>
void computeLDL_nopivot(const MatrixType& a, MatrixType &m_matrix)
{
  assert(a.rows()==a.cols());
  const int size = a.rows();
  m_matrix.resize(size, size);
  bool m_isPositiveDefinite = true;
  typedef typename MatrixType::RealScalar RealScalar;
  typedef typename MatrixType::Scalar Scalar;
  const RealScalar eps = precision<Scalar>();

  if (size<=1) {
    m_matrix = a;
    return;
  }

  Matrix<Scalar,MatrixType::RowsAtCompileTime,1> _temporary(size);

  m_matrix.row(0) = a.row(0).conjugate();
  m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
  for (int j = 1; j < size; ++j)
  {
    RealScalar tmp = ei_real(a.coeff(j,j) - (m_matrix.row(j).start(j) * m_matrix.col(j).start(j).conjugate()).coeff(0,0));
    m_matrix.coeffRef(j,j) = tmp;

    if (0 && tmp < eps) {
      m_isPositiveDefinite = false;
      cout << "dying; with no pivot tmp = " << tmp << " rank = " << (j+1) << endl;
      return;
    }

    int endSize = size-j-1;
    if (endSize > 0) {
      _temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
                                  * m_matrix.col(j).start(j).conjugate() ).lazy();

      m_matrix.row(j).end(endSize) = a.row(j).end(endSize).conjugate()
                                   - _temporary.end(endSize).transpose();

      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / tmp;
    }
  }
}


// Computes a (PL)D(PL)^T decomposition. m_matrix holds the result P*L
// afterwords.
template<typename MatrixType>
void computeLDL_pivot(const MatrixType& a, MatrixType &m_matrix)
{
  assert(a.rows()==a.cols());
  const int size = a.rows();
  m_matrix.resize(size, size);
  bool m_isPositiveDefinite = true;
  typedef typename MatrixType::RealScalar RealScalar;
  typedef typename MatrixType::Scalar Scalar;
  const RealScalar eps = precision<Scalar>();

  if (size<=1) {
    m_matrix = a;
    return;
  }

  m_matrix = a; //k

  typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntVectorType;
  IntVectorType transpositions(size);
  RealScalar biggest = RealScalar(0), biggest_in_corner;

  Matrix<Scalar,MatrixType::RowsAtCompileTime,1> _temporary(size);

  for (int j = 0; j < size; ++j)
  {
    // Find largest element diagonal and pivot it upward for processing next.
    int row_of_biggest_in_corner, col_of_biggest_in_corner;
    biggest_in_corner = m_matrix.diagonal().end(size-j).cwise().abs()
                       .maxCoeff(&row_of_biggest_in_corner,
                                 &col_of_biggest_in_corner);
    ei_assert(col_of_biggest_in_corner == 0); // XXX

    if(j == 0) biggest = biggest_in_corner;

    // Finish early if the matrix is not full rank.
    if(0 && ei_isMuchSmallerThan(biggest_in_corner, biggest))
    {
      m_isPositiveDefinite = false;
      for(int i = j; i < size; i++) transpositions.coeffRef(i) = i;
      cout << "dying; not full rank. biggest in corner is much smaller than biggest";
      break;
    }

    row_of_biggest_in_corner += j;
    transpositions.coeffRef(j) = row_of_biggest_in_corner;
    if(j != row_of_biggest_in_corner)
    {
      m_matrix.row(j).swap(m_matrix.row(row_of_biggest_in_corner));
      m_matrix.col(j).swap(m_matrix.col(row_of_biggest_in_corner));
    }

    if (j == 0) {
      m_matrix.row(0) = m_matrix.row(0).conjugate();
      m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
      continue;
    }

    RealScalar tmp = ei_real(m_matrix.coeff(j,j) - (m_matrix.row(j).start(j) * m_matrix.col(j).start(j).conjugate()).coeff(0,0));
    m_matrix.coeffRef(j,j) = tmp;

    if (0 && tmp < eps)
    {
      m_isPositiveDefinite = false;
      for(int i = j; i < size; i++) transpositions.coeffRef(i) = i;
      cout << "DYING; tmp = " << tmp << " rank = " << (j+1) << endl;
      break;
    }

    int endSize = size-j-1;
    if (endSize > 0) {
      _temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
                                  * m_matrix.col(j).start(j).conjugate() ).lazy();

      m_matrix.row(j).end(endSize) = m_matrix.row(j).end(endSize).conjugate()  //kk
                                   - _temporary.end(endSize).transpose();

      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / tmp;
    }
  }

  // cout << "perms: " << transpositions.transpose() << endl;

  IntVectorType m_p(size);
  for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
  for(int k = size-1; k >= 0; --k) {
    std::swap(m_p.coeffRef(k), m_p.coeffRef(transpositions.coeff(k)));
  }
  // cout << "m_p: " << m_p.transpose() << endl;

  // For now, make a permuted L matrix so the caller can use it as A = LL^T,
  // even though it's really PL, not L, that they are getting.
  _temporary = m_matrix.diagonal();
  MatrixType t_matrix = m_matrix.template part<Eigen::UnitLowerTriangular>();
  m_matrix = t_matrix;
  m_matrix.diagonal() = _temporary;
  for (int j = size-1; j >= 0; --j) {
    m_matrix.row(j).swap(m_matrix.row(transpositions.coeffRef(j)));
    m_matrix.col(j).swap(m_matrix.col(transpositions.coeffRef(j)));
  }

}

template<int s, typename T>
void tryDecomp(const T &A, double *res_nopiv, double *res_piv) {
  // cout << "rank =\n" << A.lu().rank() << endl;
  // cout << "Ap=\n" << A << endl;
  // cout << "eigenvalues:\n" << (A.template part<SelfAdjointBit>().eigenvalues()) << endl;

  Matrix<double, s, s> ldlt;
  Matrix<double, s, s> L, D, res, X;

  // No pivoting; original algorithm.
  computeLDL_nopivot(A, ldlt);
  L = ldlt.template part<Eigen::UnitLowerTriangular>();
  D = ldlt.diagonal().asDiagonal();
  res = L*D*L.transpose() - A;
  // cout << "nopiv res = \n" << (res.cwise().abs().cwise() >= 1e-10) << endl;
  // cout << "nopiv res = \n" << res << endl;
  // cout << "nopiv res = " << sqrt(res.cwise().square().sum()) << endl;
  *res_nopiv = sqrt(res.cwise().square().sum());

  // With pivoting; doesn't work as well.
  computeLDL_pivot(A, ldlt);
  L = ldlt; // NOT lower triangular anymore (this is really PL).
  L.diagonal().fill(1);
  D = ldlt.diagonal().asDiagonal();
  res = L*D*L.transpose() - A;
  // cout << "piv res = \n" << (res.cwise().abs().cwise() >= 1e-10) << endl;
  // cout << "piv res = \n" << res << endl;
  // cout << "piv res = " << sqrt(res.cwise().square().sum()) << endl;
  *res_piv = sqrt(res.cwise().square().sum());
}


int main() {
  const int s = 100;
  const int experiments = 100;

  Matrix<double, s, s> A;
  Matrix<double, s, 1> b;
  Matrix<double, experiments, 1> res_nopiv, res_piv;

  for (int i=0; i<experiments; i++) {
    A.setRandom();
    // A = A.cwise().abs();
    A *= A.transpose();
    // A /= 2;
    // cout << "A=\n" << A << endl;
    // cout << "eigenvalues orig:\n" << A.part<SelfAdjointBit>().eigenvalues() << endl;
  
    // Force A to be badly conditioned, but p.s.d. No pivoting consistently works
    // better.
    SelfAdjointEigenSolver<Matrix<double, s, s> > eis(A.part<SelfAdjointBit>());
    b = eis.eigenvalues().cwise().abs();
    b(0) *= 1e-9;
    b(1) *= 1e-11;
    b(4) *= 1e-11;
    b(8) *= 1e-14;
    A = eis.eigenvectors() * b.asDiagonal() * eis.eigenvectors().transpose();
    // A(4, 4) = 0.0;
    tryDecomp<s>(A, &res_nopiv[i], &res_piv[i]);
  
    // N*N^t is on order 1e-60; residual error from decomposition is indeed
    // 1e-75, which is about machine precision compared to 1e-60.
    // Matrix<double, s, s> N;
    // N = A.part<Eigen::UnitLowerTriangular>();
    // N *= 1e-30;
    // cout << "N \n" << N << endl;
    // cout << "\n\n\n";
    // cout << "This should work well with error about machine precision compared to the A matrix.";
    // cout << "Note that the tmp < eps check in trunk LDLT prevents this from solving.";
    // A = N * N.transpose();
    // tryDecomp<s>(A);
  }
  cout << "Average residual:  nopiv " << res_nopiv.sum() / experiments;
  cout << "    piv " << res_piv.sum() / experiments << endl;
  cout << "Std.dev.:          nopiv " << sqrt(res_nopiv.cwise().square().sum() / experiments - pow(res_nopiv.sum() / experiments, 2));
  cout << "    piv " << sqrt(res_piv.cwise().square().sum() / experiments - pow(res_piv.sum() / experiments, 2)) << endl;
  cout << "Minimum:           nopiv " << res_nopiv.minCoeff();
  cout << "    piv " << res_piv.minCoeff() << endl;
  cout << "Maximum:           nopiv " << res_nopiv.maxCoeff();
  cout << "    piv " << res_piv.maxCoeff() << endl;
}

Follow-Ups:
- Re: [eigen] Pivoting for LDLT
  - From: Benoit Jacob
- Re: [eigen] Pivoting for LDLT
  - From: Keir Mierle

References:
- [eigen] Pivoting for LDLT
  - From: Keir Mierle

Messages sorted by: [ date | thread ]
Prev by Date: Re: [eigen] sdgfdf
Next by Date: Re: [eigen] Pivoting for LDLT
Previous by thread: [eigen] Pivoting for LDLT
Next by thread: Re: [eigen] Pivoting for LDLT

Mail converted by MHonArc 2.6.19+

http://listengine.tuxfamily.org/