[eigen] RFC: BasicSVD class

```Attached is a draft of a faster SVD class.

Summary:
```
The BasicSVD is much faster and sufficiently accurate for many (most?) purposes.
```

* Singular Value Decomposition by eigenanalysis of the Gramian
*
```
* option 1: (default) Solves A=U*S*V' by finding the eigenvectors,values of A'A (or AA' if matrix is wide)
``` *           This is done using the SelfAdjointEigenSolver
```
* option 2: "fastMode" uses power iteration to quickly strip off the dominant eigenvectors one at a time. * This may be useful when only a few of the singular dimensions are desired (e.g. k<5)
``` *
```
* Currently limited to "Thin" or "Economy Sized" singular vectors. When decomposing a rectangular matrix, * either the U or V matrix will be an incomplete basis (tall). Any completion of this basis is valid and
``` * is left as an exercise to the reader.
*
* The "fast" mode uses power iteration (plus feedback)
*    This can be several times faster than the default mode, but
*   with caveats:
```
* *) convergence is slow and accuracy suffers in the case of nearly identical strongest singular values * *) speed is linear with K, so if K> 5ish, fastMode may actually be slower.
```

Output from testBasicSVD.cc (attached)
compiled with: g++ -Wall -O -g -msse4.2 -I /path/to/inc/ testBasicSVD.cc
explanation of fields:
```
values: K strongest singular values (in this case K=2) ( all lesser dimensions have singular values around 1) vectorwise errors: singular vector errors left|right for each of the K strongest dimensions subspace error: residual error of the true rank-K subspace projected onto the found subspace
```
begin output :
############## Type: float Problem Size: 300x200
================ JacobiSVD ================
elapsed time 0.358115s
values = 3.00011 2.00006
vectorwise errors (dB):-87.5|-92.3 -87.8|-92.2
subspace error -114 dB
================ BasicSVD ================
elapsed time 0.0242s
values = 3 2
vectorwise errors (dB):-133|-125 -112|-106
subspace error -109 dB
================ BasicSVD (fast mode) ================
elapsed time 0.0046s
values = 3 2
vectorwise errors (dB):-89.8|-86.3 -78.5|-81.8
subspace error -97.3 dB
############## Type: complex<double> Problem Size: 300x200
================ JacobiSVD ================
elapsed time 5.91s
values = 3 2
vectorwise errors (dB):-259|-265 -266|-268
subspace error -284 dB
================ BasicSVD ================
elapsed time 0.216s
values = 3 2
vectorwise errors (dB):-241|-237 -231|-231
subspace error -235 dB
================ BasicSVD (fast mode) ================
elapsed time 0.0411s
values = 3 2
vectorwise errors (dB):-305|-306 -178|-175
subspace error -178 dB

-- Mark Borgerding
```
```#ifndef BASIC_SVD_H
#define BASIC_SVD_H

#include <Eigen/Dense>

/**
* Singular Value Decomposition by eigenanalysis of the Gramian
*
* option 1: (default) Solves A=U*S*V' by finding the eigenvectors,values of A'A (or AA' if matrix is wide)
*           This is done using the SelfAdjointEigenSolver
* option 2: "fastMode" uses power iteration to quickly strip off the dominant eigenvectors one at a time.
*              This may be useful when only a few of the singular dimensions are desired (e.g. k<5)
*
* Currently limited to "Thin" or "Economy Sized" singular vectors. When decomposing a rectangular matrix,
* either the U or V matrix will be an incomplete basis (tall).  Any completion of this basis is valid and
* is left as an exercise to the reader.
*
* Mark Borgerding 2012
**/
template <typename _MatrixType>
class BasicSVD
{
public:
typedef typename _MatrixType::Scalar Scalar;
typedef Eigen::Matrix<Scalar, -1, -1> MatrixType;

typedef typename Eigen::NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef typename Eigen::internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
typedef typename Eigen::internal::plain_col_type<MatrixType>::type ColType;

BasicSVD():fast_(false) {}
BasicSVD(const _MatrixType & A,int K=-1)
:fast_(false)
{
compute(A,K);
}

/** The "fast" mode uses power iteration (plus feedback)
This can be several times faster than the default mode, but
with caveats:
*) convergence is slow and accuracy suffers in the case of nearly identical strongest singular values
*) speed is linear with K, so if K> 5ish, it may actually be slower.
Use at your own risk.                **/
void fastMode(bool yn) {fast_=yn;}  /// allow fastMode
bool fastMode() const {return fast_;}

const SingularValuesType & singularValues() const {return s_;} /// returns the diagonal positive real values along the S matrix
const MatrixType & matrixU() const {return u_;} /// returns the first K columns of the left singular vector orthonormal basis
const MatrixType & matrixV() const {return v_;}  /// returns the first K columns of the right singular vector orthonormal basis

BasicSVD& compute(_MatrixType A,int K=-1)  /// compute the K strongest dimensions of A = U*S*V'
{
int maxRank = std::min(A.rows(),A.cols());
if (K < 1 || K>maxRank)
K = maxRank;

// the rest of the algorithm assumes a tall matrix (rows>cols)
// when this is not the case, find the SVD for A.adjoint() then
// exchange u_ and v_ at the end (singular values stay the same)
bool swapped = false;
if (A.rows() < A.cols() ) {
MatrixType tmp = A;
swapped = true;
}
u_.resize(A.rows(),K);
v_.resize(A.cols(),K);
s_.resize(K);

// create the self-adjoint matrix A'*A
MatrixType X(A.cols(),A.cols());
X.setZero();

if (fast_)
usePowerIteration(X,K);
else
useEigenSolver(X,K);

// compute the left singular vectors from the right singular vectors
u_ = A*v_;
for (int k=0;k<K;++k)
u_.col(k).normalize();

if (swapped) {
MatrixType tmp = v_;
v_ = u_;
u_ = tmp;
}
return *this;
}

private:

void useEigenSolver( MatrixType & X ,int K)
{
v_ = es.eigenvectors().rightCols(K);
s_ = es.eigenvalues().tail(K);

// SelfAdjointEigenSolver returns the eigenvalues in increasing order,
// reverse them to SVD order
s_.reverseInPlace();
s_ = s_.array().sqrt();

for (int i=0;i<K/2;++i) {
ColType tmp = v_.col(i);
v_.col(i) = v_.col(K-1-i);
v_.col(K-1-i) = tmp;
}
}

void usePowerIteration( MatrixType & X ,int K)
{
MatrixType PX( X.rows(),X.cols() );
PX.setZero();
for (int k=0;k<K;++k) {
// subtract the orthogonal projection to the strongest eignevalue
PX = v_.col(k) * ( v_.col(k).adjoint() * X);
X -= PX;
}
}

void findStrongestEigenValue( const MatrixType & X , int k)
{
int n=X.cols();
ColType v = ColType::Random(n );
ColType vprev=v, vdif;
RealScalar eps = std::numeric_limits<RealScalar>::epsilon();

RealScalar mu=-999,muprev=1;
RealScalar damp=2;
v = v/v.norm();
int it;
for (it=0;it < 1e3;++it) {
muprev = mu;
#if USESAV
#else
v = X * vprev;
#endif
mu = v.norm();
v *= 1/mu;
vdif = v - vprev;
vprev = v;
RealScalar vdifnorm = vdif.norm();
if ( vdifnorm < .1) {
//if ( vdifnorm <= eps*n) break;
if (mu >= muprev) {
if ( mu/muprev-1 < eps )
break;
// Rayleigh quotient is still increasing
// overshoot the change to speed convergence
v += vdif*damp;
v.normalize();
damp *= 1.5;// get a little more aggressive since we are converging
damp = std::min<RealScalar>( 1000,damp); // if we don't cap this, we can get NaNs under some conditions
}else{
damp *= .5; // back off the feedback
}
}
}
v_.col(k) = v;
s_(k) = sqrt(mu);
}

bool fast_;
MatrixType u_;
SingularValuesType s_;
MatrixType v_;
};

#endif
```
```#include "BasicSVD.h"
#include <iostream>
#include <iomanip>
#include <typeinfo>
#include <sys/time.h>
#include <sys/types.h>

using namespace Eigen;
using namespace std;

static inline
double curtime(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec*.000001;
}

template <typename Derived>
Matrix<typename MatrixBase<Derived>::Scalar,Dynamic,Dynamic,0,Dynamic,Dynamic>
GramSchmidt(const MatrixBase<Derived>& A)
{
typedef typename Derived::PlainObject MatrixType;
typedef typename MatrixBase<Derived>::Scalar Scalar;
typedef typename NumTraits<typename MatrixBase<Derived>::Scalar>::Real RealScalar;
typedef Matrix<Scalar,Dynamic,1> VectorType;

int maxRank = std::min(A.rows(),A.cols());
MatrixType U(A.rows(),maxRank);
int outrank=0;
RealScalar tol = std::numeric_limits<RealScalar>::epsilon();
for (int k=0;k<A.cols();++k) {
VectorType a =  A.col(k);
RealScalar nb = a.norm(); // "before" norm
for (int k2=0;k2<outrank;k2++)
a -= U.col(k2) * ( U.col(k2).dot(a) ); // subtract orthogonal projections onto previous basis vectors
RealScalar na = a.norm(); // "after" norm

if ( na > 0 && na/nb > tol) {
a *= (1/na);
U.col(outrank) = a;
outrank++;
if (outrank==maxRank)
break;
}
}
if (outrank<U.cols())
U.conservativeResize(U.rows(),outrank);
return U;
}

template <typename T1,typename T2>
float dB_nmse(const T1 & vec1,const T2 & vec2)
{
return 20*log10( std::max<double>( (vec1-vec2.dot(vec1)*vec2 ).norm() / vec1.norm() , 1e-20) );
}

template <typename T>
void testType(const char * name, int m,int n,int k,double spread)
{
typedef Matrix<T, Dynamic, Dynamic> MatrixType;
typedef Matrix<typename MatrixType::RealScalar, Dynamic, 1 > ValuesType;
typedef DiagonalMatrix<typename MatrixType::RealScalar, Dynamic, Dynamic> DiagonalMatrixType ;
double t0,t1;

MatrixType Uin,Vin,X;
int rank=std::min(m,n);
{
t0=curtime();
MatrixType U = GramSchmidt( MatrixType::Random(m,k) );
MatrixType V = GramSchmidt( MatrixType::Random(n,k) );

ValuesType singvals(k);
if (k==1) {
singvals(0) = 2;
}else{
}
MatrixType X1 = U*DiagonalMatrixType( singvals )*V.adjoint(); // the truly low rank part of the matrix

MatrixType R = MatrixType::Random(m,n)/sqrt(m);// randomness to fill in the other dimensions
for (int i=0;i<5;++i) {
R -= U*(U.adjoint()*R); // subtract orthogonal projection onto the column space of X1
R -= (R*V)*V.adjoint(); // subtract orthogonal projection onto the row space of X1
}
X = X1 + R; // we can now add R to the low rank matrix X1 without affecting its dimensions.
/*
BasicSVD<MatrixType> bsvd;
bsvd.fastMode(true);
bsvd.compute(X,k+5);
ValuesType singvals2 = bsvd.singularValues();
cerr << "first few singular values:" << singvals2.transpose() << endl; */
Uin = U.leftCols(k);
Vin = V.leftCols(k);
t1=curtime();
}

cerr << "############## Type: " << name << " Problem Size: " << m<< "x" << n << endl;// " (gen took " << (t1-t0) << " seconds)\n";

bool skipJacobiSVD  = false;
if ( rank>400 ) {
cerr << "skipping JacobiSVD for time reasons\n";
skipJacobiSVD=true;
}

for (int approach=skipJacobiSVD;approach<3;++approach) {
cerr << "================ ";
t0 = curtime();
MatrixType U,V;
ValuesType S;
if (approach==0) {
cerr << "JacobiSVD "; // slow, but accurate
JacobiSVD<MatrixType> jsvd(X,ComputeThinU|ComputeThinV);
U = jsvd.matrixU().leftCols(k);
V = jsvd.matrixV().leftCols(k);
} else {
cerr << "BasicSVD "; // uses SelfAdjointEigenSolver or power iteration
BasicSVD<MatrixType> svds;
if (approach==2)  {
svds.fastMode(true);
cerr << "(fast mode) ";
}
svds.compute(X,k);
U = svds.matrixU();
V = svds.matrixV();
S = svds.singularValues();
}
t1 = curtime();
cerr << "================\nelapsed time " << (t1-t0)<< "s\n";

cerr << "values = " << S.transpose() << endl;

cerr << "vectorwise errors (dB):" << setprecision(3);
for (int i=0;i<k;++i) {
cerr << dB_nmse(Uin.col(i),U.col(i) ) << "|";
cerr << dB_nmse(Vin.col(i),V.col(i) ) << " ";
}
cerr << endl;

double err = (V-PV).norm()/sqrt((double)k);
cerr << "subspace error "
<< (20*log10(std::max<double>( 1e-20, err) ) )
<< " dB\n";
if (err > 1e-3) {
// gross check
cerr << "unacceptable subspace error!!!!!\n";
exit(1);
}
}
}

int main(int argc, char ** argv)
{
// compute the rank k SVD of a mxn matrix
int m = argc>1? atoi(argv[1]) : 300;
int n = argc>2? atoi(argv[2]) : 200;
int K = argc>3? atoi(argv[3]) : 2;
double spread = max<double>(0,argc>4? atof(argv[4]) : 1.0); // set to higher number to make the principal K values closer
srand(getpid());