[eigen] How to use Eigen in CUDA kernels for eigendecomposition?

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]


Dear list,

This is my first post to the list and I would like to use it to thank all the developers for their
astonishing work.  Keep it up guys!

Now to my problem: I am using the latest development version of Eigen and I want to calculate the
eigendecomposition of a matrix in a CUDA kernel.

I followed »Using custom scalar types« (http://eigen.tuxfamily.org/dox-3.2/TopicCustomizingEigen.htm
l#CustomScalarType) to implement support for thrust::complex.

The example below compiles but is unusable.  The approximately 800 lines of nvcc V8.0.61 output can
be view at https://gist.github.com/anonymous/757218ae4daec63ed021b143a38e76b0.  There, I see a lot
of warnings which involve calling a __host__ function from a __host__ __device__ function.  It
appears to me that some functions are not correctly decorated (yet).  I am very interested in
helping to fix this but have so far not worked with the internals of Eigen's expression template
code.  Could you please provide some help?

Best regards,
Henri

---

The compile command line is simply
$ nvcc -std=c++11 -I/path/to/eigen/ test.cu


#include <iostream>
#include <thrust/complex.h>
#include <Eigen/Eigenvalues>

static void HandleError(cudaError_t err, const char *file, int line)
{
  if (err != cudaSuccess)
  {
    std::cerr << cudaGetErrorString(err) << " in " << file << ' ' << line << '\n';
    exit(EXIT_FAILURE);
  }
}
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))


namespace Eigen {
  template<typename _Real> struct NumTraits<thrust::complex<_Real> >
    : NumTraits<std::complex<_Real> >
  {
    typedef _Real Real;
    typedef typename NumTraits<_Real>::Literal Literal;
    enum {
      IsComplex = 1,
      RequireInitialization = NumTraits<_Real>::RequireInitialization,
      ReadCost = 2 * NumTraits<_Real>::ReadCost,
      AddCost = 2 * NumTraits<Real>::AddCost,
      MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
    };
  };
}


namespace thrust {
  template < typename T >
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  T real(complex<T> const& x) { return x.real(); }

  template < typename T >
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  T imag(complex<T> const& x) { return x.imag(); }
}


__global__ void test_kernel(int n, float * r) {
  typedef Eigen::Matrix<thrust::complex<float>,2,2> cuMatrixXcf;

  cuMatrixXcf m;
  m << 1,0, 0,1;
  m *= threadIdx.x;

  Eigen::SelfAdjointEigenSolver< cuMatrixXcf > solver;
  solver.compute(m, Eigen::EigenvaluesOnly);

  auto const& D = solver.eigenvalues();
  
  for (int i = 0; i < n; ++i)
    r[n*threadIdx.x + i] = D(i);
}


int main()
{
  float * r;
  HANDLE_ERROR( cudaMalloc(&r, 4*10*sizeof(float)) );

  test_kernel<<<10,1>>>(4,r);

  std::vector<float> ev(4*10);
  HANDLE_ERROR( cudaMemcpy(ev.data(), r, 4*10*sizeof(float), cudaMemcpyDeviceToHost) );

  for (auto const& p : ev)
    std::cout << p << ' ';
  std::cout << '\n';
}



Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/