Re: [eigen] Re: How to use Eigen in CUDA kernels for eigendecomposition?

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]


I did not paid attention, but currently only computeDirect is supported from CUDA kernels, and computeDirect supports only 2x2 and 3x3 real matrices.

To support the more general compute() function, all its dependencies should be qualified with EIGEN_DEVICE_FUNC, but I cannot guaranty that all of them can be compiled on the device without some refactoring. Its a shame that nvcc does not trigger a compile time error when device code calling host-only code gets truly instantiated.

gael

On Mon, Jul 3, 2017 at 11:17 PM, Henri Menke <henrimenke@xxxxxxxxx> wrote:
On Mon, 2017-07-03 at 14:07 +0200, Gael Guennebaud wrote:
> Hi,
>
> is your code running fine despite these warnings?  It could be that those warnings are generated
> from code that is never called. Compiling with --expt-relaxed-constexpr will also help.

Thanks for your reply!

The warnings don't seem to have an influence.  It also possible to get rid of them by simply using
C++98 code in which case constexpr is ignored.  Unfortunately, the code below does not run when I
comment in the solver.compute line.  The output of the code on my test machine is

CUDA error status: 0
CUDA error status: 0
CUDA error status: 77
an illegal memory access was encountered in test.cu 73

I ran the code with the offending line commented in through cuda-memcheck (which somehow changed the
error status from 77 to 4).  The output of cuda-memcheck is attached (test.mem).  Running it in
cuda-gdb is not very helpful either (see attached test.gdb).  Do you have any advice how to debug
this code?

Cheers, Henri

---

#include <iostream>
#include <vector>
#include <thrust/complex.h>
#include <Eigen/Eigenvalues>

static void HandleError(cudaError_t err, const char *file, int line)
{
  std::cerr << "CUDA error status: " << err << '\n';
  if (err != cudaSuccess)
  {
    std::cerr << cudaGetErrorString(err) << " in " << file << ' ' << line << '\n';
    exit(EXIT_FAILURE);
  }
}
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))


namespace Eigen {
  template<typename _Real> struct NumTraits<thrust::complex<_Real> >
    : NumTraits<std::complex<_Real> >
  {
    typedef _Real Real;
    typedef typename NumTraits<_Real>::Literal Literal;
    enum {
      IsComplex = 1,
      RequireInitialization = NumTraits<_Real>::RequireInitialization,
      ReadCost = 2 * NumTraits<_Real>::ReadCost,
      AddCost = 2 * NumTraits<Real>::AddCost,
      MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
    };
  };
}


namespace thrust {
  template < typename T >
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  T real(complex<T> const& x) { return x.real(); }

  template < typename T >
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  T imag(complex<T> const& x) { return x.imag(); }
}


__global__ void test_kernel(int n, float * r) {
  typedef Eigen::Matrix<thrust::complex<float>,2,2> cuMatrix2cf;

  cuMatrix2cf m;
  m << 1,0, 0,1;
  m *= threadIdx.x;

  Eigen::SelfAdjointEigenSolver< cuMatrix2cf > solver;
//solver.compute(m, Eigen::EigenvaluesOnly);

  Eigen::Vector2f const& D = solver.eigenvalues();

  for (int i = 0; i < n; ++i)
  {
    r[n*threadIdx.x + i] = D(i);
  }
}

int main()
{
  float * r;
  HANDLE_ERROR( cudaMalloc(&r, 4*10*sizeof(float)) );

  test_kernel<<<10,1>>>(4,r);
  HANDLE_ERROR( cudaGetLastError() );

  float * ev = new float[4*10];
  HANDLE_ERROR( cudaMemcpy(ev, r, 4*10*sizeof(float), cudaMemcpyDeviceToHost) );

  for (size_t i = 0; i < 4*10; ++i)
    std::cout << ev[i] << ' ';
  std::cout << '\n';

  delete[] ev;
  cudaFree(r);
}



> gael
>
> On Mon, Jul 3, 2017 at 6:36 AM, Henri Menke <henrimenke@xxxxxxxxx> wrote:
> > Dear list,
> >
> > I am very sorry.  I didn't compile the example with the latest development version but some
> > outdated
> > version from a GitHub mirror.  With the latest version from BitBucket it compiles without these
> > loads of __host__ __device__ warnings.  A few warnings are left however, namely
> >
> >
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__
> > function("real")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__
> > function("imag")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__
> > function("real")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__
> > function("imag")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> >
> >
> > This is slightly concerning.  Did I forget to define a function?  Because
> > thrust::abs(thrust::complex<T> const&) already exists.
> >
> > Cheers, Henri
> >
> > On Sun, 2017-07-02 at 13:01 +1200, Henri Menke wrote:
> > > Dear list,
> > >
> > > This is my first post to the list and I would like to use it to thank all the developers for
> > their
> > > astonishing work.  Keep it up guys!
> > >
> > > Now to my problem: I am using the latest development version of Eigen and I want to calculate
> > the
> > > eigendecomposition of a matrix in a CUDA kernel.
> > >
> > > I followed »Using custom scalar types« (http://eigen.tuxfamily.org/dox-3.2/TopicCustomizingEig
> > en.h
> > > tm
> > > l#CustomScalarType) to implement support for thrust::complex..
> > >
> > > The example below compiles but is unusable.  The approximately 800 lines of nvcc V8.0.61
> > output
> > > can
> > > be view at https://gist.github.com/anonymous/757218ae4daec63ed021b143a38e76b0.  There, I see a
> > lot
> > > of warnings which involve calling a __host__ function from a __host__ __device__ function.  It
> > > appears to me that some functions are not correctly decorated (yet).  I am very interested in
> > > helping to fix this but have so far not worked with the internals of Eigen's _expression_
> > template
> > > code.  Could you please provide some help?
> > >
> > > Best regards,
> > > Henri
> > >
> > > ---
> > >
> > > The compile command line is simply
> > > $ nvcc -std=c++11 -I/path/to/eigen/ test.cu
> > >
> > >
> > > #include <iostream>
> > > #include <thrust/complex.h>
> > > #include <Eigen/Eigenvalues>
> > >
> > > static void HandleError(cudaError_t err, const char *file, int line)
> > > {
> > >   if (err != cudaSuccess)
> > >   {
> > >     std::cerr << cudaGetErrorString(err) << " in " << file << ' ' << line << '\n';
> > >     exit(EXIT_FAILURE);
> > >   }
> > > }
> > > #define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
> > >
> > >
> > > namespace Eigen {
> > >   template<typename _Real> struct NumTraits<thrust::complex<_Real> >
> > >     : NumTraits<std::complex<_Real> >
> > >   {
> > >     typedef _Real Real;
> > >     typedef typename NumTraits<_Real>::Literal Literal;
> > >     enum {
> > >       IsComplex = 1,
> > >       RequireInitialization = NumTraits<_Real>::RequireInitialization,
> > >       ReadCost = 2 * NumTraits<_Real>::ReadCost,
> > >       AddCost = 2 * NumTraits<Real>::AddCost,
> > >       MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
> > >     };
> > >   };
> > > }
> > >
> > >
> > > namespace thrust {
> > >   template < typename T >
> > >   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
> > >   T real(complex<T> const& x) { return x.real(); }
> > >
> > >   template < typename T >
> > >   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
> > >   T imag(complex<T> const& x) { return x.imag(); }
> > > }
> > >
> > >
> > > __global__ void test_kernel(int n, float * r) {
> > >   typedef Eigen::Matrix<thrust::complex<float>,2,2> cuMatrixXcf;
> > >
> > >   cuMatrixXcf m;
> > >   m << 1,0, 0,1;
> > >   m *= threadIdx.x;
> > >
> > >   Eigen::SelfAdjointEigenSolver< cuMatrixXcf > solver;
> > >   solver.compute(m, Eigen::EigenvaluesOnly);
> > >
> > >   auto const& D = solver.eigenvalues();
> > >   
> > >   for (int i = 0; i < n; ++i)
> > >     r[n*threadIdx.x + i] = D(i);
> > > }
> > >
> > >
> > > int main()
> > > {
> > >   float * r;
> > >   HANDLE_ERROR( cudaMalloc(&r, 4*10*sizeof(float)) );
> > >
> > >   test_kernel<<<10,1>>>(4,r);
> > >
> > >   std::vector<float> ev(4*10);
> > >   HANDLE_ERROR( cudaMemcpy(ev.data(), r, 4*10*sizeof(float), cudaMemcpyDeviceToHost) );
> > >
> > >   for (auto const& p : ev)
> > >     std::cout << p << ' ';
> > >   std::cout << '\n';
> > > }
> >
> >
> >



Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/