Re: [eigen] Re: How to use Eigen in CUDA kernels for eigendecomposition? |
[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]
On Mon, 2017-07-03 at 14:07 +0200, Gael Guennebaud wrote:
> Hi,
>
> is your code running fine despite these warnings? It could be that those warnings are generated
> from code that is never called. Compiling with --expt-relaxed-constexpr will also help.
Thanks for your reply!
The warnings don't seem to have an influence. It also possible to get rid of them by simply using
C++98 code in which case constexpr is ignored. Unfortunately, the code below does not run when I
comment in the solver.compute line. The output of the code on my test machine is
CUDA error status: 0
CUDA error status: 0
CUDA error status: 77
an illegal memory access was encountered in test.cu 73
I ran the code with the offending line commented in through cuda-memcheck (which somehow changed the
error status from 77 to 4). The output of cuda-memcheck is attached (test.mem). Running it in
cuda-gdb is not very helpful either (see attached test.gdb). Do you have any advice how to debug
this code?
Cheers, Henri
---
#include <iostream>
#include <vector>
#include <thrust/complex.h>
#include <Eigen/Eigenvalues>
static void HandleError(cudaError_t err, const char *file, int line)
{
std::cerr << "CUDA error status: " << err << '\n';
if (err != cudaSuccess)
{
std::cerr << cudaGetErrorString(err) << " in " << file << ' ' << line << '\n';
exit(EXIT_FAILURE);
}
}
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
namespace Eigen {
template<typename _Real> struct NumTraits<thrust::complex<_Real> > typedef Eigen::Matrix<thrust::complex<
: NumTraits<std::complex<_Real> >
{
typedef _Real Real;
typedef typename NumTraits<_Real>::Literal Literal;
enum {
IsComplex = 1,
RequireInitialization = NumTraits<_Real>::RequireInitialization,
ReadCost = 2 * NumTraits<_Real>::ReadCost,
AddCost = 2 * NumTraits<Real>::AddCost,
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
};
};
}
namespace thrust {
template < typename T >
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T real(complex<T> const& x) { return x.real(); }
template < typename T >
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T imag(complex<T> const& x) { return x.imag(); }
}
__global__ void test_kernel(int n, float * r) {
float>,2,2> cuMatrix2cf;
cuMatrix2cf m;
m << 1,0, 0,1;
m *= threadIdx.x;
Eigen::SelfAdjointEigenSolver< cuMatrix2cf > solver;
//solver.compute(m, Eigen::EigenvaluesOnly);
Eigen::Vector2f const& D = solver.eigenvalues();
for (int i = 0; i < n; ++i)
{
r[n*threadIdx.x + i] = D(i);
}
}
int main()
{
float * r;
HANDLE_ERROR( cudaMalloc(&r, 4*10*sizeof(float)) );
test_kernel<<<10,1>>>(4,r);
HANDLE_ERROR( cudaGetLastError() );
float * ev = new float[4*10];
HANDLE_ERROR( cudaMemcpy(ev, r, 4*10*sizeof(float), cudaMemcpyDeviceToHost) );
for (size_t i = 0; i < 4*10; ++i)
std::cout << ev[i] << ' ';
std::cout << '\n';
delete[] ev;
cudaFree(r);
}
> gael
>
> On Mon, Jul 3, 2017 at 6:36 AM, Henri Menke <henrimenke@xxxxxxxxx> wrote:
> > Dear list,
> >
> > I am very sorry. I didn't compile the example with the latest development version but some
> > outdated
> > version from a GitHub mirror. With the latest version from BitBucket it compiles without these
> > loads of __host__ __device__ warnings. A few warnings are left however, namely
> >
> >
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__
> > function("real")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__
> > function("imag")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1265): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__
> > function("real")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__
> > function("imag")
> > from a __host__ __device__ function("abs") is not allowed. The experimental flag '--expt-
> > relaxed-
> > constexpr' can be used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> > eigen/Eigen/src/Core/MathFunctions.h(1270): warning: calling a constexpr __host__ function from
> > a
> > __host__ __device__ function is not allowed. The experimental flag '--expt-relaxed-constexpr'
> > can be
> > used to allow this.
> >
> >
> >
> > This is slightly concerning. Did I forget to define a function? Because
> > thrust::abs(thrust::complex<T> const&) already exists.
> >
> > Cheers, Henri
> >
> > On Sun, 2017-07-02 at 13:01 +1200, Henri Menke wrote:
> > > Dear list,
> > >
> > > This is my first post to the list and I would like to use it to thank all the developers for
> > their
> > > astonishing work. Keep it up guys!
> > >
> > > Now to my problem: I am using the latest development version of Eigen and I want to calculate
> > the
> > > eigendecomposition of a matrix in a CUDA kernel.
> > >
> > > I followed »Using custom scalar types« (http://eigen.tuxfamily.org/dox-3.2/TopicCustomizingEig
> > en.h
> > > tm
> > > l#CustomScalarType) to implement support for thrust::complex..
> > >
> > > The example below compiles but is unusable. The approximately 800 lines of nvcc V8.0.61
> > output
> > > can
> > > be view at https://gist.github.com/anonymous/ . There, I see a757218ae4daec63ed021b143a38e76 b0
> > lot
> > > of warnings which involve calling a __host__ function from a __host__ __device__ function. It
> > > appears to me that some functions are not correctly decorated (yet). I am very interested in
> > > helping to fix this but have so far not worked with the internals of Eigen's _expression_
> > template
> > > code. Could you please provide some help?
> > >
> > > Best regards,
> > > Henri
> > >
> > > ---
> > >
> > > The compile command line is simply
> > > $ nvcc -std=c++11 -I/path/to/eigen/ test.cu
> > >
> > >
> > > #include <iostream>
> > > #include <thrust/complex.h>
> > > #include <Eigen/Eigenvalues>
> > >
> > > static void HandleError(cudaError_t err, const char *file, int line)
> > > {
> > > if (err != cudaSuccess)
> > > {
> > > std::cerr << cudaGetErrorString(err) << " in " << file << ' ' << line << '\n';
> > > exit(EXIT_FAILURE);
> > > }
> > > }
> > > #define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
> > >
> > >
> > > namespace Eigen {
> > > template<typename _Real> struct NumTraits<thrust::complex<_Real> >
> > > : NumTraits<std::complex<_Real> >
> > > {
> > > typedef _Real Real;
> > > typedef typename NumTraits<_Real>::Literal Literal;
> > > enum {
> > > IsComplex = 1,
> > > RequireInitialization = NumTraits<_Real>::RequireInitialization,
> > > ReadCost = 2 * NumTraits<_Real>::ReadCost,
> > > AddCost = 2 * NumTraits<Real>::AddCost,
> > > MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
> > > };
> > > };
> > > }
> > >
> > >
> > > namespace thrust {
> > > template < typename T >
> > > EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
> > > T real(complex<T> const& x) { return x.real(); }
> > >
> > > template < typename T >
> > > EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
> > > T imag(complex<T> const& x) { return x.imag(); }
> > > }
> > >
> > >
> > > __global__ void test_kernel(int n, float * r) {
> > > typedef Eigen::Matrix<thrust::complex<float>,2,2> cuMatrixXcf;
> > >
> > > cuMatrixXcf m;
> > > m << 1,0, 0,1;
> > > m *= threadIdx.x;
> > >
> > > Eigen::SelfAdjointEigenSolver< cuMatrixXcf > solver;
> > > solver.compute(m, Eigen::EigenvaluesOnly);
> > >
> > > auto const& D = solver.eigenvalues();
> > >
> > > for (int i = 0; i < n; ++i)
> > > r[n*threadIdx.x + i] = D(i);
> > > }
> > >
> > >
> > > int main()
> > > {
> > > float * r;
> > > HANDLE_ERROR( cudaMalloc(&r, 4*10*sizeof(float)) );
> > >
> > > test_kernel<<<10,1>>>(4,r);
> > >
> > > std::vector<float> ev(4*10);
> > > HANDLE_ERROR( cudaMemcpy(ev.data(), r, 4*10*sizeof(float), cudaMemcpyDeviceToHost) );
> > >
> > > for (auto const& p : ev)
> > > std::cout << p << ' ';
> > > std::cout << '\n';
> > > }
> >
> >
> >
Mail converted by MHonArc 2.6.19+ | http://listengine.tuxfamily.org/ |