[eigen] Re: perf issue with vector of size 2 ? |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/eigen Archives
]
Now in the case C!=C2=2 or C1=C2=3 a similar behavior occurs:
I stlghtly changed the code
# C1=C2=2
../e2
time: 1.66
time: 2.56
time: 8.35
scalar product
time: 2.97
time: 2.68
time: 7.92
# C1=C2=3
../e2
time: 4.44
time: 5.67
time: 2.42
scalar product
time: 5.81
time: 5.91
time: 0.74
Again Eigen2 is very fast for C1=C2=3 and way slower than the "by
hand" implementations
anything to explain that behavior ?
Best regards
C.
--
Christophe Prud'homme
Université de Grenoble christophe.prudhomme@xxxxxxxxxxxxxxx
LJK - Room 55 Tel: +33476635497
51, rue des Mathématiques Fax: +33476631263
BP53 38041 Grenoble Cedex 9
<http://ljk.imag.fr/membres/Christophe.Prudhomme/>
#include <boost/multi_array.hpp>
#include <Eigen/Eigen>
#include <boost/timer.hpp>
int main()
{
const int P = 100000;
static const int N=21;
static const int Q=25;
static const int C1=3;
static const int C2=3;
typedef Eigen::Matrix<double,C1,C2> vector_type;
boost::multi_array<double,4> x( boost::extents[N][Q][1][1] );
boost::multi_array<double,4> x1( boost::extents[N][Q][C1][C2] );
boost::multi_array<double,4> x2( boost::extents[N][Q][C1][C2] );
boost::multi_array<double,4> w( boost::extents[N][Q][1][1] );
boost::multi_array<double,4> w1( boost::extents[N][C1][C2][Q] );
boost::multi_array<double,4> w2( boost::extents[N][C1][C2][Q] );
boost::multi_array<double,2> y( boost::extents[N][Q] );
boost::multi_array<vector_type,2> y1( boost::extents[N][Q] );
boost::multi_array<vector_type,2> y2( boost::extents[N][Q] );
boost::timer ti;
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
for(int q = 0; q < Q; ++q )
{
for(int c1 = 0; c1 < C1; ++c1 )
for(int c2 = 0; c2 < C2; ++c2)
{
x1[i][q][c1][c2] = cos(x2[i][q][c1][c2])*x2[i][q][c1][c2];
}
}
std::cout << "time: " << ti.elapsed() << "\n";
ti.restart();
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
{
for(int c1 = 0; c1 < C1; ++c1 )
for(int c2 = 0; c2 < C2; ++c2)
for(int q = 0; q < Q; ++q )
{
w1[i][c1][c2][q] = cos(w2[i][c1][c2][q])*w2[i][c1][c2][q];
}
}
std::cout << "time: " << ti.elapsed() << "\n";
ti.restart();
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
for(int q = 0; q < Q; ++q )
{
y1[i][q] = y2[i][q].cwise()*y2[i][q].cwise().cos();
}
std::cout << "time: " << ti.elapsed() << "\n";
std::cout << "scalar product\n";
ti.restart();
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
for(int q = 0; q < Q; ++q )
{
x[i][q][0][0] = 0;
for(int c1 = 0; c1 < C1; ++c1 )
for(int c2 = 0; c2 < C2; ++c2)
{
x[i][q][0][0] += x2[i][q][c1][c2]*x1[i][q][c1][c2];
}
}
std::cout << "time: " << ti.elapsed() << "\n";
ti.restart();
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
{
for(int c1 = 0; c1 < C1; ++c1 )
for(int c2 = 0; c2 < C2; ++c2)
{
for(int q = 0; q < Q; ++q )
{
w[i][q][0][0] += w2[i][c1][c2][q]*w1[i][c1][c2][q];
}
}
}
std::cout << "time: " << ti.elapsed() << "\n";
ti.restart();
for(int e = 0; e < P;++e )
for(int i = 0; i < N; ++i )
for(int q = 0; q < Q; ++q )
{
//y[i][q] = y2[i][q].dot(y1[i][q]);
y[i][q] = (y2[i][q].transpose()*y1[i][q]).trace();
}
std::cout << "time: " << ti.elapsed() << "\n";
}