[eigen] strange SSE2 impact on Vector::array and Array

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]


Consider the following code:

void main(){

  const int n=100000,m=10000;
  {
    ArrayXd a(ArrayXd::Random(n)),b(ArrayXd::Random(n));
    for(int i=0;i<m;i++)
        a += b;
  }
  {
    VectorXd a(VectorXd::Random(n)),b(VectorXd::Random(n));
    for(int i=0;i<m;i++)
        a.array() += b.array();
  }
}

I tested on MSVC10 32bit and full optimization and inlining I get the
following timings (in Sec):

Eigen Vector 9.501
Eigen Array  9.504

If I compilie using /arch:SSE2 and run on my Intel Core 2 Duo:

Eigen Vector 10.680
Eigen Array   8.258

The use of SSE2 seems to slow down the Vector and speed up the array.
How is that possible, if the assumption is correct, that Eigen does its
magic to boil everything down to zero-overhead optimized loops?

Thanks

PS: the code was just hacked together and unfortunatly depends on tbb,
but I still attached it!
#include "stdafx.h"

using namespace Eigen;
using namespace std;
using tbb::tick_count;


void t2a(int n,int m)
{
	ArrayXd a(ArrayXd::Random(n)),b(ArrayXd::Random(n));

	tick_count t0 = tick_count::now();
	for(int i=0;i<m;i++)
		a += b;

	tick_count t1 = tick_count::now();
	cout<<"Eigen Array  "<<scientific <<setprecision(3)<<(t1-t0).seconds()<<"s"<<endl;
}

void t2c(int n,int m)
{
	VectorXd a(VectorXd::Random(n)),b(VectorXd::Random(n));

	tick_count t0 = tick_count::now();
	for(int i=0;i<m;i++)
		a.array() += b.array();

	tick_count t1 = tick_count::now();
	cout<<"Eigen Vector "<<scientific <<setprecision(3)<<(t1-t0).seconds()<<"s"<<endl;
}

void t2b(int n,int m){
	ArrayXd a(ArrayXd::Random(n)),b(ArrayXd::Random(n));
	double* a1 = a.data(), *b1 = b.data();

	tick_count t0 = tick_count::now();
	for(int i=0;i<m;i++)
		for(int j=0;j<n;j++)
			a1[j] += b1[j];

	tick_count t1 = tick_count::now();
	cout<<"Plain        "<<scientific <<setprecision(3)<<(t1-t0).seconds()<<"s"<<endl;
}

void main(){
	tbb::this_tbb_thread::sleep(tick_count::interval_t(1.5));

	const int n=100000,m=100000;
	t2a(n,m);t2c(n,m);
	t2a(n,m);t2c(n,m);

}

//
////static vs. dynamic
//void main1(){
//	tbb::this_tbb_thread::sleep(tick_count::interval_t(1.5));
//
//	const int n=1000,m=100000;
//	{
//		VectorXd a(VectorXd::Random(n)),b(VectorXd::Random(n));
//
//		tick_count t0 = tick_count::now();
//		for(int i=0;i<m;i++)
//			a.array() *= b.array();
//		tick_count t1 = tick_count::now();
//		cout<<"One "<<scientific <<setprecision(3)<<(t1-t0).seconds()<<"s"<<endl;
//	}{
//		typedef Matrix<double,n,1> myV;
//		 myV a(myV::Random()),b(myV::Random());
//
//		tick_count t0 = tick_count::now();
//		for(int i=0;i<m;i++)
//			a.array() *= b.array();
//		tick_count t1 = tick_count::now();
//		cout<<"Two "<<scientific <<setprecision(3)<<(t1-t0).seconds()<<"s"<<endl;
//	}
//
//	
//}
// stdafx.h : Includedatei für Standardsystem-Includedateien
// oder häufig verwendete projektspezifische Includedateien,
// die nur in unregelmäßigen Abständen geändert werden.
//

#pragma once

#include "targetver.h"

#include <tchar.h>

#include <time.h>
#include <iostream>
#include <map>
#include <numeric>
#include <iomanip>
#include <devel/Eigen/Core>
#include <tbb/tbb_thread.h>
#include <tbb/tick_count.h>



// TODO: Hier auf zusätzliche Header, die das Programm erfordert, verweisen.


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/