[eigen] Vectorized(SSE) integer multiplication

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]

To: eigen@xxxxxxxxxxxxxxxxxxx
Subject: [eigen] Vectorized(SSE) integer multiplication
From: Rohit Garg <rpg.314@xxxxxxxxx>
Date: Sun, 8 Mar 2009 11:32:36 +0530
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:mime-version:received:date:message-id:subject :from:to:content-type; bh=sanad5a9eF/86Ep/+mAekTUQ4bKpAs91P1I/Dkt+ZnU=; b=nQID/U9SG+uSHbmn6Rl1RKZUzQVe4mw2oph9VU+StVYWh3Mo9Uagm69E2+HHqT6Gum sRfxJQeinXe0X/Wf4wlMnyqC6pRUKv6JTPpEwaRy63a90ZTZJGzRma2vV9ARXif6GmBa KKl9k7owgB+ujONjcAlbM2pfV4Yyf7TRjkwdI=
Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:date:message-id:subject:from:to:content-type; b=X+EXSC0jcUNubJZR2z5FEbsRhu9YefK3B8ow9TZVqRFDayDSw9Srtbn7EVj+YP1Gnp ynFsVhJNWOfCPF5F13jYgK+24/0h42U9YBCAqs+aTrntUHFIhLb2Nn+vJTyEGVEf7ojJ FPxjG5DQs7hcLUgj3Ys/phIoWLzXGorhMKH0M=

Hi,

This file has my vectorized implementation (sse2) of multiplication of
4 integers. The eigen routine was taken from packetMath.h file. The
benchmarks show small but noticeable difference.

~/Documents/numerical@rpg> g++ vec4i_mul.cpp -msse3 -O3 -march=native
~/Documents/numerical@rpg> ./a.out > /dev/null
1236491601 ei mul begins
1236491618 ei mul ends
1236491618my mul
1236491633 end

The macros could be defined better I admit. They were taken from my
implementation of vec4i multiplication which I wrote for my own needs
earlier. They are same as for the quaternion routine I sent earlier.
So please consider unifying them.

BTW, this multiplication instruction that you (and I) are using does
only unsigned multiplication. Signed multiplication is there as a
single instruction in SSE4.1. So a small patch could be added for that
too. the exact intrinsic is _mm_mul_epi32. My cpu doesn't have that,
so I can't test it.

Regards,
-- 
Rohit Garg

http://rpg-314.blogspot.com/

Senior Undergraduate
Department of Physics
Indian Institute of Technology
Bombay

#include<iostream>
#include<ctime>
#include<pmmintrin.h>
using namespace std;

const int testCount=25000;

#define VECTOR4UI_SHUFFLE_MASK(p,q,r,s) (((p<<6)|(q<<4)|(r<<2)|(s)))

#define vector4ui_swizzle(v,p,q,r,s) ((_mm_shuffle_epi32( (v), ((s)<<6)|((r)<<4)|((q)<<2)|(p))))

inline __m128i ei_mul(const __m128i a, const __m128i b)
{
  return _mm_or_si128(
    _mm_and_si128(
      _mm_mul_epu32(a,b),
      _mm_setr_epi32(0xffffffff,0,0xffffffff,0)),
    _mm_slli_si128(
      _mm_and_si128(
        _mm_mul_epu32(_mm_srli_si128(a,4),_mm_srli_si128(b,4)),
        _mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4));
}

inline __m128i my_mul(const __m128i v1, const __m128i v2)
    {
    __m128i a1=vector4ui_swizzle(v1,1,0,3,2);
    __m128i b1=vector4ui_swizzle(v2,1,0,3,2);
    __m128i mul1=_mm_mul_epu32(v1,v2);
    __m128i mul2=_mm_mul_epu32(a1,b1);
    __m128i composite=_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(mul1),_mm_castsi128_ps(mul2),VECTOR4UI_SHUFFLE_MASK(0,2,0,2)));
    return (vector4ui_swizzle(composite,1,3,0,2));
    }

int __attribute__((aligned(16))) printBuf[4];

void print( __m128i a)
    {
    _mm_store_si128((__m128i*)printBuf,a);
    cout<<printBuf[0]<<' '<<
	printBuf[1]<<' '<<
	printBuf[2]<<' '<<
	printBuf[3]<<endl;
    }

int main()
    {
    __m128i num1[testCount], num2[testCount], out[testCount];
    int i,j;
    for(i=0; i<testCount; i++)
	{
	num1[i]=_mm_setr_epi32(i,i+1,i+2,i+3);
	num2[i]=_mm_setr_epi32(i+3,i+2,i+1,i);
	}
    cerr<<time(NULL)<<" ei mul begins\n";
    for(j=0;j<testCount; j++)
	for(i=0; i<testCount; i++)
	    {
		out[i]=ei_mul(num1[i],num2[i]);
	    }
    cerr<<time(NULL)<<" ei mul ends\n";
    for(i=0;i<testCount; i++)
	print(out[i]);
    cerr<<time(NULL)<<"my mul\n";
    for(j=0;j<testCount; j++)
	for(i=0; i<testCount; i++)
	    {
		out[i]=my_mul(num1[i],num2[i]);
	    }
    cerr<<time(NULL)<<" end\n";
    for(i=0;i<testCount; i++)
	print(out[i]);
    return 0;
    }

Follow-Ups:
- Re: [eigen] Vectorized(SSE) integer multiplication
  - From: Gael Guennebaud

Messages sorted by: [ date | thread ]
Prev by Date: [eigen] SVN access, was Vectorized quaternion multiplication.
Next by Date: Re: [eigen] Vectorized(SSE) integer multiplication
Previous by thread: [eigen] SVN access, was Vectorized quaternion multiplication.
Next by thread: Re: [eigen] Vectorized(SSE) integer multiplication

Mail converted by MHonArc 2.6.19+

http://listengine.tuxfamily.org/