[eigen] vectorization bug

[ Thread Index | Date Index | More lists.tuxfamily.org/eigen Archives ]


Hi List,

Here's a simple benchmark, a.cpp. It runs faster without vectorization than with!

Trying to understand this I added some asm comments in Assign.h, so my copy looks like this:

template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
{
  static void run(Derived1 &dst, const Derived2 &src)
  {
    asm("#begin");
    const int size = dst.size();
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
                           : ei_alignmentOffset(&dst.coeffRef(0), size);
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;

    asm("#unaligned start");

    for(int index = 0; index < alignedStart; index++)
      dst.copyCoeff(index, src);
    asm("#aligned middle");

    for(int index = alignedStart; index < alignedEnd; index += packetSize)
    {
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
    }

    asm("#unaligned end");

    for(int index = alignedEnd; index < size; index++)
      dst.copyCoeff(index, src);
    asm("#end");
  }
};

I attach the resulting assembly (a.s). Can you see what's wrong?

Another thing. The ones() part compiles to this:

	xorl	%edx, %edx
..L107:
	movl	-24(%ebp), %eax
	fld1
	fstl	(%eax,%edx)
	fstpl	8(%eax,%edx)
	addl	$16, %edx
	cmpl	$24000, %edx
	jne	.L107

This is not vectorized, right??

Cheers,
Benoit

Cheers,
Benoit

----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.

#include <Eigen/Array> 
 
void test_eigen()
{
  typedef Eigen::Matrix<double, 3, Eigen::Dynamic> OBPositions;
  OBPositions pos1, pos2, pos3;
 
  // initialization
  asm("#BEGIN ONES");
  pos1 = OBPositions::Ones(3, 1000);
  asm("#ANOTHER ONES");
  pos2 = OBPositions::Ones(3, 1000);
  asm("#END ONES");
  pos3.resize(3, 1000);
 
  asm("#BEGIN MAIN LOOP");
  // benchmark
  for (int i = 0; i < 100000; ++i)
  {
    pos3 = pos1 + pos2;
  }
  asm("#END MAIN LOOP");
}
 
int main()
{
  test_eigen();
}
	.file	"a.cpp"
	.text
	.p2align 4,,15
	.type	_GLOBAL__I__Z10test_eigenv, @function
_GLOBAL__I__Z10test_eigenv:
..LFB3234:
	pushl	%ebp
..LCFI0:
	movl	%esp, %ebp
..LCFI1:
	subl	$24, %esp
..LCFI2:
	movl	$_ZStL8__ioinit, (%esp)
	call	_ZNSt8ios_base4InitC1Ev
	movl	$__dso_handle, 8(%esp)
	movl	$_ZStL8__ioinit, 4(%esp)
	movl	$_ZNSt8ios_base4InitD1Ev, (%esp)
	call	__cxa_atexit
	leave
	ret
..LFE3234:
	.size	_GLOBAL__I__Z10test_eigenv, .-_GLOBAL__I__Z10test_eigenv
	.section	.ctors,"aw",@progbits
	.align 4
	.long	_GLOBAL__I__Z10test_eigenv
	.section	.rodata.str1.1,"aMS",@progbits,1
..LC0:
	.string	"vector::_M_fill_insert"
	.section	.text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_,comdat
	.align 2
	.p2align 4,,15
	.weak	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
	.type	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_:
..LFB3081:
	pushl	%ebp
..LCFI3:
	movl	%esp, %ebp
..LCFI4:
	pushl	%edi
..LCFI5:
	pushl	%esi
..LCFI6:
	pushl	%ebx
..LCFI7:
	subl	$28, %esp
..LCFI8:
	movl	16(%ebp), %edx
	movl	20(%ebp), %esi
	testl	%edx, %edx
	je	.L37
	movl	8(%ebp), %eax
	movl	4(%eax), %ebx
	movl	8(%eax), %eax
	subl	%ebx, %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %eax
	cmpl	%eax, 16(%ebp)
	jbe	.L45
	movl	8(%ebp), %edi
	movl	%ebx, %eax
	subl	(%edi), %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %edx
	movl	$178956970, %eax
	subl	%edx, %eax
	cmpl	%eax, 16(%ebp)
	ja	.L46
	movl	16(%ebp), %eax
	cmpl	16(%ebp), %edx
	cmovae	%edx, %eax
	addl	%edx, %eax
	movl	%eax, -24(%ebp)
	jb	.L22
	cmpl	$178956970, %eax
	jbe	.L47
..L22:
	movl	$178956970, -24(%ebp)
	movl	$-16, %eax
..L25:
	movl	%eax, (%esp)
	call	_Znwj
	movl	8(%ebp), %ecx
	movl	%eax, -16(%ebp)
	movl	4(%ecx), %ebx
	jmp	.L24
	.p2align 4,,7
	.p2align 3
..L47:
	testl	%eax, %eax
	movl	$0, -16(%ebp)
	jne	.L48
..L24:
	movl	8(%ebp), %edi
	movl	-16(%ebp), %ecx
	movl	(%edi), %edi
	movl	%ecx, %eax
	movl	%edi, %edx
	movl	%edi, -20(%ebp)
	movl	12(%ebp), %edi
	cmpl	%edi, %edx
	je	.L27
	.p2align 4,,7
	.p2align 3
..L38:
	testl	%ecx, %ecx
	je	.L28
	fldl	(%edx)
	fstpl	(%ecx)
	fldl	8(%edx)
	fstpl	8(%ecx)
	fldl	16(%edx)
	fstpl	16(%ecx)
..L28:
	addl	$24, %edx
	addl	$24, %ecx
	cmpl	12(%ebp), %edx
	jne	.L38
	movl	-20(%ebp), %eax
	movl	12(%ebp), %edx
	movl	-16(%ebp), %ecx
	addl	$24, %eax
	subl	%eax, %edx
	movl	%edx, %eax
	shrl	$3, %eax
	imull	$178956971, %eax, %eax
	andl	$536870911, %eax
	leal	3(%eax,%eax,2), %eax
	leal	(%ecx,%eax,8), %eax
..L27:
	movl	%eax, %ecx
	movl	16(%ebp), %edx
	jmp	.L32
	.p2align 4,,7
	.p2align 3
..L49:
	addl	$24, %eax
..L32:
	testl	%eax, %eax
	je	.L30
	fldl	(%esi)
	fstpl	(%eax)
	fldl	8(%esi)
	fstpl	8(%eax)
	fldl	16(%esi)
	fstpl	16(%eax)
..L30:
	subl	$1, %edx
	jne	.L49
	movl	16(%ebp), %edi
	cmpl	12(%ebp), %ebx
	leal	(%edi,%edi,2), %eax
	leal	(%ecx,%eax,8), %esi
	je	.L33
	movl	12(%ebp), %edx
	movl	%esi, %eax
	.p2align 4,,7
	.p2align 3
..L35:
	testl	%eax, %eax
	je	.L34
	fldl	(%edx)
	fstpl	(%eax)
	fldl	8(%edx)
	fstpl	8(%eax)
	fldl	16(%edx)
	fstpl	16(%eax)
..L34:
	addl	$24, %edx
	addl	$24, %eax
	cmpl	%edx, %ebx
	jne	.L35
	movl	12(%ebp), %eax
	addl	$24, %eax
	subl	%eax, %ebx
	shrl	$3, %ebx
	imull	$178956971, %ebx, %eax
	andl	$536870911, %eax
	leal	3(%eax,%eax,2), %eax
	leal	(%esi,%eax,8), %esi
..L33:
	movl	-20(%ebp), %eax
	testl	%eax, %eax
	je	.L36
	movl	-20(%ebp), %eax
	movl	%eax, (%esp)
	call	_ZdlPv
..L36:
	movl	-16(%ebp), %ecx
	movl	8(%ebp), %edx
	movl	%ecx, (%edx)
	movl	%esi, 4(%edx)
	movl	-24(%ebp), %edi
	leal	(%edi,%edi,2), %eax
	leal	(%ecx,%eax,8), %eax
	movl	%eax, 8(%edx)
	jmp	.L37
..L53:
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	.p2align 4,,7
	.p2align 3
..L37:
	addl	$28, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
	.p2align 4,,7
	.p2align 3
..L45:
	movl	%ebx, %eax
	fldl	(%esi)
	subl	12(%ebp), %eax
	fldl	8(%esi)
	fldl	16(%esi)
	sarl	$3, %eax
	imull	$-1431655765, %eax, %esi
	cmpl	%esi, 16(%ebp)
	jae	.L6
	movl	16(%ebp), %ecx
	leal	(%ecx,%ecx,2), %eax
	movl	%ebx, %ecx
	leal	0(,%eax,8), %esi
	subl	%esi, %ecx
	cmpl	%ecx, %ebx
	je	.L7
	movl	%ebx, %edx
	movl	%ecx, %eax
	jmp	.L9
	.p2align 4,,7
	.p2align 3
..L50:
	addl	$24, %edx
..L9:
	testl	%edx, %edx
	je	.L8
	fldl	(%eax)
	fstpl	(%edx)
	fldl	8(%eax)
	fstpl	8(%edx)
	fldl	16(%eax)
	fstpl	16(%edx)
..L8:
	addl	$24, %eax
	cmpl	%eax, %ebx
	jne	.L50
..L7:
	movl	8(%ebp), %edi
	leal	(%ebx,%esi), %eax
	movl	%eax, 4(%edi)
	movl	%ecx, %eax
	subl	12(%ebp), %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %eax
	testl	%eax, %eax
	jle	.L10
	xorl	%edx, %edx
	.p2align 4,,7
	.p2align 3
..L11:
	fldl	-24(%ecx,%edx)
	subl	$1, %eax
	fstpl	-24(%ebx,%edx)
	fldl	-16(%ecx,%edx)
	fstpl	-16(%ebx,%edx)
	fldl	-8(%ecx,%edx)
	fstpl	-8(%ebx,%edx)
	subl	$24, %edx
	testl	%eax, %eax
	jg	.L11
..L10:
	movl	12(%ebp), %edx
	addl	%esi, %edx
	cmpl	12(%ebp), %edx
	je	.L53
	fxch	%st(2)
	movl	12(%ebp), %eax
	jmp	.L12
	.p2align 4,,7
	.p2align 3
..L54:
	fxch	%st(2)
	fxch	%st(1)
..L12:
	fstl	(%eax)
	fxch	%st(1)
	fstl	8(%eax)
	fxch	%st(2)
	fstl	16(%eax)
	addl	$24, %eax
	cmpl	%eax, %edx
	jne	.L54
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	addl	$28, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
	.p2align 4,,7
	.p2align 3
..L6:
	movl	16(%ebp), %ecx
	subl	%esi, %ecx
	je	.L55
	movl	%ebx, %eax
	movl	%ecx, %edx
	jmp	.L15
	.p2align 4,,7
	.p2align 3
..L51:
	fxch	%st(1)
	fxch	%st(2)
	fxch	%st(1)
	addl	$24, %eax
..L15:
	testl	%eax, %eax
	je	.L56
	fxch	%st(2)
	fstl	(%eax)
	fxch	%st(1)
	fstl	8(%eax)
	fxch	%st(2)
	fstl	16(%eax)
	jmp	.L14
	.p2align 4,,7
	.p2align 3
..L56:
	fxch	%st(1)
	fxch	%st(2)
	fxch	%st(1)
	.p2align 4,,7
	.p2align 3
..L14:
	subl	$1, %edx
	jne	.L51
	jmp	.L13
..L55:
	fxch	%st(1)
	fxch	%st(2)
	fxch	%st(1)
	.p2align 4,,7
	.p2align 3
..L13:
	leal	(%ecx,%ecx,2), %eax
	leal	(%ebx,%eax,8), %edi
	movl	8(%ebp), %eax
	movl	%edi, 4(%eax)
	cmpl	12(%ebp), %ebx
	je	.L16
	movl	12(%ebp), %edx
	movl	%edi, %ecx
	movl	%edx, %eax
	jmp	.L19
	.p2align 4,,7
	.p2align 3
..L52:
	addl	$24, %ecx
..L19:
	testl	%ecx, %ecx
	je	.L17
	fldl	(%eax)
	fstpl	(%ecx)
	fldl	8(%eax)
	fstpl	8(%ecx)
	fldl	16(%eax)
	fstpl	16(%ecx)
..L17:
	addl	$24, %eax
	cmpl	%eax, %ebx
	jne	.L52
	fxch	%st(1)
	movl	8(%ebp), %ecx
	leal	(%esi,%esi,2), %eax
	leal	(%edi,%eax,8), %eax
	movl	%eax, 4(%ecx)
	jmp	.L20
	.p2align 4,,7
	.p2align 3
..L57:
	fxch	%st(1)
	fxch	%st(2)
..L20:
	fstl	(%edx)
	fxch	%st(2)
	fstl	8(%edx)
	fxch	%st(1)
	fstl	16(%edx)
	addl	$24, %edx
	cmpl	%edx, %ebx
	jne	.L57
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	addl	$28, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
..L16:
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	movl	8(%ebp), %edx
	leal	(%esi,%esi,2), %eax
	leal	(%edi,%eax,8), %eax
	movl	%eax, 4(%edx)
	jmp	.L37
..L46:
	movl	$.LC0, (%esp)
	call	_ZSt20__throw_length_errorPKc
..L48:
	movl	-24(%ebp), %edx
	leal	(%edx,%edx,2), %eax
	sall	$3, %eax
	jmp	.L25
..LFE3081:
	.size	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_, .-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
	.section	.text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_,comdat
	.align 2
	.p2align 4,,15
	.weak	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
	.type	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_:
..LFB2930:
	pushl	%ebp
..LCFI9:
	movl	%esp, %ebp
..LCFI10:
	subl	$24, %esp
..LCFI11:
	movl	%ebx, -8(%ebp)
..LCFI12:
	movl	8(%ebp), %ebx
	movl	%esi, -4(%ebp)
..LCFI13:
	movl	12(%ebp), %edx
	movl	4(%ebx), %esi
	movl	(%ebx), %ecx
	movl	%esi, %eax
	subl	%ecx, %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %eax
	cmpl	%eax, %edx
	jae	.L59
	leal	(%edx,%edx,2), %eax
	leal	(%ecx,%eax,8), %eax
	movl	%eax, 4(%ebx)
	movl	-8(%ebp), %ebx
	movl	-4(%ebp), %esi
	movl	%ebp, %esp
	popl	%ebp
	ret
	.p2align 4,,7
	.p2align 3
..L59:
	movl	16(%ebp), %ecx
	subl	%eax, %edx
	movl	%esi, 4(%esp)
	movl	%ebx, (%esp)
	movl	%edx, 8(%esp)
	movl	%ecx, 12(%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
	movl	-8(%ebp), %ebx
	movl	-4(%ebp), %esi
	movl	%ebp, %esp
	popl	%ebp
	ret
..LFE2930:
	.size	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_, .-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
	.section	.text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev,comdat
	.align 2
	.p2align 4,,15
	.weak	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	.type	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev:
..LFB2920:
	pushl	%ebp
..LCFI14:
	movl	%esp, %ebp
..LCFI15:
	movl	8(%ebp), %eax
	movl	(%eax), %eax
	testl	%eax, %eax
	je	.L64
	movl	%eax, 8(%ebp)
	popl	%ebp
	jmp	_ZdlPv
	.p2align 4,,7
	.p2align 3
..L64:
	popl	%ebp
	ret
..LFE2920:
	.size	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev, ..-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
..globl _Unwind_Resume
	.text
	.p2align 4,,15
..globl _Z11test_vectorv
	.type	_Z11test_vectorv, @function
_Z11test_vectorv:
..LFB2863:
	pushl	%ebp
..LCFI16:
	movl	%esp, %ebp
..LCFI17:
	pushl	%edi
..LCFI18:
	pushl	%esi
..LCFI19:
	pushl	%ebx
..LCFI20:
	subl	$172, %esp
..LCFI21:
	leal	-88(%ebp), %eax
	movl	%eax, 8(%esp)
	leal	-28(%ebp), %eax
	movl	$0, -28(%ebp)
	movl	$0, -24(%ebp)
	movl	$0, -20(%ebp)
	movl	$0, -40(%ebp)
	movl	$0, -36(%ebp)
	movl	$0, -32(%ebp)
	movl	$0, -52(%ebp)
	movl	$0, -48(%ebp)
	movl	$0, -44(%ebp)
	movl	$1000, 4(%esp)
	movl	%eax, (%esp)
..LEHB0:
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE0:
	leal	-40(%ebp), %edx
	leal	-112(%ebp), %eax
	movl	%eax, 8(%esp)
	movl	$1000, 4(%esp)
	movl	%edx, -168(%ebp)
	movl	%edx, (%esp)
..LEHB1:
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE1:
	leal	-52(%ebp), %edx
	leal	-136(%ebp), %eax
	movl	%eax, 8(%esp)
	movl	$1000, 4(%esp)
	movl	%edx, -164(%ebp)
	movl	%edx, (%esp)
..LEHB2:
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE2:
	movl	-28(%ebp), %eax
	movl	%eax, -152(%ebp)
	xorl	%eax, %eax
..L70:
	movl	-152(%ebp), %edx
	fld1
	fstl	(%edx,%eax)
	fstl	8(%edx,%eax)
	fstpl	16(%edx,%eax)
	addl	$24, %eax
	cmpl	$24000, %eax
	jne	.L70
	movl	-40(%ebp), %esi
	movl	-36(%ebp), %eax
	subl	%esi, %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %ecx
	testl	%ecx, %ecx
	je	.L72
	xorl	%edx, %edx
	xorl	%eax, %eax
..L73:
	leal	(%eax,%eax,2), %eax
	addl	$1, %edx
	leal	(%esi,%eax,8), %eax
	cmpl	%edx, %ecx
	fld1
	fstl	(%eax)
	fstl	8(%eax)
	fstpl	16(%eax)
	movl	%edx, %eax
	ja	.L73
..L72:
	movl	-52(%ebp), %eax
	movl	$0, -160(%ebp)
	movl	%eax, -148(%ebp)
	movl	-48(%ebp), %eax
	subl	-148(%ebp), %eax
	sarl	$3, %eax
	imull	$-1431655765, %eax, %edi
..L74:
	movl	$0, -156(%ebp)
	.p2align 4,,7
	.p2align 3
..L80:
	xorl	%ebx, %ebx
	xorl	%eax, %eax
	testl	%edi, %edi
	je	.L78
	.p2align 4,,7
	.p2align 3
..L91:
	flds	.LC3
	leal	(%eax,%eax,2), %eax
	addl	$1, %ebx
	fstl	-60(%ebp)
	movl	-152(%ebp), %edx
	sall	$3, %eax
	leal	(%esi,%eax), %ecx
	addl	%eax, %edx
	fmull	(%ecx)
	addl	-148(%ebp), %eax
	cmpl	%edi, %ebx
	movl	%ecx, -64(%ebp)
	faddl	(%edx)
	fstpl	(%eax)
	fldl	-60(%ebp)
	fldl	8(%ecx)
	fmul	%st(1), %st
	faddl	8(%edx)
	fstpl	8(%eax)
	fmull	16(%ecx)
	faddl	16(%edx)
	fstpl	16(%eax)
	movl	%ebx, %eax
	jb	.L91
..L78:
	addl	$1, -156(%ebp)
	cmpl	$1000, -156(%ebp)
	jne	.L80
	addl	$1, -160(%ebp)
	cmpl	$100000, -160(%ebp)
	jne	.L74
	movl	-164(%ebp), %eax
	movl	%eax, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	movl	-168(%ebp), %edx
	movl	%edx, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	leal	-28(%ebp), %eax
	movl	%eax, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	addl	$172, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
..L87:
..L67:
	movl	%eax, %ebx
	leal	-40(%ebp), %edx
	leal	-52(%ebp), %eax
	movl	%edx, -168(%ebp)
	movl	%eax, -164(%ebp)
..L88:
..L81:
..L89:
..L82:
..L90:
..L83:
	movl	-164(%ebp), %edx
	movl	%edx, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	movl	-168(%ebp), %eax
	movl	%eax, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	leal	-28(%ebp), %edx
	movl	%edx, (%esp)
	call	_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
	movl	%ebx, (%esp)
..LEHB3:
	call	_Unwind_Resume
..LEHE3:
..L85:
..L71:
	movl	%eax, %ebx
	jmp	.L81
..L86:
..L69:
	movl	%eax, %ebx
	leal	-52(%ebp), %eax
	movl	%eax, -164(%ebp)
	jmp	.L81
..LFE2863:
	.size	_Z11test_vectorv, .-_Z11test_vectorv
..globl __gxx_personality_v0
	.section	.gcc_except_table,"a",@progbits
..LLSDA2863:
	.byte	0xff
	.byte	0xff
	.byte	0x1
	.uleb128 .LLSDACSE2863-.LLSDACSB2863
..LLSDACSB2863:
	.uleb128 .LEHB0-.LFB2863
	.uleb128 .LEHE0-.LEHB0
	.uleb128 .L87-.LFB2863
	.uleb128 0x0
	.uleb128 .LEHB1-.LFB2863
	.uleb128 .LEHE1-.LEHB1
	.uleb128 .L86-.LFB2863
	.uleb128 0x0
	.uleb128 .LEHB2-.LFB2863
	.uleb128 .LEHE2-.LEHB2
	.uleb128 .L85-.LFB2863
	.uleb128 0x0
	.uleb128 .LEHB3-.LFB2863
	.uleb128 .LEHE3-.LEHB3
	.uleb128 0x0
	.uleb128 0x0
..LLSDACSE2863:
	.text
	.p2align 4,,15
..globl _Z10test_eigenv
	.type	_Z10test_eigenv, @function
_Z10test_eigenv:
..LFB2862:
	pushl	%ebp
..LCFI22:
	movl	%esp, %ebp
..LCFI23:
	pushl	%edi
..LCFI24:
	pushl	%esi
..LCFI25:
	pushl	%ebx
..LCFI26:
	xorl	%ebx, %ebx
	subl	$60, %esp
..LCFI27:
	movl	%ebx, %esi
	leal	-16(%ebp), %eax
	movl	$8, 8(%esp)
	movl	$16, 4(%esp)
	movl	%eax, (%esp)
	call	posix_memalign
	movl	$1, -20(%ebp)
	movl	$8, 8(%esp)
	movl	$16, 4(%esp)
	testl	%eax, %eax
	movl	%ebx, %eax
	cmove	-16(%ebp), %eax
	movl	%eax, -24(%ebp)
	leal	-16(%ebp), %eax
	movl	%eax, (%esp)
	call	posix_memalign
	movl	$1, -28(%ebp)
	movl	$8, 8(%esp)
	movl	$16, 4(%esp)
	testl	%eax, %eax
	movl	%ebx, %eax
	cmove	-16(%ebp), %eax
	movl	%eax, -32(%ebp)
	leal	-16(%ebp), %eax
	movl	%eax, (%esp)
	call	posix_memalign
	movl	$1, -36(%ebp)
	testl	%eax, %eax
	cmove	-16(%ebp), %esi
	movl	%esi, -40(%ebp)
#APP
# 15 "a.cpp" 1
	#BEGIN ONES
# 0 "" 2
#NO_APP
	movl	-20(%ebp), %eax
	leal	(%eax,%eax,2), %eax
	cmpl	$3000, %eax
	je	.L104
	movl	-24(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	leal	-16(%ebp), %eax
	movl	$24000, 8(%esp)
	movl	$16, 4(%esp)
	movl	%eax, (%esp)
	call	posix_memalign
	testl	%eax, %eax
	cmove	-16(%ebp), %ebx
	movl	%ebx, -24(%ebp)
..L104:
	movl	$1000, -20(%ebp)
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#begin
# 0 "" 2
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#aligned middle
# 0 "" 2
#NO_APP
	xorl	%edx, %edx
..L107:
	movl	-24(%ebp), %eax
	fld1
	fstl	(%eax,%edx)
	fstpl	8(%eax,%edx)
	addl	$16, %edx
	cmpl	$24000, %edx
	jne	.L107
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned end
# 0 "" 2
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#end
# 0 "" 2
# 17 "a.cpp" 1
	#ANOTHER ONES
# 0 "" 2
#NO_APP
	movl	-28(%ebp), %eax
	leal	(%eax,%eax,2), %eax
	cmpl	$3000, %eax
	je	.L108
	movl	-32(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	leal	-16(%ebp), %eax
	movl	$24000, 8(%esp)
	movl	$16, 4(%esp)
	movl	%eax, (%esp)
	call	posix_memalign
	xorl	%edx, %edx
	testl	%eax, %eax
	cmove	-16(%ebp), %edx
	movl	%edx, -32(%ebp)
..L108:
	movl	$1000, -28(%ebp)
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#begin
# 0 "" 2
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#aligned middle
# 0 "" 2
#NO_APP
	xorl	%edx, %edx
..L111:
	movl	-32(%ebp), %eax
	fld1
	fstl	(%eax,%edx)
	fstpl	8(%eax,%edx)
	addl	$16, %edx
	cmpl	$24000, %edx
	jne	.L111
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned end
# 0 "" 2
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#end
# 0 "" 2
# 19 "a.cpp" 1
	#END ONES
# 0 "" 2
#NO_APP
	movl	%esi, (%esp)
	call	free
	leal	-16(%ebp), %eax
	movl	$24000, 8(%esp)
	movl	$16, 4(%esp)
	movl	%eax, (%esp)
	call	posix_memalign
	xorl	%edx, %edx
	movl	$1000, -36(%ebp)
	testl	%eax, %eax
	cmove	-16(%ebp), %edx
	movl	%edx, -40(%ebp)
#APP
# 22 "a.cpp" 1
	#BEGIN MAIN LOOP
# 0 "" 2
#NO_APP
	movl	$0, -52(%ebp)
	.p2align 4,,7
	.p2align 3
..L122:
	movl	-20(%ebp), %eax
	leal	(%eax,%eax,2), %edi
	movl	%eax, -48(%ebp)
	movl	-36(%ebp), %eax
	leal	(%eax,%eax,2), %eax
	cmpl	%eax, %edi
	je	.L114
	movl	-40(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	leal	0(,%edi,8), %eax
	movl	%eax, 8(%esp)
	leal	-16(%ebp), %eax
	movl	$16, 4(%esp)
	movl	%eax, (%esp)
	call	posix_memalign
	testl	%eax, %eax
	movl	$0, %eax
	cmove	-16(%ebp), %eax
	movl	%eax, -40(%ebp)
..L114:
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#begin
# 0 "" 2
#NO_APP
	movl	%edi, %eax
	shrl	$31, %eax
	leal	(%eax,%edi), %ecx
	andl	$-2, %ecx
#APP
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#aligned middle
# 0 "" 2
#NO_APP
	testl	%ecx, %ecx
	jle	.L117
	xorl	%edx, %edx
	.p2align 4,,7
	.p2align 3
..L118:
	movl	-24(%ebp), %eax
	movapd	(%eax,%edx,8), %xmm0
	movl	-32(%ebp), %eax
	addpd	(%eax,%edx,8), %xmm0
	movl	-40(%ebp), %eax
	movapd	%xmm0, (%eax,%edx,8)
	addl	$2, %edx
	cmpl	%edx, %ecx
	jg	.L118
..L117:
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#unaligned end
# 0 "" 2
#NO_APP
	cmpl	%edi, %ecx
	jge	.L119
	leal	0(,%ecx,8), %ebx
	movl	-32(%ebp), %esi
	movl	%ebx, %edx
	movl	%ebx, %eax
	addl	-24(%ebp), %edx
	addl	-40(%ebp), %eax
	.p2align 4,,7
	.p2align 3
..L120:
	fldl	(%edx)
	addl	$1, %ecx
	addl	$8, %edx
	faddl	(%esi,%ebx)
	addl	$8, %ebx
	fstpl	(%eax)
	addl	$8, %eax
	cmpl	%edi, %ecx
	jl	.L120
..L119:
#APP
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
	#end
# 0 "" 2
#NO_APP
	addl	$1, -52(%ebp)
	cmpl	$100000, -52(%ebp)
	je	.L121
	movl	-48(%ebp), %eax
	movl	%eax, -36(%ebp)
	jmp	.L122
..L121:
#APP
# 28 "a.cpp" 1
	#END MAIN LOOP
# 0 "" 2
#NO_APP
	movl	-40(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	movl	-32(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	movl	-24(%ebp), %eax
	movl	%eax, (%esp)
	call	free
	addl	$60, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
..L123:
..L124:
..L125:
..LFE2862:
	.size	_Z10test_eigenv, .-_Z10test_eigenv
	.p2align 4,,15
..globl main
	.type	main, @function
main:
..LFB2864:
	leal	4(%esp), %ecx
..LCFI28:
	andl	$-16, %esp
	pushl	-4(%ecx)
..LCFI29:
	pushl	%ebp
..LCFI30:
	movl	%esp, %ebp
..LCFI31:
	pushl	%ecx
..LCFI32:
	subl	$4, %esp
..LCFI33:
	call	_Z10test_eigenv
	addl	$4, %esp
	xorl	%eax, %eax
	popl	%ecx
	popl	%ebp
	leal	-4(%ecx), %esp
	ret
..LFE2864:
	.size	main, .-main
	.local	_ZStL8__ioinit
	.comm	_ZStL8__ioinit,1,1
	.weakref	_ZL20__gthrw_pthread_oncePiPFvvE,pthread_once
	.weakref	_ZL27__gthrw_pthread_getspecificj,pthread_getspecific
	.weakref	_ZL27__gthrw_pthread_setspecificjPKv,pthread_setspecific
	.weakref	_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_,pthread_create
	.weakref	_ZL22__gthrw_pthread_cancelm,pthread_cancel
	.weakref	_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t,pthread_mutex_lock
	.weakref	_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t,pthread_mutex_trylock
	.weakref	_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t,pthread_mutex_unlock
	.weakref	_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t,pthread_mutex_init
	.weakref	_ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t,pthread_cond_broadcast
	.weakref	_ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t,pthread_cond_wait
	.weakref	_ZL26__gthrw_pthread_key_createPjPFvPvE,pthread_key_create
	.weakref	_ZL26__gthrw_pthread_key_deletej,pthread_key_delete
	.weakref	_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t,pthread_mutexattr_init
	.weakref	_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti,pthread_mutexattr_settype
	.weakref	_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t,pthread_mutexattr_destroy
	.section	.rodata.cst4,"aM",@progbits,4
	.align 4
..LC3:
	.long	1075838976
	.section	.eh_frame,"a",@progbits
..Lframe1:
	.long	.LECIE1-.LSCIE1
..LSCIE1:
	.long	0x0
	.byte	0x1
	.string	"zPL"
	.uleb128 0x1
	.sleb128 -4
	.byte	0x8
	.uleb128 0x6
	.byte	0x0
	.long	__gxx_personality_v0
	.byte	0x0
	.byte	0xc
	.uleb128 0x4
	.uleb128 0x4
	.byte	0x88
	.uleb128 0x1
	.align 4
..LECIE1:
..LSFDE1:
	.long	.LEFDE1-.LASFDE1
..LASFDE1:
	.long	.LASFDE1-.Lframe1
	.long	.LFB3234
	.long	.LFE3234-.LFB3234
	.uleb128 0x4
	.long	0x0
	.byte	0x4
	.long	.LCFI0-.LFB3234
	.byte	0xe
	.uleb128 0x8
	.byte	0x85
	.uleb128 0x2
	.byte	0x4
	.long	.LCFI1-.LCFI0
	.byte	0xd
	.uleb128 0x5
	.align 4
..LEFDE1:
..LSFDE3:
	.long	.LEFDE3-.LASFDE3
..LASFDE3:
	.long	.LASFDE3-.Lframe1
	.long	.LFB3081
	.long	.LFE3081-.LFB3081
	.uleb128 0x4
	.long	0x0
	.byte	0x4
	.long	.LCFI3-.LFB3081
	.byte	0xe
	.uleb128 0x8
	.byte	0x85
	.uleb128 0x2
	.byte	0x4
	.long	.LCFI4-.LCFI3
	.byte	0xd
	.uleb128 0x5
	.byte	0x4
	.long	.LCFI8-.LCFI4
	.byte	0x83
	.uleb128 0x5
	.byte	0x86
	.uleb128 0x4
	.byte	0x87
	.uleb128 0x3
	.align 4
..LEFDE3:
..LSFDE5:
	.long	.LEFDE5-.LASFDE5
..LASFDE5:
	.long	.LASFDE5-.Lframe1
	.long	.LFB2930
	.long	.LFE2930-.LFB2930
	.uleb128 0x4
	.long	0x0
	.byte	0x4
	.long	.LCFI9-.LFB2930
	.byte	0xe
	.uleb128 0x8
	.byte	0x85
	.uleb128 0x2
	.byte	0x4
	.long	.LCFI10-.LCFI9
	.byte	0xd
	.uleb128 0x5
	.byte	0x4
	.long	.LCFI12-.LCFI10
	.byte	0x83
	.uleb128 0x4
	.byte	0x4
	.long	.LCFI13-.LCFI12
	.byte	0x86
	.uleb128 0x3
	.align 4
..LEFDE5:
..LSFDE9:
	.long	.LEFDE9-.LASFDE9
..LASFDE9:
	.long	.LASFDE9-.Lframe1
	.long	.LFB2863
	.long	.LFE2863-.LFB2863
	.uleb128 0x4
	.long	.LLSDA2863
	.byte	0x4
	.long	.LCFI16-.LFB2863
	.byte	0xe
	.uleb128 0x8
	.byte	0x85
	.uleb128 0x2
	.byte	0x4
	.long	.LCFI17-.LCFI16
	.byte	0xd
	.uleb128 0x5
	.byte	0x4
	.long	.LCFI21-.LCFI17
	.byte	0x83
	.uleb128 0x5
	.byte	0x86
	.uleb128 0x4
	.byte	0x87
	.uleb128 0x3
	.align 4
..LEFDE9:
	.ident	"GCC: (Ubuntu 4.3.0-1ubuntu1) 4.3.0"
	.section	.note.GNU-stack,"",@progbits


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/