[eigen] vectorization bug |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/eigen Archives
]
Hi List,
Here's a simple benchmark, a.cpp. It runs faster without vectorization
than with!
Trying to understand this I added some asm comments in Assign.h, so my
copy looks like this:
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
{
static void run(Derived1 &dst, const Derived2 &src)
{
asm("#begin");
const int size = dst.size();
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
const int alignedStart =
ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: ei_alignmentOffset(&dst.coeffRef(0), size);
const int alignedEnd = alignedStart +
((size-alignedStart)/packetSize)*packetSize;
asm("#unaligned start");
for(int index = 0; index < alignedStart; index++)
dst.copyCoeff(index, src);
asm("#aligned middle");
for(int index = alignedStart; index < alignedEnd; index += packetSize)
{
dst.template copyPacket<Derived2, Aligned,
ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
}
asm("#unaligned end");
for(int index = alignedEnd; index < size; index++)
dst.copyCoeff(index, src);
asm("#end");
}
};
I attach the resulting assembly (a.s). Can you see what's wrong?
Another thing. The ones() part compiles to this:
xorl %edx, %edx
..L107:
movl -24(%ebp), %eax
fld1
fstl (%eax,%edx)
fstpl 8(%eax,%edx)
addl $16, %edx
cmpl $24000, %edx
jne .L107
This is not vectorized, right??
Cheers,
Benoit
Cheers,
Benoit
----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.
#include <Eigen/Array>
void test_eigen()
{
typedef Eigen::Matrix<double, 3, Eigen::Dynamic> OBPositions;
OBPositions pos1, pos2, pos3;
// initialization
asm("#BEGIN ONES");
pos1 = OBPositions::Ones(3, 1000);
asm("#ANOTHER ONES");
pos2 = OBPositions::Ones(3, 1000);
asm("#END ONES");
pos3.resize(3, 1000);
asm("#BEGIN MAIN LOOP");
// benchmark
for (int i = 0; i < 100000; ++i)
{
pos3 = pos1 + pos2;
}
asm("#END MAIN LOOP");
}
int main()
{
test_eigen();
}
.file "a.cpp"
.text
.p2align 4,,15
.type _GLOBAL__I__Z10test_eigenv, @function
_GLOBAL__I__Z10test_eigenv:
..LFB3234:
pushl %ebp
..LCFI0:
movl %esp, %ebp
..LCFI1:
subl $24, %esp
..LCFI2:
movl $_ZStL8__ioinit, (%esp)
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, 8(%esp)
movl $_ZStL8__ioinit, 4(%esp)
movl $_ZNSt8ios_base4InitD1Ev, (%esp)
call __cxa_atexit
leave
ret
..LFE3234:
.size _GLOBAL__I__Z10test_eigenv, .-_GLOBAL__I__Z10test_eigenv
.section .ctors,"aw",@progbits
.align 4
.long _GLOBAL__I__Z10test_eigenv
.section .rodata.str1.1,"aMS",@progbits,1
..LC0:
.string "vector::_M_fill_insert"
.section .text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_,comdat
.align 2
.p2align 4,,15
.weak _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
.type _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_:
..LFB3081:
pushl %ebp
..LCFI3:
movl %esp, %ebp
..LCFI4:
pushl %edi
..LCFI5:
pushl %esi
..LCFI6:
pushl %ebx
..LCFI7:
subl $28, %esp
..LCFI8:
movl 16(%ebp), %edx
movl 20(%ebp), %esi
testl %edx, %edx
je .L37
movl 8(%ebp), %eax
movl 4(%eax), %ebx
movl 8(%eax), %eax
subl %ebx, %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
cmpl %eax, 16(%ebp)
jbe .L45
movl 8(%ebp), %edi
movl %ebx, %eax
subl (%edi), %eax
sarl $3, %eax
imull $-1431655765, %eax, %edx
movl $178956970, %eax
subl %edx, %eax
cmpl %eax, 16(%ebp)
ja .L46
movl 16(%ebp), %eax
cmpl 16(%ebp), %edx
cmovae %edx, %eax
addl %edx, %eax
movl %eax, -24(%ebp)
jb .L22
cmpl $178956970, %eax
jbe .L47
..L22:
movl $178956970, -24(%ebp)
movl $-16, %eax
..L25:
movl %eax, (%esp)
call _Znwj
movl 8(%ebp), %ecx
movl %eax, -16(%ebp)
movl 4(%ecx), %ebx
jmp .L24
.p2align 4,,7
.p2align 3
..L47:
testl %eax, %eax
movl $0, -16(%ebp)
jne .L48
..L24:
movl 8(%ebp), %edi
movl -16(%ebp), %ecx
movl (%edi), %edi
movl %ecx, %eax
movl %edi, %edx
movl %edi, -20(%ebp)
movl 12(%ebp), %edi
cmpl %edi, %edx
je .L27
.p2align 4,,7
.p2align 3
..L38:
testl %ecx, %ecx
je .L28
fldl (%edx)
fstpl (%ecx)
fldl 8(%edx)
fstpl 8(%ecx)
fldl 16(%edx)
fstpl 16(%ecx)
..L28:
addl $24, %edx
addl $24, %ecx
cmpl 12(%ebp), %edx
jne .L38
movl -20(%ebp), %eax
movl 12(%ebp), %edx
movl -16(%ebp), %ecx
addl $24, %eax
subl %eax, %edx
movl %edx, %eax
shrl $3, %eax
imull $178956971, %eax, %eax
andl $536870911, %eax
leal 3(%eax,%eax,2), %eax
leal (%ecx,%eax,8), %eax
..L27:
movl %eax, %ecx
movl 16(%ebp), %edx
jmp .L32
.p2align 4,,7
.p2align 3
..L49:
addl $24, %eax
..L32:
testl %eax, %eax
je .L30
fldl (%esi)
fstpl (%eax)
fldl 8(%esi)
fstpl 8(%eax)
fldl 16(%esi)
fstpl 16(%eax)
..L30:
subl $1, %edx
jne .L49
movl 16(%ebp), %edi
cmpl 12(%ebp), %ebx
leal (%edi,%edi,2), %eax
leal (%ecx,%eax,8), %esi
je .L33
movl 12(%ebp), %edx
movl %esi, %eax
.p2align 4,,7
.p2align 3
..L35:
testl %eax, %eax
je .L34
fldl (%edx)
fstpl (%eax)
fldl 8(%edx)
fstpl 8(%eax)
fldl 16(%edx)
fstpl 16(%eax)
..L34:
addl $24, %edx
addl $24, %eax
cmpl %edx, %ebx
jne .L35
movl 12(%ebp), %eax
addl $24, %eax
subl %eax, %ebx
shrl $3, %ebx
imull $178956971, %ebx, %eax
andl $536870911, %eax
leal 3(%eax,%eax,2), %eax
leal (%esi,%eax,8), %esi
..L33:
movl -20(%ebp), %eax
testl %eax, %eax
je .L36
movl -20(%ebp), %eax
movl %eax, (%esp)
call _ZdlPv
..L36:
movl -16(%ebp), %ecx
movl 8(%ebp), %edx
movl %ecx, (%edx)
movl %esi, 4(%edx)
movl -24(%ebp), %edi
leal (%edi,%edi,2), %eax
leal (%ecx,%eax,8), %eax
movl %eax, 8(%edx)
jmp .L37
..L53:
fstp %st(0)
fstp %st(0)
fstp %st(0)
.p2align 4,,7
.p2align 3
..L37:
addl $28, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.p2align 4,,7
.p2align 3
..L45:
movl %ebx, %eax
fldl (%esi)
subl 12(%ebp), %eax
fldl 8(%esi)
fldl 16(%esi)
sarl $3, %eax
imull $-1431655765, %eax, %esi
cmpl %esi, 16(%ebp)
jae .L6
movl 16(%ebp), %ecx
leal (%ecx,%ecx,2), %eax
movl %ebx, %ecx
leal 0(,%eax,8), %esi
subl %esi, %ecx
cmpl %ecx, %ebx
je .L7
movl %ebx, %edx
movl %ecx, %eax
jmp .L9
.p2align 4,,7
.p2align 3
..L50:
addl $24, %edx
..L9:
testl %edx, %edx
je .L8
fldl (%eax)
fstpl (%edx)
fldl 8(%eax)
fstpl 8(%edx)
fldl 16(%eax)
fstpl 16(%edx)
..L8:
addl $24, %eax
cmpl %eax, %ebx
jne .L50
..L7:
movl 8(%ebp), %edi
leal (%ebx,%esi), %eax
movl %eax, 4(%edi)
movl %ecx, %eax
subl 12(%ebp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
testl %eax, %eax
jle .L10
xorl %edx, %edx
.p2align 4,,7
.p2align 3
..L11:
fldl -24(%ecx,%edx)
subl $1, %eax
fstpl -24(%ebx,%edx)
fldl -16(%ecx,%edx)
fstpl -16(%ebx,%edx)
fldl -8(%ecx,%edx)
fstpl -8(%ebx,%edx)
subl $24, %edx
testl %eax, %eax
jg .L11
..L10:
movl 12(%ebp), %edx
addl %esi, %edx
cmpl 12(%ebp), %edx
je .L53
fxch %st(2)
movl 12(%ebp), %eax
jmp .L12
.p2align 4,,7
.p2align 3
..L54:
fxch %st(2)
fxch %st(1)
..L12:
fstl (%eax)
fxch %st(1)
fstl 8(%eax)
fxch %st(2)
fstl 16(%eax)
addl $24, %eax
cmpl %eax, %edx
jne .L54
fstp %st(0)
fstp %st(0)
fstp %st(0)
addl $28, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.p2align 4,,7
.p2align 3
..L6:
movl 16(%ebp), %ecx
subl %esi, %ecx
je .L55
movl %ebx, %eax
movl %ecx, %edx
jmp .L15
.p2align 4,,7
.p2align 3
..L51:
fxch %st(1)
fxch %st(2)
fxch %st(1)
addl $24, %eax
..L15:
testl %eax, %eax
je .L56
fxch %st(2)
fstl (%eax)
fxch %st(1)
fstl 8(%eax)
fxch %st(2)
fstl 16(%eax)
jmp .L14
.p2align 4,,7
.p2align 3
..L56:
fxch %st(1)
fxch %st(2)
fxch %st(1)
.p2align 4,,7
.p2align 3
..L14:
subl $1, %edx
jne .L51
jmp .L13
..L55:
fxch %st(1)
fxch %st(2)
fxch %st(1)
.p2align 4,,7
.p2align 3
..L13:
leal (%ecx,%ecx,2), %eax
leal (%ebx,%eax,8), %edi
movl 8(%ebp), %eax
movl %edi, 4(%eax)
cmpl 12(%ebp), %ebx
je .L16
movl 12(%ebp), %edx
movl %edi, %ecx
movl %edx, %eax
jmp .L19
.p2align 4,,7
.p2align 3
..L52:
addl $24, %ecx
..L19:
testl %ecx, %ecx
je .L17
fldl (%eax)
fstpl (%ecx)
fldl 8(%eax)
fstpl 8(%ecx)
fldl 16(%eax)
fstpl 16(%ecx)
..L17:
addl $24, %eax
cmpl %eax, %ebx
jne .L52
fxch %st(1)
movl 8(%ebp), %ecx
leal (%esi,%esi,2), %eax
leal (%edi,%eax,8), %eax
movl %eax, 4(%ecx)
jmp .L20
.p2align 4,,7
.p2align 3
..L57:
fxch %st(1)
fxch %st(2)
..L20:
fstl (%edx)
fxch %st(2)
fstl 8(%edx)
fxch %st(1)
fstl 16(%edx)
addl $24, %edx
cmpl %edx, %ebx
jne .L57
fstp %st(0)
fstp %st(0)
fstp %st(0)
addl $28, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
..L16:
fstp %st(0)
fstp %st(0)
fstp %st(0)
movl 8(%ebp), %edx
leal (%esi,%esi,2), %eax
leal (%edi,%eax,8), %eax
movl %eax, 4(%edx)
jmp .L37
..L46:
movl $.LC0, (%esp)
call _ZSt20__throw_length_errorPKc
..L48:
movl -24(%ebp), %edx
leal (%edx,%edx,2), %eax
sall $3, %eax
jmp .L25
..LFE3081:
.size _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_, .-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
.section .text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_,comdat
.align 2
.p2align 4,,15
.weak _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
.type _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_:
..LFB2930:
pushl %ebp
..LCFI9:
movl %esp, %ebp
..LCFI10:
subl $24, %esp
..LCFI11:
movl %ebx, -8(%ebp)
..LCFI12:
movl 8(%ebp), %ebx
movl %esi, -4(%ebp)
..LCFI13:
movl 12(%ebp), %edx
movl 4(%ebx), %esi
movl (%ebx), %ecx
movl %esi, %eax
subl %ecx, %eax
sarl $3, %eax
imull $-1431655765, %eax, %eax
cmpl %eax, %edx
jae .L59
leal (%edx,%edx,2), %eax
leal (%ecx,%eax,8), %eax
movl %eax, 4(%ebx)
movl -8(%ebp), %ebx
movl -4(%ebp), %esi
movl %ebp, %esp
popl %ebp
ret
.p2align 4,,7
.p2align 3
..L59:
movl 16(%ebp), %ecx
subl %eax, %edx
movl %esi, 4(%esp)
movl %ebx, (%esp)
movl %edx, 8(%esp)
movl %ecx, 12(%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPS2_S4_EEjRKS2_
movl -8(%ebp), %ebx
movl -4(%ebp), %esi
movl %ebp, %esp
popl %ebp
ret
..LFE2930:
.size _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_, .-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
.section .text._ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev,"axG",@progbits,_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev,comdat
.align 2
.p2align 4,,15
.weak _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
.type _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev, @function
_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev:
..LFB2920:
pushl %ebp
..LCFI14:
movl %esp, %ebp
..LCFI15:
movl 8(%ebp), %eax
movl (%eax), %eax
testl %eax, %eax
je .L64
movl %eax, 8(%ebp)
popl %ebp
jmp _ZdlPv
.p2align 4,,7
.p2align 3
..L64:
popl %ebp
ret
..LFE2920:
.size _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev, ..-_ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
..globl _Unwind_Resume
.text
.p2align 4,,15
..globl _Z11test_vectorv
.type _Z11test_vectorv, @function
_Z11test_vectorv:
..LFB2863:
pushl %ebp
..LCFI16:
movl %esp, %ebp
..LCFI17:
pushl %edi
..LCFI18:
pushl %esi
..LCFI19:
pushl %ebx
..LCFI20:
subl $172, %esp
..LCFI21:
leal -88(%ebp), %eax
movl %eax, 8(%esp)
leal -28(%ebp), %eax
movl $0, -28(%ebp)
movl $0, -24(%ebp)
movl $0, -20(%ebp)
movl $0, -40(%ebp)
movl $0, -36(%ebp)
movl $0, -32(%ebp)
movl $0, -52(%ebp)
movl $0, -48(%ebp)
movl $0, -44(%ebp)
movl $1000, 4(%esp)
movl %eax, (%esp)
..LEHB0:
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE0:
leal -40(%ebp), %edx
leal -112(%ebp), %eax
movl %eax, 8(%esp)
movl $1000, 4(%esp)
movl %edx, -168(%ebp)
movl %edx, (%esp)
..LEHB1:
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE1:
leal -52(%ebp), %edx
leal -136(%ebp), %eax
movl %eax, 8(%esp)
movl $1000, 4(%esp)
movl %edx, -164(%ebp)
movl %edx, (%esp)
..LEHB2:
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EE6resizeEjS2_
..LEHE2:
movl -28(%ebp), %eax
movl %eax, -152(%ebp)
xorl %eax, %eax
..L70:
movl -152(%ebp), %edx
fld1
fstl (%edx,%eax)
fstl 8(%edx,%eax)
fstpl 16(%edx,%eax)
addl $24, %eax
cmpl $24000, %eax
jne .L70
movl -40(%ebp), %esi
movl -36(%ebp), %eax
subl %esi, %eax
sarl $3, %eax
imull $-1431655765, %eax, %ecx
testl %ecx, %ecx
je .L72
xorl %edx, %edx
xorl %eax, %eax
..L73:
leal (%eax,%eax,2), %eax
addl $1, %edx
leal (%esi,%eax,8), %eax
cmpl %edx, %ecx
fld1
fstl (%eax)
fstl 8(%eax)
fstpl 16(%eax)
movl %edx, %eax
ja .L73
..L72:
movl -52(%ebp), %eax
movl $0, -160(%ebp)
movl %eax, -148(%ebp)
movl -48(%ebp), %eax
subl -148(%ebp), %eax
sarl $3, %eax
imull $-1431655765, %eax, %edi
..L74:
movl $0, -156(%ebp)
.p2align 4,,7
.p2align 3
..L80:
xorl %ebx, %ebx
xorl %eax, %eax
testl %edi, %edi
je .L78
.p2align 4,,7
.p2align 3
..L91:
flds .LC3
leal (%eax,%eax,2), %eax
addl $1, %ebx
fstl -60(%ebp)
movl -152(%ebp), %edx
sall $3, %eax
leal (%esi,%eax), %ecx
addl %eax, %edx
fmull (%ecx)
addl -148(%ebp), %eax
cmpl %edi, %ebx
movl %ecx, -64(%ebp)
faddl (%edx)
fstpl (%eax)
fldl -60(%ebp)
fldl 8(%ecx)
fmul %st(1), %st
faddl 8(%edx)
fstpl 8(%eax)
fmull 16(%ecx)
faddl 16(%edx)
fstpl 16(%eax)
movl %ebx, %eax
jb .L91
..L78:
addl $1, -156(%ebp)
cmpl $1000, -156(%ebp)
jne .L80
addl $1, -160(%ebp)
cmpl $100000, -160(%ebp)
jne .L74
movl -164(%ebp), %eax
movl %eax, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
movl -168(%ebp), %edx
movl %edx, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
leal -28(%ebp), %eax
movl %eax, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
addl $172, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
..L87:
..L67:
movl %eax, %ebx
leal -40(%ebp), %edx
leal -52(%ebp), %eax
movl %edx, -168(%ebp)
movl %eax, -164(%ebp)
..L88:
..L81:
..L89:
..L82:
..L90:
..L83:
movl -164(%ebp), %edx
movl %edx, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
movl -168(%ebp), %eax
movl %eax, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
leal -28(%ebp), %edx
movl %edx, (%esp)
call _ZNSt6vectorIN5Eigen6MatrixIdLi3ELi1ELi0ELi3ELi1EEESaIS2_EED1Ev
movl %ebx, (%esp)
..LEHB3:
call _Unwind_Resume
..LEHE3:
..L85:
..L71:
movl %eax, %ebx
jmp .L81
..L86:
..L69:
movl %eax, %ebx
leal -52(%ebp), %eax
movl %eax, -164(%ebp)
jmp .L81
..LFE2863:
.size _Z11test_vectorv, .-_Z11test_vectorv
..globl __gxx_personality_v0
.section .gcc_except_table,"a",@progbits
..LLSDA2863:
.byte 0xff
.byte 0xff
.byte 0x1
.uleb128 .LLSDACSE2863-.LLSDACSB2863
..LLSDACSB2863:
.uleb128 .LEHB0-.LFB2863
.uleb128 .LEHE0-.LEHB0
.uleb128 .L87-.LFB2863
.uleb128 0x0
.uleb128 .LEHB1-.LFB2863
.uleb128 .LEHE1-.LEHB1
.uleb128 .L86-.LFB2863
.uleb128 0x0
.uleb128 .LEHB2-.LFB2863
.uleb128 .LEHE2-.LEHB2
.uleb128 .L85-.LFB2863
.uleb128 0x0
.uleb128 .LEHB3-.LFB2863
.uleb128 .LEHE3-.LEHB3
.uleb128 0x0
.uleb128 0x0
..LLSDACSE2863:
.text
.p2align 4,,15
..globl _Z10test_eigenv
.type _Z10test_eigenv, @function
_Z10test_eigenv:
..LFB2862:
pushl %ebp
..LCFI22:
movl %esp, %ebp
..LCFI23:
pushl %edi
..LCFI24:
pushl %esi
..LCFI25:
pushl %ebx
..LCFI26:
xorl %ebx, %ebx
subl $60, %esp
..LCFI27:
movl %ebx, %esi
leal -16(%ebp), %eax
movl $8, 8(%esp)
movl $16, 4(%esp)
movl %eax, (%esp)
call posix_memalign
movl $1, -20(%ebp)
movl $8, 8(%esp)
movl $16, 4(%esp)
testl %eax, %eax
movl %ebx, %eax
cmove -16(%ebp), %eax
movl %eax, -24(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call posix_memalign
movl $1, -28(%ebp)
movl $8, 8(%esp)
movl $16, 4(%esp)
testl %eax, %eax
movl %ebx, %eax
cmove -16(%ebp), %eax
movl %eax, -32(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call posix_memalign
movl $1, -36(%ebp)
testl %eax, %eax
cmove -16(%ebp), %esi
movl %esi, -40(%ebp)
#APP
# 15 "a.cpp" 1
#BEGIN ONES
# 0 "" 2
#NO_APP
movl -20(%ebp), %eax
leal (%eax,%eax,2), %eax
cmpl $3000, %eax
je .L104
movl -24(%ebp), %eax
movl %eax, (%esp)
call free
leal -16(%ebp), %eax
movl $24000, 8(%esp)
movl $16, 4(%esp)
movl %eax, (%esp)
call posix_memalign
testl %eax, %eax
cmove -16(%ebp), %ebx
movl %ebx, -24(%ebp)
..L104:
movl $1000, -20(%ebp)
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#begin
# 0 "" 2
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#aligned middle
# 0 "" 2
#NO_APP
xorl %edx, %edx
..L107:
movl -24(%ebp), %eax
fld1
fstl (%eax,%edx)
fstpl 8(%eax,%edx)
addl $16, %edx
cmpl $24000, %edx
jne .L107
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned end
# 0 "" 2
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#end
# 0 "" 2
# 17 "a.cpp" 1
#ANOTHER ONES
# 0 "" 2
#NO_APP
movl -28(%ebp), %eax
leal (%eax,%eax,2), %eax
cmpl $3000, %eax
je .L108
movl -32(%ebp), %eax
movl %eax, (%esp)
call free
leal -16(%ebp), %eax
movl $24000, 8(%esp)
movl $16, 4(%esp)
movl %eax, (%esp)
call posix_memalign
xorl %edx, %edx
testl %eax, %eax
cmove -16(%ebp), %edx
movl %edx, -32(%ebp)
..L108:
movl $1000, -28(%ebp)
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#begin
# 0 "" 2
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#aligned middle
# 0 "" 2
#NO_APP
xorl %edx, %edx
..L111:
movl -32(%ebp), %eax
fld1
fstl (%eax,%edx)
fstpl 8(%eax,%edx)
addl $16, %edx
cmpl $24000, %edx
jne .L111
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned end
# 0 "" 2
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#end
# 0 "" 2
# 19 "a.cpp" 1
#END ONES
# 0 "" 2
#NO_APP
movl %esi, (%esp)
call free
leal -16(%ebp), %eax
movl $24000, 8(%esp)
movl $16, 4(%esp)
movl %eax, (%esp)
call posix_memalign
xorl %edx, %edx
movl $1000, -36(%ebp)
testl %eax, %eax
cmove -16(%ebp), %edx
movl %edx, -40(%ebp)
#APP
# 22 "a.cpp" 1
#BEGIN MAIN LOOP
# 0 "" 2
#NO_APP
movl $0, -52(%ebp)
.p2align 4,,7
.p2align 3
..L122:
movl -20(%ebp), %eax
leal (%eax,%eax,2), %edi
movl %eax, -48(%ebp)
movl -36(%ebp), %eax
leal (%eax,%eax,2), %eax
cmpl %eax, %edi
je .L114
movl -40(%ebp), %eax
movl %eax, (%esp)
call free
leal 0(,%edi,8), %eax
movl %eax, 8(%esp)
leal -16(%ebp), %eax
movl $16, 4(%esp)
movl %eax, (%esp)
call posix_memalign
testl %eax, %eax
movl $0, %eax
cmove -16(%ebp), %eax
movl %eax, -40(%ebp)
..L114:
#APP
# 307 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#begin
# 0 "" 2
#NO_APP
movl %edi, %eax
shrl $31, %eax
leal (%eax,%edi), %ecx
andl $-2, %ecx
#APP
# 314 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned start
# 0 "" 2
# 318 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#aligned middle
# 0 "" 2
#NO_APP
testl %ecx, %ecx
jle .L117
xorl %edx, %edx
.p2align 4,,7
.p2align 3
..L118:
movl -24(%ebp), %eax
movapd (%eax,%edx,8), %xmm0
movl -32(%ebp), %eax
addpd (%eax,%edx,8), %xmm0
movl -40(%ebp), %eax
movapd %xmm0, (%eax,%edx,8)
addl $2, %edx
cmpl %edx, %ecx
jg .L118
..L117:
#APP
# 325 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#unaligned end
# 0 "" 2
#NO_APP
cmpl %edi, %ecx
jge .L119
leal 0(,%ecx,8), %ebx
movl -32(%ebp), %esi
movl %ebx, %edx
movl %ebx, %eax
addl -24(%ebp), %edx
addl -40(%ebp), %eax
.p2align 4,,7
.p2align 3
..L120:
fldl (%edx)
addl $1, %ecx
addl $8, %edx
faddl (%esi,%ebx)
addl $8, %ebx
fstpl (%eax)
addl $8, %eax
cmpl %edi, %ecx
jl .L120
..L119:
#APP
# 329 "cuisine/trunk/kdesupport/eigen2/Eigen/src/Core/Assign.h" 1
#end
# 0 "" 2
#NO_APP
addl $1, -52(%ebp)
cmpl $100000, -52(%ebp)
je .L121
movl -48(%ebp), %eax
movl %eax, -36(%ebp)
jmp .L122
..L121:
#APP
# 28 "a.cpp" 1
#END MAIN LOOP
# 0 "" 2
#NO_APP
movl -40(%ebp), %eax
movl %eax, (%esp)
call free
movl -32(%ebp), %eax
movl %eax, (%esp)
call free
movl -24(%ebp), %eax
movl %eax, (%esp)
call free
addl $60, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
..L123:
..L124:
..L125:
..LFE2862:
.size _Z10test_eigenv, .-_Z10test_eigenv
.p2align 4,,15
..globl main
.type main, @function
main:
..LFB2864:
leal 4(%esp), %ecx
..LCFI28:
andl $-16, %esp
pushl -4(%ecx)
..LCFI29:
pushl %ebp
..LCFI30:
movl %esp, %ebp
..LCFI31:
pushl %ecx
..LCFI32:
subl $4, %esp
..LCFI33:
call _Z10test_eigenv
addl $4, %esp
xorl %eax, %eax
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
..LFE2864:
.size main, .-main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.weakref _ZL20__gthrw_pthread_oncePiPFvvE,pthread_once
.weakref _ZL27__gthrw_pthread_getspecificj,pthread_getspecific
.weakref _ZL27__gthrw_pthread_setspecificjPKv,pthread_setspecific
.weakref _ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_,pthread_create
.weakref _ZL22__gthrw_pthread_cancelm,pthread_cancel
.weakref _ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t,pthread_mutex_lock
.weakref _ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t,pthread_mutex_trylock
.weakref _ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t,pthread_mutex_unlock
.weakref _ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t,pthread_mutex_init
.weakref _ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t,pthread_cond_broadcast
.weakref _ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t,pthread_cond_wait
.weakref _ZL26__gthrw_pthread_key_createPjPFvPvE,pthread_key_create
.weakref _ZL26__gthrw_pthread_key_deletej,pthread_key_delete
.weakref _ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t,pthread_mutexattr_init
.weakref _ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti,pthread_mutexattr_settype
.weakref _ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t,pthread_mutexattr_destroy
.section .rodata.cst4,"aM",@progbits,4
.align 4
..LC3:
.long 1075838976
.section .eh_frame,"a",@progbits
..Lframe1:
.long .LECIE1-.LSCIE1
..LSCIE1:
.long 0x0
.byte 0x1
.string "zPL"
.uleb128 0x1
.sleb128 -4
.byte 0x8
.uleb128 0x6
.byte 0x0
.long __gxx_personality_v0
.byte 0x0
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x88
.uleb128 0x1
.align 4
..LECIE1:
..LSFDE1:
.long .LEFDE1-.LASFDE1
..LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB3234
.long .LFE3234-.LFB3234
.uleb128 0x4
.long 0x0
.byte 0x4
.long .LCFI0-.LFB3234
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xd
.uleb128 0x5
.align 4
..LEFDE1:
..LSFDE3:
.long .LEFDE3-.LASFDE3
..LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB3081
.long .LFE3081-.LFB3081
.uleb128 0x4
.long 0x0
.byte 0x4
.long .LCFI3-.LFB3081
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI4-.LCFI3
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI8-.LCFI4
.byte 0x83
.uleb128 0x5
.byte 0x86
.uleb128 0x4
.byte 0x87
.uleb128 0x3
.align 4
..LEFDE3:
..LSFDE5:
.long .LEFDE5-.LASFDE5
..LASFDE5:
.long .LASFDE5-.Lframe1
.long .LFB2930
.long .LFE2930-.LFB2930
.uleb128 0x4
.long 0x0
.byte 0x4
.long .LCFI9-.LFB2930
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI10-.LCFI9
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI12-.LCFI10
.byte 0x83
.uleb128 0x4
.byte 0x4
.long .LCFI13-.LCFI12
.byte 0x86
.uleb128 0x3
.align 4
..LEFDE5:
..LSFDE9:
.long .LEFDE9-.LASFDE9
..LASFDE9:
.long .LASFDE9-.Lframe1
.long .LFB2863
.long .LFE2863-.LFB2863
.uleb128 0x4
.long .LLSDA2863
.byte 0x4
.long .LCFI16-.LFB2863
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI17-.LCFI16
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI21-.LCFI17
.byte 0x83
.uleb128 0x5
.byte 0x86
.uleb128 0x4
.byte 0x87
.uleb128 0x3
.align 4
..LEFDE9:
.ident "GCC: (Ubuntu 4.3.0-1ubuntu1) 4.3.0"
.section .note.GNU-stack,"",@progbits