[ Thread Index |
Date Index
| More lists.liballeg.org/allegro-developers Archives
]
Finally IT WORKS !!!!
Ok, here's the code; it's a replacement for src/i386/iblit16.s
If mmx is disabled, it reverts to non mmx clear. If the destination is a
video bitmap, it uses the segment decode of %es, else it saves the decode.
It also checks for alignment and copies pixels until alinement has been
reached.
It's not as optimized as I would like it to be. I'll start working on that,
but I need people to test this one.
Also, is it possible for memory or system bitmaps not to be in %ds ? If so,
I'll need to change this code.
My early test show the MMX function can clear 190.1 MB/sec of RAM, whereas
the non-MMX version can do 189 MB/sec. But the Allegro test programs
reports 3980 versus 3820 in favor of the MMX version. (This is under Windows).
But under DOS (which is for some reason slower than dos-in-win95), my test
shows the memory clear speed to be the same (170 MB/sec), but the video RAM
clear speed *DOUBLES* with MMX to 86 MB/sec from 42.5 MB/sec !
/* ______ ___ ___
* /\ _ \ /\_ \ /\_ \
* \ \ \L\ \\//\ \ \//\ \ __ __ _ __ ___
* \ \ __ \ \ \ \ \ \ \ /'__`\ /'_ `\/\`'__\/ __`\
* \ \ \/\ \ \_\ \_ \_\ \_/\ __//\ \L\ \ \ \//\ \L\ \
* \ \_\ \_\/\____\/\____\ \____\ \____ \ \_\\ \____/
* \/_/\/_/\/____/\/____/\/____/\/___L\ \/_/ \/___/
* /\____/
* \_/__/
*
* 16 bit bitmap blitting (written for speed, not readability :-)
*
* By Shawn Hargreaves.
*
* See readme.txt for copyright information.
*/
#include "asmdefs.inc"
#include "blit.inc"
#ifdef ALLEGRO_COLOR16
.text
/* void _linear_clear_to_color16(BITMAP *bitmap, int color);
* Fills a linear bitmap with the specified color. It will use
* MMX instructions if available.
*/
FUNC(_linear_clear_to_color16)
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %esi
pushl %edi
pushw %es
#ifdef ALLEGRO_MMX /* Only use MMX if the compiler supports it */
movl GLOBL(cpu_mmx), %eax /* And if it's been enabled (or rather, not disabled :) */
orl %eax, %eax
jz clear_no_mmx
movl ARG1, %edx /* edx = bmp */
movl BMP_CT(%edx), %ebx /* line to start at */
movw BMP_SEG(%edx), %es /* select segment */
movl BMP_CR(%edx), %esi /* width to clear */
subl BMP_CL(%edx), %esi
cld
movl ARG2, %eax /* duplicate color twice */
movl ARG2, %ecx
shll $16, %eax
andl $0xFFFF, %ecx
orl %ecx, %eax
pushl %eax
movl BMP_ID(%edx), %eax /* If the bitmap is from video memory */
andl $0x80000000, %eax
jnz clearMMXseg /* Use an MMX segment-prefixed clear */
_align_
clearMMX_loop:
movl %ebx, %eax
WRITE_BANK() /* select bank */
movl BMP_CL(%edx), %edi
leal (%eax, %edi, 2), %edi /* get line address */
popl %eax /* Get eax back */
movl %esi, %ecx /* width to clear */
movd %eax, %mm0 /* restore mmx reg 0 in case it's been clobbered by WRITE_BANK() */
movd %eax, %mm1
psllq $32, %mm0
por %mm1, %mm0
pushl %eax /* Save eax */
testl $3, %edi /* Is destination aligned on 32-bit ? */
jz clearMMX_aligned
movw %ax, (%edi) /* else move one pixel */
addl $2, %edi
decl %ecx
jz clearMMX_no_long
clearMMX_aligned:
movd %ecx, %mm2 /* Save for later */
shrl $2, %ecx /* Divide by 4 for 8-byte memory move */
jz clearMMX_finish_line /* If there's less than 4 pixels to clear, no need for MMX */
clearMMX_continue_line:
movq %mm0, (%edi)
addl $8, %edi
decl %ecx
jnz clearMMX_continue_line
clearMMX_finish_line:
movd %mm2, %ecx /* Get the number of pixels that we had to draw */
andl $3, %ecx /* Check if there's any left */
jz clearMMX_no_long
/* Else, write trailing pixels */
shrl $1, %ecx
jz clearMMX_finish_line2
movw %ax, (%edi)
addl $2, %edi
jz clearMMX_no_long
clearMMX_finish_line2:
shrl $1, %ecx
jz clearMMX_no_long
movl %eax, (%edi)
addl $4, %edi
clearMMX_no_long:
incl %ebx
cmpl %ebx, BMP_CB(%edx)
jg clearMMX_loop /* and loop */
popl %eax
emms /* Clear FPU tag word */
jmp clear_end
clearMMXseg:
_align_
clearMMXseg_loop:
movl %ebx, %eax
WRITE_BANK() /* select bank */
movl BMP_CL(%edx), %edi
leal (%eax, %edi, 2), %edi /* get line address */
popl %eax /* Get eax back */
movl %esi, %ecx /* width to clear */
movd %eax, %mm0 /* restore mmx reg 0 in case it's been clobbered by WRITE_BANK() */
movd %eax, %mm1
psllq $32, %mm0
por %mm1, %mm0
pushl %eax /* Save eax */
testl $3, %edi /* Is destination aligned on 32-bit ? */
jz clearMMXseg_aligned
movw %ax, %es:(%edi) /* else move one pixel */
addl $2, %edi
decl %ecx
jz clearMMXseg_no_long
clearMMXseg_aligned:
movd %ecx, %mm2 /* Save for later */
shrl $2, %ecx /* Divide by 4 for 8-byte memory move */
jz clearMMXseg_finish_line /* If there's less than 4 pixels to clear, no need for MMX */
clearMMXseg_continue_line:
movq %mm0, %es:(%edi)
addl $8, %edi
decl %ecx
jnz clearMMXseg_continue_line
clearMMXseg_finish_line:
movd %mm2, %ecx /* Get the number of pixels that we had to draw */
andl $3, %ecx /* Check if there's any left */
jz clearMMXseg_no_long
/* Else, write trailing pixels */
shrl $1, %ecx
jz clearMMXseg_finish_line2
movw %ax, %es:(%edi)
addl $2, %edi
jz clearMMXseg_no_long
clearMMXseg_finish_line2:
shrl $1, %ecx
jz clearMMXseg_no_long
movl %eax, %es:(%edi)
addl $4, %edi
clearMMXseg_no_long:
incl %ebx
cmpl %ebx, BMP_CB(%edx)
jg clearMMXseg_loop /* and loop */
popl %eax
emms /* Clear FPU tag word */
jmp clear_end
#endif /* ALLEGRO_MMX */
clear_no_mmx: /* If no MMX is available, use the non-MMX version */
movl ARG1, %edx /* edx = bmp */
movl BMP_CT(%edx), %ebx /* line to start at */
movw BMP_SEG(%edx), %es /* select segment */
movl BMP_CR(%edx), %esi /* width to clear */
subl BMP_CL(%edx), %esi
cld
_align_
clear_loop:
movl %ebx, %eax
WRITE_BANK() /* select bank */
movl BMP_CL(%edx), %edi
leal (%eax, %edi, 2), %edi /* get line address */
movw ARG2, %ax /* duplicate color twice */
shll $16, %eax
movw ARG2, %ax
movl %esi, %ecx /* width to clear */
shrl $1, %ecx /* halve for 32 bit clear */
jnc clear_no_word
stosw /* clear an odd word */
clear_no_word:
jz clear_no_long
rep ; stosl /* clear the line */
clear_no_long:
incl %ebx
cmpl %ebx, BMP_CB(%edx)
jg clear_loop /* and loop */
clear_end:
UNWRITE_BANK()
popw %es
popl %edi
popl %esi
popl %ebx
movl %ebp, %esp
popl %ebp
ret /* end of _linear_clear_to_color16() */
/* void _linear_blit16(BITMAP *source, BITMAP *dest, int source_x, source_y,
* int dest_x, dest_y, int width, height);
* Normal forwards blitting routine for linear bitmaps.
*/
FUNC(_linear_blit16)
pushl %ebp
movl %esp, %ebp
pushw %es
pushl %edi
pushl %esi
pushl %ebx
movl B_DEST, %edx
movw BMP_SEG(%edx), %es /* load destination segment */
movw %ds, %bx /* save data segment selector */
cld /* for forward copy */
shrl $1, B_WIDTH /* halve counter for long copies */
jz blit_only_one_word
jnc blit_even_words
_align_
BLIT_LOOP(longs_and_word, 2, /* long at a time, plus leftover word */
rep ; movsl
movsw
)
jmp blit_done
_align_
blit_even_words:
BLIT_LOOP(even_words, 2, /* copy a long at a time */
rep ; movsl
)
jmp blit_done
_align_
blit_only_one_word:
BLIT_LOOP(only_one_word, 2, /* copy just the one word */
movsw
)
_align_
blit_done:
movl B_SOURCE, %edx
UNWRITE_BANK()
movl B_DEST, %edx
UNWRITE_BANK()
popl %ebx
popl %esi
popl %edi
popw %es
movl %ebp, %esp
popl %ebp
ret /* end of _linear_blit16() */
/* void _linear_blit_backward16(BITMAP *source, BITMAP *dest, int source_x,
* int source_y, int dest_x, dest_y, int width, height);
* Reverse blitting routine, for overlapping linear bitmaps.
*/
FUNC(_linear_blit_backward16)
pushl %ebp
movl %esp, %ebp
pushw %es
pushl %edi
pushl %esi
pushl %ebx
movl B_HEIGHT, %eax /* y values go from high to low */
decl %eax
addl %eax, B_SOURCE_Y
addl %eax, B_DEST_Y
movl B_WIDTH, %eax /* x values go from high to low */
decl %eax
addl %eax, B_SOURCE_X
addl %eax, B_DEST_X
movl B_DEST, %edx
movw BMP_SEG(%edx), %es /* load destination segment */
movw %ds, %bx /* save data segment selector */
_align_
blit_backwards_loop:
movl B_DEST, %edx /* destination bitmap */
movl B_DEST_Y, %eax /* line number */
WRITE_BANK() /* select bank */
movl B_DEST_X, %edi /* x offset */
leal (%eax, %edi, 2), %edi
movl B_SOURCE, %edx /* source bitmap */
movl B_SOURCE_Y, %eax /* line number */
READ_BANK() /* select bank */
movl B_SOURCE_X, %esi /* x offset */
leal (%eax, %esi, 2), %esi
movl B_WIDTH, %ecx /* x loop counter */
movw BMP_SEG(%edx), %ds /* load data segment */
std /* backwards */
rep ; movsw /* copy the line */
movw %bx, %ds /* restore data segment */
decl B_SOURCE_Y
decl B_DEST_Y
decl B_HEIGHT
jg blit_backwards_loop /* and loop */
cld /* finished */
movl B_SOURCE, %edx
UNWRITE_BANK()
movl B_DEST, %edx
UNWRITE_BANK()
popl %ebx
popl %esi
popl %edi
popw %es
movl %ebp, %esp
popl %ebp
ret /* end of _linear_blit_backward16() */
FUNC(_linear_blit16_end)
ret
/* void _linear_masked_blit16(BITMAP *source, *dest, int source_x, source_y,
* int dest_x, dest_y, int width, height);
* Masked (skipping zero pixels) blitting routine for linear bitmaps.
*/
FUNC(_linear_masked_blit16)
pushl %ebp
movl %esp, %ebp
pushw %es
pushl %edi
pushl %esi
pushl %ebx
movl B_DEST, %edx
movw BMP_SEG(%edx), %es
movw %ds, %bx
cld
_align_
BLIT_LOOP(masked, 2,
movl BMP_VTABLE(%edx), %edx
movl VTABLE_MASK_COLOR(%edx), %edx
_align_
masked_blit_x_loop:
movw (%esi), %ax /* read a byte */
addl $2, %esi
cmpw %ax, %dx /* test it */
je masked_blit_skip
movw %ax, %es:(%edi) /* write the pixel */
addl $2, %edi
decl %ecx
jg masked_blit_x_loop
jmp masked_blit_x_loop_done
_align_
masked_blit_skip:
addl $2, %edi /* skip zero pixels */
decl %ecx
jg masked_blit_x_loop
masked_blit_x_loop_done:
)
movl B_DEST, %edx
UNWRITE_BANK()
popl %ebx
popl %esi
popl %edi
popw %es
movl %ebp, %esp
popl %ebp
ret /* end of _linear_masked_blit16() */
#endif /* ifdef ALLEGRO_COLOR16 */
- Robert J Ohannessian
There is always one more bug.