Re: [AD] Bug in Allegro's color convertors? |
[ Thread Index |
Date Index
| More lists.liballeg.org/allegro-developers Archives
]
Eric Botcazou wrote:
[snip]
However, these are the color convertors, and should draw on screen what the
user should be seeing (i.e.: it's an actual bug).
...provided that the user uses a correct color format to talk to them.
Right. The user uses 32-bpp. So I expect that the screen looks like it was
being drawn on hardware that supports 32-bpp.
Here's a patch to fix this. I have also optimized the MMX 32->24 convertor
to take 6.5 clocks/4 pixels instead of 10. The non-MMX version grew from 10
to 12.5 clocks though.
[snip]
--
- Robert J Ohannessian
"Microsoft code is probably O(n^20)" (my CS prof)
http://pages.infinit.net/voidstar/
Index: icolconv.s
===================================================================
RCS file: /cvsroot/alleg/allegro/src/misc/icolconv.s,v
retrieving revision 1.23
diff -u -b -r1.23 icolconv.s
--- icolconv.s 14 Jan 2002 11:05:27 -0000 1.23
+++ icolconv.s 13 Apr 2002 13:28:28 -0000
@@ -1250,6 +1250,10 @@
pushl %edi
/* init register values */
+ movl $0x00FFFFFF, %eax; /* Get RGB Mask */
+ movd %eax, %mm6; /* Low RGB mask in mm6 - [.....RGB] */
+ movd %eax, %mm5;
+ psllq $32, %mm5; /* High RGB mask in mm5 - [.RGB....] */
movl ARG1, %eax /* eax = src_rect */
movl GFXRECT_WIDTH(%eax), %ecx /* ecx = src_rect->width */
@@ -1289,24 +1293,33 @@
next_block_32_to_24:
movq (%esi), %mm0 /* mm0 = [.RGB1][.RGB0] */
movq 8(%esi), %mm1 /* mm1 = [.RGB3][.RGB2] */
- movq %mm0, %mm2
- movq %mm1, %mm3
- movq %mm1, %mm4
- psllq $48, %mm3
- psllq $40, %mm0
- psrlq $32, %mm2
- psrlq $40, %mm0
- psllq $24, %mm2
- por %mm3, %mm0
- por %mm2, %mm0
- psllq $8, %mm4
- psllq $40, %mm1
- psrlq $32, %mm4
- psrlq $56, %mm1
- por %mm4, %mm1
+
+ /* Convert from 32-bpp to 24-bpp */
+ movq %mm0, %mm2; /* Make temp copies of the loaded value */
+ movq %mm1, %mm3;
+
+ pand %mm6, %mm0; /* Mask RGB components */
+ pand %mm5, %mm2;
+ pand %mm6, %mm1;
+ pand %mm5, %mm3;
+
+ psrlq $8, %mm2; /* Shift [.RGB....] -> [..RGB...] */
+ movq %mm1, %mm4;
+
+ psrlq $16, %mm1; /* [.....RGB] -> [.......R] */
+ por %mm2, %mm0; /* Combine first 2 pixels into [..RGBRGB] */
+
+ psllq $48, %mm4; /* [.....RGB] - > [GB......] */
+ por %mm3, %mm1; /* Combine last 2 pixels into [....RGBR] */
+
+ por %mm4, %mm0; /* Combine pixels 1, 2 and 3 into [GBRGBRGB] */
+ /* Done - 6.5 cycles / 4 pixels */
+
+ addl $16, %esi;
+
movq %mm0, (%edi)
movd %mm1, 8(%edi)
- addl $16, %esi
+
addl $12, %edi
decl %ecx
@@ -1337,14 +1350,16 @@
movq (%esi), %mm0 /* read 2 pixels */
- movq %mm0, %mm1
+ /* Convert from 32-bpp to 24-bpp */
+ movq %mm0, %mm2; /* Make temp copies of the loaded value */
+
+ pand %mm6, %mm0; /* Mask RGB components */
+ pand %mm5, %mm2;
- psllq $40, %mm0
- psrlq $32, %mm1
- psrlq $40, %mm0
- psllq $24, %mm1
+ psrlq $8, %mm2; /* Shift [.RGB....] -> [..RGB...] */
- por %mm1, %mm0
+ por %mm2, %mm0; /* Combine first 2 pixels into [..RGBRGB] */
+ /* Done - 4 cycles / 2 pixels */
movd %mm0, (%edi)
psrlq $32, %mm0
@@ -2957,6 +2972,8 @@
INIT_REGISTERS_NO_MMX(SIZE_4, SIZE_3, LOOP_RATIO_1)
#endif
+ movl $0xFFFFFF, %ebp;
+
_align_
next_line_32_to_24_no_mmx:
movl MYLOCAL1, %ecx
@@ -2972,26 +2989,42 @@
/* 100% Pentium pairable loop */
/* 10 cycles = 9 cycles/4 pixels + 1 cycle loop */
next_block_32_to_24_no_mmx:
- movl 4(%esi), %ebx /* ebx = pixel2 */
- addl $12, %edi /* 4 pixels written */
- movl %ebx, %ebp /* ebp = pixel2 */
- movl 12(%esi), %edx /* edx = pixel4 */
- shll $8, %edx /* edx = pixel4 << 8 */
- movl (%esi), %eax /* eax = pixel1 */
- shll $24, %ebx /* ebx = b8 pixel2 << 24 */
- movb 10(%esi), %dl /* edx = pixel4 | r8 pixel3 */
- orl %eax, %ebx /* ebx = b8 pixel2 | pixel1 */
- movl %ebp, %eax /* eax = pixel2 */
- shrl $8, %eax /* eax = r8g8 pixel2 */
- movl %ebx, -12(%edi) /* write pixel1..b8 pixel2 */
- movl 8(%esi), %ebx /* ebx = pixel 3 */
- movl %edx, -4(%edi) /* write r8 pixel3..pixel4 */
- shll $16, %ebx /* ebx = g8b8 pixel3 << 16 */
- addl $16, %esi /* 4 pixels read */
- orl %ebx, %eax /* eax = g8b8 pixel3 | r8g8 pixel2 */
- decl %ecx
- movl %eax, -8(%edi) /* write g8r8 pixel2..b8g8 pixel3 */
- jnz next_block_32_to_24_no_mmx
+ movl 4(%esi), %ebx; /* Read 2[ARGB] */
+ addl $12, %edi; /* 4 pixels written */
+
+ movl (%esi), %eax; /* Read 1[ARGB] */
+ movl %ebx, %edx; /* Copy pixel 2 */
+
+ shll $24, %edx; /* 2[ARGB] -> 2[B...] */
+ andl %ebp, %ebx; /* 2[ARGB] -> 2[.RGB] */
+
+ shrl $8, %ebx; /* 2[.RGB] -> 2[..RG] */
+ andl %ebp, %eax; /* 1[ARGB] -> 1[.RGB] */
+
+ orl %edx, %eax; /* 2[B...] | 1[.RGB] -> 21[BRGB] */
+ movl 8(%edi), %edx; /* Read 3[ARGB] */
+
+ movl %eax, -12(%edi); /* Write [B2 R1 G1 B1] */
+ movl %edx, %eax; /* Copy pixel 3 */
+
+ shll $16, %edx; /* 3[ARGB] -> 3[GB..] */
+ andl %ebp, %eax; /* 3[ARGB] -> 3[.RGB] */
+
+ shrl $16, %eax; /* 3[.RGB] -> 3[...R] */
+ orl %edx, %ebx; /* 3[GB..] | 2[..RG] -> 32[GBRG] */
+
+ movl $12(%esi), %edx; /* Read 4[ARGB] */
+ movl %ebx, -8(%edi); /* Write [G3 B3 R2 G2] */
+
+ shll $8, %edx; /* 4[ARGB] -> 4[RGB.] */
+ addl $16, %esi; /* We read 4 pixels */
+
+ orl %edx, %eax; /* 4[RGB.] | 3[...R] -> 43[RGBR] */
+ decl %ecx; /* Loop counter */
+
+ movl %eax, -4(%edi); /* Write [R4 G4 B4 R3] */
+
+ jnz next_block_32_to_24_no_mmx /* Loop */
popl %edx
@@ -3019,7 +3052,7 @@
movl 4(%esi), %ebx
addl $8, %esi
movl %ebx, %ecx
- andl $0xFFFFFF, %eax
+ andl %ebp, %eax
shll $24, %ebx
orl %ebx, %eax
shrl $8, %ecx