Re: [AD] Bug in Allegro's color convertors?

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


Eric Botcazou wrote:
[snip]
However, these are the color convertors, and should draw on screen what the
user should be seeing (i.e.: it's an actual bug).

...provided that the user uses a correct color format to talk to them.

Right. The user uses 32-bpp. So I expect that the screen looks like it was being drawn on hardware that supports 32-bpp.

Here's a patch to fix this. I have also optimized the MMX 32->24 convertor to take 6.5 clocks/4 pixels instead of 10. The non-MMX version grew from 10 to 12.5 clocks though.

[snip]

--
- Robert J Ohannessian
"Microsoft code is probably O(n^20)" (my CS prof)
http://pages.infinit.net/voidstar/
Index: icolconv.s
===================================================================
RCS file: /cvsroot/alleg/allegro/src/misc/icolconv.s,v
retrieving revision 1.23
diff -u -b -r1.23 icolconv.s
--- icolconv.s	14 Jan 2002 11:05:27 -0000	1.23
+++ icolconv.s	13 Apr 2002 13:28:28 -0000
@@ -1250,6 +1250,10 @@
    pushl %edi
 
    /* init register values */
+   movl $0x00FFFFFF, %eax;            /* Get RGB Mask */
+   movd %eax, %mm6;                   /* Low RGB mask in mm6  - [.....RGB] */
+   movd %eax, %mm5;
+   psllq $32, %mm5;                   /* High RGB mask in mm5 - [.RGB....] */
 
    movl ARG1, %eax                    /* eax = src_rect         */
    movl GFXRECT_WIDTH(%eax), %ecx     /* ecx = src_rect->width  */
@@ -1289,24 +1293,33 @@
       next_block_32_to_24:
          movq (%esi), %mm0         /* mm0 = [.RGB1][.RGB0] */
          movq 8(%esi), %mm1        /* mm1 = [.RGB3][.RGB2] */
-         movq %mm0, %mm2
-         movq %mm1, %mm3
-         movq %mm1, %mm4
-         psllq $48, %mm3
-         psllq $40, %mm0
-         psrlq $32, %mm2
-         psrlq $40, %mm0
-         psllq $24, %mm2
-         por %mm3, %mm0
-         por %mm2, %mm0
-         psllq $8, %mm4
-         psllq $40, %mm1
-         psrlq $32, %mm4
-         psrlq $56, %mm1
-         por %mm4, %mm1
+
+         /* Convert from 32-bpp to 24-bpp */
+         movq %mm0, %mm2;          /* Make temp copies of the loaded value */
+         movq %mm1, %mm3;
+         
+         pand %mm6, %mm0;          /* Mask RGB components */
+         pand %mm5, %mm2;
+         pand %mm6, %mm1;
+         pand %mm5, %mm3;
+         
+         psrlq $8, %mm2;           /* Shift [.RGB....] -> [..RGB...] */
+         movq %mm1, %mm4;
+         
+         psrlq $16, %mm1;          /* [.....RGB] -> [.......R] */
+         por %mm2, %mm0;           /* Combine first 2 pixels into [..RGBRGB] */
+         
+         psllq $48, %mm4;          /* [.....RGB] - > [GB......] */
+         por %mm3, %mm1;           /* Combine last 2 pixels into [....RGBR] */
+         
+         por %mm4, %mm0;           /* Combine pixels 1, 2 and 3 into [GBRGBRGB] */
+         /* Done - 6.5 cycles / 4 pixels */
+         
+         addl $16, %esi;
+
          movq %mm0, (%edi)
          movd %mm1, 8(%edi)
-         addl $16, %esi
+
          addl $12, %edi
 
          decl %ecx
@@ -1337,14 +1350,16 @@
 
          movq (%esi), %mm0         /* read 2 pixels */
 
-         movq %mm0, %mm1
+         /* Convert from 32-bpp to 24-bpp */
+         movq %mm0, %mm2;          /* Make temp copies of the loaded value */
+         
+         pand %mm6, %mm0;          /* Mask RGB components */
+         pand %mm5, %mm2;
 
-         psllq $40, %mm0
-         psrlq $32, %mm1
-         psrlq $40, %mm0
-         psllq $24, %mm1
+         psrlq $8, %mm2;           /* Shift [.RGB....] -> [..RGB...] */
 
-         por %mm1, %mm0
+         por %mm2, %mm0;           /* Combine first 2 pixels into [..RGBRGB] */
+         /* Done - 4 cycles / 2 pixels */
 
          movd %mm0, (%edi)
          psrlq $32, %mm0
@@ -2957,6 +2972,8 @@
    INIT_REGISTERS_NO_MMX(SIZE_4, SIZE_3, LOOP_RATIO_1)
 #endif
 
+   movl $0xFFFFFF, %ebp;
+
    _align_
    next_line_32_to_24_no_mmx:
       movl MYLOCAL1, %ecx
@@ -2972,26 +2989,42 @@
       /* 100% Pentium pairable loop */
       /* 10 cycles = 9 cycles/4 pixels + 1 cycle loop */
       next_block_32_to_24_no_mmx:
-         movl 4(%esi), %ebx     /* ebx = pixel2                    */
-         addl $12, %edi         /* 4 pixels written                */
-         movl %ebx, %ebp        /* ebp = pixel2                    */
-         movl 12(%esi), %edx    /* edx = pixel4                    */
-         shll $8, %edx          /* edx = pixel4 << 8               */
-         movl (%esi), %eax      /* eax = pixel1                    */
-         shll $24, %ebx         /* ebx = b8 pixel2 << 24           */
-         movb 10(%esi), %dl     /* edx = pixel4 | r8 pixel3        */
-         orl  %eax, %ebx        /* ebx = b8 pixel2 | pixel1        */
-         movl %ebp, %eax        /* eax = pixel2                    */
-         shrl $8, %eax          /* eax = r8g8 pixel2               */
-         movl %ebx, -12(%edi)   /* write pixel1..b8 pixel2         */
-         movl 8(%esi), %ebx     /* ebx = pixel 3                   */
-         movl %edx, -4(%edi)    /* write r8 pixel3..pixel4         */
-         shll $16, %ebx         /* ebx = g8b8 pixel3 << 16         */
-         addl $16, %esi         /* 4 pixels read                   */
-         orl  %ebx, %eax        /* eax = g8b8 pixel3 | r8g8 pixel2 */
-         decl %ecx
-         movl %eax, -8(%edi)    /* write g8r8 pixel2..b8g8 pixel3  */
-         jnz next_block_32_to_24_no_mmx
+         movl 4(%esi), %ebx;    /* Read 2[ARGB]            */
+         addl $12, %edi;        /* 4 pixels written        */
+
+         movl (%esi), %eax;     /* Read 1[ARGB]            */
+         movl %ebx, %edx;       /* Copy pixel 2 */
+         
+         shll $24, %edx;        /* 2[ARGB] -> 2[B...] */
+         andl %ebp, %ebx;       /* 2[ARGB] -> 2[.RGB] */
+         
+         shrl $8, %ebx;         /* 2[.RGB] -> 2[..RG] */
+         andl %ebp, %eax;       /* 1[ARGB] -> 1[.RGB] */
+         
+         orl %edx, %eax;        /* 2[B...] | 1[.RGB] -> 21[BRGB] */
+         movl 8(%edi), %edx;    /* Read 3[ARGB] */
+         
+         movl %eax, -12(%edi);  /* Write [B2 R1 G1 B1] */
+         movl %edx, %eax;       /* Copy pixel 3 */
+         
+         shll $16, %edx;        /* 3[ARGB] -> 3[GB..] */
+         andl %ebp, %eax;       /* 3[ARGB] -> 3[.RGB] */
+         
+         shrl $16, %eax;        /* 3[.RGB] -> 3[...R] */
+         orl %edx, %ebx;        /* 3[GB..] | 2[..RG] -> 32[GBRG] */
+         
+         movl $12(%esi), %edx;  /* Read 4[ARGB] */
+         movl %ebx, -8(%edi);   /* Write [G3 B3 R2 G2] */
+         
+         shll $8, %edx;         /* 4[ARGB] -> 4[RGB.] */
+         addl $16, %esi;        /* We read 4 pixels */
+         
+         orl %edx, %eax;        /* 4[RGB.] | 3[...R] -> 43[RGBR] */
+         decl %ecx;             /* Loop counter */
+         
+         movl %eax, -4(%edi);   /* Write [R4 G4 B4 R3] */
+         
+         jnz next_block_32_to_24_no_mmx /* Loop */
 
       popl %edx
 
@@ -3019,7 +3052,7 @@
          movl 4(%esi), %ebx
          addl $8, %esi
          movl %ebx, %ecx
-         andl $0xFFFFFF, %eax
+         andl %ebp, %eax
          shll $24, %ebx
          orl %ebx, %eax
          shrl $8, %ecx


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/