Re: [AD] blender operations SRC and DST

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


On 06/08/2011 09:15 AM, Jon Rafkind wrote:
> On 06/08/2011 10:14 AM, Peter Wang wrote:
>> On 2011-06-08, Elias Pschernig <elias.pschernig@xxxxxxxxxx> wrote:
>>> On Wed, Jun 8, 2011 at 5:29 PM, Peter Wang <novalazy@xxxxxxxxxx> wrote:
>>>> It's much worse on my machine, 10-12%.  I'd want to see what we can do
>>>> to avoid that kind of regression.
>>>>
>>>> Peter
>>>>
>>>> before patch:
>>>> Plain blit: 659.283 FPS
>>>> Scaled blit: 663.27 FPS
>>>> Rotated blit: 678.819 FPS
>>>>
>>>> after patch:
>>>> Plain blit: 576.974 FPS
>>>> Scaled blit: 592.111 FPS
>>>> Rotated blit: 610.166 FPS
>>>>
>>> Looking at _al_blend_inline, I guess we can shuffle the code around so
>>> only the two new blend modes do anything differently but the existing
>>> ones remain as is.
>> Yes.  In addition (or afterwards) we might want to generate specialised
>> copies of _al_blend_inline for common operations, rather than relying on
>> the compiler to prune the dead branches in each inlined copy.  Manually
>> commenting out some of the (untaken) switch cases affects the benchmark
>> results so perhaps the compiler isn't doing that as well as I hoped.
>>
> Since the slowdowns come from scanline_drawers we could make it use a
> more specific al_blend_alpha_inline when it knows the arguments only
> deal with alpha (or ONE / ZERO). So like
>
> if (OP == ALLEGRO_ADD && src_mode == ALLEGRO_ALPHA ...){
>   ...
>   _al_blend_inline_alpha(...);
> } else {
>    /* do general blending */
>    _al_blend_inline(...);
> }
>
> I haven't tested this yet because it requires some non-trivial changes
> to make_scanline_drawers.py. Hm but probably I could just make local
> changes to scanline_drawers.c and see how it goes..

Making scanline_drawers.c call a the alpha blender if its only using
alpha channel factors seems to work. This patch adds
_al_blend_alpha_inline which is what the old _al_blend_inline was. Now
_al_blend_inline deals with ALLEGRO_COLOR in its full generality. Also I
changed make_scanline_drawers.py call _al_blend_alpha_inline when
appropriate.

You'll have to apply this patch as well as the original patch that added
ALLEGRO_DST_COLOR to get things to compile. Theres some cruft leftover I
will eventually clean up if this is all ok.

Before patch:
 Plain blit: 213.454 FPS
 Scaled blit: 213.883 FPS
 Rotated blit: 223.992 FPS

After:
 Plain blit: 207.739 FPS
 Scaled blit: 212.275 FPS
 Rotated blit: 220.875 FPS
Index: include/allegro5/internal/aintern_blend.h
===================================================================
--- include/allegro5/internal/aintern_blend.h	(revision 14764)
+++ include/allegro5/internal/aintern_blend.h	(working copy)
@@ -25,81 +25,275 @@
 
 #ifndef _AL_NO_BLEND_INLINE_FUNC
 
+/* only cares about alpha blending modes */
 static _AL_ALWAYS_INLINE float
-get_factor(enum ALLEGRO_BLEND_MODE operation, float alpha)
+get_alpha_factor(enum ALLEGRO_BLEND_MODE operation, float alpha)
 {
    switch (operation) {
        case ALLEGRO_ZERO: return 0;
        case ALLEGRO_ONE: return 1;
        case ALLEGRO_ALPHA: return alpha;
        case ALLEGRO_INVERSE_ALPHA: return 1 - alpha;
+       default: ASSERT(false); return 0;
    }
    ASSERT(false);
    return 0; /* silence warning in release build */
 }
 
+/* puts the blending factor in an ALLEGRO_COLOR object.
+ */
+static _AL_ALWAYS_INLINE void get_factor(enum ALLEGRO_BLEND_MODE operation, const ALLEGRO_COLOR * source, const ALLEGRO_COLOR * dest, ALLEGRO_COLOR * factor)
+{
+   switch(operation) {
+       case ALLEGRO_ZERO: {
+          factor->r = factor->g = factor->b = factor->a = 0;
+          break;
+       }
+       case ALLEGRO_ONE: {
+          factor->r = factor->g = factor->b = factor->a = 1;
+          break;
+       }
+       case ALLEGRO_ALPHA: {
+          factor->r = factor->g = factor->b = factor->a = source->a;
+          break;
+       }
+       case ALLEGRO_INVERSE_ALPHA: {
+          factor->r = factor->g = factor->b = factor->a = 1 - source->a;
+          break;
+       }
+       case ALLEGRO_SRC_COLOR: {
+           *factor = *source;
+           break;
+       }
+       case ALLEGRO_DST_COLOR: {
+           *factor = *dest;
+           break;
+       }
+       default: {
+           ASSERT(false);
+           break;
+       }
+   }
+}
 
+#if 0
 static _AL_ALWAYS_INLINE
+bool needs_color_factor(enum ALLEGRO_BLEND_MODE operation){
+   switch (operation){
+      case ALLEGRO_SRC_COLOR:
+      case ALLEGRO_DST_COLOR: return true;
+      case ALLEGRO_ZERO:
+      case ALLEGRO_ONE:
+      case ALLEGRO_ALPHA:
+      case ALLEGRO_INVERSE_ALPHA: return false;
+   }
+   ASSERT(false);
+   return 0;
+}
+
+static _AL_ALWAYS_INLINE
+void _al_blend_inline_color(
+   const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
+   int op, int src_, int dst_, int aop, int asrc_, int adst_,
+   ALLEGRO_COLOR *result)
+{
+   float asrc, adst;
+   ALLEGRO_COLOR src, dst;
+
+   asrc = get_alpha_factor(asrc_, result->a);
+   adst = get_alpha_factor(adst_, result->a);
+
+
+      get_factor(src_, scol, dcol, &src);
+      get_factor(dst_, scol, dcol, &dst);
+
+      #define BLEND(c, src, dst) \
+         result->c = OP(result->c * src.c, dcol->c * dst.c);
+      switch (op) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+      }
+      #undef BLEND
+
+      #define BLEND(c, src, dst) \
+         result->c = OP(result->c * src, dcol->c * dst);
+      switch (aop) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+      }
+      #undef BLEND
+}
+#endif
+
+/* only call this if the blend modes are one of
+ * ALLEGRO_ONE, ALLEGRO_ZERO, ALLEGRO_ALPHA, ALLEGRO_INVERSE_ALPHA
+ */
+static _AL_ALWAYS_INLINE
+void _al_blend_alpha_inline(
+   const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
+   int op, int src_, int dst_, int aop, int asrc_, int adst_,
+   ALLEGRO_COLOR *result)
+{
+   float asrc, adst;
+   float src, dst;
+
+   result->r = scol->r;
+   result->g = scol->g;
+   result->b = scol->b;
+   result->a = scol->a;
+
+   asrc = get_alpha_factor(asrc_, result->a);
+   adst = get_alpha_factor(adst_, result->a);
+
+      src = get_alpha_factor(src_, result->a);
+      dst = get_alpha_factor(dst_, result->a);
+
+      #define BLEND(c, src, dst) \
+         result->c = OP(result->c * src, dcol->c * dst);
+      switch (op) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+      }
+
+      switch (aop) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+      }
+      #undef BLEND
+}
+
+/* call this for general blending. its a little slower than just using alpha */
+static _AL_ALWAYS_INLINE
 void _al_blend_inline(
    const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
    int op, int src_, int dst_, int aop, int asrc_, int adst_,
    ALLEGRO_COLOR *result)
 {
-   float src, dst, asrc, adst;
+   float asrc, adst;
+   ALLEGRO_COLOR src, dst;
 
    result->r = scol->r;
    result->g = scol->g;
    result->b = scol->b;
    result->a = scol->a;
+   
+   asrc = get_alpha_factor(asrc_, result->a);
+   adst = get_alpha_factor(adst_, result->a);
 
-   src = get_factor(src_, result->a);
-   dst = get_factor(dst_, result->a);
-   asrc = get_factor(asrc_, result->a);
-   adst = get_factor(adst_, result->a);
+   get_factor(src_, scol, dcol, &src);
+   get_factor(dst_, scol, dcol, &dst);
 
-   #define BLEND(c, src, dst) \
-      result->c = OP(result->c * src, dcol->c * dst);
-   switch (op) {
-      case ALLEGRO_ADD:
-         #define OP(x, y) _ALLEGRO_MIN(1, x + y)
-         BLEND(r, src, dst)
-         BLEND(g, src, dst)
-         BLEND(b, src, dst)
-         #undef OP
-         break;
-      case ALLEGRO_SRC_MINUS_DEST:
-         #define OP(x, y) _ALLEGRO_MAX(0, x - y)
-         BLEND(r, src, dst)
-         BLEND(g, src, dst)
-         BLEND(b, src, dst)
-         #undef OP
-         break;
-      case ALLEGRO_DEST_MINUS_SRC:
-         #define OP(x, y) _ALLEGRO_MAX(0, y - x)
-         BLEND(r, src, dst)
-         BLEND(g, src, dst)
-         BLEND(b, src, dst)
-         #undef OP
-         break;
-   }
+      #define BLEND(c, src, dst) \
+         result->c = OP(result->c * src.c, dcol->c * dst.c);
+      switch (op) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(r, src, dst)
+            BLEND(g, src, dst)
+            BLEND(b, src, dst)
+            #undef OP
+            break;
+      }
+      #undef BLEND
 
-   switch (aop) {
-      case ALLEGRO_ADD:
-         #define OP(x, y) _ALLEGRO_MIN(1, x + y)
-         BLEND(a, asrc, adst)
-         #undef OP
-         break;
-      case ALLEGRO_SRC_MINUS_DEST:
-         #define OP(x, y) _ALLEGRO_MAX(0, x - y)
-         BLEND(a, asrc, adst)
-         #undef OP
-         break;
-      case ALLEGRO_DEST_MINUS_SRC:
-         #define OP(x, y) _ALLEGRO_MAX(0, y - x)
-         BLEND(a, asrc, adst)
-         #undef OP
-         break;
-   }
+      #define BLEND(c, src, dst) \
+         result->c = OP(result->c * src, dcol->c * dst);
+      switch (aop) {
+         case ALLEGRO_ADD:
+            #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_SRC_MINUS_DEST:
+            #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+         case ALLEGRO_DEST_MINUS_SRC:
+            #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+            BLEND(a, asrc, adst)
+            #undef OP
+            break;
+      }
+      #undef BLEND
 }
 
 #endif
Index: misc/make_scanline_drawers.py
===================================================================
--- misc/make_scanline_drawers.py	(revision 14764)
+++ misc/make_scanline_drawers.py	(working copy)
@@ -16,12 +16,12 @@
 
 def make_drawer(name):
    global texture, grad, solid, shade, opaque, white
-   texture = (name.find("_texture_") != -1)
-   grad = (name.find("_grad_") != -1)
-   solid = (name.find("_solid_") != -1)
-   shade = (name.find("_shade") != -1)
-   opaque = (name.find("_opaque") != -1)
-   white = (name.find("_white") != -1)
+   texture = "_texture_" in name
+   grad = "_grad_" in name
+   solid = "_solid_" in name
+   shade = "_shade" in name
+   opaque = "_opaque" in name
+   white = "_white" in name
 
    if grad and solid:
       raise Exception("grad and solid")
@@ -148,7 +148,8 @@
             op_alpha='ALLEGRO_ADD',
             dst_mode='ALLEGRO_INVERSE_ALPHA',
             dst_alpha='ALLEGRO_INVERSE_ALPHA',
-            if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888'
+            if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888',
+            alpha_only=True
             )
       print "else"
       make_if_blender_loop(
@@ -158,7 +159,8 @@
             op_alpha='ALLEGRO_ADD',
             dst_mode='ALLEGRO_ONE',
             dst_alpha='ALLEGRO_ONE',
-            if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888'
+            if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888',
+            alpha_only=True
             )
       print "else"
 
@@ -193,7 +195,8 @@
       dst_alpha='dst_alpha',
       src_format='src_format',
       dst_format='dst_format',
-      if_format=None
+      if_format=None,
+      alpha_only=False
       ):
    print interp("""\
       if (op == #{op} &&
@@ -212,7 +215,8 @@
             op_alpha=op_alpha,
             dst_mode=dst_mode,
             dst_alpha=dst_alpha,
-            if_format=if_format
+            if_format=if_format,
+            alpha_only=alpha_only
             )
       print "else"
 
@@ -222,7 +226,8 @@
       src_alpha=src_alpha,
       op_alpha=op_alpha,
       dst_mode=dst_mode,
-      dst_alpha=dst_alpha)
+      dst_alpha=dst_alpha,
+      alpha_only=alpha_only)
 
    print "}"
 
@@ -237,7 +242,8 @@
       dst_format='dst_format',
       src_size='src_size',
       if_format=None,
-      copy_format=False
+      copy_format=False,
+      alpha_only=False
       ):
 
    if if_format:
@@ -282,7 +288,8 @@
             dst_format=dst_format,
             src_size=src_size,
             copy_format=copy_format,
-            tiling=False
+            tiling=False,
+            alpha_only=alpha_only
             )
          print "} else"
 
@@ -296,7 +303,8 @@
       src_format=src_format,
       dst_format=dst_format,
       src_size=src_size,
-      copy_format=copy_format
+      copy_format=copy_format,
+      alpha_only=alpha_only
       )
 
    print "}"
@@ -312,7 +320,8 @@
       dst_format='dst_format',
       src_size='src_size',
       copy_format=False,
-      tiling=True
+      tiling=True,
+      alpha_only=True
       ):
 
    print "{"
@@ -389,12 +398,15 @@
          }
          """)
    elif shade:
+      blend = "_al_blend_inline"
+      if alpha_only:
+         blend = "_al_blend_alpha_inline"
       print interp("""\
          {
             ALLEGRO_COLOR dst_color;
             ALLEGRO_COLOR result;
             _AL_INLINE_GET_PIXEL(#{dst_format}, dst_data, dst_color, false);
-            _al_blend_inline(&src_color, &dst_color,
+            #{blend}(&src_color, &dst_color,
                #{op}, #{src_mode}, #{dst_mode},
                #{op_alpha}, #{src_alpha}, #{dst_alpha},
                &result);


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/