Re: [AD] blender operations SRC and DST |
[ Thread Index |
Date Index
| More lists.liballeg.org/allegro-developers Archives
]
On 06/08/2011 09:15 AM, Jon Rafkind wrote:
> On 06/08/2011 10:14 AM, Peter Wang wrote:
>> On 2011-06-08, Elias Pschernig <elias.pschernig@xxxxxxxxxx> wrote:
>>> On Wed, Jun 8, 2011 at 5:29 PM, Peter Wang <novalazy@xxxxxxxxxx> wrote:
>>>> It's much worse on my machine, 10-12%. I'd want to see what we can do
>>>> to avoid that kind of regression.
>>>>
>>>> Peter
>>>>
>>>> before patch:
>>>> Plain blit: 659.283 FPS
>>>> Scaled blit: 663.27 FPS
>>>> Rotated blit: 678.819 FPS
>>>>
>>>> after patch:
>>>> Plain blit: 576.974 FPS
>>>> Scaled blit: 592.111 FPS
>>>> Rotated blit: 610.166 FPS
>>>>
>>> Looking at _al_blend_inline, I guess we can shuffle the code around so
>>> only the two new blend modes do anything differently but the existing
>>> ones remain as is.
>> Yes. In addition (or afterwards) we might want to generate specialised
>> copies of _al_blend_inline for common operations, rather than relying on
>> the compiler to prune the dead branches in each inlined copy. Manually
>> commenting out some of the (untaken) switch cases affects the benchmark
>> results so perhaps the compiler isn't doing that as well as I hoped.
>>
> Since the slowdowns come from scanline_drawers we could make it use a
> more specific al_blend_alpha_inline when it knows the arguments only
> deal with alpha (or ONE / ZERO). So like
>
> if (OP == ALLEGRO_ADD && src_mode == ALLEGRO_ALPHA ...){
> ...
> _al_blend_inline_alpha(...);
> } else {
> /* do general blending */
> _al_blend_inline(...);
> }
>
> I haven't tested this yet because it requires some non-trivial changes
> to make_scanline_drawers.py. Hm but probably I could just make local
> changes to scanline_drawers.c and see how it goes..
Making scanline_drawers.c call a the alpha blender if its only using
alpha channel factors seems to work. This patch adds
_al_blend_alpha_inline which is what the old _al_blend_inline was. Now
_al_blend_inline deals with ALLEGRO_COLOR in its full generality. Also I
changed make_scanline_drawers.py call _al_blend_alpha_inline when
appropriate.
You'll have to apply this patch as well as the original patch that added
ALLEGRO_DST_COLOR to get things to compile. Theres some cruft leftover I
will eventually clean up if this is all ok.
Before patch:
Plain blit: 213.454 FPS
Scaled blit: 213.883 FPS
Rotated blit: 223.992 FPS
After:
Plain blit: 207.739 FPS
Scaled blit: 212.275 FPS
Rotated blit: 220.875 FPS
Index: include/allegro5/internal/aintern_blend.h
===================================================================
--- include/allegro5/internal/aintern_blend.h (revision 14764)
+++ include/allegro5/internal/aintern_blend.h (working copy)
@@ -25,81 +25,275 @@
#ifndef _AL_NO_BLEND_INLINE_FUNC
+/* only cares about alpha blending modes */
static _AL_ALWAYS_INLINE float
-get_factor(enum ALLEGRO_BLEND_MODE operation, float alpha)
+get_alpha_factor(enum ALLEGRO_BLEND_MODE operation, float alpha)
{
switch (operation) {
case ALLEGRO_ZERO: return 0;
case ALLEGRO_ONE: return 1;
case ALLEGRO_ALPHA: return alpha;
case ALLEGRO_INVERSE_ALPHA: return 1 - alpha;
+ default: ASSERT(false); return 0;
}
ASSERT(false);
return 0; /* silence warning in release build */
}
+/* puts the blending factor in an ALLEGRO_COLOR object.
+ */
+static _AL_ALWAYS_INLINE void get_factor(enum ALLEGRO_BLEND_MODE operation, const ALLEGRO_COLOR * source, const ALLEGRO_COLOR * dest, ALLEGRO_COLOR * factor)
+{
+ switch(operation) {
+ case ALLEGRO_ZERO: {
+ factor->r = factor->g = factor->b = factor->a = 0;
+ break;
+ }
+ case ALLEGRO_ONE: {
+ factor->r = factor->g = factor->b = factor->a = 1;
+ break;
+ }
+ case ALLEGRO_ALPHA: {
+ factor->r = factor->g = factor->b = factor->a = source->a;
+ break;
+ }
+ case ALLEGRO_INVERSE_ALPHA: {
+ factor->r = factor->g = factor->b = factor->a = 1 - source->a;
+ break;
+ }
+ case ALLEGRO_SRC_COLOR: {
+ *factor = *source;
+ break;
+ }
+ case ALLEGRO_DST_COLOR: {
+ *factor = *dest;
+ break;
+ }
+ default: {
+ ASSERT(false);
+ break;
+ }
+ }
+}
+#if 0
static _AL_ALWAYS_INLINE
+bool needs_color_factor(enum ALLEGRO_BLEND_MODE operation){
+ switch (operation){
+ case ALLEGRO_SRC_COLOR:
+ case ALLEGRO_DST_COLOR: return true;
+ case ALLEGRO_ZERO:
+ case ALLEGRO_ONE:
+ case ALLEGRO_ALPHA:
+ case ALLEGRO_INVERSE_ALPHA: return false;
+ }
+ ASSERT(false);
+ return 0;
+}
+
+static _AL_ALWAYS_INLINE
+void _al_blend_inline_color(
+ const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
+ int op, int src_, int dst_, int aop, int asrc_, int adst_,
+ ALLEGRO_COLOR *result)
+{
+ float asrc, adst;
+ ALLEGRO_COLOR src, dst;
+
+ asrc = get_alpha_factor(asrc_, result->a);
+ adst = get_alpha_factor(adst_, result->a);
+
+
+ get_factor(src_, scol, dcol, &src);
+ get_factor(dst_, scol, dcol, &dst);
+
+ #define BLEND(c, src, dst) \
+ result->c = OP(result->c * src.c, dcol->c * dst.c);
+ switch (op) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ }
+ #undef BLEND
+
+ #define BLEND(c, src, dst) \
+ result->c = OP(result->c * src, dcol->c * dst);
+ switch (aop) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ }
+ #undef BLEND
+}
+#endif
+
+/* only call this if the blend modes are one of
+ * ALLEGRO_ONE, ALLEGRO_ZERO, ALLEGRO_ALPHA, ALLEGRO_INVERSE_ALPHA
+ */
+static _AL_ALWAYS_INLINE
+void _al_blend_alpha_inline(
+ const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
+ int op, int src_, int dst_, int aop, int asrc_, int adst_,
+ ALLEGRO_COLOR *result)
+{
+ float asrc, adst;
+ float src, dst;
+
+ result->r = scol->r;
+ result->g = scol->g;
+ result->b = scol->b;
+ result->a = scol->a;
+
+ asrc = get_alpha_factor(asrc_, result->a);
+ adst = get_alpha_factor(adst_, result->a);
+
+ src = get_alpha_factor(src_, result->a);
+ dst = get_alpha_factor(dst_, result->a);
+
+ #define BLEND(c, src, dst) \
+ result->c = OP(result->c * src, dcol->c * dst);
+ switch (op) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ }
+
+ switch (aop) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ }
+ #undef BLEND
+}
+
+/* call this for general blending. its a little slower than just using alpha */
+static _AL_ALWAYS_INLINE
void _al_blend_inline(
const ALLEGRO_COLOR *scol, const ALLEGRO_COLOR *dcol,
int op, int src_, int dst_, int aop, int asrc_, int adst_,
ALLEGRO_COLOR *result)
{
- float src, dst, asrc, adst;
+ float asrc, adst;
+ ALLEGRO_COLOR src, dst;
result->r = scol->r;
result->g = scol->g;
result->b = scol->b;
result->a = scol->a;
+
+ asrc = get_alpha_factor(asrc_, result->a);
+ adst = get_alpha_factor(adst_, result->a);
- src = get_factor(src_, result->a);
- dst = get_factor(dst_, result->a);
- asrc = get_factor(asrc_, result->a);
- adst = get_factor(adst_, result->a);
+ get_factor(src_, scol, dcol, &src);
+ get_factor(dst_, scol, dcol, &dst);
- #define BLEND(c, src, dst) \
- result->c = OP(result->c * src, dcol->c * dst);
- switch (op) {
- case ALLEGRO_ADD:
- #define OP(x, y) _ALLEGRO_MIN(1, x + y)
- BLEND(r, src, dst)
- BLEND(g, src, dst)
- BLEND(b, src, dst)
- #undef OP
- break;
- case ALLEGRO_SRC_MINUS_DEST:
- #define OP(x, y) _ALLEGRO_MAX(0, x - y)
- BLEND(r, src, dst)
- BLEND(g, src, dst)
- BLEND(b, src, dst)
- #undef OP
- break;
- case ALLEGRO_DEST_MINUS_SRC:
- #define OP(x, y) _ALLEGRO_MAX(0, y - x)
- BLEND(r, src, dst)
- BLEND(g, src, dst)
- BLEND(b, src, dst)
- #undef OP
- break;
- }
+ #define BLEND(c, src, dst) \
+ result->c = OP(result->c * src.c, dcol->c * dst.c);
+ switch (op) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(r, src, dst)
+ BLEND(g, src, dst)
+ BLEND(b, src, dst)
+ #undef OP
+ break;
+ }
+ #undef BLEND
- switch (aop) {
- case ALLEGRO_ADD:
- #define OP(x, y) _ALLEGRO_MIN(1, x + y)
- BLEND(a, asrc, adst)
- #undef OP
- break;
- case ALLEGRO_SRC_MINUS_DEST:
- #define OP(x, y) _ALLEGRO_MAX(0, x - y)
- BLEND(a, asrc, adst)
- #undef OP
- break;
- case ALLEGRO_DEST_MINUS_SRC:
- #define OP(x, y) _ALLEGRO_MAX(0, y - x)
- BLEND(a, asrc, adst)
- #undef OP
- break;
- }
+ #define BLEND(c, src, dst) \
+ result->c = OP(result->c * src, dcol->c * dst);
+ switch (aop) {
+ case ALLEGRO_ADD:
+ #define OP(x, y) _ALLEGRO_MIN(1, x + y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_SRC_MINUS_DEST:
+ #define OP(x, y) _ALLEGRO_MAX(0, x - y)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ case ALLEGRO_DEST_MINUS_SRC:
+ #define OP(x, y) _ALLEGRO_MAX(0, y - x)
+ BLEND(a, asrc, adst)
+ #undef OP
+ break;
+ }
+ #undef BLEND
}
#endif
Index: misc/make_scanline_drawers.py
===================================================================
--- misc/make_scanline_drawers.py (revision 14764)
+++ misc/make_scanline_drawers.py (working copy)
@@ -16,12 +16,12 @@
def make_drawer(name):
global texture, grad, solid, shade, opaque, white
- texture = (name.find("_texture_") != -1)
- grad = (name.find("_grad_") != -1)
- solid = (name.find("_solid_") != -1)
- shade = (name.find("_shade") != -1)
- opaque = (name.find("_opaque") != -1)
- white = (name.find("_white") != -1)
+ texture = "_texture_" in name
+ grad = "_grad_" in name
+ solid = "_solid_" in name
+ shade = "_shade" in name
+ opaque = "_opaque" in name
+ white = "_white" in name
if grad and solid:
raise Exception("grad and solid")
@@ -148,7 +148,8 @@
op_alpha='ALLEGRO_ADD',
dst_mode='ALLEGRO_INVERSE_ALPHA',
dst_alpha='ALLEGRO_INVERSE_ALPHA',
- if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888'
+ if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888',
+ alpha_only=True
)
print "else"
make_if_blender_loop(
@@ -158,7 +159,8 @@
op_alpha='ALLEGRO_ADD',
dst_mode='ALLEGRO_ONE',
dst_alpha='ALLEGRO_ONE',
- if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888'
+ if_format='ALLEGRO_PIXEL_FORMAT_ARGB_8888',
+ alpha_only=True
)
print "else"
@@ -193,7 +195,8 @@
dst_alpha='dst_alpha',
src_format='src_format',
dst_format='dst_format',
- if_format=None
+ if_format=None,
+ alpha_only=False
):
print interp("""\
if (op == #{op} &&
@@ -212,7 +215,8 @@
op_alpha=op_alpha,
dst_mode=dst_mode,
dst_alpha=dst_alpha,
- if_format=if_format
+ if_format=if_format,
+ alpha_only=alpha_only
)
print "else"
@@ -222,7 +226,8 @@
src_alpha=src_alpha,
op_alpha=op_alpha,
dst_mode=dst_mode,
- dst_alpha=dst_alpha)
+ dst_alpha=dst_alpha,
+ alpha_only=alpha_only)
print "}"
@@ -237,7 +242,8 @@
dst_format='dst_format',
src_size='src_size',
if_format=None,
- copy_format=False
+ copy_format=False,
+ alpha_only=False
):
if if_format:
@@ -282,7 +288,8 @@
dst_format=dst_format,
src_size=src_size,
copy_format=copy_format,
- tiling=False
+ tiling=False,
+ alpha_only=alpha_only
)
print "} else"
@@ -296,7 +303,8 @@
src_format=src_format,
dst_format=dst_format,
src_size=src_size,
- copy_format=copy_format
+ copy_format=copy_format,
+ alpha_only=alpha_only
)
print "}"
@@ -312,7 +320,8 @@
dst_format='dst_format',
src_size='src_size',
copy_format=False,
- tiling=True
+ tiling=True,
+ alpha_only=True
):
print "{"
@@ -389,12 +398,15 @@
}
""")
elif shade:
+ blend = "_al_blend_inline"
+ if alpha_only:
+ blend = "_al_blend_alpha_inline"
print interp("""\
{
ALLEGRO_COLOR dst_color;
ALLEGRO_COLOR result;
_AL_INLINE_GET_PIXEL(#{dst_format}, dst_data, dst_color, false);
- _al_blend_inline(&src_color, &dst_color,
+ #{blend}(&src_color, &dst_color,
#{op}, #{src_mode}, #{dst_mode},
#{op_alpha}, #{src_alpha}, #{dst_alpha},
&result);