Re: [AD] stretch_blit bug

[ Thread Index | Date Index | More lists.liballeg.org/allegro-developers Archives ]


I was finally able to get the new stretch_blit a hair faster than the
original. I've attached the results of the test program on my system and
the test program itself. Everything appears to work just as the
original, except I'm not able to test Mode-X right now. I may be able to
check it next month or if someone has a system capable of Mode-X I would
appreciate if they could test it. The Mode-X code is taken directly from
cstretch.c but with the same modifications as the others. Everything has
turned out to look very near to the original.

If someone could take a second and check that it works and performs as
it does for me that would be nice. You will of course have to compile it
with the same optimization flags as Allegro was compiled with, and have
Allegro compiled without assembly, to get meaningful results.
8 bpp
mem->mem stretch_blit took 235 millis
mem->mem my_stretch took 212 millis
mem->vid stretch_blit took 4299 millis
mem->vid my_stretch took 4268 millis
15 bpp
mem->mem stretch_blit took 224 millis
mem->mem my_stretch took 196 millis
mem->vid stretch_blit took 4782 millis
mem->vid my_stretch took 4732 millis
16 bpp
mem->mem stretch_blit took 223 millis
mem->mem my_stretch took 194 millis
mem->vid stretch_blit took 4767 millis
mem->vid my_stretch took 4739 millis
24 bpp
mem->mem stretch_blit took 488 millis
mem->mem my_stretch took 511 millis
mem->vid stretch_blit took 5230 millis
mem->vid my_stretch took 5192 millis
32 bpp
mem->mem stretch_blit took 341 millis
mem->mem my_stretch took 337 millis
mem->vid stretch_blit took 3529 millis
mem->vid my_stretch took 3487 millis
#include <allegro.h>
#include <stdio.h>

#ifdef __linux__
#include <sys/time.h>
#else
#include <allegro.h>
#include <winalleg.h>
#endif

long currentTimeMillis()
{
#ifdef __linux__
	struct timeval tv;
	gettimeofday(&tv, 0);
	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
#else
	return timeGetTime();
#endif
}

static struct {
		int xcstart; /* x counter start */
		int sxinc; /* amount to increment src x every time */
		int xcdec; /* amount to deccrement counter by, increase sptr when this reaches 0 */
		int xcinc; /* amount to increment counter by when it reaches 0 */
		int linesize; /* size of a whole row of pixels */
} _al_stretch;

/* Stretcher macros */
#define DECLARE_STRETCHER(type, size, put, get) \
		int xc = _al_stretch.xcstart; \
		uintptr_t dend = dptr + _al_stretch.linesize; \
		ASSERT(dptr); \
		ASSERT(sptr); \
		for (; dptr < dend; dptr += size, sptr += _al_stretch.sxinc) { \
			put(dptr, get((type*)sptr)); \
			if (xc <= 0) { \
      			sptr += size; \
				xc += _al_stretch.xcinc; \
			} \
			else \
				xc -= _al_stretch.xcdec; \
		}

#define DECLARE_MASKED_STRETCHER(type, size, put, get, mask) \
		int xc = _al_stretch.xcstart; \
		uintptr_t dend = dptr + _al_stretch.linesize; \
		ASSERT(dptr); \
		ASSERT(sptr); \
		for (; dptr < dend; dptr += size, sptr += _al_stretch.sxinc) { \
			int color = get((type*)sptr); \
			if (color != mask) \
				put(dptr, get((type*)sptr)); \
			if (xc <= 0) { \
      			sptr += size; \
				xc += _al_stretch.xcinc; \
			} \
			else \
				xc -= _al_stretch.xcdec; \
		}

#ifdef GFX_HAS_VGA
/*
 * Mode-X line stretcher.
 */
static void stretch_linex(uintptr_t dptr, unsigned char *sptr)
{
   int plane;
   int first_xc = _al_stretch.xcstart;
   int dw = _al_stretch.linesize;

   ASSERT(dptr);
   ASSERT(sptr);

   for (plane = 0; plane < 4; plane++) {
      int xc = first_xc;
      unsigned char *s = sptr;
      uintptr_t d = dptr / 4;
      uintptr_t dend = (dptr + dw) / 4;

      outportw(0x3C4, (0x100 << (dptr & 3)) | 2);
      for (; d < dend; d++, s += 4 * _al_stretch.sxinc) {
	 bmp_write8(d, *s);
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
      }

      /* Move to the beginning of next plane.  */
	  if (first_xc <= 0) {
         sptr++;
		 first_xc += _al_stretch.xcinc;
	  }
	  else
         first_xc -= _al_stretch.xcdec;
      dptr++;
      sptr += _al_stretch.sxinc;
      dw--;
   }
}

/*
 * Mode-X masked line stretcher.
 */
static void stretch_masked_linex(uintptr_t dptr, unsigned char *sptr)
{
   int plane;
   int dw = _al_stretch.linesize;
   int first_xc = _al_stretch.xcstart;

   ASSERT(dptr);
   ASSERT(sptr);

   for (plane = 0; plane < 4; plane++) {
      int xc = first_xc;
      unsigned char *s = sptr;
      uintptr_t d = dptr / 4;
      uintptr_t dend = (dptr + dw) / 4;

      outportw(0x3C4, (0x100 << (dptr & 3)) | 2);
      for (; d < dend; d++, s += 4 * _al_stretch.sxinc) {
	 unsigned long color = *s;
	 if (color != 0)
	    bmp_write8(d, color);
      }
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;
	 if (xc <= 0) s++, xc += _al_stretch.xcinc;
     else xc -= _al_stretch.xcdec;

      /* Move to the beginning of next plane.  */
	  if (first_xc <= 0) {
         sptr++;
		 first_xc += _al_stretch.xcinc;
	  }
	  else
         first_xc -= _al_stretch.xcdec;
      dptr++;
      sptr += _al_stretch.sxinc;
      dw--;
   }
}
#endif

#ifdef ALLEGRO_COLOR8
static void stretch_line8(uintptr_t dptr, unsigned char *sptr)
{
   DECLARE_STRETCHER(unsigned char, 1, bmp_write8, *);
}

static void stretch_masked_line8(uintptr_t dptr, unsigned char *sptr)
{
   DECLARE_MASKED_STRETCHER(unsigned char, 1, bmp_write8, *, 0);
}
#endif

#ifdef ALLEGRO_COLOR16
static void stretch_line15(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_STRETCHER(unsigned short, 2, bmp_write15, *);
}

static void stretch_line16(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_STRETCHER(unsigned short, 2, bmp_write16, *);
}

static void stretch_masked_line15(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_MASKED_STRETCHER(unsigned short, 2, bmp_write15, *, MASK_COLOR_15);
}

static void stretch_masked_line16(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_MASKED_STRETCHER(unsigned short, 2, bmp_write16, *, MASK_COLOR_16);
}
#endif

#ifdef ALLEGRO_COLOR24
static void stretch_line24(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_STRETCHER(unsigned char, 3, bmp_write24, READ3BYTES);
}

static void stretch_masked_line24(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_MASKED_STRETCHER(unsigned char, 3, bmp_write24, READ3BYTES, MASK_COLOR_24);
}
#endif

#ifdef ALLEGRO_COLOR32
static void stretch_line32(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_STRETCHER(uint32_t, 4, bmp_write32, *);
}

static void stretch_masked_line32(uintptr_t dptr, unsigned char* sptr)
{
	DECLARE_MASKED_STRETCHER(uint32_t, 4, bmp_write32, *, MASK_COLOR_32);
}
#endif

void _al_stretch_blit(BITMAP *src, BITMAP *dst,
    int sx, int sy, int sw, int sh, int dx, int dy, int dw, int dh,
	int masked)
{
	int y; /* current dst y */
	int yc; /* y counter */
	int sxofs, dxofs; /* start offsets */
	int syinc; /* amount to increment src y each time */
	int ycdec; /* amount to deccrement counter by, increase sy when this reaches 0 */
	int ycinc; /* amount to increment counter by when it reaches 0 */
	int size; /* pixel size */
	int dxbeg, dxend; /* clipping information */
	int dybeg, dyend;
	int i;

	void (*stretch_line)(uintptr_t, unsigned char*);

	ASSERT(src);
	ASSERT(dst);
	ASSERT(bitmap_color_depth(src) == bitmap_color_depth(dst));

	if (sw <= 0 || sh <= 0 || dw <= 0 || dh <= 0) return;

	if (masked) {
		switch (bitmap_color_depth(src)) {
#ifdef ALLEGRO_COLOR8
			case 8:
				if (is_linear_bitmap(dst))
					stretch_line = stretch_masked_line8;
#ifdef GFX_HAS_VGA
				else
					stretch_line = stretch_masked_linex;
#endif
				size = 1;
				break;
#endif
#ifdef ALLEGRO_COLOR16
			case 15:
				stretch_line = stretch_masked_line15;
				size = 2;
				break;
			case 16:
				stretch_line = stretch_masked_line16;
				size = 2;
				break;
#endif
#ifdef ALLEGRO_COLOR24
			case 24:
				stretch_line = stretch_masked_line24;
				size = 3;
				break;
#endif
#ifdef ALLEGRO_COLOR32
			case 32:
				stretch_line = stretch_masked_line32;
				size = 4;
				break;
#endif
		}
	}
	else {
		switch (bitmap_color_depth(src)) {
#ifdef ALLEGRO_COLOR8
			case 8:
				if (is_linear_bitmap(dst))
					stretch_line = stretch_line8;
#ifdef GFX_HAS_VGA
				else
					stretch_line = stretch_linex;
#endif
				size = 1;
				break;
#endif
#ifdef ALLEGRO_COLOR16
			case 15:
				stretch_line = stretch_line15;
				size = 2;
				break;
			case 16:
				stretch_line = stretch_line16;
				size = 2;
				break;
#endif
#ifdef ALLEGRO_COLOR24
			case 24:
				stretch_line = stretch_line24;
				size = 3;
				break;
#endif
#ifdef ALLEGRO_COLOR32
			case 32:
				stretch_line = stretch_line32;
				size = 4;
				break;
#endif
		}
	}

	ASSERT(stretch_line);

	if (dst->clip) {
		dybeg = ((dy > dst->ct) ? dy : dst->ct);
		dyend = (((dy + dh) < dst->cb) ? (dy + dh) : dst->cb);
		if (dybeg >= dyend)
			return;

		dxbeg = ((dx > dst->cl) ? dx : dst->cl);
		dxend = (((dx + dw) < dst->cr) ? (dx + dw) : dst->cr);
		if (dxbeg >= dxend)
			return;
	}
	else {
		dxbeg = dx;
		dxend = dx + dw;
		dybeg = dy;
		dyend = dy + dh;
	}

	syinc = sh / dh;
	ycdec = sh - (syinc*dh);
	ycinc = dh - ycdec;
	yc = ycinc;
	sxofs = sx * size;
	dxofs = dx * size;

	_al_stretch.sxinc = sw / dw * size;
	_al_stretch.xcdec = sw - ((sw/dw)*dw);
	_al_stretch.xcinc = dw - _al_stretch.xcdec;
	_al_stretch.linesize = (dxend-dxbeg)*size;

	/* get start state (clip) */
	_al_stretch.xcstart = _al_stretch.xcinc;
	for (i = 0; i < dxbeg-dx; i++, sxofs += _al_stretch.sxinc) {
		if (_al_stretch.xcstart <= 0) {
			_al_stretch.xcstart += _al_stretch.xcinc;
			sxofs += size;
		}
		else
			_al_stretch.xcstart -= _al_stretch.xcdec;
	}

	dxofs += i * size;

	/* skip clipped lines */
	for (y = dy; y < dybeg; y++, sy += syinc) {
		if (yc <= 0) {
			sy++;
			yc += ycinc;
		}
		else
				yc -= ycdec;
	}

	/* Stretch it */

	bmp_select(dst);

	for (; y < dyend; y++, sy += syinc) {
		(*stretch_line)(bmp_write_line(dst, y) + dxofs, src->line[sy] + sxofs);
		if (yc <= 0) {
			sy++;
			yc += ycinc;
		}
		else
				yc -= ycdec;
	}
	
	bmp_unwrite_line(dst);
}

void my_stretch(BITMAP *src, BITMAP *dst,
    int sx, int sy, int sw, int sh, int dx, int dy, int dw, int dh)
{
	_al_stretch_blit(src, dst, sx, sy, sw, sh, dx, dy, dw, dh, 0);
}

void my_stretch_masked(BITMAP *src, BITMAP *dst,
    int sx, int sy, int sw, int sh, int dx, int dy, int dw, int dh)
{
		_al_stretch_blit(src, dst, sx, sy, sw, sh, dx, dy, dw, dh, 1);
}

int main(int argc, char** argv)
{
	const int count = 2500;
	const int w = 200;
	const int h = 200;
	BITMAP* buffer, *sprite;
	long start, end;
	int i;

	if (argc != 2) {
			printf("Usage: my_stretch [color_depth]\n");
			return 1;
	}

	allegro_init();
	install_keyboard();
	set_color_depth(atoi(argv[1]));
	if (set_gfx_mode(GFX_AUTODETECT_WINDOWED, 640, 480, 0, 0))
			return 1;

	printf("%d bpp\n", bitmap_color_depth(screen));

	buffer = create_bitmap(640, 480);
	sprite = load_bitmap("mysha.pcx", 0);

	if (!buffer || !sprite) {
		printf("Couldn't load mysha.pcx\n");
		return 1;
	}

	start = currentTimeMillis();

	for (i = 0; i < count; i++) {
		int x = rand() % 640 - w/2;
		int y = rand() % 480 - h/2;
		stretch_blit(sprite, buffer, 0, 0, sprite->w, sprite->h,
				x, y, w, h);
	}

	end = currentTimeMillis();
	
	printf("mem->mem stretch_blit took %d millis\n", end - start);

	start = currentTimeMillis();

	for (i = 0; i < count; i++) {
		int x = rand() % 640 - w/2;
		int y = rand() % 480 - h/2;
		my_stretch(sprite, buffer, 0, 0, sprite->w, sprite->h,
				x, y, w, h);
	}

	end = currentTimeMillis();

	printf("mem->mem my_stretch took %d millis\n", end - start);
	
	start = currentTimeMillis();

	for (i = 0; i < count; i++) {
		int x = rand() % 640 - w/2;
		int y = rand() % 480 - h/2;
		stretch_blit(sprite, screen, 0, 0, sprite->w, sprite->h,
				x, y, w, h);
	}

	end = currentTimeMillis();
	
	printf("mem->vid stretch_blit took %d millis\n", end - start);

	start = currentTimeMillis();

	for (i = 0; i < count; i++) {
		int x = rand() % 640 - w/2;
		int y = rand() % 480 - h/2;
		my_stretch(sprite, screen, 0, 0, sprite->w, sprite->h,
				x, y, w, h);
	}

	end = currentTimeMillis();

	printf("mem->vid my_stretch took %d millis\n", end - start);

	return 0;
}
END_OF_MAIN()


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/