Mail Archives: djgpp/1997/08/18/18:51:25
vecna AT inlink DOT com ([vecna]) wrote:
>Okay, this is the single most important routine to optimize. It's the
>transparent blitter. It's very important to optimize already, and it
>will get VERY important to optimize in the next version... EVERY CYCLE
>COUNTS in this one.
>tcopysprite(int x, int y, int width, int height, char *spr)
>{ asm("movl %3, %%ecx \n\t"
> "movl %4, %%esi \n\t"
>"tcsl0: \n\t"
> "movl %1, %%eax \n\t"
> "imul $352, %%eax \n\t"
> "addl %0, %%eax \n\t"
> "addl _virscr, %%eax \n\t"
> "movl %%eax, %%edi \n\t"
> "movl %2, %%edx \n\t"
>"drawloop: \n\t"
> "lodsb \n\t"
> "orb %%al, %%al \n\t"
> "jz nodraw \n\t"
> "stosb \n\t"
> "decl %%edx \n\t"
> "orl %%edx, %%edx \n\t"
> "jz endline \n\t"
> "jmp drawloop \n\t"
>"nodraw: \n\t"
> "incl %%edi \n\t"
> "decl %%edx \n\t"
> "orl %%edx, %%edx \n\t"
> "jnz drawloop \n\t"
>"endline: \n\t"
> "incl %1 \n\t"
> "decl %%ecx \n\t"
> "jnz tcsl0 \n\t"
> :
> : "m" (x), "m" (y), "m" (width), "m" (height), "m" (spr)
> : "eax","edx","esi","edi","ecx","cc" );
>}
Ok, a couple of ideas...
I think you should try to avoid that imul in the address calculation,
use shifts and adds instead, 352=256+64+32.
I haven't tested the following and it may have to be rearranged to
avoid stalls (if you are targetting the pentium processor)
mov eax,ypos
mov ebx,xpos
shl eax,5 ; ypos*32
add ebx,eax
add eax,eax ;ypos*64
lea eax,[eax*4+eax] ;ypos*320 and we have already added ypos*32 to
ebx
add ebx,eax ; now you only have to add the start of the buffer
Now for the transparency...
Have you tried using a mask? If you had a mask you could avoid so many
conditional jumps. If you construct the mask while loading the
sprites, you could set the mask to 255 for transparent pixels and 0
for transparen ones, then you could do the following:
mov esi,maskstart
mov edi,backg
mov eax,spritedata
mov ebx,[esi] ; 4 pixels at a time
mov ecx,[edi]
and ebx,ecx
mov edx,[eax]
or ebx,edx
mov[edi],ebx ;put back to screen
Again, I haven't tried this but you should be able to unroll this so
that you perhaps do a whole line at a time if your sprites are a fixed
size.
-Chris
- Raw text -