http://www.softkam.ro/index.php?pid=alphablend
// Copyright (C) 2005 SoftKAM. All Rights Reserved.
//
// AlphaBlend is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
#ifndef _ALPHA_H_INCLUDED
#define _ALPHA_H_INCLUDED
void AlphaBlt(unsigned char *dst, unsigned char *src, int w, int h);
void AlphaBltMMX(unsigned char *dst, unsigned char *src, int w, int h);
void AlphaBltSSE(unsigned char *dst, unsigned char *src, int w, int h);
#endif // _ALPHA_H_INCLUDED
// Copyright (C) 2005 SoftKAM. All Rights Reserved.
//
// AlphaBlend is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
#include "alpha.h"
/******************************************************************************/
void AlphaBlt(unsigned char *dst, unsigned char *src, int w, int h)
{
int i;
int srcb, srcg, srcr, srca;
int dstb, dstg, dstr, dsta;
for (i=0; i < w * h * 4; i += 4)
{
srcb = src[i + 0];
srcg = src[i + 1];
srcr = src[i + 2];
srca = src[i + 3];
dstb = dst[i + 0];
dstg = dst[i + 1];
dstr = dst[i + 2];
dsta = dst[i + 3];
dstb = (srca * (srcb - dstb) + dstb * 256) / 256;
dstg = (srca * (srcg - dstg) + dstg * 256) / 256;
dstr = (srca * (srcr - dstr) + dstr * 256) / 256;
dsta = srca;
dst[i + 0] = dstb;
dst[i + 1] = dstg;
dst[i + 2] = dstr;
dst[i + 3] = dsta;
}
}
/******************************************************************************/
void AlphaBltSSE(unsigned char *dst, unsigned char *src, int w, int h)
{
int wmul4 = w << 2;
if (w==0) return;
w >>= 1;
_asm {
// For each pixel: dst = (src_alpha * (src-dst) + dst * 256) / 256
mov edi,dst
mov esi,src
mov edx,h
pxor mm6,mm6
pxor mm7,mm7
xor eax,eax
scan_loop:
mov ecx,w
xor ebx,ebx
pix_loop:
movq mm4,[esi+ebx*8] // mm0 = src (RG BA RG BA)
movq mm5,[edi+ebx*8] // mm1 = dst (RG BA RG BA)
// FIRST PIXEL
movq mm0,mm4 // mm0 = 00 00 RG BA
movq mm1,mm5 // mm1 = 00 00 RG BA
punpcklbw mm0,mm6 // mm0 = (0R 0G 0B 0A)
punpcklbw mm1,mm7 // mm0 = (0R 0G 0B 0A)
pshufw mm2,mm0,0ffh // mm2 = 0A 0A 0A 0A
movq mm3,mm1 // mm3 = mm1
psubw mm0,mm1 // mm0 = mm0 - mm1
psllw mm3,8 // mm3 = mm1 * 256
pmullw mm0,mm2 // mm0 = (src-dst)*alpha
paddw mm0,mm3 // mm0 = (src-dst)*alpha+dst*256
psrlw mm0,8 // mm0 = ((src - dst) * alpha + dst * 256) / 256
// SECOND PIXEL
punpckhbw mm5,mm7 // mm5 = (0R 0G 0B 0A)
punpckhbw mm4,mm6 // mm4 = (0R 0G 0B 0A)
movq mm3,mm5 // mm3 = mm5
pshufw mm2,mm4,0ffh // mm2 = 0A 0A 0A 0A
psllw mm3,8 // mm3 = mm5 * 256
psubw mm4,mm5 // mm4 = mm4 - mm5
pmullw mm4,mm2 // mm4 = (src-dst)*alpha
paddw mm4,mm3 // mm4 = (src-dst)*alpha+dst*256
psrlw mm4,8 // mm4 = ((src - dst) * alpha + dst * 256) / 256
packuswb mm0,mm4 // mm0 = RG BA RG BA
movq [edi+ebx*8],mm0 // dst = mm0
inc ebx
loop pix_loop
//
mov ebx, wmul4
add esi, ebx
add edi, ebx
dec edx
jnz scan_loop
}
}
/******************************************************************************/
void AlphaBltMMX(unsigned char *dst, unsigned char *src, int w, int h)
{
int wmul4 = w << 2;
if (w==0) return;
w >>= 1;
_asm {
// For each pixel: dst = (src_alpha * (src-dst) + dst * 256) / 256
mov edi,dst
mov esi,src
mov edx,h
pxor mm6,mm6
pxor mm7,mm7
xor eax,eax
scan_loop:
mov ecx,w
xor ebx,ebx
pix_loop:
movq mm4,[esi+ebx*8] // mm4 = src (RG BA RG BA)
movq mm5,[edi+ebx*8] // mm5 = dst (RG BA RG BA)
// FIRST PIXEL
movq mm0,mm4 // mm0 = src (-- -- RG BA)
movq mm1,mm5 // mm1 = dst (-- -- RG BA)
punpcklbw mm0,mm6 // mm0 = (0R 0G 0B 0A)
mov al,[esi+ebx*8+3] // eax = pixel alpha (0 - 255)
punpcklbw mm1,mm7 // mm1 = (0R 0G 0B 0A)
movd mm2,eax // 00 00 00 0A
movq mm3,mm1 // mm3 = mm1: dst (0R 0G 0B 0A)
punpcklwd mm2,mm2 // 00 00 0A 0A
psubw mm0,mm1 // mm0 = mm0 - mm1
punpckldq mm2,mm2 // 0A 0A 0A 0A
psllw mm3,8 // mm3 = mm1 * 256
pmullw mm0,mm2 // mm0 = (src - dst) * alpha
paddw mm0,mm3 // mm0 = (src - dst) * alpha + dst * 256
psrlw mm0,8 // mm0 = ((src - dst) * alpha + dst * 256) / 256
packuswb mm0,mm6 // mm0 = RGBA
// SECOND PIXEL
punpckhbw mm4,mm6 // mm4 = (0R 0G 0B 0A)
mov al,[esi+ebx*8+7] // eax = pixel alpha (0 - 255)
punpckhbw mm5,mm7 // mm5 = (0R 0G 0B 0A)
movd mm2,eax // 00 00 00 0A
movq mm3,mm5 // mm3 = mm5: dst (0R 0G 0B 0A)
punpcklwd mm2,mm2 // 00 00 0A 0A
psubw mm4,mm5 // mm4 = mm4 - mm5
punpckldq mm2,mm2 // 0A 0A 0A 0A
psllw mm3,8 // mm3 = mm5 * 256
pmullw mm4,mm2 // mm4 = (src - dst) * alpha
paddw mm4,mm3 // mm4 = (src - dst) * alpha + dst * 256
psrlw mm4,8 // mm4 = ((src - dst) * alpha + dst * 256) / 256
packuswb mm4,mm6 // mm4 = RGBA
punpckldq mm0,mm4 // mm0 = RG BA RG BA
movq [edi+ebx*8],mm0 // dst = mm0
inc ebx
// REPEAT
loop pix_loop
mov ebx, wmul4
add esi, ebx
add edi, ebx
dec edx
jnz scan_loop
}
}