;copy a db into a xmm for 16 times
%macro SSE2_Copy16Times 2
movd %1, %2
pshuflw %1, %1, 0
punpcklqdq %1, %1
packuswb %1, %1
%endmacro
movzx r2d, byte [r0] ; pixel pData for left border
SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
假设 [ r0 ] = 0x07
上面将展开如下:
movzx r2d, byte [r0] => r2d = 0x00000007
movd xmm0, r2d => xmm0 = 0x00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 07
pshuflw xmm0, xmm0, 0 => xmm0 = 0x0000 0000 0000 0000 0007 0007 0007 0007
punpcklqdq xmm0, xmm0 = > xmm0 = 0x0007 0007 0007 0007 0007 0007 0007 0007
packuswb xmm0, xmm0 => xmm0 = 0x0707 0707 0707 0707 0707 0707 0707 0707