Assembly x64 Intro - SSE2 4x8 Store




%macro SSE2_Store4x8p 6
    SSE2_XSawp qdq, %2, %3, %6
    SSE2_XSawp qdq, %4, %5, %3
    MOVDQ    [%1+0x00], %2
    MOVDQ    [%1+0x10], %4
    MOVDQ    [%1+0x20], %6
    MOVDQ    [%1+0x30], %3
%endmacro



;for TRANSPOSE
%macro SSE2_XSawp 4
    movdqa      %4, %2
    punpckl%1   %2, %3
    punpckh%1   %4, %3
%endmacro



SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5 =>
    SSE2_XSawp qdq, xmm4, xmm2, xmm5 =>
     movdqa     xmm5, xmm4
  punpcklqdq xmm4, xmm2 =>(xmm4 存xmm4, xmm2低4字组合)
  punpckhqdq xmm5, xmm2 =>(xmm5 存xmm4, xmm2高4字组合)
    SSE2_XSawp qdq, xmm3, xmm0, xmm2 =>
     movdqa     xmm2, xmm3
  punpcklqdq xmm3, xmm0 =>(xmm3 存xmm3, xmm0的低4字组合)
  punpckhqdq xmm2, xmm0 =>(xmm2 存xmm3, xmm0的高4字组合)
 movdqa     [r0],      xmm4
 movdqa     [r0 + 16], xmm3
 movdqa     [r0 + 32], xmm5
 movdqa     [r0 + 48], xmm2

   




你可能感兴趣的:(Assembly x64 Intro - SSE2 4x8 Store)