;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
;*--==--* fasm console float mul example. By G-Spider
;*--==--* 编译: fasm.exe MatrixMul.asm MatrixMul.exe
;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
format PE console ;指定为PE控制台程序
entry start ;指定入口点为start
;---------------------------------------------
;//代码节,命名为'.text'(可读可执行)
section '.text' code readable executable
start:
;--------函数调用---------------
push fc ;地址入栈
push fb
push fa
call MatrixMul ;自实现函数调用
;-------输出浮点结果------------
cvtps2pd xmm0,qword [fc+4*0] ;转成双精度浮点
cvtps2pd xmm1,qword [fc+4*2]
sub esp ,16*2
movups [esp],xmm0 ;参数入栈
movups [esp+16],xmm1
push szFmt
call [printf] ;导入函数调用
add esp,16*2+4 ;栈平衡
;-------暂停--------------------
push szPause
call [system]
add esp,8
jmp dword [esp-4] ;栈平衡,并结束程序
;===================================================
;MatrixMul(a14,b44,c14) 函数
; 功能:
; a14*b44=c14 (1*4的向量乘以4*4的矩阵=1*4的向量)
; 参数:
; a14:1*4的单精度浮点向量指针,输入参数
; b44:4*4的单精度浮点矩阵指针,输入参数
; c14:1*4的单精度浮点向量指针,输出参数
; 说明:
; 如果能保存数据对齐,可将movups替换成movaps
;===================================================
MatrixMul:
;定义三个标签
label .a14 dword at esp+4 ;arg1
label .b44 dword at esp+8 ;arg2
label .c14 dword at esp+12 ;arg3
mov eax,[.a14]
mov edx,[.b44]
mov ecx,[.c14]
movups xmm0,[eax] ;d c b a
movaps xmm1,xmm0 ;d c b a
movaps xmm2,xmm0 ;d c b a
movaps xmm3,xmm0 ;d c b a
shufps xmm0,xmm0,0 ;a a a a
shufps xmm1,xmm1,01010101b ;b b b b
shufps xmm2,xmm2,10101010b ;c c c c
shufps xmm3,xmm3,11111111b ;d d d d
movups xmm4,[edx+16*0]
movups xmm5,[edx+16*1]
movups xmm6,[edx+16*2]
movups xmm7,[edx+16*3]
mulps xmm0,xmm4
mulps xmm1,xmm5
mulps xmm2,xmm6
mulps xmm3,xmm7
addps xmm0,xmm1
addps xmm2,xmm3
addps xmm0,xmm2
movups [ecx],xmm0
retn 3*4 ;栈平衡
;---------------函数MatrixMul结束--------------------
;//数据节(可读可写)
section '.data' data readable writeable
fa dd 0.1,0.2,0.3,0.4
fb dd 10.0,11.0,12.0,13.0
dd 14.0,15.0,16.0,17.0
dd 18.0,19.0,20.0,21.0
dd 22.0,23.0,24.0,25.0
fc dd 0,0,0,0
szFmt db 'Result=',0ah,'%f %f %f %f',0ah,0
szPause db 'pause',0
;---------------------------------------------
;//导入节(可读可写)
section '.idata' import data readable writeable
;//IMAGE_IMPORT_DESCRIPTOR ,以一个全0的结束
dd 0,0,0,RVA msvcrt_name,RVA msvcrt_table
dd 0,0,0,0,0
;//FirstThunk 以一个全0结束
msvcrt_table:
printf dd RVA _printf
system dd RVA _system
dd 0
;//Name
msvcrt_name db 'msvcrt.dll',0
;//IMAGE_IMPORT_BY_NAME
_printf dw 0 ;hint
db 'printf',0 ;Name
_system dw 0
db 'system',0
;---------------------------------------------
;All end
更新重定位版:
;===================================================
;*--==--* fasm console float mul example. By G-Spider
;*--==--* 编译: fasm.exe MatrixMul.asm MatrixMul.exe
;===================================================
format PE console ;指定为PE控制台程序
entry start ;指定入口点为start
;===================================================
;//代码节,命名为'.text'(可读可执行)
section '.text' code readable executable
start:
;=====重定位=====
call @F
@@:
pop ebx
sub ebx,@B
;--------函数调用1---------------
lea eax,[ebx+fc]
push eax ;地址入栈
lea eax,[ebx+fb]
push eax
lea eax,[ebx+fa]
push eax
call MatrixMul ;自实现函数调用
;--------函数调用2---------------
; lea eax,[ebx+fc]
; push eax
; lea eax,[ebx+fb]
; push eax
; call MatrixTrans
;-------输出浮点结果------------
xor ecx,ecx
@@:
push ecx
cvtps2pd xmm0,qword [ebx+fc+8*ecx] ;转成双精度浮点
cvtps2pd xmm1,qword [ebx+fc+8*ecx+8]
sub esp ,16*2
movups [esp],xmm0 ;参数入栈
movups [esp+16],xmm1
lea eax,[ebx+szFmt]
push eax
call dword [ebx+printf] ;导入函数调用
add esp,16*2+4 ;栈平衡
pop ecx
add ecx,2
cmp ecx,8
jb @B
;-------暂停--------------------
lea eax,[ebx+szPause]
push eax
call dword [ebx+system]
add esp,8
jmp dword [esp-4] ;栈平衡,并结束程序
ret
;===================================================
;VectMatrixMul(a14,b44,c14) 函数
; 功能:
; a14*b44=c14 (1*4的向量乘以4*4的矩阵=1*4的向量)
; 参数:
; a14:1*4的单精度浮点向量指针,输入参数
; b44:4*4的单精度浮点矩阵指针,输入参数
; c14:1*4的单精度浮点向量指针,输出参数
; 说明:
; 要求数据16字节对齐
;===================================================
align 16
VectMatrixMul:
;定义三个标签(加点表示局部标号)
label .a14 dword at esp+4 ;arg1
label .b44 dword at esp+8 ;arg2
label .c14 dword at esp+12 ;arg3
mov eax,[.a14]
mov edx,[.b44]
mov ecx,[.c14]
movaps xmm0,[eax] ;d c b a
movaps xmm1,xmm0 ;d c b a
movaps xmm2,xmm0 ;d c b a
movaps xmm3,xmm0 ;d c b a
shufps xmm0,xmm0,0 ;a a a a
shufps xmm1,xmm1,01010101b ;b b b b
shufps xmm2,xmm2,10101010b ;c c c c
shufps xmm3,xmm3,11111111b ;d d d d
movaps xmm4,[edx+16*0]
movaps xmm5,[edx+16*1]
movaps xmm6,[edx+16*2]
movaps xmm7,[edx+16*3]
mulps xmm0,xmm4
mulps xmm1,xmm5
mulps xmm2,xmm6
mulps xmm3,xmm7
addps xmm0,xmm1
addps xmm2,xmm3
addps xmm0,xmm2
movaps [ecx],xmm0
retn 3*4 ;栈平衡
;===================================================
;MatrixTrans(a44,b44) 函数
; 功能:
; a44'=b44 (矩阵转置)
; 参数:
; a44:4*4的单精度浮点矩阵指针,输入参数
; b44:4*4的单精度浮点矩阵指针,输出参数
; 说明:
; 要求数据16字节对齐
;===================================================
align 16
MatrixTrans:
label .a44 dword at esp+4;arg1
label .b44 dword at esp+8;arg2
mov ecx, [.a44] ;load addresses
mov edx, [.b44]
movaps xmm0,[ecx] ;x3 x2 x1 x0
movaps xmm1,[ecx+16] ;y3 y2 y1 y0
movaps xmm2,[ecx+32] ;z3 z2 z1 z0
movaps xmm3,[ecx+48] ;w3 w2 w1 w0
movaps xmm5,xmm0
movaps xmm7,xmm2
unpcklps xmm0,xmm1 ;y1 x1 y0 x0
unpcklps xmm2,xmm3 ;w1 z1 w0 z0
movdqa xmm4,xmm0
movlhps xmm0,xmm2 ;w0 z0 y0 x0
movhlps xmm2,xmm4 ;w1 z1 y1 x1
unpckhps xmm5,xmm1 ;y3 x3 y2 x2
unpckhps xmm7,xmm3 ;w3 z3 w2 z2
movdqa xmm6,xmm5
movlhps xmm5,xmm7 ;w2 z2 y2 x2
movhlps xmm7,xmm6 ;w3 z3 y3 x3
movaps [edx+0*16],xmm0 ;w0 z0 y0 x0
movaps [edx+1*16],xmm2 ;w1 z1 y1 x1
movaps [edx+2*16],xmm5 ;w2 z2 y2 x2
movaps [edx+3*16],xmm7 ;w3 z3 y3 x3
retn 4*2
;===================================================
;MatrixMul(a44,b44,c44) 函数
; 功能:
; a44*b44=c44 (4*4的矩阵乘以4*4的矩阵=4*4的矩阵)
; 参数:
; a44:4*4的单精度浮点矩阵指针,输入参数
; b44:4*4的单精度浮点矩阵指针,输入参数
; c44:4*4的单精度浮点矩阵指针,输出参数
; 说明:
; 要求数据16字节对齐
;===================================================
align 16
MatrixMul:
push ebp
mov ebp,esp
label .a44 dword at ebp+8 ;arg1
label .b44 dword at ebp+12 ;arg2
label .c44 dword at ebp+16 ;arg3
sub esp,3*4 ;local var save
mov [ebp-12],edi
mov [ebp-8 ],esi
mov [ebp-4 ],ebx
mov esi,[.a44]
mov edi,[.c44]
mov ebx,4
@@:
push edi
push [.b44]
push esi
call VectMatrixMul
add edi,16
add esi,16
dec ebx
jne @B
mov edi,[ebp-12]
mov esi,[ebp-8 ]
mov ebx,[ebp-4 ]
mov esp,ebp
pop ebp
retn 4*3
;===================================================
;//数据节(可读可写)
section '.data' data readable writeable
;数据16字节对齐
fa dd 0.1,0.2,0.3,0.4
dd 0.5,0.6,0.7,0.8
dd 0.9,1.0,1.1,1.2
dd 1.3,1.4,1.5,1.6
;数据16字节对齐
fb dd 10.0,11.0,12.0,13.0
dd 14.0,15.0,16.0,17.0
dd 18.0,19.0,20.0,21.0
dd 22.0,23.0,24.0,25.0
;数据16字节对齐
fc rd 16
szFmt db '%12.4f %12.4f %12.4f %12.4f',0ah,0
szPause db 'pause',0
;===================================================
;//导入节(可读可写)
section '.idata' import data readable writeable
;//IMAGE_IMPORT_DESCRIPTOR ,以一个全0的结束
dd 0,0,0,RVA msvcrt_name,RVA msvcrt_table
dd 0,0,0,0,0
;//FirstThunk 以一个全0结束
msvcrt_table:
printf dd RVA _printf
system dd RVA _system
dd 0
;//Name
msvcrt_name db 'msvcrt.dll',0
;//IMAGE_IMPORT_BY_NAME
_printf dw 0 ;hint
db 'printf',0 ;Name
_system dw 0
db 'system',0
;---------------------------------------------
;All end