;****************************************************************************************************
; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
;****************************************************************************************************
%ifdef WIN64
WELS_EXTERN WelsCPUId
push rbx
push rdx
mov eax, ecx
mov ecx, [r9]
cpuid
mov [r9], ecx
mov [r8], ebx
mov rcx, [rsp + 2*8 + 40]
mov [rcx], edx
pop rdx
mov [rdx], eax
pop rbx
ret
%elifdef UNIX64
WELS_EXTERN WelsCPUId
push rbx
push rcx
push rdx
mov eax, edi
mov ecx, [rcx]
cpuid
mov [r8], edx
pop rdx
pop r8
mov [r8], ecx
mov [rdx], ebx
mov [rsi], eax
pop rbx
ret
%elifdef X86_32
WELS_EXTERN WelsCPUId
push ebx
push edi
mov eax, [esp+12] ; operating index
mov edi, [esp+24]
mov ecx, [edi]
cpuid ; cpuid
; processing various information return
mov edi, [esp+16]
mov [edi], eax
mov edi, [esp+20]
mov [edi], ebx
mov edi, [esp+24]
mov [edi], ecx
mov edi, [esp+28]
mov [edi], edx
pop edi
pop ebx
ret
%endif
When writing code for 64-bit Linux that integrates with a C library, you must follow the calling conventions explained in the AMD64 ABI Reference. You can also get this information from Wikipedia. The most important points are:
rdi
, rsi
, rdx
, rcx
, r8
, r9
. xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
. [rsp]
, the first memory parameter is at [rsp+8]
, etc. rsp
must be aligned to a 16-byte boundary before making a call. Fine, but the process of making a call pushes the return address (8 bytes) on the stack, so when a function gets control, rsp
is not aligned. You have to make that extra space yourself, by pushing something or subtracting 8 from rsp
. rbp
, rbx
, r12
, r13
, r14
, r15
. All others are free to be changed by the called function. rax
or rdx:rax
, and floating point values are returned in xmm0
or xmm1:xmm0
. 3. register define in asm_inc.asm of openh264
%ifdef WIN64 ; Windows x64 ;************************************
DEFAULT REL
BITS 64
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define arg4 r9
%define arg5 [rsp + push_num*8 + 40]
%define arg6 [rsp + push_num*8 + 48]
%define arg7 [rsp + push_num*8 + 56]
%define arg8 [rsp + push_num*8 + 64]
%define arg9 [rsp + push_num*8 + 72]
%define arg10 [rsp + push_num*8 + 80]
%define arg11 [rsp + push_num*8 + 88]
%define arg12 [rsp + push_num*8 + 96]
%define r0 rcx
%define r1 rdx
%define r2 r8
%define r3 r9
%define r4 rax
%define r5 r10
%define r6 r11
%define r7 rsp
%define r0d ecx
%define r1d edx
%define r2d r8d
%define r3d r9d
%define r4d eax
%define r5d r10d
%define r6d r11d
%define r0w cx
%define r1w dx
%define r2w r8w
%define r3w r9w
%define r6w r11w
%define r0b cl
%define r1b dl
%define r2b r8l
%define r3b r9l
%define PUSHRFLAGS pushfq
%define POPRFLAGS popfq
%define retrq rax
%define retrd eax
%elifdef UNIX64 ; Unix x64 ;************************************
DEFAULT REL
BITS 64
%ifidn __OUTPUT_FORMAT__,elf64
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define arg4 rcx
%define arg5 r8
%define arg6 r9
%define arg7 [rsp + push_num*8 + 8]
%define arg8 [rsp + push_num*8 + 16]
%define arg9 [rsp + push_num*8 + 24]
%define arg10 [rsp + push_num*8 + 32]
%define arg11 [rsp + push_num*8 + 40]
%define arg12 [rsp + push_num*8 + 48]
%define r0 rdi
%define r1 rsi
%define r2 rdx
%define r3 rcx
%define r4 r8
%define r5 r9
%define r6 r10
%define r7 rsp
%define r0d edi
%define r1d esi
%define r2d edx
%define r3d ecx
%define r4d r8d
%define r5d r9d
%define r6d r10d
%define r0w di
%define r1w si
%define r2w dx
%define r3w cx
%define r6w r10w
%define r0b dil
%define r1b sil
%define r2b dl
%define r3b cl
%define PUSHRFLAGS pushfq
%define POPRFLAGS popfq
%define retrq rax
%define retrd eax
%elifdef X86_32 ; X86_32 ;************************************
BITS 32
%ifidn __OUTPUT_FORMAT__,elf
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif
%define arg1 [esp + push_num*4 + 4]
%define arg2 [esp + push_num*4 + 8]
%define arg3 [esp + push_num*4 + 12]
%define arg4 [esp + push_num*4 + 16]
%define arg5 [esp + push_num*4 + 20]
%define arg6 [esp + push_num*4 + 24]
%define arg7 [esp + push_num*4 + 28]
%define arg8 [esp + push_num*4 + 32]
%define arg9 [esp + push_num*4 + 36]
%define arg10 [esp + push_num*4 + 40]
%define arg11 [esp + push_num*4 + 44]
%define arg12 [esp + push_num*4 + 48]
%define r0 eax
%define r1 ecx
%define r2 edx
%define r3 ebx
%define r4 esi
%define r5 edi
%define r6 ebp
%define r7 esp
%define r0d eax
%define r1d ecx
%define r2d edx
%define r3d ebx
%define r4d esi
%define r5d edi
%define r6d ebp
%define r0w ax
%define r1w cx
%define r2w dx
%define r3w bx
%define r6w bp
%define r0b al
%define r1b cl
%define r2b dl
%define r3b bl
%define PUSHRFLAGS pushfd
%define POPRFLAGS popfd
%define retrq eax ; 32 bit mode do not support 64 bits regesters
%define retrd eax
%endif