atlstdthunk.h
// This is a part of the Active Template Library. // Copyright (C) Microsoft Corporation // All rights reserved. // // This source code is only intended as a supplement to the // Active Template Library Reference and related // electronic documentation provided with the library. // See these sources for detailed information regarding the // Active Template Library product. #ifndef __ATLSTDTHUNK_H__ #define __ATLSTDTHUNK_H__ #pragma once #pragma push_macro("malloc") #undef malloc #pragma push_macro("realloc") #undef realloc #pragma push_macro("free") #undef free #pragma push_macro("new") #undef new #pragma push_macro("HeapAlloc") #undef HeapAlloc #pragma push_macro("HeapFree") #undef HeapFree #pragma push_macro("GetProcessHeap") #undef GetProcessHeap namespace ATL { ///////////////////////////////////////////////////////////////////////////// // Thunks for __stdcall member functions #if defined(_M_IX86) PVOID __stdcall __AllocStdCallThunk(VOID); VOID __stdcall __FreeStdCallThunk(PVOID); #pragma pack(push,1) struct _stdcallthunk { DWORD m_mov; // mov dword ptr [esp+0x4], pThis (esp+0x4 is hWnd) DWORD m_this; // BYTE m_jmp; // jmp WndProc DWORD m_relproc; // relative jmp BOOL Init(DWORD_PTR proc, void* pThis) { m_mov = 0x042444C7; //C7 44 24 0C m_this = PtrToUlong(pThis); m_jmp = 0xe9; m_relproc = DWORD((INT_PTR)proc - ((INT_PTR)this+sizeof(_stdcallthunk))); // write block from data cache and // flush from instruction cache FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } //some thunks will dynamically allocate the memory for the code void* GetCodeAddress() { return this; } void* operator new(size_t) { return __AllocStdCallThunk(); } void operator delete(void* pThunk) { __FreeStdCallThunk(pThunk); } }; #pragma pack(pop) #elif defined(_M_AMD64) PVOID __AllocStdCallThunk(VOID); VOID __FreeStdCallThunk(PVOID); #pragma pack(push,2) struct _stdcallthunk { USHORT RcxMov; // mov rcx, pThis ULONG64 RcxImm; // USHORT RaxMov; // mov rax, target ULONG64 RaxImm; // USHORT RaxJmp; // jmp target BOOL Init(DWORD_PTR proc, void *pThis) { RcxMov = 0xb948; // mov rcx, pThis RcxImm = (ULONG64)pThis; // RaxMov = 0xb848; // mov rax, target RaxImm = (ULONG64)proc; // RaxJmp = 0xe0ff; // jmp rax FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } //some thunks will dynamically allocate the memory for the code void* GetCodeAddress() { return this; } void* operator new(size_t) { return __AllocStdCallThunk(); } void operator delete(void* pThunk) { __FreeStdCallThunk(pThunk); } }; #pragma pack(pop) #elif defined (_M_ALPHA) // For ALPHA we will stick the this pointer into a0, which is where // the HWND is. However, we don't actually need the HWND so this is OK. #pragma pack(push,4) struct _stdcallthunk //this should come out to 20 bytes { DWORD ldah_at; // ldah at, HIWORD(func) DWORD ldah_a0; // ldah a0, HIWORD(this) DWORD lda_at; // lda at, LOWORD(func)(at) DWORD lda_a0; // lda a0, LOWORD(this)(a0) DWORD jmp; // jmp zero,(at),0 BOOL Init(DWORD_PTR proc, void* pThis) { ldah_at = (0x279f0000 | HIWORD(proc)) + (LOWORD(proc)>>15); ldah_a0 = (0x261f0000 | HIWORD(pThis)) + (LOWORD(pThis)>>15); lda_at = 0x239c0000 | LOWORD(proc); lda_a0 = 0x22100000 | LOWORD(pThis); jmp = 0x6bfc0000; // write block from data cache and // flush from instruction cache FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } void* GetCodeAddress() { return this; } }; #pragma pack(pop) #elif defined(_SH3_) #pragma pack(push,4) struct _stdcallthunk // this should come out to 16 bytes { WORD m_mov_r0; // mov.l pFunc,r0 WORD m_mov_r1; // mov.l pThis,r1 WORD m_jmp; // jmp @r0 WORD m_nop; // nop DWORD m_pFunc; DWORD m_pThis; BOOL Init(DWORD_PTR proc, void* pThis) { m_mov_r0 = 0xd001; m_mov_r1 = 0xd402; m_jmp = 0x402b; m_nop = 0x0009; m_pFunc = (DWORD)proc; m_pThis = (DWORD)pThis; // write block from data cache and // flush from instruction cache FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } void* GetCodeAddress() { return this; } }; #pragma pack(pop) #elif defined(_MIPS_) #pragma pack(push,4) struct _stdcallthunk { WORD m_pFuncHi; WORD m_lui_t0; // lui t0,PFUNC_HIGH WORD m_pFuncLo; WORD m_ori_t0; // ori t0,t0,PFUNC_LOW WORD m_pThisHi; WORD m_lui_a0; // lui a0,PTHIS_HIGH DWORD m_jr_t0; // jr t0 WORD m_pThisLo; WORD m_ori_a0; // ori a0,PTHIS_LOW BOOL Init(DWORD_PTR proc, void* pThis) { m_pFuncHi = HIWORD(proc); m_lui_t0 = 0x3c08; m_pFuncLo = LOWORD(proc); m_ori_t0 = 0x3508; m_pThisHi = HIWORD(pThis); m_lui_a0 = 0x3c04; m_jr_t0 = 0x01000008; m_pThisLo = LOWORD(pThis); m_ori_a0 = 0x3484; // write block from data cache and // flush from instruction cache FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } void* GetCodeAddress() { return this; } }; #pragma pack(pop) #elif defined(_ARM_) #pragma pack(push,4) struct _stdcallthunk // this should come out to 16 bytes { DWORD m_mov_r0; // mov r0, pThis DWORD m_mov_pc; // mov pc, pFunc DWORD m_pThis; DWORD m_pFunc; BOOL Init(DWORD_PTR proc, void* pThis) { m_mov_r0 = 0xE59F0000; m_mov_pc = 0xE59FF000; m_pThis = (DWORD)pThis; m_pFunc = (DWORD)proc; // write block from data cache and // flush from instruction cache FlushInstructionCache(GetCurrentProcess(), this, sizeof(_stdcallthunk)); return TRUE; } void* GetCodeAddress() { return this; } }; #pragma pack(pop) #elif defined(_M_IA64) #pragma pack(push,8) extern "C" void _StdCallThunkProcProc(void); struct _FuncDesc { void* pfn; void* gp; }; struct _stdcallthunk { _FuncDesc m_funcdesc; void* m_pFunc; void* m_pThis; BOOL Init(DWORD_PTR proc, void* pThis) { m_funcdesc.pfn = ((_FuncDesc*)(&_StdCallThunkProcProc))->pfn; // Pointer to actual beginning of StdCallThunkProc m_funcdesc.gp = &m_pFunc; m_pFunc = reinterpret_cast< void* >( proc ); m_pThis = pThis; ::FlushInstructionCache( GetCurrentProcess(), this, sizeof( _stdcallthunk ) ); return TRUE; } void* GetCodeAddress() { return( &m_funcdesc ); } }; #pragma pack(pop) //IA64 thunks do not currently use the atlhunk.cpp allocator. #else #error Only ARM, ALPHA, SH3, MIPS, IA64, AMD64 and X86 supported #endif #if defined(_M_IX86) || defined (_M_AMD64) #pragma pack(push,8) class CDynamicStdCallThunk { public: _stdcallthunk *pThunk; CDynamicStdCallThunk() { pThunk = NULL; } ~CDynamicStdCallThunk() { if (pThunk) { delete pThunk; } } BOOL Init(DWORD_PTR proc, void *pThis) { if (pThunk == NULL) { pThunk = new _stdcallthunk; if (pThunk == NULL) { return FALSE; } } return pThunk->Init(proc, pThis); } void* GetCodeAddress() { return pThunk->GetCodeAddress(); } }; #pragma pack(pop) typedef CDynamicStdCallThunk CStdCallThunk; #else typedef _stdcallthunk CStdCallThunk; #endif // _M_IX86 || _M_AMD64 } // namespace ATL #pragma pop_macro("GetProcessHeap") #pragma pop_macro("HeapAlloc") #pragma pop_macro("HeapFree") #pragma pop_macro("new") #pragma pop_macro("free") #pragma pop_macro("realloc") #pragma pop_macro("malloc") #endif // __ATLSTDTHUNK_H__
atlthunk.cpp
/*++ Copyright (c) 1989 Microsoft Corporation Module Name: thunkpool.cpp Abstract: This module contains the support routines for managing a pool of ATL thunk structures. An ATL thunk contains object code that is built on the fly. Normally ATL allocates these structures from standard usermode heap. On platforms supporting "no-execute" operation, however, heap is protected no-execute so this isn't an option. The code here manages a separate "heap" of thunk structures that are allocated from execute-enabled page allocations. Author: Forrest Foltz (forrestf) 16-May-2002 Environment: User mode only. Revision History: --*/ #include <windows.h> #include "atlstdthunk.h" extern "C" { typedef struct _CLIENT_ID { HANDLE UniqueProcess; HANDLE UniqueThread; } CLIENT_ID; typedef CLIENT_ID *PCLIENT_ID; struct _PEB; typedef struct _PEB * PPEB; typedef struct _TEB { NT_TIB NtTib; PVOID EnvironmentPointer; CLIENT_ID ClientId; PVOID ActiveRpcHandle; PVOID ThreadLocalStoragePointer; PPEB ProcessEnvironmentBlock; /* .... Don't need any thing below this*/ } TEB, *PTEB; _inline struct _TEB * Atl_NtCurrentTeb( void ) { __asm mov eax, fs:[0x18] } } #if !defined(_X86_) #error Unsupported platform #endif #if !defined(PAGE_SIZE) #define PAGE_SIZE 4096 #endif #if !defined(DECLSPEC_NOINLINE) #define DECLSPEC_NOINLINE __declspec(noinline) #endif #define ATL_THUNKS_PER_PAGE (PAGE_SIZE / sizeof(ATL_THUNK_ENTRY)) // // Local function prototypes and typedefs // BOOL static __InitializeThunkPool ( VOID ); typedef PSINGLE_LIST_ENTRY (__stdcall *PINTERLOCKED_PUSH_ENTRY_SLIST) ( PSLIST_HEADER ListHead, PSINGLE_LIST_ENTRY ListEntry ); typedef PSINGLE_LIST_ENTRY (__stdcall *PINTERLOCKED_POP_ENTRY_SLIST) ( PSLIST_HEADER ListHead ); // // An ATL thunk structure, used to manage free thunks in the pool // typedef union _ATL_THUNK_ENTRY { SLIST_ENTRY SListEntry; struct ATL::_stdcallthunk Thunk; } ATL_THUNK_ENTRY, *PATL_THUNK_ENTRY; // // Pointer to the process-wide ATL thunk slist. // PSLIST_HEADER __AtlThunkPool = NULL; // // Special value for __AtlThunkPool indicating that the standard // heap should be used for thunk allocation. // #define ATLTHUNK_USE_HEAP_VALUE (PSLIST_HEADER)UlongToPtr(1) #define ATLTHUNK_USE_HEAP() (__AtlThunkPool == ATLTHUNK_USE_HEAP_VALUE) PINTERLOCKED_PUSH_ENTRY_SLIST __AtlInterlockedPushEntrySList = NULL; PINTERLOCKED_POP_ENTRY_SLIST __AtlInterlockedPopEntrySList = NULL; PVOID __AllocStdCallThunk_cmn ( VOID ) /*++ Routine Description: This function is called by ATL to allocate a thunk structure from executable memory. Arguments: None. Return Value: Returns a pointer to a thunk structure on success. Raises an exception on failure. --*/ { PATL_THUNK_ENTRY lastThunkEntry; PATL_THUNK_ENTRY thunkEntry; PVOID thunkPage; // // Perform initialization if this is the first time through. // if (__AtlThunkPool == NULL) { if (__InitializeThunkPool() == FALSE) { goto outOfMemory; } } if (ATLTHUNK_USE_HEAP()) { // // On a non-NX capable platform, use the standard heap. // thunkEntry = (PATL_THUNK_ENTRY)HeapAlloc(GetProcessHeap(), 0, sizeof(ATL::_stdcallthunk)); if (thunkEntry == NULL) { goto outOfMemory; } return thunkEntry; } // // Attempt to pop a thunk structure from the list and return it // thunkEntry = (PATL_THUNK_ENTRY)__AtlInterlockedPopEntrySList(__AtlThunkPool); if (thunkEntry != NULL) { return &thunkEntry->Thunk; } // // The thunk list was empty. Allocate a new page of executable // memory. // thunkPage = (PATL_THUNK_ENTRY)VirtualAlloc(NULL, PAGE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE); if (thunkPage == NULL) { goto outOfMemory; } // // See if another thread has replenished the pool while we were off // allocating memory. This does not close the window but makes it much // smaller. // // The volatile reference moves the overhead of making the page present // outside of the window. // *(DWORD volatile *)thunkPage; thunkEntry = (PATL_THUNK_ENTRY)__AtlInterlockedPopEntrySList(__AtlThunkPool); if (thunkEntry != NULL) { // // The pool has been replenished. Free the page and use the thunk // entry that we just received. // VirtualFree(thunkPage,0,MEM_RELEASE); return thunkEntry; } // // Create an array of thunk structures on the page and insert all but // the last into the free thunk list. // // The last is kept out of the list and represents the thunk allocation. // thunkEntry = (PATL_THUNK_ENTRY)thunkPage; lastThunkEntry = thunkEntry + ATL_THUNKS_PER_PAGE - 1; do { __AtlInterlockedPushEntrySList(__AtlThunkPool,&thunkEntry->SListEntry); thunkEntry += 1; } while (thunkEntry < lastThunkEntry); return thunkEntry; outOfMemory: return NULL; } VOID __FreeStdCallThunk_cmn ( IN PVOID Thunk ) /*++ Routine Description: This function is called by ATL to release a thunk structure back to the process-wide free thunk pool. Arguments: Thunk - supplies a pointer to a thunk structure that was allocated with __AllocStdCallThunk(). Return Value: None. --*/ { PATL_THUNK_ENTRY thunkEntry; if (ATLTHUNK_USE_HEAP()) { // // On a non-NX capable platform, use the standard heap. // HeapFree(GetProcessHeap(),0,Thunk); } else { // // Simply push the free thunk structure back onto the pool // thunkEntry = (PATL_THUNK_ENTRY)Thunk; __AtlInterlockedPushEntrySList(__AtlThunkPool,&thunkEntry->SListEntry); } } BOOL static DECLSPEC_NOINLINE __InitializeThunkPool ( VOID ) /*++ Routine Description: This function is called on the first invocation of __AllocStdCallThunk(). It retrieves a pointer to the process-wide thunk pool SLIST_HEADER, if one already exists, otherwise this routine supplies an initialized SLIST_HEADER. Arguments: None. Return Value: Returns TRUE if initialization succeeded, FALSE otherwise. --*/ { #define PEB_POINTER_OFFSET 0x34 PSLIST_HEADER *atlThunkPoolPtr; PSLIST_HEADER atlThunkPool; // // On Win64, a per-process ATL thunk "heap" (anchored in the PEB) is always // mantained as an SLIST. // // On X86, such a heap is conditional. If the OS is < 5.1 (Windows XP) then // thunks are allocated/freed from/to the heap, otherwise they are mantained // as they would be on Win64. // // Two reasons for this: // // - We can't guarantee that the SLIST slot in the PEB is available downlevel // - Downlevel OSs may not offer the SLIST functionality // HMODULE kernel32Module; BOOL result; result = IsProcessorFeaturePresent( 12 /*PF_NX_ENABLED*/ ); if (result == FALSE) { // // NX execution is not happening on this machine. // // Indicate that the regular heap should be used by setting // __AtlThunkPool to a special value. // __AtlThunkPool = ATLTHUNK_USE_HEAP_VALUE; return TRUE; } // // We are running on Windows NT5.1 or later. Get the kernel32 pointers to // InterlockedPushEntrySList and InterlockedPopEntrySList. They can't be // simply imported as this library may run in environments without those // routines. // kernel32Module = LoadLibrary( "kernel32.dll" ); if (kernel32Module != NULL) { __AtlInterlockedPushEntrySList = (PINTERLOCKED_PUSH_ENTRY_SLIST) GetProcAddress( kernel32Module, "InterlockedPushEntrySList" ); __AtlInterlockedPopEntrySList = (PINTERLOCKED_POP_ENTRY_SLIST) GetProcAddress( kernel32Module, "InterlockedPopEntrySList" ); } if (__AtlInterlockedPushEntrySList == NULL || __AtlInterlockedPopEntrySList == NULL) { // // If either address could not be retrieved then fail the // initialization. // return FALSE; } atlThunkPoolPtr = (PSLIST_HEADER *)((PCHAR)(Atl_NtCurrentTeb()->ProcessEnvironmentBlock) + PEB_POINTER_OFFSET); atlThunkPool = *atlThunkPoolPtr; if (atlThunkPool == NULL) { // // The pool list has not yet been initialized. Try to use ours. // // Normally we would simply call InitializeSListHead() to initialize // the SLIST_HEADER. However, this creates linkage that conflicts with // modules (such as duser) which also link to ntslist.lib. // // So to avoid that, the SLIST_HEADER is initialized manually. This // code is platform-specific. // atlThunkPool = (PSLIST_HEADER)HeapAlloc( GetProcessHeap(), 0, sizeof(SLIST_HEADER) ); if (atlThunkPool == NULL) { return FALSE; } //InitializeSListHead(atlThunkPool); atlThunkPool->Alignment = 0; if (InterlockedCompareExchangePointer( (PVOID *)atlThunkPoolPtr, atlThunkPool, NULL ) != NULL) { // // Another thread was initializing as well, and won the race. // Free our slist header and use the one that is now there. // HeapFree( GetProcessHeap(), 0, atlThunkPool ); } atlThunkPool = *atlThunkPoolPtr; } __AtlThunkPool = atlThunkPool; return TRUE; } // // Now create the actual routines, one pair within an ATL namespace and one // without. // PVOID __stdcall __AllocStdCallThunk ( VOID ) { return __AllocStdCallThunk_cmn(); } VOID __stdcall __FreeStdCallThunk ( IN PVOID Thunk ) { __FreeStdCallThunk_cmn(Thunk); } namespace ATL { PVOID __stdcall __AllocStdCallThunk ( VOID ) { return __AllocStdCallThunk_cmn(); } VOID __stdcall __FreeStdCallThunk ( IN PVOID Thunk ) { __FreeStdCallThunk_cmn(Thunk); } } // namespace ATL