1. The performance hit regarding __finally without abnormal exiting.
int main()
{
int i;
__try
{
i=1;
printf("__try...\n");
}
__finally
{
printf("__finally...\n");
}
return i;
}
#1, before enterring __try block, compiler will set trylevel to 0
__try
00401023 mov dword ptr [ebp-4],0
{
#2, Compiler will wrap all the code in __finally as a code block, which has a "ret" at the last. (Just like a sub-function):
__finally
{
printf("__finally...\n");
0040105E push offset string "__finally...\n" (4060FCh)
00401063 call printf (40106Ch)
00401068 add esp,4
$L55563:
0040106B ret
#3. before exiting the __try block, compiler first reset "trylevel" field to -1, then call __finally block as a sub-function:
0040103E mov dword ptr [ebp-4],0FFFFFFFFh
00401045 call $L55553 (40105Eh)
}
return i;
0040104A mov eax,dword ptr [i]
}
So, as we can see except an extra "call" instructor, an "ret" instructor, there is no extra overhead.
2. The performance hit regarding __finally WITH abnormal exiting.
int test()
{
int temp=5;
__try
{
goto returnval;
int i=10;
}
__finally
{
temp=6;
}
temp=7;
returnval:
return temp;
}
All the thing will be the same as the first scenario. However, in order to execute the __finally block before abnormal exiting, compiler place a __local_unwind2 function calling in any abnormal exiting statement, like goto, return, longjmp etc...
__try
00411A5F mov dword ptr [ebp-4],0
00411A66 push 0FFFFFFFFh
{
goto returnval;
00411A68 lea eax,[ebp-10h]
00411A6B push eax
00411A6C call @ILT+390(__local_unwind2) (41118Bh)
00411A71 add esp,8
00411A74 jmp returnval (411A85h)
__local_unwind2 will call all the __finally block below the stack.
Ok, let's reversing __local_unwind2 to see what it does internally:
/*compiler pushes 2 parameters for __local_unwind2 with the value below:
P1=ebp-10h= _exception_registration
P2=-1*/
__local_unwind2(_exception_registration, p2)
{
t1=_exception_registration;
t2=-2;
//setup a raw OS level SEH handler for any exception generated in __finally block
push offset __unwind_handler (4010A8h)
push dword ptr fs:[00000000]
fs:[00000000]=esp
do
{
SCOPETABLE *pscopetable=_exception_registration->scopetable;
trylevel=_exception_registration->trylevel;
if(trylevel==-1)
{
break;
}
if(trylevel==p2)
{
break;
}
pre_trylevel=pscopetable[trylevel].previousTryLevel;
t2=pre_trylevel;
_exception_registration->trylevel=pre_trylevel; //prepare for next loop
if(pscopetable[trylevel].lpfnFilter!=0)
{
contine;
}
_NLG_Notify(0x101, pscopetable[trylevel].lpfnHandler);
pscopetable[trylevel].lpfnHandler();
}while(1)
//destroy the raw SEH
pop dword ptr fs:[0]
add esp,0Ch
ret
}
As we can see, __local_unwind2 loops through all the scopetable entries that wrap the __local_unwind2.(Each scopetable entry stands for a __try). Then it calls any __try block with __finally associated. This is what local unwind is all about. And this function loop is what "Jeffrey Richter" believes as "Performance Hit".
3. Caution scenarios:
#1, what is the return result of the following function?
int test()
{
int i;
__try
{
i=1;
printf("__try...\n");
return i;
}
__finally
{
printf("__finally...\n");
i=2;
}
i=3;
return i;
}
The answer is 1 instead of 2!!!