1)平台崩溃分析
平台最近一次dump的分析结果如下:
PROCESS_NAME: hotfoxd.exe
ERROR_CODE: (NTSTATUS) 0x80000003 - {
EXCEPTION_CODE: (HRESULT) 0x80000003 (2147483651) - <Unable to get error code text>
EXCEPTION_PARAMETER1: 00000000
EXCEPTION_PARAMETER2: 880b0bd0
EXCEPTION_PARAMETER3: ffffffff
FAULTING_THREAD: 00000d14
BUGCHECK_STR: APPLICATION_FAULT_FILL_PATTERN_ffffffff
PRIMARY_PROBLEM_CLASS: FILL_PATTERN_ffffffff
DEFAULT_BUCKET_ID: FILL_PATTERN_ffffffff
LAST_CONTROL_TRANSFER: from 10211863 to 10211920
STACK_TEXT:
2820f204 10211863 00000007 00000001 00000000 MSVCRTD!_heap_alloc_dbg+0x60
2820f220 10211836 00000007 00000001 00000001 MSVCRTD!_nh_malloc_dbg+0x23
2820f23c 1020e1be 00000007 00000001 2820f258 MSVCRTD!_nh_malloc+0x16
2820f24c 104b7a69 00000007 2820f26c 104ad0d2 MSVCRTD!operator new+0xe
2820f258 104ad0d2 00000007 00000000 2820f9bc MSVCP60D!std::_Allocate+0x19
2820f26c 1048ba3a 00000007 00000000 1020db67 MSVCP60D!std::allocator<char>::allocate+0x12
2820f8f4 1048bccc 00000005 21811380 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Copy+0x7a
2820f908 1048a005 00000005 00000001 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Grow+0x10c
2820f91c 1048a060 02549111 00000005 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign+0x15
2820f930 10489ad7 02549111 2820f9bc 2820f9fc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign+0x20
2820f940 00415fcd 02549111 2820f9b8 2820fa64 MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >+0x27
2820f9fc 070c1b82 02549111 ffffffff 024e0a01 hotfoxd!HTX_DBPool::GetDbConnection+0x9d [e:/src/server/hotfox/dbpool.cpp @ 94]
2820fa64 070c1c79 02549111 ffffffff cccccc01 rto!CBasePlugInModule::GetDbConnection+0x52 [e:/src/server/source/source/baseplugin.cpp @ 210]
2820fac4 070a6e55 02549111 ffffffff 00000001 rto!GetDbConnection+0x29 [e:/src/server/source/source/baseplugin.cpp @ 225]
2820fb28 070b79de 02549111 00000002 2820fe18 rto!CDBCGETTER2::GetDBC+0x35 [e:/src/server/source/include/baseplugin.h @ 295]
2820fd90 070a3099 2820fdf0 2820fe84 2820fe2c rto!CSEMQ::GetData+0x6e [e:/src/server/common/semq.cpp @ 692]
2820fe24 070b6dec 2820fef4 2820fe90 21811380 rto!CSepSEMQ::GetData+0x119 [e:/src/server/sevs/rto/psemq.cpp @ 106]
2820fe84 0041a3df 025483c0 00000000 00000000 rto!GetDataProc+0x4c [e:/src/server/common/semq.cpp @ 583]
2820fef4 01635ab7 21814ec0 00000000 00000000 hotfoxd!exec_task_func+0x5f [e:/src/server/hotfox/deamontask.cpp @ 30]
WARNING: Stack unwind information not available. Following frames may be wrong.
2820ff44 016359e4 21815af0 21817af0 860ab628 aced+0xa5ab7
2820ff74 015aa836 21817af0 2820ffb8 2820ffb8 aced+0xa59e4
2820ff84 1020c323 21817af0 00000000 00000000 aced+0x1a836
2820ffb8 7c824829 21811380 00000000 00000000 MSVCRTD!_beginthreadex+0x133
2820ffec 00000000 1020c2b0 21811380 00000000 kernel32!BaseThreadStart+0x34
问题还是在VC运行时库的operator new崩溃。
经查,代码里面存在operator new运算符的重载代码,如下所示:
#ifdef _DEBUG
#define _CRTDBG_MAP_ALLOC
#include <crtdbg.h>
void _RegDebugNew( void )
{
// _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG | _CRTDBG_LEAK_CHECK_DF );
_CrtSetDbgFlag( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
}
void* __cdecl operator new( size_t nSize, const char* lpszFileName, int nLine )
{
return _malloc_dbg( nSize, _NORMAL_BLOCK, lpszFileName, nLine );
}
void __cdecl operator delete( void* p, const char* , int )
{
_free_dbg( p, _CLIENT_BLOCK );
}
#endif
解决办法:直接去掉operator new重载代码
2)线程com初始化问题
线程在开始处通过CAutoComInit类初始化和反初始化com环境。
class CAutoComInit
{
public:
CAutoComInit()
{
::CoInitializeEx(NULL,COINIT_MULTITHREADED);
}
~CAutoComInit()
{
::CoUninitialize();
}
};
但是当线程中在某处又执行了初始化,例如,A,B都有com初始化,A是线程函数入口,A调用了B。
这样就存在一个问题:
CoInitializeEx函数存在失败的情况,msdn的描述如下:
This function supports the standard return values E_INVALIDARG, E_OUTOFMEMORY, and E_UNEXPECTED, as well as the following:
S_OK
The COM library was initialized successfully on the calling thread.
S_FALSE
The COM library is already initialized on the calling thread.
RPC_E_CHANGED_MODE
A previous call to CoInitializeEx specified a different concurrency model for the calling thread, or the thread that called CoInitializeEx currently belongs to the neutral threaded apartment.
只有当返回S_OK和S_FALSE的时候才需要调用CoUninitialize反初始化。
如果A调用B,B调用CoInitializeEx失败(返回E_INVALIDARG/E_OUTOFMEMORY/E_UNEXPECTED),而B返回时,调用CoUninitialize,则此时
CoUninitialize反初始化的是A的初始化,而导致A线程COM环境没有初始化。
解决办法:检查初始化的返回值,只有当S_OK/S_FALSE时,才执行反初始化。代码如下:
class CAutoComInit
{
public:
CAutoComInit()
{
com_status_ = ::CoInitializeEx(NULL,COINIT_MULTITHREADED);
}
~CAutoComInit()
{
if( com_status_ == S_OK || com_status_ == S_FALSE )
::CoUninitialize();
}
protected:
HRESULT com_status_;
};
3)增加线程保护机制
从ACE_Thread_Manager派生一个HTX_Thread_Manager类,该类实现线程按名字跟踪、线程保护等措施。
代码如下所示:
///<========HTX_Thread_Manager.h===================
#ifndef H_GRC_THREAD_MANAGER_H
#define H_GRC_THREAD_MANAGER_H
#include <ace/Thread_Manager.h>
#include <ace/Singleton.h>
#include <string>
#include "lssdk.h"
using namespace std;
class LS_API HTX_Thread_Manager:public ACE_Thread_Manager{
public:
HTX_Thread_Manager();
virtual ~HTX_Thread_Manager();
int spawn (const char* name,
ACE_THR_FUNC func,
void *args = 0,
long flags = THR_NEW_LWP | THR_JOINABLE,
ACE_thread_t * = 0,
ACE_hthread_t *t_handle = 0,
long priority = ACE_DEFAULT_THREAD_PRIORITY,
int grp_id = -1,
void *stack = 0,
size_t stack_size = 0);
};
typedef ACE_Singleton<HTX_Thread_Manager,ACE_Mutex> HTX_Thread_Manager_Singleton;
#endif
///<========HTX_Thread_Manager.cpp=================
#include "HTX_Thread_Manager.h"
#include "auto_cominit.h"
#ifndef PLUGIN_DLL_EXPORTS
#include "log.h"
#else
#include "i_log.h"
extern INative_Logger_Base* g_logger;
#endif
class HTX_Thread_Proc_Param{
public:
HTX_Thread_Proc_Param(const char* name,ACE_THR_FUNC func,void *args)
:name_(name),func_(func),args_(args){
}
string name_;
ACE_THR_FUNC func_;
void *args_;
};
ACE_THR_FUNC_RETURN HTX_Thread_Manager_Main_Proc(void *arg){
///<1.初始化线程COM环境
CAutoComInit AutoComInit;
HTX_Thread_Proc_Param* param = (HTX_Thread_Proc_Param*)arg;
#ifndef PLUGIN_DLL_EXPORTS
HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]启动.../n",param->name_.c_str());
#else
g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]启动.../n",param->name_.c_str());
#endif
int ret = -1;
try{
ret = param->func_(param->args_);
}catch(...){
#ifndef PLUGIN_DLL_EXPORTS
HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]异常退出./n",param->name_.c_str());
#else
g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]异常退出./n",param->name_.c_str());
#endif
delete param;
return -1;
}
#ifndef PLUGIN_DLL_EXPORTS
HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]退出./n",param->name_.c_str());
#else
g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]退出./n",param->name_.c_str());
#endif
delete param;
return ret;
}
HTX_Thread_Manager::HTX_Thread_Manager()
:ACE_Thread_Manager(){}
HTX_Thread_Manager::~HTX_Thread_Manager(){}
int HTX_Thread_Manager::spawn (const char* name,
ACE_THR_FUNC func,
void *args,
long flags,
ACE_thread_t * t_id,
ACE_hthread_t *t_handle,
long priority,
int grp_id,
void *stack,
size_t stack_size){
HTX_Thread_Proc_Param* param = new HTX_Thread_Proc_Param(name,func,args);
int ret = ACE_Thread_Manager::spawn(HTX_Thread_Manager_Main_Proc,param,flags,t_id,t_handle,priority,grp_id,stack,stack_size);
if( ret == -1 )
delete param;
return ret;
}
平台作出以上修改,于9-27日晚9点左右重起。