平台崩溃之operator new异常(二十三)-2010-9-27

1)平台崩溃分析
平台最近一次dump的分析结果如下:
PROCESS_NAME:  hotfoxd.exe
ERROR_CODE: (NTSTATUS) 0x80000003 - {
EXCEPTION_CODE: (HRESULT) 0x80000003 (2147483651) - <Unable to get error code text>
EXCEPTION_PARAMETER1:  00000000
EXCEPTION_PARAMETER2:  880b0bd0
EXCEPTION_PARAMETER3:  ffffffff
FAULTING_THREAD:  00000d14
BUGCHECK_STR:  APPLICATION_FAULT_FILL_PATTERN_ffffffff
PRIMARY_PROBLEM_CLASS:  FILL_PATTERN_ffffffff
DEFAULT_BUCKET_ID:  FILL_PATTERN_ffffffff
LAST_CONTROL_TRANSFER:  from 10211863 to 10211920

STACK_TEXT: 
2820f204 10211863 00000007 00000001 00000000 MSVCRTD!_heap_alloc_dbg+0x60
2820f220 10211836 00000007 00000001 00000001 MSVCRTD!_nh_malloc_dbg+0x23
2820f23c 1020e1be 00000007 00000001 2820f258 MSVCRTD!_nh_malloc+0x16
2820f24c 104b7a69 00000007 2820f26c 104ad0d2 MSVCRTD!operator new+0xe
2820f258 104ad0d2 00000007 00000000 2820f9bc MSVCP60D!std::_Allocate+0x19
2820f26c 1048ba3a 00000007 00000000 1020db67 MSVCP60D!std::allocator<char>::allocate+0x12
2820f8f4 1048bccc 00000005 21811380 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Copy+0x7a
2820f908 1048a005 00000005 00000001 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Grow+0x10c
2820f91c 1048a060 02549111 00000005 2820f9bc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign+0x15
2820f930 10489ad7 02549111 2820f9bc 2820f9fc MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign+0x20
2820f940 00415fcd 02549111 2820f9b8 2820fa64 MSVCP60D!std::basic_string<char,std::char_traits<char>,std::allocator<char> >::basic_string<char,std::char_traits<char>,std::allocator<char> >+0x27
2820f9fc 070c1b82 02549111 ffffffff 024e0a01 hotfoxd!HTX_DBPool::GetDbConnection+0x9d [e:/src/server/hotfox/dbpool.cpp @ 94]
2820fa64 070c1c79 02549111 ffffffff cccccc01 rto!CBasePlugInModule::GetDbConnection+0x52 [e:/src/server/source/source/baseplugin.cpp @ 210]
2820fac4 070a6e55 02549111 ffffffff 00000001 rto!GetDbConnection+0x29 [e:/src/server/source/source/baseplugin.cpp @ 225]
2820fb28 070b79de 02549111 00000002 2820fe18 rto!CDBCGETTER2::GetDBC+0x35 [e:/src/server/source/include/baseplugin.h @ 295]
2820fd90 070a3099 2820fdf0 2820fe84 2820fe2c rto!CSEMQ::GetData+0x6e [e:/src/server/common/semq.cpp @ 692]
2820fe24 070b6dec 2820fef4 2820fe90 21811380 rto!CSepSEMQ::GetData+0x119 [e:/src/server/sevs/rto/psemq.cpp @ 106]
2820fe84 0041a3df 025483c0 00000000 00000000 rto!GetDataProc+0x4c [e:/src/server/common/semq.cpp @ 583]
2820fef4 01635ab7 21814ec0 00000000 00000000 hotfoxd!exec_task_func+0x5f [e:/src/server/hotfox/deamontask.cpp @ 30]
WARNING: Stack unwind information not available. Following frames may be wrong.
2820ff44 016359e4 21815af0 21817af0 860ab628 aced+0xa5ab7
2820ff74 015aa836 21817af0 2820ffb8 2820ffb8 aced+0xa59e4
2820ff84 1020c323 21817af0 00000000 00000000 aced+0x1a836
2820ffb8 7c824829 21811380 00000000 00000000 MSVCRTD!_beginthreadex+0x133
2820ffec 00000000 1020c2b0 21811380 00000000 kernel32!BaseThreadStart+0x34

问题还是在VC运行时库的operator new崩溃。
经查,代码里面存在operator new运算符的重载代码,如下所示:
#ifdef _DEBUG
#define _CRTDBG_MAP_ALLOC
#include <crtdbg.h>

void _RegDebugNew( void )
{
//    _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG | _CRTDBG_LEAK_CHECK_DF );
 _CrtSetDbgFlag( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
}

void* __cdecl operator new( size_t nSize, const char* lpszFileName, int nLine )
{
 return _malloc_dbg( nSize, _NORMAL_BLOCK, lpszFileName, nLine );
}

void __cdecl operator delete( void* p, const char* , int )
{
    _free_dbg( p, _CLIENT_BLOCK );
}
#endif

解决办法:直接去掉operator new重载代码


2)线程com初始化问题
线程在开始处通过CAutoComInit类初始化和反初始化com环境。
class CAutoComInit
{
public:
 CAutoComInit()
 {
  ::CoInitializeEx(NULL,COINIT_MULTITHREADED);
 }
 ~CAutoComInit()
 {
  ::CoUninitialize();
 }
};
但是当线程中在某处又执行了初始化,例如,A,B都有com初始化,A是线程函数入口,A调用了B。
这样就存在一个问题:
CoInitializeEx函数存在失败的情况,msdn的描述如下:
This function supports the standard return values E_INVALIDARG, E_OUTOFMEMORY, and E_UNEXPECTED, as well as the following:
S_OK
The COM library was initialized successfully on the calling thread.
S_FALSE
The COM library is already initialized on the calling thread.
RPC_E_CHANGED_MODE
A previous call to CoInitializeEx specified a different concurrency model for the calling thread, or the thread that called CoInitializeEx currently belongs to the neutral threaded apartment.

只有当返回S_OK和S_FALSE的时候才需要调用CoUninitialize反初始化。
如果A调用B,B调用CoInitializeEx失败(返回E_INVALIDARG/E_OUTOFMEMORY/E_UNEXPECTED),而B返回时,调用CoUninitialize,则此时
CoUninitialize反初始化的是A的初始化,而导致A线程COM环境没有初始化。

解决办法:检查初始化的返回值,只有当S_OK/S_FALSE时,才执行反初始化。代码如下:
class CAutoComInit
{
public:
 CAutoComInit()
 {
  com_status_ = ::CoInitializeEx(NULL,COINIT_MULTITHREADED);
 }
 ~CAutoComInit()
 {
  if( com_status_ == S_OK || com_status_ == S_FALSE )
   ::CoUninitialize();
 }
protected:
 HRESULT com_status_;
};


3)增加线程保护机制
从ACE_Thread_Manager派生一个HTX_Thread_Manager类,该类实现线程按名字跟踪、线程保护等措施。
代码如下所示:
///<========HTX_Thread_Manager.h===================
#ifndef H_GRC_THREAD_MANAGER_H
#define H_GRC_THREAD_MANAGER_H
#include <ace/Thread_Manager.h>
#include <ace/Singleton.h>
#include <string>
#include "lssdk.h"
using namespace std;

class LS_API HTX_Thread_Manager:public ACE_Thread_Manager{
public:
 HTX_Thread_Manager();
 virtual ~HTX_Thread_Manager();
 int spawn (const char* name,
  ACE_THR_FUNC func,
  void *args = 0,
  long flags = THR_NEW_LWP | THR_JOINABLE,
  ACE_thread_t * = 0,
  ACE_hthread_t *t_handle = 0,
  long priority = ACE_DEFAULT_THREAD_PRIORITY,
  int grp_id = -1,
  void *stack = 0,
        size_t stack_size = 0);
};
typedef ACE_Singleton<HTX_Thread_Manager,ACE_Mutex> HTX_Thread_Manager_Singleton;

#endif

///<========HTX_Thread_Manager.cpp=================

#include "HTX_Thread_Manager.h"
#include "auto_cominit.h"
#ifndef PLUGIN_DLL_EXPORTS
#include "log.h"
#else
#include "i_log.h"
extern INative_Logger_Base* g_logger;
#endif

class HTX_Thread_Proc_Param{
public:
 HTX_Thread_Proc_Param(const char* name,ACE_THR_FUNC func,void *args)
 :name_(name),func_(func),args_(args){
 }
 string name_;
 ACE_THR_FUNC func_;
 void *args_;
};

ACE_THR_FUNC_RETURN HTX_Thread_Manager_Main_Proc(void *arg){
 ///<1.初始化线程COM环境
 CAutoComInit AutoComInit;
 HTX_Thread_Proc_Param* param = (HTX_Thread_Proc_Param*)arg;
#ifndef PLUGIN_DLL_EXPORTS
 HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]启动.../n",param->name_.c_str());
#else
 g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]启动.../n",param->name_.c_str());
#endif
 int ret = -1;
 try{
  ret = param->func_(param->args_);
 }catch(...){
  #ifndef PLUGIN_DLL_EXPORTS
   HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]异常退出./n",param->name_.c_str()); 
  #else
   g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]异常退出./n",param->name_.c_str()); 
  #endif 
  delete param;
  return -1;
 }
#ifndef PLUGIN_DLL_EXPORTS
 HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]退出./n",param->name_.c_str());
#else
 g_logger->log(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"线程[%s]退出./n",param->name_.c_str());
#endif
 delete param;
 return ret;
}

HTX_Thread_Manager::HTX_Thread_Manager()
:ACE_Thread_Manager(){}

HTX_Thread_Manager::~HTX_Thread_Manager(){}
 
int HTX_Thread_Manager::spawn (const char* name,
     ACE_THR_FUNC func,
     void *args,
     long flags,
     ACE_thread_t * t_id,
     ACE_hthread_t *t_handle,
     long priority,
     int grp_id,
     void *stack,
     size_t stack_size){
 HTX_Thread_Proc_Param* param = new HTX_Thread_Proc_Param(name,func,args);
 int ret = ACE_Thread_Manager::spawn(HTX_Thread_Manager_Main_Proc,param,flags,t_id,t_handle,priority,grp_id,stack,stack_size);
 if( ret == -1 )
  delete param;
 return ret;

}

平台作出以上修改,于9-27日晚9点左右重起。

你可能感兴趣的:(thread,exception,manager,basic,平台,2010)