如果你不投递(POST)Overlapped I/O,那么I/O Completion Ports 只能为你提供一个Queue.
CreateIoCompletionPort的NumberOfConcurrentThreads:
1.只有当第二个参数ExistingCompletionPort为NULL时它才有效,它是个max threads limits.
2.大家有谁把它设置为超出cpu个数的值,当然不只是cpu个数的2倍,而是下面的MAX_THREADS 100甚至更大。
对于这个值的设定,msdn并没有说非得设成cpu个数的2倍,而且也没有把减少线程之间上下文交换这些影响扯到这里来。I/O Completion Ports MSDN:"If your transaction required a lengthy computation, a larger concurrency value will allow more threads to run. Each completion packet may take longer to finish, but more completion packets will be processed at the same time. "。
对于struct OVERLAPPED,我们常会如下扩展,
typedef struct {
WSAOVERLAPPED overlapped; //must be first member? 是的,必须是第一个。如果你不肯定,你可以试试。
SOCKET client_s;
SOCKADDR_IN client_addr;
WORD optCode;//1--read,2--send. 有人常会定义这个数据成员,但也有人不用,争议在send/WSASend,此时的同步和异步是否有必要? 至少我下面的server更本就没用它。
char buf[MAX_BUF_SIZE];
WSABUF wsaBuf;//inited ? 这个不要忘了!
DWORD numberOfBytesTransferred;
DWORD flags;
}QSSOverlapped;//for per connection
我下面的server框架的基本思想是:
One connection VS one thread in worker thread pool ,worker thread performs completionWorkerRoutine.
A Acceptor thread 专门用来accept socket,关联至IOCP,并WSARecv:post Recv Completion Packet to IOCP.
在completionWorkerRoutine中有以下的职责:
1.handle request,当忙时增加completionWorkerThread数量但不超过maxThreads,post Recv Completion Packet to IOCP.
2.timeout时检查是否空闲和当前completionWorkerThread数量,当空闲时保持或减少至minThreads数量.
3.对所有Accepted-socket管理生命周期,这里利用系统的keepalive probes,若想实现业务层"心跳探测"只需将QSS_SIO_KEEPALIVE_VALS_TIMEOUT 改回系统默认的2小时.
下面结合源代码,浅析一下IOCP:
socketserver.h
#ifndef __Q_SOCKET_SERVER__
#define __Q_SOCKET_SERVER__
#include <winsock2.h>
#include <mstcpip.h>
#define QSS_SIO_KEEPALIVE_VALS_TIMEOUT 30*60*1000
#define QSS_SIO_KEEPALIVE_VALS_INTERVAL 5*1000
#define MAX_THREADS 100
#define MAX_THREADS_MIN 10
#define MIN_WORKER_WAIT_TIMEOUT 20*1000
#define MAX_WORKER_WAIT_TIMEOUT 60*MIN_WORKER_WAIT_TIMEOUT
#define MAX_BUF_SIZE 1024
/*当Accepted socket和socket关闭或发生异常时回调CSocketLifecycleCallback*/
typedef void (*CSocketLifecycleCallback)(SOCKET cs,int lifecycle);//lifecycle:0:OnAccepted,-1:OnClose//注意OnClose此时的socket未必可用,可 能已经被非正常关闭或其他异常.
/*协议处理回调*/
typedef int (*InternalProtocolHandler)(LPWSAOVERLAPPED overlapped);//return -1:SOCKET_ERROR
typedef struct Q_SOCKET_SERVER SocketServer;
DWORD initializeSocketServer(SocketServer ** ssp,WORD passive,WORD port,CSocketLifecycleCallback cslifecb,InternalProtocolHandler protoHandler,WORD minThreads,WORD maxThreads,long workerWaitTimeout);
DWORD startSocketServer(SocketServer *ss);
DWORD shutdownSocketServer(SocketServer *ss);
#endif
qsocketserver.c 简称 qss,相应的OVERLAPPED简称qssOl.
#include "socketserver.h"
#include "stdio.h"
typedef struct {
WORD passive;//daemon
WORD port;
WORD minThreads;
WORD maxThreads;
volatile long lifecycleStatus;//0-created,1-starting, 2-running,3-stopping,4-exitKeyPosted,5-stopped
long workerWaitTimeout;//wait timeout
CRITICAL_SECTION QSS_LOCK;
volatile long workerCounter;
volatile long currentBusyWorkers;
volatile long CSocketsCounter;//Accepted-socket引用计数
CSocketLifecycleCallback cslifecb;
InternalProtocolHandler protoHandler;
WORD wsaVersion;//=MAKEWORD(2,0);
WSADATA wsData;
SOCKET server_s;
SOCKADDR_IN serv_addr;
HANDLE iocpHandle;
}QSocketServer;
typedef struct {
WSAOVERLAPPED overlapped;
SOCKET client_s;
SOCKADDR_IN client_addr;
WORD optCode;
char buf[MAX_BUF_SIZE];
WSABUF wsaBuf;
DWORD numberOfBytesTransferred;
DWORD flags;
}QSSOverlapped;
DWORD acceptorRoutine(LPVOID);
DWORD completionWorkerRoutine(LPVOID);
static void adjustQSSWorkerLimits(QSocketServer *qss){
/*adjust size and timeout.*/
/*if(qss->maxThreads <= 0) {
qss->maxThreads = MAX_THREADS;
} else if (qss->maxThreads < MAX_THREADS_MIN) {
qss->maxThreads = MAX_THREADS_MIN;
}
if(qss->minThreads > qss->maxThreads) {
qss->minThreads = qss->maxThreads;
}
if(qss->minThreads <= 0) {
if(1 == qss->maxThreads) {
qss->minThreads = 1;
} else {
qss->minThreads = qss->maxThreads/2;
}
}
if(qss->workerWaitTimeout<MIN_WORKER_WAIT_TIMEOUT)
qss->workerWaitTimeout=MIN_WORKER_WAIT_TIMEOUT;
if(qss->workerWaitTimeout>MAX_WORKER_WAIT_TIMEOUT)
qss->workerWaitTimeout=MAX_WORKER_WAIT_TIMEOUT; */
}
typedef struct{
QSocketServer * qss;
HANDLE th;
}QSSWORKER_PARAM;
static WORD addQSSWorker(QSocketServer *qss,WORD addCounter){
WORD res=0;
if(qss->workerCounter<qss->minThreads||(qss->currentBusyWorkers==qss->workerCounter&&qss->workerCounter<qss->maxThreads)){
DWORD threadId;
QSSWORKER_PARAM * pParam=NULL;
int i=0;
EnterCriticalSection(&qss->QSS_LOCK);
if(qss->workerCounter+addCounter<=qss->maxThreads)
for(;i<addCounter;i++)
{
pParam=malloc(sizeof(QSSWORKER_PARAM));
if(pParam){
pParam->th=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)completionWorkerRoutine,pParam,CREATE_SUSPENDED,&threadId);
pParam->qss=qss;
ResumeThread(pParam->th);
qss->workerCounter++,res++;
}
}
LeaveCriticalSection(&qss->QSS_LOCK);
}
return res;
}
static void SOlogger(const char * msg,SOCKET s,int clearup){
perror(msg);
if(s>0)
closesocket(s);
if(clearup)
WSACleanup();
}
static int _InternalEchoProtocolHandler(LPWSAOVERLAPPED overlapped){
QSSOverlapped *qssOl=(QSSOverlapped *)overlapped;
printf("numOfT:%d,WSARecvd:%s,\n",qssOl->numberOfBytesTransferred,qssOl->buf);
//Sleep(500);
return send(qssOl->client_s,qssOl->buf,qssOl->numberOfBytesTransferred,0);
}
DWORD initializeSocketServer(SocketServer ** ssp,WORD passive,WORD port,CSocketLifecycleCallback cslifecb,InternalProtocolHandler protoHandler,WORD minThreads,WORD maxThreads,long workerWaitTimeout){
QSocketServer * qss=malloc(sizeof(QSocketServer));
qss->passive=passive>0?1:0;
qss->port=port;
qss->minThreads=minThreads;
qss->maxThreads=maxThreads;
qss->workerWaitTimeout=workerWaitTimeout;
qss->wsaVersion=MAKEWORD(2,0);
qss->lifecycleStatus=0;
InitializeCriticalSection(&qss->QSS_LOCK);
qss->workerCounter=0;
qss->currentBusyWorkers=0;
qss->CSocketsCounter=0;
qss->cslifecb=cslifecb,qss->protoHandler=protoHandler;
if(!qss->protoHandler)
qss->protoHandler=_InternalEchoProtocolHandler;
adjustQSSWorkerLimits(qss);
*ssp=(SocketServer *)qss;
return 1;
}
DWORD startSocketServer(SocketServer *ss){
QSocketServer * qss=(QSocketServer *)ss;
if(qss==NULL||InterlockedCompareExchange(&qss->lifecycleStatus,1,0))
return 0;
qss->serv_addr.sin_family=AF_INET;
qss->serv_addr.sin_port=htons(qss->port);
qss->serv_addr.sin_addr.s_addr=INADDR_ANY;//inet_addr("127.0.0.1");
if(WSAStartup(qss->wsaVersion,&qss->wsData)){
/*这里还有个插曲就是这个WSAStartup被调用的时候,它居然会启动一条额外的线程,当然稍后这条线程会自动退出的.不知WSAClearup又会如何?......*/
SOlogger("WSAStartup failed.\n",0,0);
return 0;
}
qss->server_s=socket(AF_INET,SOCK_STREAM,IPPROTO_IP);
if(qss->server_s==INVALID_SOCKET){
SOlogger("socket failed.\n",0,1);
return 0;
}
if(bind(qss->server_s,(LPSOCKADDR)&qss->serv_addr,sizeof(SOCKADDR_IN))==SOCKET_ERROR){
SOlogger("bind failed.\n",qss->server_s,1);
return 0;
}
if(listen(qss->server_s,SOMAXCONN)==SOCKET_ERROR)/*这里来谈谈backlog, 很多人不知道设成何值,我见到过1,5,50,100的,有人说设定的越大越耗资源,的确,这里设成SOMAXCONN不代表windows会真的使用 SOMAXCONN,而是" If set to SOMAXCONN, the underlying service provider responsible for socket s will set the backlog to a maximum reasonable value. ",同时在现实环境中,不同操作系统支持TCP缓冲队列有所不同,所以还不如让操作系统来决定它的值。像Apache这种服务器:
#ifndef DEFAULT_LISTENBACKLOG
#define DEFAULT_LISTENBACKLOG 511
#endif
*/
{
SOlogger("listen failed.\n",qss->server_s,1);
return 0;
}
qss->iocpHandle=CreateIoCompletionPort(INVALID_HANDLE_VALUE,NULL,0,/*NumberOfConcurrentThreads-->*/qss->maxThreads);
//initialize worker for completion routine.
addQSSWorker(qss,qss->minThreads);
qss->lifecycleStatus=2;
{
QSSWORKER_PARAM * pParam=malloc(sizeof(QSSWORKER_PARAM));
pParam->qss=qss;
pParam->th=NULL;
if(qss->passive){
DWORD threadId;
pParam->th=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)acceptorRoutine,pParam,0,&threadId);
}else
return acceptorRoutine(pParam);
}
return 1;
}
DWORD shutdownSocketServer(SocketServer *ss){
QSocketServer * qss=(QSocketServer *)ss;
if(qss==NULL||InterlockedCompareExchange(&qss->lifecycleStatus,3,2)!=2)
return 0;
closesocket(qss->server_s/*listen-socket*/);//..other accepted-sockets associated with the listen-socket will not be closed,except WSACleanup is called..
if(qss->CSocketsCounter==0)
qss->lifecycleStatus=4,PostQueuedCompletionStatus(qss->iocpHandle,0,-1,NULL);
WSACleanup();
return 1;
}
DWORD acceptorRoutine(LPVOID ss){
QSSWORKER_PARAM * pParam=(QSSWORKER_PARAM *)ss;
QSocketServer * qss=pParam->qss;
HANDLE curThread=pParam->th;
QSSOverlapped *qssOl=NULL;
SOCKADDR_IN client_addr;
int client_addr_leng=sizeof(SOCKADDR_IN);
SOCKET cs;
free(pParam);
while(1){
printf("accept starting.....\n");
cs/*Accepted-socket*/=accept(qss->server_s,(LPSOCKADDR)&client_addr,&client_addr_leng);
if(cs==INVALID_SOCKET)
{
printf("accept failed:%d\n",GetLastError());
break;
}else{//SO_KEEPALIVE,SIO_KEEPALIVE_VALS 这里是利用系统的"心跳探测",keepalive probes.linux:setsockopt,SOL_TCP:TCP_KEEPIDLE,TCP_KEEPINTVL,TCP_KEEPCNT
struct tcp_keepalive alive,aliveOut;
int so_keepalive_opt=1;
DWORD outDW;
if(!setsockopt(cs,SOL_SOCKET,SO_KEEPALIVE,(char *)&so_keepalive_opt,sizeof(so_keepalive_opt))){
alive.onoff=TRUE;
alive.keepalivetime=QSS_SIO_KEEPALIVE_VALS_TIMEOUT;
alive.keepaliveinterval=QSS_SIO_KEEPALIVE_VALS_INTERVAL;
if(WSAIoctl(cs,SIO_KEEPALIVE_VALS,&alive,sizeof(alive),&aliveOut,sizeof(aliveOut),&outDW,NULL,NULL)==SOCKET_ERROR){
printf("WSAIoctl SIO_KEEPALIVE_VALS failed:%d\n",GetLastError());
break;
}
}else{
printf("setsockopt SO_KEEPALIVE failed:%d\n",GetLastError());
break;
}
}
CreateIoCompletionPort((HANDLE)cs,qss->iocpHandle,cs,0);
if(qssOl==NULL){
qssOl=malloc(sizeof(QSSOverlapped));
}
qssOl->client_s=cs;
qssOl->wsaBuf.len=MAX_BUF_SIZE,qssOl->wsaBuf.buf=qssOl->buf,qssOl->numberOfBytesTransferred=0,qssOl->flags=0;//initialize WSABuf.
memset(&qssOl->overlapped,0,sizeof(WSAOVERLAPPED));
{
DWORD lastErr=GetLastError();
int ret=0;
SetLastError(0);
ret=WSARecv(cs,&qssOl->wsaBuf,1,&qssOl->numberOfBytesTransferred,&qssOl->flags,&qssOl->overlapped,NULL);
if(ret==0||(ret==SOCKET_ERROR&&GetLastError()==WSA_IO_PENDING)){
InterlockedIncrement(&qss->CSocketsCounter);//Accepted-socket计数递增.
if(qss->cslifecb)
qss->cslifecb(cs,0);
qssOl=NULL;
}
if(!GetLastError())
SetLastError(lastErr);
}
printf("accept flags:%d ,cs:%d.\n",GetLastError(),cs);
}//end while.
if(qssOl)
free(qssOl);
if(qss)
shutdownSocketServer((SocketServer *)qss);
if(curThread)
CloseHandle(curThread);
return 1;
}
static int postRecvCompletionPacket(QSSOverlapped * qssOl,int SOErrOccurredCode){
int SOErrOccurred=0;
DWORD lastErr=GetLastError();
SetLastError(0);
//SOCKET_ERROR:-1,WSA_IO_PENDING:997
if(WSARecv(qssOl->client_s,&qssOl->wsaBuf,1,&qssOl->numberOfBytesTransferred,&qssOl->flags,&qssOl->overlapped,NULL)==SOCKET_ERROR
&&GetLastError()!=WSA_IO_PENDING)//this case lastError maybe 64, 10054
{
SOErrOccurred=SOErrOccurredCode;
}
if(!GetLastError())
SetLastError(lastErr);
if(SOErrOccurred)
printf("worker[%d] postRecvCompletionPacket SOErrOccurred=%d,preErr:%d,postedErr:%d\n",GetCurrentThreadId(),SOErrOccurred,lastErr,GetLastError());
return SOErrOccurred;
}
DWORD completionWorkerRoutine(LPVOID ss){
QSSWORKER_PARAM * pParam=(QSSWORKER_PARAM *)ss;
QSocketServer * qss=pParam->qss;
HANDLE curThread=pParam->th;
QSSOverlapped * qssOl=NULL;
DWORD numberOfBytesTransferred=0;
ULONG_PTR completionKey=0;
int postRes=0,handleCode=0,exitCode=0,SOErrOccurred=0;
free(pParam);
while(!exitCode){
SetLastError(0);
if(GetQueuedCompletionStatus(qss->iocpHandle,&numberOfBytesTransferred,&completionKey,(LPOVERLAPPED *)&qssOl,qss->workerWaitTimeout)){
if(completionKey==-1&&qss->lifecycleStatus>=4)
{
printf("worker[%d] completionKey -1:%d \n",GetCurrentThreadId(),GetLastError());
if(qss->workerCounter>1)
PostQueuedCompletionStatus(qss->iocpHandle,0,-1,NULL);
exitCode=1;
break;
}
if(numberOfBytesTransferred>0){
InterlockedIncrement(&qss->currentBusyWorkers);
addQSSWorker(qss,1);
handleCode=qss->protoHandler((LPWSAOVERLAPPED)qssOl);
InterlockedDecrement(&qss->currentBusyWorkers);
if(handleCode>=0){
SOErrOccurred=postRecvCompletionPacket(qssOl,1);
}else
SOErrOccurred=2;
}else{
printf("worker[%d] numberOfBytesTransferred==0 ***** closesocket servS or cs *****,%d,%d ,ol is:%d\n",GetCurrentThreadId(),GetLastError(),completionKey,qssOl==NULL?0:1);
SOErrOccurred=3;
}
}else{ //GetQueuedCompletionStatus rtn FALSE, lastError 64 ,995[timeout worker thread exit.] ,WAIT_TIMEOUT:258
if(qssOl){
SOErrOccurred=postRecvCompletionPacket(qssOl,4);
}else {
printf("worker[%d] GetQueuedCompletionStatus F:%d \n",GetCurrentThreadId(),GetLastError());
if(GetLastError()!=WAIT_TIMEOUT){
exitCode=2;
}else{//wait timeout
if(qss->lifecycleStatus!=4&&qss->currentBusyWorkers==0&&qss->workerCounter>qss->minThreads){
EnterCriticalSection(&qss->QSS_LOCK);
if(qss->lifecycleStatus!=4&&qss->currentBusyWorkers==0&&qss->workerCounter>qss->minThreads){
qss->workerCounter--;//until qss->workerCounter decrease to qss->minThreads
exitCode=3;
}
LeaveCriticalSection(&qss->QSS_LOCK);
}
}
}
}//end GetQueuedCompletionStatus.
if(SOErrOccurred){
if(qss->cslifecb)
qss->cslifecb(qssOl->client_s,-1);
/*if(qssOl)*/{
closesocket(qssOl->client_s);
free(qssOl);
}
if(InterlockedDecrement(&qss->CSocketsCounter)==0&&qss->lifecycleStatus>=3){
//for qss workerSize,PostQueuedCompletionStatus -1
qss->lifecycleStatus=4,PostQueuedCompletionStatus(qss->iocpHandle,0,-1,NULL);
exitCode=4;
}
}
qssOl=NULL,numberOfBytesTransferred=0,completionKey=0,SOErrOccurred=0;//for net while.
}//end while.
//last to do
if(exitCode!=3){
int clearup=0;
EnterCriticalSection(&qss->QSS_LOCK);
if(!--qss->workerCounter&&qss->lifecycleStatus>=4){//clearup QSS
clearup=1;
}
LeaveCriticalSection(&qss->QSS_LOCK);
if(clearup){
DeleteCriticalSection(&qss->QSS_LOCK);
CloseHandle(qss->iocpHandle);
free(qss);
}
}
CloseHandle(curThread);
return 1;
}
------------------------------------------------------------------------------------------------------------------------
对于IOCP的LastError的辨别和处理是个难点,所以请注意我的completionWorkerRoutine的while结构,
结构如下:
while(!exitCode){
if(completionKey==-1){...break;}
if(GetQueuedCompletionStatus){/*在这个if体中只要你投递的OVERLAPPED is not NULL,那么这里你得到的就是它.*/
if(numberOfBytesTransferred>0){
/*在这里handle request,记得要继续投递你的OVERLAPPED哦! */
}else{
/*这里可能客户端或服务端closesocket(the socket),但是OVERLAPPED is not NULL,只要你投递的不为NULL!*/
}
}else{/*在这里的if体中,虽然GetQueuedCompletionStatus return FALSE,但是不代表OVERLAPPED一定为NULL.特别是OVERLAPPED is not NULL的情况下,不要以为LastError发生了,就代表当前的socket无用或发生致命的异常,比如发生lastError:995这种情况下此时的socket有可能是一切正常的可用的,你不应该关闭它.*/
if(OVERLAPPED is not NULL){
/*这种情况下,请不管37,21继续投递吧!在投递后再检测错误.*/
}else{
}
}
if(socket error occured){
}
prepare for next while.
}
行文仓促,难免有错误或不足之处,希望大家踊跃指正评论,谢谢!
这个模型在性能上还是有改进的空间哦!