原文地址: http://blog.csdn.net/wangjia184/article/details/17919667
突然来了这样一个任务:当用户从非欧洲外国家访问时,采用CDNetworks;而该CDN流量费过于昂贵,当用户从欧洲访问的时候要求采用欧洲本地另一个CDN。为了避免改动太多现有代码,最简单的方式就是通过HttpModule在IIS服务器返回HTTP响应前对查找到内容中的资源url域名并进行替换。
这个问题初看上去很简单,但其实上有很多麻烦。
原文地址: http://blog.csdn.net/wangjia184/article/details/17919667
服务器在返回的时候是将响应分隔成了多个chunk,那么有可能目标字符串被分隔到了2个chunk中。
例如,//static.xxxxxxxx.com是查找的目标字符串,可能分布成下图的情况,这给查找带来了很大的不便。
因为这个处理过程会针对所有响应触发,因此应该采用最高效的方式实现,否则对性能的影响很严重。
在Rick Strahl的《Capturing and Transforming ASP.NET Output with Response.Filter》一文中,为了解决第一个问题,他采用托管的HttpModule, 在将多个chunk合并到一起后再进行处理。这样虽然解决了问题,但是对性能造成很大的不利影响。实际上如果某个chunk中没有出现目标字符串的情况下可以完全不用进行处理。
其次,对于字符串的查找可以使用更高效的Boyer-Moore算法。
最后,我决定使用非托管代码实现,这样的话效率是最高的。
从IIS7开始,非托管Module从以前的ISAPI变成了C++ Module, 首先下载
HRESULT __stdcall RegisterModule( DWORD dwServerVersion, IHttpModuleRegistrationInfo * pModuleInfo, IHttpServer * pHttpServer ) { HRESULT hr = S_OK; CPostProcessHttpModuleFactory * pFactory = NULL; if ( pModuleInfo == NULL || pHttpServer == NULL ) { hr = HRESULT_FROM_WIN32( ERROR_INVALID_PARAMETER ); goto Finished; } // step 1: save the IHttpServer and the module context id for future use g_pModuleContext = pModuleInfo->GetId(); g_pHttpServer = pHttpServer; // step 2: create the module factory pFactory = new CPostProcessHttpModuleFactory(); if ( pFactory == NULL ) { hr = HRESULT_FROM_WIN32( ERROR_NOT_ENOUGH_MEMORY ); goto Finished; } // step 3: register for server events // TODO: register for more server events here hr = pModuleInfo->SetRequestNotifications( pFactory, /* module factory */ RQ_SEND_RESPONSE /* server event mask */, 0 /* server post event mask */); if ( FAILED( hr ) ) { goto Finished; } pFactory = NULL; Finished: if ( pFactory != NULL ) { delete pFactory; pFactory = NULL; } return hr; }
然后在CHttpModule的派生类中,重载OnSendResponse方法
class CPostProcessHttpModule : public CHttpModule { public: REQUEST_NOTIFICATION_STATUS OnSendResponse( IN IHttpContext * pHttpContext, IN ISendResponseProvider * pProvider ); private: BOOL StringStartsWith(LPCSTR szText, LPCSTR szPrefix, int nMaxLength = 1024000); }; REQUEST_NOTIFICATION_STATUS CPostProcessHttpModule::OnSendResponse( IN IHttpContext * pHttpContext, IN ISendResponseProvider * pProvider ) { UNREFERENCED_PARAMETER( pHttpContext ); UNREFERENCED_PARAMETER( pProvider ); return RQ_NOTIFICATION_CONTINUE; }
工程到这里就搭建完成了
%systemroot%\system32\inetsrv\APPCMD.EXE install module /name:HtmlPostProcessModule /image:G:\IISPostProcessModule\bin\PostProcessModule_x64.dll /add:false/image: dll的绝对路径
在Modules中,点击Configure Native Modules, 在弹出窗口中,勾上刚安装的module,这样就可以了。
如果要卸载,可以使用如下命令
%systemroot%\system32\inetsrv\APPCMD.EXE uninstall module HtmlPostProcessModule其中HtmlPostProcessModule是安装的时候给的module name.
当服务器返回请求的时候,OnSendResponse函数会被调用。通过下面的代码可以遍历所有的chunk
REQUEST_NOTIFICATION_STATUS CPostProcessHttpModule::OnSendResponse( IN IHttpContext * pHttpContext, IN ISendResponseProvider * pProvider ){ IHttpResponse * pHttpResponse = pHttpContext->GetResponse(); if( pHttpContext ){ HTTP_RESPONSE *pResponseStruct = pHttpResponse->GetRawHttpResponse(); if (pResponseStruct){ for( int i = 0; i < pResponseStruct->EntityChunkCount; i++) { HTTP_DATA_CHUNK pChunk = &(pResponseStruct->pEntityChunks[i]); if( pChunk->DataChunkType == HttpDataChunkFromMemory ){ } // TODO : } } } }
typedef enum _HTTP_DATA_CHUNK_TYPE { HttpDataChunkFromMemory, HttpDataChunkFromFileHandle, HttpDataChunkFromFragmentCache, HttpDataChunkFromFragmentCacheEx, HttpDataChunkMaximum } HTTP_DATA_CHUNK_TYPE, *PHTTP_DATA_CHUNK_TYPE;
对目标字符串采用最高效的BM算法查找,boost准标准库中已经有现成的实现了,直接用即可。
#include <boost\algorithm\searching\boyer_moore.hpp> // the search target char * szPattern = "//static.xxxxxxxx.com"; const int PATTERN_SIZE = strlen(szPattern); boost::algorithm::boyer_moore<char*> bm( szPattern, szPattern + PATTERN_SIZE ); char * pStart = (char *)pChunk->FromMemory.pBuffer; char * pEnd = pStart + pChunk->FromMemory.BufferLength; // find out all the appearances char * pMatch = pStart; for(;;){ pMatch = bm( pMatch, pEnd); if( !pMatch || pMatch >= pEnd ) break; // TO DO: // pMatch is the matched address of the string pMatch += PATTERN_SIZE; if( pMatch >= pEnd ) break; }
// detect if there could be uncompleted partner at the end of this chunk int nChunkRemaingChars = 0; if( i < pResponseStruct->EntityChunkCount - 1 ){ int j = PATTERN_SIZE - 1; for( ; j > 0; j--){ char * pFirst = &pStart[pChunk->FromMemory.BufferLength - j]; if( StringStartsWith( pFirst, szPattern, j) ){ nChunkRemaingChars = j; dwNewSize -= nChunkRemaingChars; // the end part is moved to next chunk to process break; } } }
LPBYTE pBuffer = (LPBYTE)pHttpContext->AllocateRequestMemory(dwNewSize); // TODO : modify the new chunk // Set back the new chunk point pChunk->FromMemory.pBuffer = pBuffer; pChunk->FromMemory.BufferLength = dwNewSize;这里没有使用 WriteEntityChunks方法来写入新的Chunk,而是直接修改。MSDN上说使用该方法一个chunk的大小最大只能为65534。 而通过直接修改chunk的方法,我测试过,一次写入650K都不成问题。
BOOL CPostProcessHttpModule::StringStartsWith(LPCSTR szText, LPCSTR szPrefix, int nMaxLength /* = 1024000 */) { for( int i = 0; i < nMaxLength; i++){ if( szPrefix[i] == 0 ) return TRUE; if( szText[i] != szPrefix[i] ) return FALSE; } return TRUE; } REQUEST_NOTIFICATION_STATUS CPostProcessHttpModule::OnSendResponse( IN IHttpContext * pHttpContext, IN ISendResponseProvider * pProvider ) { UNREFERENCED_PARAMETER( pHttpContext ); UNREFERENCED_PARAMETER( pProvider ); IHttpResponse * pHttpResponse = pHttpContext->GetResponse(); if( pHttpContext ) { pHttpResponse->WriteEntityChunks HTTP_RESPONSE *pResponseStruct = pHttpResponse->GetRawHttpResponse(); if (pResponseStruct) { PCSTR pszContentType; USHORT cchContentType; pszContentType = pHttpResponse->GetHeader( HttpHeaderContentType, &cchContentType); if( pszContentType ) { if( StringStartsWith( pszContentType, "application/json") || StringStartsWith( pszContentType, "text/html") ) { char * szPattern = "//static.xxxxxxxx.com"; char * szReplace = "//cdn.xxxxxxxx.com"; const int PATTERN_SIZE = strlen(szPattern); const int REPLACE_SIZE = strlen(szReplace); int nLastChunkChars = 0; boost::algorithm::boyer_moore<char*> bm( szPattern, szPattern + PATTERN_SIZE ); for( int i = 0; i < pResponseStruct->EntityChunkCount; i++) { std::vector<int> lstAppearance; char * pStart = NULL; char * pEnd = NULL; PHTTP_DATA_CHUNK pChunk = &(pResponseStruct->pEntityChunks[i]); if( pChunk->DataChunkType == HttpDataChunkFromMemory ) { if( pChunk->FromMemory.BufferLength > 0 ) { pStart = (char *)pChunk->FromMemory.pBuffer; pEnd = pStart + pChunk->FromMemory.BufferLength; // caculate the new buffer size BOOL bHasUncompletedPartner = FALSE; DWORD dwNewSize = pChunk->FromMemory.BufferLength; BOOL bRequireModification = FALSE; // flag indicating if this chunk need be modified // if there is uncompleted partner from the end of last chunk if( nLastChunkChars > 0 ){ // detect if (the end of last chunk + start of this chunk) matches the partner if( StringStartsWith( pStart, szPattern + nLastChunkChars, PATTERN_SIZE - nLastChunkChars) ){ bHasUncompletedPartner = TRUE; dwNewSize = dwNewSize - (PATTERN_SIZE - nLastChunkChars) + REPLACE_SIZE; } else{ dwNewSize += nLastChunkChars; } bRequireModification = TRUE; } // find out all the appearances char * pMatch = pStart; if( bHasUncompletedPartner ) pMatch = pMatch + nLastChunkChars; // skip the begin part if (the end of last chunk + start of this chunk) matches the partner for(;;){ pMatch = bm( pMatch, pEnd); if( !pMatch || pMatch >= pEnd ) break; lstAppearance.push_back( (int)(pMatch - pStart) ); pMatch += PATTERN_SIZE; if( pMatch >= pEnd ) break; } if( !lstAppearance.empty() ){ dwNewSize += lstAppearance.size() * ( REPLACE_SIZE - PATTERN_SIZE); bRequireModification = TRUE; } // detect if there could be uncompleted partner at the end of this chunk int nChunkRemaingChars = 0; if( i < pResponseStruct->EntityChunkCount - 1 ){ int j = PATTERN_SIZE - 1; if( lstAppearance.size() > 0 ){ std::vector<int>::iterator iter = lstAppearance.end(); iter--; // the last matched position int nRemaining = pChunk->FromMemory.BufferLength - ( *iter + PATTERN_SIZE ); if( j > nRemaining ) j = nRemaining; } for( ; j > 0; j--){ char * pFirst = &pStart[pChunk->FromMemory.BufferLength - j]; if( StringStartsWith( pFirst, szPattern, j) ){ nChunkRemaingChars = j; dwNewSize -= nChunkRemaingChars; // the end part is moved to next chunk to process bRequireModification = TRUE; break; } } } if( bRequireModification ) { LPBYTE pBuffer = (LPBYTE)pHttpContext->AllocateRequestMemory(dwNewSize); ATLASSERT(pBuffer); int nOffset = 0; // store the written range of the new buffer int nLastEnd = 0; // the end position of last match in orginal buffer if( pBuffer ) { if( bHasUncompletedPartner ){ // insert the replace text if (the end of last chunk + start of this chunk) matches the partner memcpy_s( pBuffer, dwNewSize, szReplace, REPLACE_SIZE); nOffset = REPLACE_SIZE; nLastEnd = PATTERN_SIZE - nLastChunkChars; } else if( nLastChunkChars > 0 ) { memcpy_s( pBuffer, dwNewSize, szPattern, nLastChunkChars); nOffset = nLastChunkChars; } nLastChunkChars = nChunkRemaingChars; if( !lstAppearance.empty() ) { std::vector<int>::iterator iter; for( iter = lstAppearance.begin(); iter != lstAppearance.end(); iter++) { int nPos = *iter; if( nPos > nLastEnd ) { memcpy_s( pBuffer + nOffset, dwNewSize - nOffset, pStart + nLastEnd, nPos - nLastEnd); nOffset += nPos - nLastEnd; } memcpy_s( pBuffer + nOffset, dwNewSize - nOffset, szReplace, REPLACE_SIZE); nOffset += REPLACE_SIZE; nLastEnd = nPos + PATTERN_SIZE; } } if( nOffset < dwNewSize ){ memcpy_s( pBuffer + nOffset, dwNewSize - nOffset, pStart + nLastEnd, dwNewSize - nOffset); } pChunk->FromMemory.pBuffer = pBuffer; pChunk->FromMemory.BufferLength = dwNewSize; } }// if( bRequireModification ) }// if( pChunk->FromMemory.BufferLength > 0 ) }// if( pChunk->DataChunkType == HttpDataChunkFromMemory ) }// for( int i = 0; i < pResponseStruct->EntityChunkCount; i++) } } } } return RQ_NOTIFICATION_CONTINUE; }