标 题: 获取电驴首页推荐信息和指定栏目信息
作 者: itdef
链 接: http://www.cnblogs.com/itdef/p/4081963.html
欢迎转帖 请保持文本完整并注明出处
/******************************************************************************* * @file * @author def< qq group: 324164944 > * @blog http://www.cnblogs.com/itdef/ * @brief /*******************************************************************************/ #include "stdafx.h" #include <afxinet.h> #include <atlsimpstr.h> #include <fstream> #include <iostream> #include <sstream> #include <set> using namespace std; #ifdef _DEBUG #define new DEBUG_NEW #endif int GetHttpFileData(CString strUrl,char* DownloadHtmFileName); int ParseHomePageDownloadFile(char* szfileName); int UTF8Str2GBK(const string& strUTF8,string& strGBK); void GetHomePageRecommend(char* szName,const string& strGbk); // 唯一的应用程序对象 CWinApp theApp; using namespace std; int ParseUpdateFile(char* szfileName) { int iRet = -1; if(NULL == szfileName) return iRet; fstream fs(szfileName); stringstream ss ; // 创建字符串流对象 ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中 fs.close(); string str = ss.str(); // 获取流中的字符串 string strGbk; int i = UTF8Str2GBK(str,strGbk); if(strGbk.size() == 0 || i != 0) { cerr << "transfer utf8 to gbk error" << endl; return iRet; } basic_string <char>::size_type keyWordStart = strGbk.find("<title>"); basic_string <char>::size_type keyWordEnd = strGbk.find("</title>",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) ) { string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7); cout << strKeyWord << endl; } keyWordStart = strGbk.find("<div class=\"cv-title\">"); keyWordEnd = strGbk.find("</div>",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) ) { string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22); cout << strKeyWord << endl; } iRet = 0; return iRet; } void ShowUpdateInfo(char* szHtmAddress) { if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; } if( 0 != ParseUpdateFile("HtmDownloadFile")) { cerr << "ParseUpdateFile error once" << endl; } } void ShowHomePageElement(char* szHomePageAddress) { if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; } if( 0 != ParseHomePageDownloadFile("HtmDownloadFile")) { cerr << "GetHttpFileData error once" << endl; } } int _tmain(int argc, TCHAR* argv[], TCHAR* envp[]) { int nRetCode = 0; // 初始化 MFC 并在失败时显示错误 if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0)) { // TODO: 更改错误代码以符合您的需要 _tprintf(_T("错误: MFC 初始化失败\n")); nRetCode = 1; } else { // TODO: 在此处为应用程序的行为编写代码。 ShowHomePageElement("http://www.verycd.com/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/790244/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/519062/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/780306/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/522227/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/507338/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/515005/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/794197/"); cout << "****************************************************" << endl; ShowUpdateInfo("http://www.verycd.com/entries/511135/"); cout << "****************************************************" << endl; } system("pause"); return nRetCode; } int UTF8Str2GBK(const string& strUTF8,string& strGBK) { int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0); WCHAR *wsz = NULL; TCHAR *tsz = NULL; int iRet = -1; wsz = new WCHAR[i+1]; if( NULL == wsz) { goto UTF8Str2GBK_EXIT; } MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i); i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL); tsz = new TCHAR[i+1]; if( NULL == tsz) { goto UTF8Str2GBK_EXIT; } WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL); strGBK = string(tsz); iRet = 0; UTF8Str2GBK_EXIT: delete []wsz; delete []tsz; return iRet; } int ParseHomePageDownloadFile(char* szfileName) { int iRet = -1; if(NULL == szfileName) return iRet; fstream fs(szfileName); stringstream ss ; // 创建字符串流对象 ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中 fs.close(); string str = ss.str(); // 获取流中的字符串 string strGbk; int i = UTF8Str2GBK(str,strGbk); if(strGbk.size() == 0 || i != 0) { cerr << "transfer utf8 to gbk error" << endl; return iRet; } cout << "首页大推" << endl; GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk); cout << "首页小推" << endl; GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk); iRet = 0; return iRet; } void GetHomePageRecommend(char* szName,const string& strGbk) { set<string> setKeyWord; //cout << strGbk; basic_string <char>::size_type keyWordStart = strGbk.find(szName); basic_string <char>::size_type keyWordEnd = strGbk.find("')",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) ) { string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37); setKeyWord.insert(strKeyWord); //cout << "电驴首页小推 " << strKeyWord << endl; } while( keyWordStart != string::npos && keyWordEnd != string::npos) { keyWordStart = strGbk.find(szName,keyWordEnd+1); keyWordEnd = strGbk.find("')",keyWordStart+1); if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) ) { string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37); setKeyWord.insert(strKeyWord); //cout << "电驴首页小推 " << strKeyWord << endl; } } set<string>::iterator pos; for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos) { cout << "电驴首页推荐 " << *pos << endl; } } int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName) { CInternetSession Session("Internet Explorer", 0); CHttpFile *pHttpFile = NULL; CString strData; CString strClip; int iRet = -1; if(szDownloadHtmFileName == NULL) { cerr << "DownloadHtmFileName is NULL" << endl; Session.Close(); return iRet; } ofstream of(szDownloadHtmFileName); if (of.bad()) { cerr << "of create file error" << endl; Session.Close(); return iRet; } try { pHttpFile = (CHttpFile*)Session.OpenURL(strUrl); while ( pHttpFile->ReadString(strClip) ) { of << strClip; } }catch(CInternetException* pEx) { TCHAR pszError[64]; pEx->GetErrorMessage(pszError, 64); cerr << __FUNCTION__ << pszError << endl; goto GetHttpFileData_EXIT; } iRet = 0; GetHttpFileData_EXIT: Session.Close(); of.close(); return iRet; }