最近写一个抓网页并提取页面信息的小东东(需要登陆的网站)
一
登陆的标识: UID和COOKIE
获取后把UID和COOKIE写入ini以备使用
二
获取网页数据的一些基本方法
(1) (CHttpConnection* )m_pConnection=(CInternet *Session) m_Sessions.GetHttpConnection(_T(m_HostAdderss),m_Port);
(2)m_pConnection->OpenRequest(m_Method,m_GetPath+m_FileName,m_Referer,1,NULL,m_HttpVersion,
INTERNET_FLAG_EXISTING_CONNECT );
(3)pot = (CHttpFile* )m_pFile->SendRequest(
_T(Headers),(LPVOID)(LPCTSTR)_T(this->m_SendMeg),m_SendMeg.GetLength());
(4)m_pFile->ReadString(tempString)
三
分析页面数据的一个片段(命名很混乱 ^_^)
filedata.Find(BZ);//BZ:关键字
filedata = filedata.Mid(pot);
count = filedata.Replace(BZ,BZ);
filedata = filedata.Mid(18);
for(int i=0;i
pot = filedata.Find(BZ);//分成几个联赛
if( pot == -1)
{
pot = filedata.GetLength();
lsdata_ls = filedata.Left(pot);
}
else
{
lsdata_ls = filedata.Left(pot);
filedata = filedata.Mid(pot+18);
}
//每个联赛再分
hj =0;hjj =0;
Newdata += "'";
pot = lsdata_ls.Find(">");
lsdata_ls = lsdata_ls.Mid(pot);
pot2 = lsdata_ls.Find("<");
Newdata += lsdata_ls.Mid(1,pot2-1);
Newdata += "',";
lsdata_ls = lsdata_ls.Mid(pot2+20);
kk = lsdata_ls;
while(kk.GetLength()>10)
{
gameid = "'";//得到gameid
pot = kk.Find("javascript:DoVote");
if(pot==-1)break;
kk =kk.Mid(pot);
gameid = kk.Mid(23,7)+"',";
pot = m_game.Replace(gameid,gameid);
if(pot==0)
m_game +=gameid;
kk = kk.Mid(30);
}
while(lsdata_ls.GetLength()>10)
{
pot= lsdata_ls.Find("");
lsdata_cc = lsdata_ls.Left(pot);
lsdata_ls = lsdata_ls.Mid(pot+6);
while(lsdata_cc.GetLength()>10)
{
if(lsdata_cc.GetLength()<300)//和局
{
hj++;
while(1)
{
pot = lsdata_cc.Find(">");
lsdata_cc = lsdata_cc.Mid(pot);
pot2 = lsdata_cc.Find("<");
if(pot2 == 1)
{
lsdata_cc = lsdata_cc.Mid(pot2);
continue;
}
Newdata += "'";
Newdata += lsdata_cc.Mid(1,pot2-1);
lsdata_cc = lsdata_cc.Mid(pot2+1);
if(lsdata_cc.GetLength()<6){
Newdata+="/n";break;}
Newdata += "',";
}
}
else
{ hjj++;
if( (hjj>1)&&(hj==0) )
Newdata +="/n";
pot = lsdata_cc.Find("