1、利用httpwatch找到网站入口及参数:(详情略,请百度httpwatch的使用方法)
2、利用httpwebRequest将51拉查看密码Post到网站入口,登陆成功后再进入你想要进入的页面抓取页面
<!--<br /><br />Code highlighting produced by Actipro CodeHighlighter (freeware)<br />http://www.CodeHighlighter.com/<br /><br />-->
///
<summary>
///
某日或者叫时间段,51la里组员ID里——搜索引擎内——的数据。
///
</summary>
///
<param name="strStaticId"></param>
///
<param name="LookPass"></param>
///
<param name="strBeginDate"></param>
///
<param name="strEndDate"></param>
///
<returns></returns>
public
string
FiveLaSeo(
string
strStaticId,
string
LookPass,
string
strBeginDate,
string
strEndDate)
{
CookieContainer Cc
=
new
CookieContainer();
ASCIIEncoding encoding
=
new
ASCIIEncoding();
string
postData
=
"
id=
"
+
strStaticId;
postData
+=
(
"
&lookpass=
"
+
LookPass);
postData
+=
"
&t=chalogin
"
;
//
将提交的字符串数据转换成字节数组
byte
[] data
=
encoding.GetBytes(postData);
//
设置提交的相关参数
HttpWebRequest myRequest
=
(HttpWebRequest)WebRequest.Create(
"
http://www.51.la/report/0_help.asp
"
);
myRequest.Method
=
"
POST
"
;
myRequest.ContentType
=
"
application/x-www-form-urlencoded
"
;
myRequest.ContentLength
=
data.Length;
//
cookie的容器一定要加
myRequest.CookieContainer
=
Cc;
//
提交请求数据
Stream newStream
=
myRequest.GetRequestStream();
newStream.Write(data,
0
, data.Length);
newStream.Close();
//
接收返回的页面
HttpWebResponse myResponse
=
(HttpWebResponse)myRequest.GetResponse();
StreamReader reader
=
new
StreamReader(myResponse.GetResponseStream(), Encoding.Default);
string
content
=
reader.ReadToEnd();
//
进去后打开特定页面的参数设置
myRequest
=
(HttpWebRequest)WebRequest.Create(
"
http://www.51.la/report/3_SE.asp?id=
"
+
strStaticId
+
"
&d1=
"
+
strBeginDate
+
"
&d2=
"
+
strEndDate);
myRequest.Method
=
"
GET
"
;
myRequest.KeepAlive
=
false
;
myRequest.CookieContainer
=
Cc;
//
接收返回的特定页面
myResponse
=
(HttpWebResponse)myRequest.GetResponse();
newStream
=
myResponse.GetResponseStream();
reader
=
new
StreamReader(myResponse.GetResponseStream(),Encoding.Default);
content
=
reader.ReadToEnd();
return
content;
}
3、将抓取的页面进行正则表达式匹配,取出自己所需要的数据(这里我需要搜索引擎流量)
<!--<br /><br />Code highlighting produced by Actipro CodeHighlighter (freeware)<br />http://www.CodeHighlighter.com/<br /><br />-->
///
<summary>
///
返回51拉中:搜索引擎的访问量IP-*?表示匹配最少的重复项目
///
</summary>
///
<param name="strStaticId"></param>
///
<param name="LookPass"></param>
///
<param name="strBeginDate"></param>
///
<param name="strEndDate"></param>
///
<returns></returns>
public
string
FivelaSeoPv(
string
strStaticId,
string
LookPass,
string
strBeginDate,
string
strEndDate)
{
Tool.FiveLa fl
=
new
FiveLa();
string
html
=
fl.FiveLaSeo(strStaticId, LookPass, strBeginDate, strEndDate);
string
pattern
=
@"
来自搜索引擎的访问量 \( [\s\S]*? IP \) 占总访问量
"
;
//
string pattern = @"占总访问量";
string
number
=
Regex.Match(html, pattern, RegexOptions.IgnoreCase).Value;
number
=
number.Replace(
"
来自搜索引擎的访问量 (
"
,
""
).Replace(
"
IP )
"
,
""
).Replace(
"
占总访问量
"
,
""
).Trim(); ;
if
(number
==
""
)
{ number
=
"
0
"
; }
return
number;
}