转自:http://www.sufeinet.com/thread-3-1-1.html
C#HttpHelper官方产品发布与源码下载---苏飞版
导读部分
-------------------------------------------------------------------------------------------------------------
C#基类库苏飞版--系列教程导航
http://www.sufeinet.com/thread-655-1-1.html
本站是C#HttpHelper类唯一官方网站,唯一更新网站,希望大家收藏关注。
介绍
C#HttpHelper实现了C#HttpWebRequest抓取时无视编码,无视证书,无视Cookie,并且实现的代理的功能,使用它您可以进行Get和Post请求,可以很方便 的设置Cookie,证书,代理,编码问题您不用管,因为类会自动为您识别网页的编码。
这个类是我以前写百度,Google,Soso,Sogou等网络蜘蛛时使用的,经过上千万个网站的测试,上万个网站抓取的例子总结出来的,中间的方法也是我实验了很久之后方案,所以大家可以放心使用。
我不敢说100%,但是应该是99%的网站都没有问题,都可以无视编码,证书,和Cookie,如果你确实发现那个网站在使用本类过程中有问题,出现乱码,或者是获取不了,不能带Cookie,不能带证书等问题,我非常乐意您能联系我,提出您的问题,您也可以直接回复本帖子,我会第一时间进行解答。 反馈建议
邮件订阅更新(通过邮件订阅,当类有更新时会发送更新通知)
使用SVN获取:http://www.sufeinet.com/thread-7297-1-1.html
Httphelper代码在线预览
1.HttpHelper类下载
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
|
//取当前webBrowser登录后的Cookie值
[DllImport(
"wininet.dll"
, CharSet = CharSet.Auto, SetLastError =
true
)]
static
extern
bool
InternetGetCookieEx(
string
pchURL,
string
pchCookieName, StringBuilder pchCookieData,
ref
int
pcchCookieData,
int
dwFlags,
object
lpReserved);
//取出Cookie,当登录后才能取
private
static
string
GetCookieString(
string
url)
{
// Determine the size of the cookie
int
datasize = 256;
StringBuilder cookieData =
new
StringBuilder(datasize);
if
(!InternetGetCookieEx(url,
null
, cookieData,
ref
datasize, 0x00002000,
null
))
{
if
(datasize < 0)
return
null
;
// Allocate stringbuilder large enough to hold the cookie
cookieData =
new
StringBuilder(datasize);
if
(!InternetGetCookieEx(url,
null
, cookieData,
ref
datasize, 0x00002000,
null
))
return
null
;
}
return
cookieData.ToString();
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
|
/// <summary>
/// 过滤html标签
/// </summary>
/// <param name="strHtml">html的内容</param>
/// <returns></returns>
public
static
string
StripHTML(
string
stringToStrip)
{
// paring using RegEx //
stringToStrip = Regex.Replace(stringToStrip,
"</p(?:\\s*)>(?:\\s*)<p(?:\\s*)>"
,
"\n\n"
, RegexOptions.IgnoreCase | RegexOptions.Compiled);
stringToStrip = Regex.Replace(stringToStrip, "
", "
\n", RegexOptions.IgnoreCase | RegexOptions.Compiled);
stringToStrip = Regex.Replace(stringToStrip,
"\""
,
"''"
, RegexOptions.IgnoreCase | RegexOptions.Compiled);
stringToStrip = StripHtmlXmlTags(stringToStrip);
return
stringToStrip;
}
private
static
string
StripHtmlXmlTags(
string
content)
{
return
Regex.Replace(content,
"<[^>]+>"
,
""
, RegexOptions.IgnoreCase | RegexOptions.Compiled);
}
|
01
02
03
04
05
06
07
08
|
public
static
string
URLDecode(
string
text)
{
return
HttpUtility.UrlDecode(text, Encoding.Default);
}
public
static
string
URLEncode(
string
text)
{
return
HttpUtility.UrlEncode(text, Encoding.Default);
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
Timeout = 100000,
//连接超时时间 可选项默认为100000
ReadWriteTimeout = 30000,
//写入Post数据超时时间 可选项默认为30000
IsToLower =
false
,
//得到的HTML代码是否转成小写 可选项默认转小写
Cookie =
""
,
//字符串Cookie 可选项
UserAgent =
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
,
//用户的浏览器类型,版本,操作系统 可选项有默认值
Accept =
"text/html, application/xhtml+xml, */*"
,
// 可选项有默认值
ContentType =
"text/html"
,
//返回类型 可选项有默认值
Referer =
"http://www.sufeinet.com"
,//来源URL 可选项
Allowautoredirect =
true
,
//是否根据301跳转 可选项
CerPath =
"d:\\123.cer"
,
//证书绝对路径 可选项不需要证书时可以不写这个参数
Connectionlimit = 1024,
//最大连接数 可选项 默认为1024
Postdata =
"C:\\PERKYSU_20121129150608_ScrubLog.txt"
,
//Post数据 可选项GET时不需要写
PostDataType = PostDataType.FilePath,
//默认为传入String类型,也可以设置PostDataType.Byte传入Byte类型数据
ProxyIp =
"192.168.1.105:8015"
,
//代理服务器ID 端口可以直接加到后面以:分开就行了 可选项 不需要代理 时可以不设置这三个参数
ProxyPwd =
"123456"
,
//代理服务器密码 可选项
ProxyUserName =
"administrator"
,
//代理服务器账户名 可选项
ResultType = ResultType.Byte,
//返回数据类型,是Byte还是String
PostdataByte = System.Text.Encoding.Default.GetBytes(
"测试一下"
),
//如果PostDataType为Byte时要设置本属性的值
CookieCollection =
new
System.Net.CookieCollection(),
//可以直接传一个Cookie集合进来
};
item.Header.Add(
"测试Key1"
,
"测试Value1"
);
item.Header.Add(
"测试Key2"
,
"测试Value2"
);
//得到HTML代码
HttpResult result = http.GetHtml(item);
//取出返回的Cookie
string
cookie = result.Cookie;
//返回的Html内容
string
html = result.Html;
if
(result.StatusCode == System.Net.HttpStatusCode.OK)
{
//表示访问成功,具体的大家就参考HttpStatusCode类
}
//表示StatusCode的文字说明与描述
string
statusCodeDescription = result.StatusDescription;
//把得到的Byte转成图片
Image img = byteArrayToImage(result.ResultByte);
}
/// <summary>
/// 字节数组生成图片
/// </summary>
/// <param name="Bytes">字节数组</param>
/// <returns>图片</returns>
private
Image byteArrayToImage(
byte
[] Bytes)
{
MemoryStream ms =
new
MemoryStream(Bytes);
Image outputImg = Image.FromStream(ms);
return
outputImg;
}}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL这里都是测试 必需项
Method =
"get"
,
//URL 可选项 默认为Get
};
//得到HTML代码
HttpResult result = http.GetHtml(item);
item =
new
HttpItem()
{
URL =
"http://tool.sufeinet.com"
,//URL这里都是测试URl 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"post"
,
//URL 可选项 默认为Get
Postdata =
"user=123123&pwd=1231313"
};
//得到新的HTML代码
result = http.GetHtml(item);
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
};
item.Header.Add(
"测试Key1"
,
"测试Value1"
);
item.Header.Add(
"测试Key2"
,
"测试Value2"
);
//得到HTML代码
HttpResult result = http.GetHtml(item);
//取出返回的Cookie
string
cookie = result.Cookie;
//返回的Html内容
string
html = result.Html;
if
(result.StatusCode == System.Net.HttpStatusCode.OK)
{
//表示访问成功,具体的大家就参考HttpStatusCode类
}
//表示StatusCode的文字说明与描述
string
statusCodeDescription = result.StatusDescription;
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
ResultType = ResultType.Byte
};
//得到HTML代码
HttpResult result = http.GetHtml(item);
if
(result.StatusCode == System.Net.HttpStatusCode.OK)
{
//表示访问成功,具体的大家就参考HttpStatusCode类
}
//表示StatusCode的文字说明与描述
string
statusCodeDescription = result.StatusDescription;
//把得到的Byte转成图片
Image img = byteArrayToImage(result.ResultByte);
}
/// <summary>
/// 字节数组生成图片
/// </summary>
/// <param name="Bytes">字节数组</param>
/// <returns>图片</returns>
private
Image byteArrayToImage(
byte
[] Bytes)
{
MemoryStream ms =
new
MemoryStream(Bytes);
Image outputImg = Image.FromStream(ms);
return
outputImg;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL这里都是测试 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
};
//得到HTML代码
HttpResult result = http.GetHtml(item);
item =
new
HttpItem()
{
URL =
"http://tool.sufeinet.com"
,//URL这里都是测试URl 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
Cookie = result.Cookie,
};
//得到新的HTML代码
result = http.GetHtml(item);
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
|
HttpHelper http =
new
HttpHelper();
HttpItem item =
new
HttpItem()
{
URL =
"http://www.sufeinet.com"
,//URL这里都是测试 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
ResultCookieType = ResultCookieType.CookieCollection
};
//得到HTML代码
HttpResult result = http.GetHtml(item);
item =
new
HttpItem()
{
URL =
"http://tool.sufeinet.com"
,//URL这里都是测试URl 必需项
Encoding =
null
,
//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
//Encoding = Encoding.Default,
Method =
"get"
,
//URL 可选项 默认为Get
CookieCollection = result.CookieCollection,
ResultCookieType = ResultCookieType.CookieCollection
};
//得到新的HTML代码
result = http.GetHtml(item);
|