最近刚好在做一个爬虫项目,借鉴了网上的资料,代码如下
///
/// Http操作类
///
public class SpiderHelper
{
///
/// 获取网址HTML
///
///
///
public string GetHtml(string URL)
{
WebRequest wrq;
wrq = WebRequest.Create(URL);
wrq.Credentials = CredentialCache.DefaultCredentials;
WebResponse wrp;
wrp = wrq.GetResponse();
string reader = new StreamReader(wrp.GetResponseStream(), Encoding.GetEncoding("utf-8")).ReadToEnd();
try
{
wrq.GetResponse().Close();
}
catch (WebException ex)
{
throw ex;
}
return reader;
}
///
/// 获取网站cookie
///
///
///
///
public string GetHtml(string URL, out string cookie)
{
WebRequest wrq;
wrq = WebRequest.Create(URL);
wrq.Credentials = CredentialCache.DefaultCredentials;
WebResponse wrp;
wrp = wrq.GetResponse();
string html = new StreamReader(wrp.GetResponseStream(), Encoding.GetEncoding("UTF-8")).ReadToEnd();
try
{
wrq.GetResponse().Close();
}
catch (WebException ex)
{
throw ex;
}
cookie = wrq.Headers.Get("Set-Cookie");
return html;
}
public string GetWeb()
{
string param = "hl=zh-CN&newwindow=1";
byte[] bs = Encoding.ASCII.GetBytes(param);
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("http://localhost:30237/D_PCS_Select.ashx");
req.Method = "POST";
req.ContentType = "application/json; charset=utf-8";
req.ContentLength = bs.Length;
using (Stream reqStream = req.GetRequestStream())
{
reqStream.Write(bs, 0, bs.Length);
}
string html = "";
using (WebResponse wr = req.GetResponse())
{
html = new StreamReader(wr.GetResponseStream(), Encoding.GetEncoding("UTF-8")).ReadToEnd();
//在这里对接收到的页面内容进行处理
}
return html;
}
public string GetCookie(string url, string indata)
{
//string url = "http://hi.baidu.com/yimeng3025/test.asp";
//string indata = "aa=zhuye";
string outdata = "";
CookieContainer myCookieContainer = new CookieContainer();
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
//新建一个HttpWebRequest
myHttpWebRequest.ContentType = "application/json; charset=utf-8";
myHttpWebRequest.ContentLength = indata.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;
//设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
Stream myRequestStream = myHttpWebRequest.GetRequestStream();
StreamWriter myStreamWriter = new StreamWriter(myRequestStream, Encoding.GetEncoding("utf-8"));
myStreamWriter.Write(indata);
//把数据写入HttpWebRequest的Request流
myStreamWriter.Close();
myRequestStream.Close();
//关闭打开对象
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
//字串9
//新建一个HttpWebResponse
myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
//获取一个包含url的Cookie集合的CookieCollection
Stream myResponseStream = myHttpWebResponse.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("gb2312"));
outdata = myStreamReader.ReadToEnd();
//把数据从HttpWebResponse的Response流中读出
myStreamReader.Close();
myResponseStream.Close();
Console.WriteLine(outdata);
//显示"登录"
//拿到了Cookie,再进行请求就能直接读取到登录后的内容了
myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
myHttpWebRequest.CookieContainer = myCookieContainer;//*
//刚才那个CookieContainer已经存有了Cookie,把它附加到HttpWebRequest中则能直接通过验证
myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
//字串1
myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
myResponseStream = myHttpWebResponse.GetResponseStream();
myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
outdata = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
return outdata;
}
public string GetHtml(string URL, string postData, string cookie, out string header, string server)
{
return GetHtml(server, URL, postData, cookie, out header);
}
public string GetHtml(string server, string URL, string postData, string cookie, out string header)
{
byte[] byteRequest = Encoding.GetEncoding("gb2312").GetBytes(postData);
return GetHtml(server, URL, byteRequest, cookie, out header);
}
public string GetHtml(string server, string URL, byte[] byteRequest, string cookie, out string header)
{
byte[] bytes = GetHtmlByBytes(server, URL, byteRequest, cookie, out header);
Stream getStraem = new MemoryStream(bytes);
StreamReader streamReader = new StreamReader(getStraem, Encoding.GetEncoding("UTF-8"));
string getString = streamReader.ReadToEnd();
streamReader.Close();
return getString;
}
///
/// Post模式浏览
///
/// 服务器地址
/// 网址
/// 流
/// cookie
/// 句柄
///
public byte[] GetHtmlByBytes(string server, string URL, byte[] byteRequest, string cookie, out string header)
{
long contentLength;
HttpWebRequest httpWebRequest;
HttpWebResponse webResponse;
Stream getStream;
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
CookieContainer co = new CookieContainer();
co.SetCookies(new Uri(server), cookie);
httpWebRequest.CookieContainer = co;
httpWebRequest.ContentType = "application/x-www-form-urlencoded";
httpWebRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
httpWebRequest.Referer = server;
httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)";
httpWebRequest.Method = "Post";
httpWebRequest.ContentLength = byteRequest.Length;
Stream stream;
stream = httpWebRequest.GetRequestStream();
stream.Write(byteRequest, 0, byteRequest.Length);
stream.Close();
webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
header = webResponse.Headers.ToString();
getStream = webResponse.GetResponseStream();
contentLength = webResponse.ContentLength;
byte[] outBytes = new byte[contentLength];
outBytes = ReadFully(getStream);
getStream.Close();
return outBytes;
}
public byte[] ReadFully(Stream stream)
{
byte[] buffer = new byte[128];
using (MemoryStream ms = new MemoryStream())
{
while (true)
{
int read = stream.Read(buffer, 0, buffer.Length);
if (read <= 0)
return ms.ToArray();
ms.Write(buffer, 0, read);
}
}
}
///
/// Get模式
///
/// 网址
/// cookie
/// 句柄
/// 服务器
///
public string GetHtml(string URL, string cookie, out string header, string server)
{
return GetHtml(URL, cookie, out header, server, "");
}
///
/// Get模式浏览
///
/// Get网址
/// cookie
/// 句柄
/// 服务器地址
///
///
public string GetHtml(string URL, string cookie, out string header, string server, string val)
{
HttpWebRequest httpWebRequest;
HttpWebResponse webResponse;
Stream getStream;
StreamReader streamReader;
string getString = "";
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
httpWebRequest.Accept = "*/*";
httpWebRequest.Referer = server;
CookieContainer co = new CookieContainer();
co.SetCookies(new Uri(server), cookie);
httpWebRequest.CookieContainer = co;
httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)";
httpWebRequest.Method = "GET";
webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
header = webResponse.Headers.ToString();
getStream = webResponse.GetResponseStream();
streamReader = new StreamReader(getStream, Encoding.GetEncoding("UTF-8"));
getString = streamReader.ReadToEnd();
streamReader.Close();
getStream.Close();
return getString;
}
}