网站页面的抓取

 1  //抓取网页上的全部内容

 2         //webrequest类获取网页源代码

 3         protected void btn_click_Click(object sender, EventArgs e)

 4         {

 5             string strMsg = string.Empty;

 6             try

 7             {

 8                 //WebRequest.Create方法,返回WebRequest的子类HttpWebRequest

 9                 WebRequest request = WebRequest.Create(txt_url.Text.Trim());//为指定的URI方案初始化新的WebRequest实例

10                 WebResponse response = request.GetResponse();//WebRequest.GetResponse方法,返回对 Internet 请求的响应

11                 //Response.Write( response.ContentType);获取输出的内容的格式

12                 //Stream resStream = response.GetResponseStream(); //WebResponse.GetResponseStream 方法,从 Internet 资源返回数据流。 

13                 //Encoding enc = Encoding.GetEncoding("GB2312"); // 如果是乱码就改成 utf-8 / GB2312

14                 StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("UTF-8"));

15                 // StreamReader 类实现一个 TextReader (TextReader类,表示可读取连续字符系列的读取器),使其以一种特定的编码从字节流中读取字符。 

16                 strMsg = reader.ReadToEnd();//输出HTML代码

17 

18                 reader.Close();

19                 reader.Dispose();

20                 response.Close();

21             }

22             catch

23             { }

24             if (strMsg != "")

25             {

26                 divShow.InnerHtml = strMsg;

27 

28             }

29             else

30             {

31                 Response.Write("获取内容为空");

32             }

33         }
webrequest类获取网页源代码
 1  string strMag = string.Empty;

 2             WebClient wc = new WebClient();//创建WebClient实例提供向URI 标识的资源发送数据和从URI 标识的资源接收数据

 3             wc.Credentials = CredentialCache.DefaultCredentials;// 获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。

 4             //方法一

 5             //Encoding enc = Encoding.GetEncoding("UTF-8");

 6             //Byte[] pageData = wc.DownloadData(txt_url.Text);// 从资源下载数据并返回字节数组。

 7             //strMag = enc.GetString(pageData);

 8             //divShow.InnerHtml = strMag;

 9             //方法二

10             Stream resStream = wc.OpenRead(txt_url.Text);//以流的形式打开URL

11             Encoding enc = Encoding.GetEncoding("UTF-8");

12             StreamReader sr = new StreamReader(resStream, enc);//以指定的编码方式读取数据流

13             divShow.InnerHtml = sr.ReadToEnd();

14             resStream.Close();

15             wc.Dispose();
webClient获取页面的俩种方法
 1  //将获取页面内容,并转化为*.txt保存

 2         //string strMsg = string.Empty;

 3         //try

 4         //{

 5         //    WebRequest request = WebRequest.Create(txt_url.Text);

 6         //    WebResponse response = request.GetResponse();

 7         //    Stream reader = response.GetResponseStream();

 8 

 9         //    //可根据实际保存为具体文件  

10         //    FileStream writer = new FileStream("D://logo.txt", FileMode.OpenOrCreate, FileAccess.Write);

11         //    byte[] buff = new byte[512];

12         //    int c = 0; //实际读取的字节数   

13         //    while ((c = reader.Read(buff, 0, buff.Length)) > 0)

14         //    {

15         //        writer.Write(buff, 0, c);

16         //    }

17         //    writer.Close();

18         //    writer.Dispose();

19 

20         //    reader.Close();

21         //    reader.Dispose();

22         //    response.Close();

23 

24         //    strMsg = "保存成功";

25         //}

26         //catch

27         //{ }

28         //Response.Write(strMsg); 
将获取页面内容,并转化为*.txt保存

网页抓取类

http://www.soaspx.com/dotnet/asp.net/tech/tech_20110913_8052.html

http://blog.csdn.net/jiang1984j/article/details/5793239

http://www.cftea.com/c/2007/08/H89S2ILKP2SPAKT7.asp

你可能感兴趣的:(网站)