利用HttpRequest登录到某个网站,然后获取网站信息的程序示例

利用HttpRequest登录到某个网站,然后获取网站信息的程序示例


问题:有的网站的相关内容必须要在登录后才可以查看,其登录信息保存在session变量之中。这样,使用asphttp等组件就难以正确得到所要的信息。

解决:使用asp.net中的httprequest和httpresponse来实现。

要点:
1。 通过附加一个cookiecontainer到httprequest对象中,可以得到登录后返回的代表SESSION ID的COOKIE。 见Login方法
2。 将此COOKIE包含在一个cookiecontainer中并附加到另一个HTTPREQUEST请求中,则可以实现SESSION的还原。见getPage方法

源程序如下:

getHttpInfo.aspx:
<%@ Page language="c#" Codebehind="getHttpInfo.aspx.cs" AutoEventWireup="false" Inherits="PdfTest.getHttpInfo" %>



WebForm1












getHttpInfo.aspx.cs:
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
//using System.Data.OleDb;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.Net;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Data.Odbc;

namespace PdfTest
{
///
/// Summary description for WebForm1.
///

public class getHttpInfo : System.Web.UI.Page
{
protected static string cookieheader;


private void Page_Load(object sender, System.EventArgs e)
{
// Put user code to initialize the page here

string strResult;

if (HttpContext.Current.Application["cookieheader"] != null)
{
cookieheader = (string)HttpContext.Current.Application["cookieheader"];
}
else
{
//Login into the website and keep the cookie for the session in the application variable
string strLogin = Login("http://www.thesiteyouwanttovisit/theloginpage.asp", "Action=&USERID=&Password=") ;
}


strResult = getPage("http://www.thesiteyouwanttovisit/theloginpage.asp", "Action=&data=") ;


//Write the result to htm file
FileStream htmFile = new FileStream("c:/save.htm", FileMode.OpenOrCreate);
StreamWriter sw = new StreamWriter(htmFile);
sw.Write(strResult);
sw.Close();
htmFile.Close();

// output the result
Response.Write(strResult);
}


public static string Login(String url, String paramList)
{
HttpWebResponse res = null;
string strResult="";

try
{

HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
req.Method = "POST";
req.ContentType = "application/x-www-form-urlencoded";
req.AllowAutoRedirect = false;
CookieContainer cookieCon = new CookieContainer();
req.CookieContainer = cookieCon;

StringBuilder UrlEncoded = new StringBuilder();
Char[] reserved = {'?', '=', '&'};
byte[] SomeBytes = null;

if (paramList != null)
{
int i=0, j;
while(i{
j=paramList.IndexOfAny(reserved, i);
if (j==-1)
{
UrlEncoded.Append(HttpUtility.UrlEncode(paramList.Substring(i, paramList.Length-i)));
break;
}
UrlEncoded.Append(HttpUtility.UrlEncode(paramList.Substring(i, j-i)));
UrlEncoded.Append(paramList.Substring(j,1));
i = j+1;
}
SomeBytes = Encoding.UTF8.GetBytes(UrlEncoded.ToString());
req.ContentLength = SomeBytes.Length;
Stream newStream = req.GetRequestStream();
newStream.Write(SomeBytes, 0, SomeBytes.Length);
newStream.Close();
}
else
{
req.ContentLength = 0;
}


res = (HttpWebResponse)req.GetResponse();
cookieheader = req.CookieContainer.GetCookieHeader(new Uri(url));
HttpContext.Current.Application.Lock();
HttpContext.Current.Application["cookieheader"] = cookieheader;
HttpContext.Current.Application.UnLock();

Stream ReceiveStream = res.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
StreamReader sr = new StreamReader( ReceiveStream, encode );
Char[] read = new Char[256];
int count = sr.Read( read, 0, 256 );
while (count > 0)
{
String str = new String(read, 0, count);
strResult += str;
count = sr.Read(read, 0, 256);
}
}
catch(Exception e)
{
strResult = e.ToString();
}
finally
{
if ( res != null )
{
res.Close();
}
}

return strResult;
}


public static string getPage(String url, String paramList)
{
HttpWebResponse res = null;
string strResult = "";

try
{

HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
req.Method = "POST";
req.KeepAlive = true;
req.ContentType = "application/x-www-form-urlencoded";
CookieContainer cookieCon = new CookieContainer();
req.CookieContainer = cookieCon;
req.CookieContainer.SetCookies(new Uri(url),cookieheader);
StringBuilder UrlEncoded = new StringBuilder();
Char[] reserved = {'?', '=', '&'};
byte[] SomeBytes = null;

if (paramList != null)
{
int i=0, j;
while(i{
j=paramList.IndexOfAny(reserved, i);
if (j==-1)
{
UrlEncoded.Append(HttpUtility.UrlEncode(paramList.Substring(i, paramList.Length-i)));
break;
}
UrlEncoded.Append(HttpUtility.UrlEncode(paramList.Substring(i, j-i)));
UrlEncoded.Append(paramList.Substring(j,1));
i = j+1;
}
SomeBytes = Encoding.UTF8.GetBytes(UrlEncoded.ToString());
req.ContentLength = SomeBytes.Length;
Stream newStream = req.GetRequestStream();
newStream.Write(SomeBytes, 0, SomeBytes.Length);
newStream.Close();
}
else
{
req.ContentLength = 0;
}


res = (HttpWebResponse)req.GetResponse();
Stream ReceiveStream = res.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
StreamReader sr = new StreamReader( ReceiveStream, encode );
Char[] read = new Char[256];
int count = sr.Read( read, 0, 256 );
while (count > 0)
{
String str = new String(read, 0, count);
strResult += str;
count = sr.Read(read, 0, 256);
}
}
catch(Exception e)
{
strResult = e.ToString();
}
finally
{
if ( res != null )
{
res.Close();
}
}

return strResult;
}


#region Web Form Designer generated code
override protected void OnInit(EventArgs e)
{
//
// CODEGEN: This call is required by the ASP.NET Web Form Designer.
//
InitializeComponent();
base.OnInit(e);
}

///
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
///

private void InitializeComponent()
{
this.Load += new System.EventHandler(this.Page_Load);

}
#endregion



}
}

你可能感兴趣的:(数据挖掘)