c# HttpWebRequest 抓取网页 自动编码

 public WebPage(string _url, string _loginurl, string _post)
    {
        string uurl = "";
        try
        {
            uurl = Uri.UnescapeDataString(_url);
            _url = uurl;
        }
        catch { };
        Regex re = new Regex("(?<h>[^\x00-\xff]+)");
        Match mc = re.Match(_url);
        if (mc.Success)
        {
            string han = mc.Groups["h"].Value;
            _url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
        }
        if (_loginurl.Trim() == "" || _post.Trim() == "" || WebPage.webcookies.ContainsKey(new Uri(_url).Host))
        {
            Init(_url);
        }
        else
        {
            #region 登陆
            string indata = _post;
            m_post = _post;
            m_loginurl = _loginurl;
            byte[] bytes = Encoding.Default.GetBytes(_post);
            CookieContainer myCookieContainer = new CookieContainer();
            try
            {

                //新建一个CookieContainer来存放Cookie集合 

                HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(_loginurl);
                //新建一个HttpWebRequest 
                myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
                myHttpWebRequest.AllowAutoRedirect = false;
                myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
                myHttpWebRequest.Timeout = 60000;
                myHttpWebRequest.KeepAlive = true;
                myHttpWebRequest.ContentLength = bytes.Length;
                myHttpWebRequest.Method = "POST";
                myHttpWebRequest.CookieContainer = myCookieContainer;
                //设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer 
                Stream myRequestStream = myHttpWebRequest.GetRequestStream();
                myRequestStream.Write(bytes, 0, bytes.Length);
                myRequestStream.Close();
                HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();

                foreach (Cookie ck in myHttpWebResponse.Cookies)
                {
                    myCookieContainer.Add(ck);
                }
                myHttpWebResponse.Close();
            }
            catch
            {
                Init(_url);
                return;
            }

            #endregion

            #region 登陆后再访问页面
            try
            {
                m_uri = new Uri(_url);
                m_links = new List<Link>();
                m_html = "";
                m_outstr = "";
                m_title = "";
                m_good = true;
                if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
                {
                    m_good = false;
                    return;
                }
                HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
                rqst.AllowAutoRedirect = true;
                rqst.MaximumAutomaticRedirections = 3;
                rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
                rqst.KeepAlive = true;
                rqst.Timeout = 30000;
                rqst.CookieContainer = myCookieContainer;
                lock (WebPage.webcookies)
                {
                    WebPage.webcookies[m_uri.Host] = myCookieContainer;
                }
                HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();

                Stream sm = rsps.GetResponseStream();
                if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
                {
                    rsps.Close();
                    m_good = false;
                    return;
                }
                Encoding cding = System.Text.Encoding.Default;
                int ix = rsps.ContentType.ToLower().IndexOf("charset=");
                if (ix != -1)
                {
                    try
                    {
                        cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
                    }
                    catch
                    {
                        cding = Encoding.Default;
                    }
                }

                m_html = new StreamReader(sm, cding).ReadToEnd();

                m_pagesize = m_html.Length;
                m_uri = rsps.ResponseUri;
                rsps.Close();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message+m_uri.ToString());
                m_good = false;
            
            }
            #endregion
        }

    }

    #endregion

你可能感兴趣的:(c# HttpWebRequest 抓取网页 自动编码)