c#代码实现百度搜索页面网页遍历

using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.Text;

using System.Windows.Forms;

using mshtml;

using System.Threading;

using mshtml;



namespace WindowsFormsApplication3

{

    public partial class Form1 : Form

    {

        public Form1()

        {

            InitializeComponent();

        }

        /// <summary>

        /// 在控件中打开网页

        /// </summary>

        public void baidu()

        {

            string url = "http://www.baidu.com/s?wd="+textBox1.Text;

            webBrowser1.Navigate(url.Trim());

        }

        private void button1_Click(object sender, EventArgs e)

        {

            baidu();

            

        }

        /// <summary>

        /// 打开新网页不会跳转到其他浏览器

        /// </summary>

        /// <param name="sender"></param>

        /// <param name="e"></param>

        private void webBrowser1_NewWindow(object sender, CancelEventArgs e)

        {

            e.Cancel = true;

            webBrowser1.Navigate(webBrowser1.StatusText);

        }



        /// <summary>

        /// 获取网页所有节点,遍历所有节点,如有标签的文本值是"下一页",模拟点击,进入下一页

        /// </summary>

        public void bianli()

        {

            IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;

            foreach (IHTMLElement ele in doc.all)

            {

                if (ele.innerText == "下一页>")

                {                   

                    ele.click();

                    break;

                }

            }

        }



        /// <summary>

        /// 判读网页是否加载完成

        /// </summary>

        /// <param name="sender"></param>

        /// <param name="e"></param>

        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)

        {

            if (e.Url==webBrowser1.Document.Url)

            {

                bianliwangye();

            }

        }



        private void button2_Click(object sender, EventArgs e)

        {

            //bianli();

            bianliwangye();

        }



        /// <summary>

        /// 枚举获取百度搜索页面的所有网址

        /// </summary>

        public void bianliwangye()

        {            

            IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码

            IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签

            //MessageBox.Show(hc.ToString());

            foreach (IHTMLElement h in hc)//遍历标签

            {

                if (h.className=="g"||h.className=="c-showurl")//以标签classname判读

                {

                    string a= h.innerHTML;//获取标签文本内容

                    if (a.Contains("&nbsp"))

                    {

                        int b = a.IndexOf("&nbsp");

                        string a1 = a.Substring(0, b);//截取网址

                        MessageBox.Show(a1);

                    }                                      

                }

            }

            bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页           

        }

    }

}



复制代码

 

你可能感兴趣的:(C#)