C#爬取武汉大学计算机学院主页的例子

using System;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace HttpGet
{
    class Class1
    {
        [STAThread]
        static void Main(string[] args)
        {
            System.Net.WebClient client = new WebClient();
            byte[] page = client.DownloadData("http://cs.whu.edu.cn");
            string content = System.Text.Encoding.UTF8.GetString(page);
            string regex = "href=[\\\"\\\'](http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?[\\\"\\\']";
            Regex re = new Regex(regex);
            MatchCollection matches = re.Matches(content);
            System.Collections.IEnumerator enu = matches.GetEnumerator();
            while (enu.MoveNext() && enu.Current != null)
            {
                Match match = (Match)(enu.Current);
                if (!match.Value.StartsWith("href=\"http"))
                {
                    Console.Write(match.Value.Remove(5,1).Insert(5,"\"http://cs.whu.edu.cn" )+ "\r\n");
                }
                else
                {
                    Console.Write(match.Value + "\r\n");
                }
                
            }
            Console.ReadLine();
        }
    }
}

C#爬取武汉大学计算机学院主页的例子_第1张图片

你可能感兴趣的:(C#爬取武汉大学计算机学院主页的例子)