超简单Nsoup版Csdn博客爬虫

自己摸索,.NET程序员也有春天

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using Microsoft.SqlServer.Management.Common;
using Microsoft.SqlServer.Management.Smo;
using NSoup.Select;

namespace NsoupDemo
{
    class Program
    {
        static void Main(string[] args)
        {
            WebClient webClient = new WebClient();
            webClient.Headers.Add("Host", "blog.csdn.net");
            webClient.Headers.Add("Referer", "http://blog.csdn.net/WuLex/article/list");
            webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");
            
            for (int i = 1; i < 3; i++)
            {
                String HtmlString = Encoding.GetEncoding("utf-8").GetString(webClient.DownloadData("http://blog.csdn.net/WuLex/article/list/"+i));
                NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(HtmlString);
                Elements elements = doc.GetElementsByClass("list_item");
                foreach (var ele in elements)
                {
                    string title = ele.GetElementsByClass("link_title").Text;
                    string descripe = ele.GetElementsByClass("article_description").Text;
                    string views = ele.GetElementsByClass("link_view").Text;
                    Write(@"D:\Info.txt","\r\n"+title+"\r\n"+descripe+"\r\n"+views+"\r\n");
                }
            }
            

            Console.ReadLine();
        }

        public static void Write(string path,string content)
        {
            FileStream fs = new FileStream(path, FileMode.Append);
            StreamWriter sw = new StreamWriter(fs);
            //开始写入
            sw.Write(content);
            //清空缓冲区
            sw.Flush();
            //关闭流
            sw.Close();
            fs.Close();
        }
 
    }
}
结果图:

超简单Nsoup版Csdn博客爬虫_第1张图片

你可能感兴趣的:(.net,网络,爬虫,CSDN博客,NSoup)