node+request+cheerio+iconv-lite爬虫

http://www.99css.com/nodejs-request-chinese-encoding/

步骤:
1,获取html源码
2,iconv-lite解码
3,cheerio像jq一样获取dom

var fs = require("fs");
var request = require("request");// 请求
var cheerio = require("cheerio");//cheerio 解析 HTML
var iconv = require('iconv-lite');//Node.js 抓取非 utf-8 的中文网页时会出现乱码问题


function objFn() {
    var _this = this;
    this.getContent = function(url) {
            request({
                url: "http://www.31xs.net/0/102/" + url,
                method: "GET",
                // gzip:true,
                encoding:null//获取的内容不编码,二进制
            }, function(error, response, body) {
                // console.log(response);
                if (!error) {
                    var strJson = iconv.decode(body,"GBK"); //进行gbk解码
                    var $ = cheerio.load(strJson,{decodeEntities: false}); //解决Unicode 编码

                    // console.log($("h1").html(),$("#content").html());
                    _this.contentEach($("h1").html(),$("#content").html());
                }
            })
        };
    this.contentEach=function(title,content){
        fs.writeFile('./a.txt',content,function(err){
            console.log(err)
        })
    }
   
}
var obj = new objFn();
    obj.getContent('4954112.html');

你可能感兴趣的:(node+request+cheerio+iconv-lite爬虫)