nodeJS

]

var http = require('http');
var url=require('url');
var request = require('request');
var fs = require('fs');
var jsdom = require('jsdom');

function spiderUrl(nPage, opt, fnSpiderData)
{   //通过http.get获取网页里面的东西
    var req = http.get( url.parse('http://www.baidu.com/s?tn=baiduhome_pg&ie=utf-8&bs=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E4%BA%AE%E5%BA%A6&f=8&rsv_bp=1&rsv_spt=1&wd=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E9%80%89%E6%8B%A9%E9%A2%9C%E8%89%B2&rsv_sug3=5&rsv_sug=1&rsv_sug1=4&rsv_sug4=78&inputT=4594'), function(res)
    {

        var g_data="";
        res.on('data', function (chunk)
        {
            g_data+=chunk;
        });

        res.on('end', function()
        {
            console.log("do page " + nPage);
            /*
            fs.appendFile('nodeBaidu.html', g_data, function (err) {
                if (err) throw err;
                console.log('The "data to append" was appended to file!');
            });
            */
            var document = jsdom.jsdom(g_data);
            var script = document.createElement("script");
            var window = document.createWindow();
            script.src = 'http://code.jquery.com/jquery-1.4.2.js';
            //如果页面载入完毕,就可以用jq的each和查询器了
            script.onload = function() {
                window.jQuery('table').each(function(e){

                    //可以将文件保存到本地,名字自己命名吧,
                    fs.appendFile('result.txt', '1',function(r){
                        if(r){console.log('error')};
                        console.log('appendFile success');
                    })
                });
            };
            document.head.appendChild(script);
            //fnSpiderData(g_data, nPage);
        });
    });

    req.on('error', function(e)
    {
        console.log('problem with request ' + opt.path + ' : ' + e.message);
    });

    req.end();
};
spiderUrl(1,{},function(data,page){console.log(data)})

 

//抓博客园的东东

var http = require('http');
var fs = require('fs');
var url = require('url');
var jsdom = require('jsdom');

function spider(u,cb){
    http.get( url.parse(u), function(res){
        var d = ''
        res.on('data',function(chunk){
            d += chunk;
        })
        res.on('end',function(){
            console.log('spider_end && do cb');
            cb(d);
        })
    });
};
spider('http://www.cnblogs.com/cate/108703/',function(data){
    //如果页面载入完毕,就可以用jq的each和查询器了
    var document = jsdom.jsdom( data );

    var window = document.createWindow();
    var script = document.createElement('script');

    script.src = 'http://code.jquery.com/jquery-1.4.2.js';
    script.onload = function(){
        window.jQuery('.post_item_summary').each(function(i,e){
            //console.log(1)
            //console.log(e.innerHTML )
            fs.appendFile('blogscn.html', e.innerHTML + '<br>' ,function(err){
                if(err)throw err;
                console.log('done')
            })
        });
    };

    document.head.appendChild( script );
})

 

//直接 npm install jquery 然后使用下面的命令,更快了

var $ = require('jquery');
var fs = require('fs');
$.get('http://www.qiushibaike.com/8hr',function(html){
    $(html).find('.block').each(function(i,e){
            writeToFile( $(e).find('.content').text() );
    });
});
function writeToFile(data){
    fs.appendFile('qiubai.txt',data,function(err){
        if(err){
            if(err)throw err;
        }
        console.log('done')
    })
}

 

你可能感兴趣的:(nodejs)