]
var http = require('http'); var url=require('url'); var request = require('request'); var fs = require('fs'); var jsdom = require('jsdom'); function spiderUrl(nPage, opt, fnSpiderData) { //通过http.get获取网页里面的东西 var req = http.get( url.parse('http://www.baidu.com/s?tn=baiduhome_pg&ie=utf-8&bs=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E4%BA%AE%E5%BA%A6&f=8&rsv_bp=1&rsv_spt=1&wd=%E7%BD%91%E9%A1%B5%E4%B8%AD%E9%80%89%E4%B8%AD+%E9%80%89%E6%8B%A9%E9%A2%9C%E8%89%B2&rsv_sug3=5&rsv_sug=1&rsv_sug1=4&rsv_sug4=78&inputT=4594'), function(res) { var g_data=""; res.on('data', function (chunk) { g_data+=chunk; }); res.on('end', function() { console.log("do page " + nPage); /* fs.appendFile('nodeBaidu.html', g_data, function (err) { if (err) throw err; console.log('The "data to append" was appended to file!'); }); */ var document = jsdom.jsdom(g_data); var script = document.createElement("script"); var window = document.createWindow(); script.src = 'http://code.jquery.com/jquery-1.4.2.js'; //如果页面载入完毕,就可以用jq的each和查询器了 script.onload = function() { window.jQuery('table').each(function(e){ //可以将文件保存到本地,名字自己命名吧, fs.appendFile('result.txt', '1',function(r){ if(r){console.log('error')}; console.log('appendFile success'); }) }); }; document.head.appendChild(script); //fnSpiderData(g_data, nPage); }); }); req.on('error', function(e) { console.log('problem with request ' + opt.path + ' : ' + e.message); }); req.end(); }; spiderUrl(1,{},function(data,page){console.log(data)})
//抓博客园的东东
var http = require('http'); var fs = require('fs'); var url = require('url'); var jsdom = require('jsdom'); function spider(u,cb){ http.get( url.parse(u), function(res){ var d = '' res.on('data',function(chunk){ d += chunk; }) res.on('end',function(){ console.log('spider_end && do cb'); cb(d); }) }); }; spider('http://www.cnblogs.com/cate/108703/',function(data){ //如果页面载入完毕,就可以用jq的each和查询器了 var document = jsdom.jsdom( data ); var window = document.createWindow(); var script = document.createElement('script'); script.src = 'http://code.jquery.com/jquery-1.4.2.js'; script.onload = function(){ window.jQuery('.post_item_summary').each(function(i,e){ //console.log(1) //console.log(e.innerHTML ) fs.appendFile('blogscn.html', e.innerHTML + '<br>' ,function(err){ if(err)throw err; console.log('done') }) }); }; document.head.appendChild( script ); })
//直接 npm install jquery 然后使用下面的命令,更快了
var $ = require('jquery'); var fs = require('fs'); $.get('http://www.qiushibaike.com/8hr',function(html){ $(html).find('.block').each(function(i,e){ writeToFile( $(e).find('.content').text() ); }); }); function writeToFile(data){ fs.appendFile('qiubai.txt',data,function(err){ if(err){ if(err)throw err; } console.log('done') }) }