初始化:
1.安装了node
2.新建一个文件夹
3.在该文件夹中初始化node应用
npm init
安装依赖:
superagent 是一个轻量级、渐进式的请求库,内部依赖 nodejs 原生的请求 api,适用于 nodejs 环境
cheerio 是 nodejs 的抓取页面模块,为服务器特别定制的,快速、灵活、实施的 jQuery 核心实现。适合各种 Web 爬虫程序。node.js 版的 jQuery。
npm i express cheerio superagent -D
代码展示:
1.首先引入模块
const express = require("express")
const app = express()
const fs = require("fs")
const superagent = require("superagent")
const cheerio = require("cheerio")
2.声明要爬的网站URL
const lagouURL = "https://www.lagou.com/guangzhou-zhaopin/webqianduan/";
const code = "/?filterOption=3&sid=b87c46399fd24f618b97b395f945ab1b";
3.请求数据
superagent.get(url).end((err, res) => {
if (err) {
console.log("获取失败")
} else {
let data = getHotData(res,i)
ListData=ListData.concat(data)
}
})
4.分析数据
let getHotData = (res,i) => {
let hotNews = []
let $ = cheerio.load(res.text);
$("#s_position_list ul>.con_list_item").each(function (index) {
// if (index !== 0) {
const $item = $(this).children();
const position=$item.find(".list_item_top .position .p_top a h3").text()
const region=$item.find(".list_item_top .position .p_top a .add").text()
const title=$item.find(".list_item_top .position .p_top a .format-time").text()
const link=$item.find(".list_item_top .position .p_top a").attr("href")
const money=$item.find(".list_item_top .position .p_bot .li_b_l").text()
const education=$item.find(".list_item_top .position .p_bot .li_b_l span").text()
const company_name=$item.find(".list_item_top .company .company_name a").text()
const industry=$item.find(".list_item_top .company .industry").text()
const com_logo=$item.find(".list_item_top .com_logo a img").attr("src")
const com_link=$item.find(".list_item_top .com_logo a").attr("href")
const skill=$item.find(".list_item_bot .li_b_l span").text()
const guarantee=$item.find(".list_item_bot .li_b_r").text()
const id=(i-1)*15+index+1
hotNews.push({ id, position, region, title, link,money,education,company_name,com_link,com_logo,industry,skill,guarantee})
// }
})
return hotNews
}
5.保存数据
fs.writeFileSync(`${__dirname}/data.json`, JSON.stringify(ListData), 'utf-8', (err) => {
if (err) {
console.log(err)
}
})
6.完整代码
const express = require("express")
const app = express()
const fs = require("fs")
const superagent = require("superagent")
const cheerio = require("cheerio")
const lagouURL = "https://www.lagou.com/guangzhou-zhaopin/webqianduan/";
const code = "/?filterOption=3&sid=b87c46399fd24f618b97b395f945ab1b";
app.get("/", (req, key) => {
let i = 0
let ListData=[]
let timer = setInterval(() => {
i++
let url = lagouURL + i + code
if (i >=30) {
// key.send(ListData)
if (ListData.length > 0) {
fs.writeFileSync(`${__dirname}/data.json`, JSON.stringify(ListData), 'utf-8', (err) => {
if (err) {
console.log(err)
}
})
}
clearInterval(timer)
} else {
console.log(url,i)
superagent.get(url).end((err, res) => {
if (err) {
console.log("获取失败")
} else {
let data = getHotData(res,i)
ListData=ListData.concat(data)
}
})
}
}, 1000)
})
let getHotData = (res,i) => {
let hotNews = []
let $ = cheerio.load(res.text);
$("#s_position_list ul>.con_list_item").each(function (index) {
// if (index !== 0) {
const $item = $(this).children();
const position=$item.find(".list_item_top .position .p_top a h3").text()
const region=$item.find(".list_item_top .position .p_top a .add").text()
const title=$item.find(".list_item_top .position .p_top a .format-time").text()
const link=$item.find(".list_item_top .position .p_top a").attr("href")
const money=$item.find(".list_item_top .position .p_bot .li_b_l").text()
const education=$item.find(".list_item_top .position .p_bot .li_b_l span").text()
const company_name=$item.find(".list_item_top .company .company_name a").text()
const industry=$item.find(".list_item_top .company .industry").text()
const com_logo=$item.find(".list_item_top .com_logo a img").attr("src")
const com_link=$item.find(".list_item_top .com_logo a").attr("href")
const skill=$item.find(".list_item_bot .li_b_l span").text()
const guarantee=$item.find(".list_item_bot .li_b_r").text()
const id=(i-1)*15+index+1
hotNews.push({ id, position, region, title, link,money,education,company_name,com_link,com_logo,industry,skill,guarantee})
// }
})
return hotNews
}
app.listen(3000, () => console.log("启动成功"))