接上一篇,本人是java,但是为了项目 研究了一段时间的nodejs和puppeteer,就是用http服务提供爬虫服务,这个爬虫服务调用的是puppeteer,有验证码的可以调用验证码服务然后继续执行!
话不多说 直接上代码更加清晰(无数的踩坑脱坑):
//引入包
const express = require('express');
const puppeteer = require('puppeteer');
const bodyParser = require('body-parser');
const assert = require('assert');
const request = require("request-promise");
const app = express();
app.use(bodyParser.urlencoded({extended: true}));
app.use(bodyParser.json());
const port = process.env.PORT || 8080; // set HTTP server port
//=====图形验证相关start=====
async function getResourceContent(page, url) {
const {content, base64Encoded} = await page._client.send(
'Page.getResourceContent',
{frameId: String(page.mainFrame()._id), url},
);
assert.equal(base64Encoded, true);
return content;
};
//请求校验验证码
function post(options) {
return new Promise((resolve, reject) => {
request.post(options, function (err, response, body) {
// console.log('返回结果:');
if (!err && response.statusCode == 200) {
if (body !== 'null') {
results = body;
resolve(results);
}
}
});
}).catch(new Function()).then();
}
//=====图形验证相关end=======
// REQUEST FOR OUR API
// =============================================================================
const router = express.Router(); // get an instance of the express Router
router.route('/login').get((req, res) => {
(async () => {
// 代理隧道验证信息
const proxyUrl = 'http://example:9020';
const username = '123123123';
const password = '123123123';
const browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}`],
});
const page = await browser.newPage();
await page.setJavaScriptEnabled(true);
try {
await page.authenticate({username, password});
//Alipay
await page.goto('https://accounts.alipay.com/console/querypwd/logonIdInputReset.htm?site=1&page_type=fullpage&scene_code=resetQueryPwd');
await page.waitForSelector('#J-findPwdForm > div:nth-child(3) > img');
const url = await page.$eval('#J-findPwdForm > div:nth-child(3) > img', i => i.src);
const content = await getResourceContent(page, url);
// console.log(content);
var options = {
url: 'http://example/api',//req.query
// headers:{
// device:"asdfasdfaadfasdf"
// },//req.headers
form: {
img: content,
isPath: "0",
} //req.body
};
let result = await post(options);
// console.log(result);
// res.json({message: result});
//终于可以写正式的操作了
//get参数传过来的手机号
let mobile = await req.query.mobile;
await page.click('#J-accName');//点击输入框
await page.type('#J-accName', mobile, {delay: 200});//延时操作 模拟更真
await page.click('#J-checkcode');
await page.type('#J-checkcode', result.toString(), {delay: 200});
await page.click('#J-findPwdForm > div.ui-form-item.ui-form-item-last > div > input');
// console.log("走到这模拟操作")
//happiness=====
await delay(1000);//等待
let message = await page
.evaluate(() => document.querySelector("#J-findPwdForm > div.ui-form-item.ui-form-item-error > div").textContent);//获取js渲染生成的内容
let pagebody = await page.content();//获取页面源码
// console.log(pagebody);
// console.log(message);
await page.deleteCookie();
//最后一步管浏览器啊啊啊啊
await page.close();//关标签页
await browser.close();//关浏览器
res.send({
message: message,
document: pagebody
});
} catch (e) {
await page.deleteCookie();
await browser.close();
res.send({
message: "请求异常,请检查",
document: "请求异常,无内容"
});
console.log(e);
}
})();
// res.json({message: 'hi, Not yet!!'});
});
//POST请求示例 先注释掉
// router.post('/submit', (req, res) => {
// (async () => {
// await page.type('#username', req.body.loginName);
// await page.type('#password', req.body.loginPwd);
// await page.type('#vcode', req.body.loginCode);
// await delay(1000);
//
// var result = await page.evaluate(() => document.querySelector(".bootbox-body"));
// if (result == null) {
// res.send({
// status: 'success'
// });
// await page.click('#loginBtn1');
// }
// })();
//
// });
//POST请求注释完毕==
function delay(timeout) {
return new Promise((resolve) => {
setTimeout(resolve, timeout);
});
}
// test route to make sure everything is working (accessed at GET
// http://localhost:8080/api)
router.get('/', (req, res) => {
console.log("hi, welcome to http服务!");
res.json({message: 'hi, welcome to http服务!'});
});
// more roues for our API will happen here
// REGISTER OUR ROUTES ---------------------------------------------------------
// all of our routes will be prefixed with /api
app.use('/api', router);
// START THE SERVER
// =============================================================================
app.listen(port);
console.log(`Magic happens on port ${port}`);
这是对支付宝忘记密码接口的测试,只是随便一个测试 什么网站都可以 例如保存到 test.js 执行 node test.js即可
访问 http://localhost:8080/api 会显示 hi,welcome to http服务
访问 http://localhost:8080/api/login?mobile=13888888888 就会完成一次完整的访问 当然 代码中的破解验证码服务是需要你自己搭建的
本人用的IDE是WebStorm 可以debug 比较方便 研究不多 如有错误还请见谅
有问题一起讨论 代码应该很清楚了 必须多多累赘 部署启动在上篇已经写了!~