Puppeteer搭配Node.js提供有验证码的登录的爬虫服务 可加代理 http接口调用

接上一篇,本人是java,但是为了项目 研究了一段时间的nodejs和puppeteer,就是用http服务提供爬虫服务,这个爬虫服务调用的是puppeteer,有验证码的可以调用验证码服务然后继续执行!

话不多说 直接上代码更加清晰(无数的踩坑脱坑):

//引入包
const express = require('express');
const puppeteer = require('puppeteer');
const bodyParser = require('body-parser');
const assert = require('assert');
const request = require("request-promise");

const app = express();

app.use(bodyParser.urlencoded({extended: true}));
app.use(bodyParser.json());

const port = process.env.PORT || 8080; // set HTTP server port

//=====图形验证相关start=====

async function getResourceContent(page, url) {
    const {content, base64Encoded} = await page._client.send(
        'Page.getResourceContent',
        {frameId: String(page.mainFrame()._id), url},
    );
    assert.equal(base64Encoded, true);
    return content;
};

//请求校验验证码
function post(options) {
    return new Promise((resolve, reject) => {
        request.post(options, function (err, response, body) {
            // console.log('返回结果:');
            if (!err && response.statusCode == 200) {
                if (body !== 'null') {
                    results = body;
                    resolve(results);
                }
            }
        });
    }).catch(new Function()).then();
}

//=====图形验证相关end=======
// REQUEST FOR OUR API
// =============================================================================
const router = express.Router(); // get an instance of the express Router
router.route('/login').get((req, res) => {
    (async () => {

        // 代理隧道验证信息
        const proxyUrl = 'http://example:9020';
        const username = '123123123';
        const password = '123123123';
        const browser = await puppeteer.launch({
            ignoreHTTPSErrors: true,
            headless: false,
            args: ['--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}`],
        });

        const page = await browser.newPage();
        await page.setJavaScriptEnabled(true);
        try {
            await page.authenticate({username, password});
            //Alipay
            await page.goto('https://accounts.alipay.com/console/querypwd/logonIdInputReset.htm?site=1&page_type=fullpage&scene_code=resetQueryPwd');
            await page.waitForSelector('#J-findPwdForm > div:nth-child(3) > img');
            const url = await page.$eval('#J-findPwdForm > div:nth-child(3) > img', i => i.src);
            const content = await getResourceContent(page, url);
            // console.log(content);
            var options = {
                url: 'http://example/api',//req.query
                // headers:{
                //     device:"asdfasdfaadfasdf"
                // },//req.headers
                form: {
                    img: content,
                    isPath: "0",
                }  //req.body
            };
            let result = await post(options);
            // console.log(result);
            // res.json({message: result});
            //终于可以写正式的操作了
            //get参数传过来的手机号
            let mobile = await req.query.mobile;
            await page.click('#J-accName');//点击输入框
            await page.type('#J-accName', mobile, {delay: 200});//延时操作 模拟更真
            await page.click('#J-checkcode');
            await page.type('#J-checkcode', result.toString(), {delay: 200});
            await page.click('#J-findPwdForm > div.ui-form-item.ui-form-item-last > div > input');
            // console.log("走到这模拟操作")
            //happiness=====
            await delay(1000);//等待
            let message = await page
                .evaluate(() => document.querySelector("#J-findPwdForm > div.ui-form-item.ui-form-item-error > div").textContent);//获取js渲染生成的内容
            
            let pagebody = await page.content();//获取页面源码
            // console.log(pagebody);
            // console.log(message);
            await page.deleteCookie();
            //最后一步管浏览器啊啊啊啊
            await page.close();//关标签页
            await browser.close();//关浏览器
            res.send({
                message: message,
                document: pagebody
            });
        } catch (e) {
            await page.deleteCookie();
            await browser.close();
            res.send({
                message: "请求异常,请检查",
                document: "请求异常,无内容"
            });
            console.log(e);
        }

    })();
    // res.json({message: 'hi, Not yet!!'});
});

//POST请求示例 先注释掉
// router.post('/submit', (req, res) => {
//     (async () => {
//         await page.type('#username', req.body.loginName);
//         await page.type('#password', req.body.loginPwd);
//         await page.type('#vcode', req.body.loginCode);
//         await delay(1000);
//
//         var result = await page.evaluate(() => document.querySelector(".bootbox-body"));
//         if (result == null) {
//             res.send({
//                 status: 'success'
//             });
//             await page.click('#loginBtn1');
//         }
//     })();
//
// });
//POST请求注释完毕==

function delay(timeout) {
    return new Promise((resolve) => {
        setTimeout(resolve, timeout);
    });
}

// test route to make sure everything is working (accessed at GET
// http://localhost:8080/api)
router.get('/', (req, res) => {
    console.log("hi, welcome to  http服务!");
    res.json({message: 'hi, welcome to  http服务!'});
});

// more roues for our API will happen here

// REGISTER OUR ROUTES ---------------------------------------------------------
// all of our routes will be prefixed with /api
app.use('/api', router);

// START THE SERVER
// =============================================================================
app.listen(port);
console.log(`Magic happens on port ${port}`);

这是对支付宝忘记密码接口的测试,只是随便一个测试 什么网站都可以 例如保存到 test.js 执行 node test.js即可 

访问 http://localhost:8080/api 会显示 hi,welcome to http服务

访问 http://localhost:8080/api/login?mobile=13888888888 就会完成一次完整的访问 当然 代码中的破解验证码服务是需要你自己搭建的

本人用的IDE是WebStorm 可以debug 比较方便 研究不多 如有错误还请见谅 

有问题一起讨论 代码应该很清楚了 必须多多累赘 部署启动在上篇已经写了!~

你可能感兴趣的:(Node,Puppeteer,爬虫)