puppeteer爬虫示例,京东拍卖

官方地址

https://github.com/puppeteer/puppeteer

// const puppeteer = require('puppeteer');
import puppeteer from 'puppeteer';

(async () => {
    const browser = await puppeteer.launch({
        defaultViewport: {
            width: 1920,
            height: 4000,
        }

    });
    const page = await browser.newPage();
    const detailPage = await browser.newPage();
    await page.goto('https://auction.jd.com/sifa_list.html?childrenCateId=13809');
    let a = 1;
    while (true) {
        await page.screenshot({path: 'example.png'});
        let urls = await page.$$eval("body > div.auction-content.auction-content-v2 > div > div.goods-list > ul > li > a", a => a.map(b => b.href))
        console.log(urls);
        await page.screenshot({path: `list${a}_example.png`});

        for (const a of urls) {
            await detailPage.goto(a);
            let temp = a.split("/");
            await detailPage.screenshot({path: temp[temp.length - 1] + 'example.png'});
            console.log(await detailPage.title());
            let text = await detailPage.$eval('div.list.description', a => a.textContent);
            console.log(text);
        }

        await page.click("a.ui-pager-next");
        await page.waitFor(1000);
        a++;
    }

    // await browser.close();
})();

备注: 如果要使用es模块化需要将js后缀改成 mjs


// launch.js
const puppeteer=require('puppeteer');
const fs=require('fs');
const launchConfig={
    headless:false
};
puppeteer.launch(launchConfig).then(browser=>{
    const wsEPAddress=browser.wsEndpoint();
    const w_data=new Buffer(wsEPAddress);
    fs.writeFile(__dirname + '/wsa.txt', w_data, {flag: 'w+'}, function (err) {
        if(err) {
            console.error(err);
        } else {
            console.log('写入成功');
        }
    });
});
复制代码

// aciton.js
const puppeteer=require('puppeteer');
const fs=require('fs');
const getWSAddress=()=>new Promise(resolve => {
    fs.readFile(__dirname + '/wsa.txt', {flag: 'r+', encoding: 'utf8'}, function (err, data) {
        if(err) {
            console.error(err);
            return;
        }
        console.log(data);
        resolve(data);
    });
});
(async ()=>{
    const wsa=await getWSAddress();
    const browserConfig={
        browserWSEndpoint :wsa
    };
    const browser= await puppeteer.connect(browserConfig);
    const page=await browser.newPage();
    // todo 你的脚本内容
})().catch(err=>{
    console.log(err);
    process.exit();
});

c# 类库

https://github.com/hardkoded/puppeteer-sharp

解决trycatch 问

let asyncFunc = async () => {
    let i = 0;
    while (true) {
        let value = await new Promise((resolve, reject) => {
            if (i > 10) {
                aa();
            }
            setTimeout(() => {
                i++;
                resolve(i);
            }, 1000)
        });
        console.log(value)
    }


};

let run = () => {
    asyncFunc().then(resp => {
        console.log(resp);
    }).catch(e => {
        console.log(e)
        run();
    })
};
run();

你可能感兴趣的:(puppeteer爬虫示例,京东拍卖)