puppeteer实现node爬虫

image.png
image.png
const puppeteer = require('puppeteer');
const url = 'https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0';


//延迟时间
let sleep = time => new Promise(resolve => {
    setTimeout(resolve, time);
});
(async() => {
    console.log("开始")
        //模拟打开浏览器
    const browser = await puppeteer.launch({
        args: ['--no-sandbox'],
        dumpio: false
    });
    //模拟创建新页面,并打开链接
    const page = await browser.newPage();
    await page.goto(url, {
        waitUntil: 'networkidle2'
    });

    //等待3秒
    await sleep(3000);

    // await page.screenshot({ path: 'example.png' });

    //等待 .more  Dom元素加载完成
    await page.waitForSelector('.more');


    for (let i = 0; i < 1; i++) {
        await sleep(3000);
        await page.click('.more');
    }

    const result = await page.evaluate(() => {

        //进行dom遍历
        var $ = window.$;
        var links = [];
        var list = $('.list a');

        list.each(function(idx, item) {
            var id = $(this).find('.cover-wp').data('id');
            var title = $(this).find('img').attr('alt');
            var imgUrl = $(this).find('img').attr('src').replace('s_ratio', 'l_ratio');
            var rate = Number($(this).find('strong').text());

            links.push({
                id: id,
                title: title,
                imgUrl: imgUrl,
                rate: rate
            })
        });

        return links;
    });

    console.log(result)

    await browser.close();
})();

你可能感兴趣的:(puppeteer实现node爬虫)