puppeteer爬取豌豆荚数据

//数据仅测试用,如有侵权请联系本人处理
const puppeteer = require('puppeteer');

 // 等待3000毫秒
 const sleep = time => new Promise(resolve => {
     setTimeout(resolve, time);
 })

const url = `https://www.wandoujia.com/category/5029`;
;(async() => {
    console.log('Start visit');

    // 启动一个浏览器
    const brower = await puppeteer.launch({
        args: ['--no-sandbox'],
        dumpio: false,
		executablePath: 'D:/chrome-win/chrome.exe',
        headless: false,
    });

    const page = await brower.newPage()   // 开启一个新页面
    // 页面
    await page.goto(url, {
        waitUntil: 'networkidle2'  // 网络空闲说明已加载完毕
    });

    await sleep(3000);


    // 爬取页数
    for (let i = 0; i < 10; i++) {
        await sleep(3000);
        // 点击加载更多
        await page.click('.load-more')
    }

    // 结果
    const result = await page.evaluate(() => {
        // 拿到页面上的jQuery
        var $ = window.$;
        var items = $('#j-tag-list li');
        var links = [];

        if (items.length >= 1) {
            items.each((index,item)=>{
                let it = $(item)
                let rate =it.find('span').text()
                let name =it.find('img').attr('alt')
				let url =it.find('a').attr('href')

                links.push({
                    rate,
                    name,
					url,
                })
            });
        }
        return links
    });

    // 关闭浏览器
    brower.close();

    console.log(result);
   
})();

 

你可能感兴趣的:(python)