puppeteer爬取案例

爬取内容bilibili热门音乐

const puppeteer = require('puppeteer');

(async () => {
    const args = [
        '--no-sandbox',
        '--disable-setuid-sandbox',
        '--disable-infobars',
        '--window-position=0,0',
        '--ignore-certifcate-errors',
        '--ignore-certifcate-errors-spki-list',
        '--user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3312.0 Safari/537.36"'
    ];

    const options = {
        args,
        headless: false,
        ignoreHTTPSErrors: true,
        userDataDir: './tmp2',
        defaultViewport:{width:1280,height:800}
    };

  const browser = await puppeteer.launch(options);
  const browserWSEndpoint = browser.wsEndpoint();
  console.log(browserWSEndpoint,'site')
  const page = await browser.newPage();
  await page.goto('https://www.bilibili.com/v/music/original/?spm_id_from=333.5.b_6d757369635f6f726967696e616c.59#/all/click/0/1/?open=hot');

    await page.waitForSelector('#videolist_box > div.vd-list-cnt > ul > li > div > div.r > a');

    let title = await page.$$eval('#videolist_box > div.vd-list-cnt > ul > li > div > div.r > a',
      
      link=>link.map(v=>{ return { title:v.innerText, href:v.href} })
    )

    console.log(title)
    
})();


console.log(`%c %c\n之夏\n%c larry %c`,
  ` padding: 35px 200px;
    background-image: url(https://kaimo313.gitee.io/blogs/console.jpg);
    background-size: contain;
    background-repeat: no-repeat;
    color: transparent;`, 
  'color: #3eaf7c; font-size: 16px;margin-bottom: 10px;',
  'background: #35495e; padding: 4px; border-radius: 3px 0 0 3px; color: #fff', 
  'background: #41b883; padding: 4px; border-radius: 0 3px 3px 0; color: #fff',
);

puppeteer爬取案例_第1张图片

有问题可以问。个人觉得使用这个比selenium更加符合习惯,而且selenium太成熟了,很多网站有对selenium做反爬机制。最好全都会。一些垃圾网站用selenium不要太爽

你可能感兴趣的:(nodejs,js)