js 爬虫初战

const axios = require('axios')
const cheerio = require('cheerio')
const download = require('download')
function sleep (time) {
  return new Promise((reslove) => setTimeout(reslove, time))
}
async function load (skip = 0, number = 2) {
  let baseUrl = `https://www.xxxxx/update_${number}.html`
  const imgUrlList = await axios.get(baseUrl).then(res => {
    let imgList = []
    let $ = cheerio.load(res.data)
    $('.wrapper .w790 .touxiangbox ul li a img').each((i, ele) => {
      let imgInfo = {
        title: $(ele).attr('alt'),
        url: $(ele).attr('src')
      }
      imgList.push(imgInfo)
    })
    return imgList
  }).catch((err) => {
    console.log(err)
  })
  await downloadFile(imgUrlList)
  await sleep(3000)
  if (skip < 2) {
    load(skip + 1, number + 1)
  } else {
    console.log('下载完成')
  }
}

let headers = {
  'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
}

async function downloadFile (data) {
  for (let index = 0; index < data.length; index++) {
    const item = data[index]

    // Path at which image will get downloaded
    const filePath = `${__dirname}/微信头像`

    await download(item.url, filePath, {
      filename: item.title + '.gif',
      headers,
    }).then(() => {
      console.log(`Download ${item.title} Completed`)
      return
    })
  }
}

load()

你可能感兴趣的:(js 爬虫初战)