第二讲作业

# 酷狗歌曲榜单TOP500

import requests

headers ={

  'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'

}

from lxml import etree

import xlwt


def get_info(url):

  res = requests.get(url,headers=headers)

  html = etree.HTML(res.text)

  infos = html.xpath('//div[@class="pc_temp_songlist "]/ul/li')

  for info in infos:

    rank1 = info.xpath('span[3]')[0]

    rank = rank1.xpath('string(.)').strip()

    name = info.xpath('a/text()')[0]

    singer = name.split('-')[0]

    song = name.split('-')[1]

    time = info.xpath('span[4]/span/text()')[0].strip()

    print(rank,singer,song,time)

if __name__ == '__main__':

  urls = ['https://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1,24)]

  for url in urls:

    get_info(url)

# 糗事百科24小时

import requests

headers ={

  'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'

}

from lxml import etree

  def get_info(url):

  res = requests.get(url,headers=headers)

  html = etree.HTML(res.text)

  infos = html.xpath('//div[@id="content-left"]/div')

  for info in infos:

    name = info.xpath('div[1]/a[2]/h2/text()')

    laugh = info.xpath('div[2]/span[1]/i/text()')

    content = info.xpath('a[1]/div/span/text()')

    comment = info.xpath('div[2]/span[2]/a/i/text()')

    print(name,content,laugh,comment)


if __name__ == '__main__':

  urls = ['https://www.qiushibaike.com/hot/page/{}/'.format(str(i)) for i in range(1,14)]

  for url in urls:

    get_info(url)

你可能感兴趣的:(第二讲作业)