2作业

import requests

from lxml import etree

import xlwt

all_info_list=[]

def get_info(url):

    res = requests.get(url)

    html = etree.HTML(res.text)

    infos = html.xpath('//div[@class="recommend-article"]/ul/li/div[@class="recmd-right"]')

    for info in infos:

        title = info.xpath('a/text()')

        laughs = info.xpath('div/div/span[1]/text()')

        comments = info.xpath('div/div/span[4]/text()')

        id = info.xpath('div/a/span/text()')

        # print(title,laughs,comments,id)

        info_list = [title, laughs, comments, id]

        all_info_list.append(info_list)

if __name__ == '__main__':

    book = xlwt.Workbook(encoding='utf-8')

    sheet = book.add_sheet('Sheet1')

    header = ['题目','好笑数','评论数','作者']

    for t in range(len(header)):

        sheet.write(0, t, header[t])

    urls = ['https://www.qiushibaike.com/8hr/page/{}/'.format(str(i)) for i in range(1,14)]

    for url in urls:

        get_info(url)

    i = 1

    for list in all_info_list:

        j = 0

        for data in list:

            sheet.write(i, j, data)

            j += 1

        i += 1

    book.save('C:/Users/madin/Desktop/糗事百科.xls')

你可能感兴趣的:(2作业)