homework2

import requests

from lxmlimport etree

import xlwt

all_info_list=[]

info_list=[]

c=[]

all_id_list=[]

all_content_list=[]

all_laugh_list=[]

all_comment_list=[]

#url='https://www.qiushibaike.com/text/page/1.html'

def get_info(url):

res = requests.get(url)

html = etree.HTML(res.text)

id = html.xpath('//div[@class="author clearfix"]//h2/text()')

#id = html.xpath('//div[@class="col1"]/div/div/a/h2/text()')

    for id1in id:

all_id_list.append(id1)

# id= html.xpath('//div[@class="col1"]/div/div/span/h2/text()')

    content=html.xpath('//div[@class="content"]//span')

for nrin content:

if nr.attrib =='':

all_content_list.append(nr.text)

laugh=html.xpath('//div[@class="stats"]/span/i/text()')

for laugh1in laugh:

all_laugh_list.append(laugh1)

comment=html.xpath('//span[@class="stats-comments"]/a/i/text()')

# print(comment)

    for comment1in comment:

all_comment_list.append(comment1)

# all_id_list.append(id)

# all_content_list.append(c)

# all_laugh_list.append(laugh)

# all_comment_list.append(comment)

# print(all_laugh_list)

if __name__ =='__main__':

book = xlwt.Workbook(encoding='utf-8')

sheet = book.add_sheet('Sheet1')

header = ['id','content','laugh','comment']

for tin range(len(header)):

sheet.write(0, t, header[t])

urls = ['https://www.qiushibaike.com/text/page/{}'.format(str(i))for iin range(1,14)]

for urlin urls:

get_info(url)

# 写用户名列

    i =1

    j =0

    for datain all_id_list:

sheet.write(i, j, data)

i +=1

    # 写content列

    i =1

    j =1

    # for data in content:

    for datain all_content_list:

sheet.write(i, j, data)

i +=1

    # 写laugh列

    i =1

    j =2

    for datain all_laugh_list:

sheet.write(i, j, data)

i +=1

    # 写comment列

    i =1

    j =3

    for datain all_comment_list:

sheet.write(i, j, data)

i +=1

    book.save('C:/Users/madin/Desktop/pytest.xls')

你可能感兴趣的:(homework2)