第三讲的作业

# 本程序用于爬取新浪网->小说->男生小说人气榜中所有的书名、分类、作者及其排名

# 爬取后的数据保存在test.xls中

import requests

from lxmlimport etree

import xlwt

all_info_list = []

def get_info(url_1):

res = requests.get(url_1)

html = etree.HTML(res.text)

info_s = html.xpath('//table[@class="child-t-tab"]/tbody/tr')

for infoin info_s:

rank1 = info.xpath('td[1]')[0]

rank = rank1.xpath('string(.)').strip()

novel_book1 = info.xpath('td[2]/a')[0]

novel_book = novel_book1.xpath('string(.)').strip()

sort1 = info.xpath('td[3]/a')[0]

sort = sort1.xpath('string(.)').strip()

author1 = info.xpath('td[4]')[0]

author = author1.xpath('string(.)').strip()

info_list = [rank, novel_book, sort, author]

all_info_list.append(info_list)

if __name__ =='__main__':

book = xlwt.Workbook(encoding='utf-8')

sheet = book.add_sheet('Sheet1')

header = ['rank', 'novel_book', 'sort', 'author']

for tin range(len(header)):

sheet.write(0, t, header[t])

urls = ['http://vip.book.sina.com.cn/weibobook/ranklist.php?channel=boy&type=click&pos=20097&vt=4&page={}'.format(str(i))for iin range(1, 5)]

for urlin urls:

get_info(url)

i =1

    for lin all_info_list:

j =0

        for datain l:

sheet.write(i, j, data)

j +=1

        i +=1

    book.save('C:/Users/madin/Desktop/test.xls')


你可能感兴趣的:(第三讲的作业)