第二次作业:爬取网页数据

# 本程序用于爬取酷我音乐网站中新歌榜前200名的歌曲和歌手信息

# # 获取信息保存在xls文件中

import requests

from lxmlimport etree

import xlwt

url ='http://www.kuwo.cn/bang/index'

res = requests.get(url)

html = etree.HTML(res.text)

info_s = html.xpath('//ul[@class="listMusic"]/li')

all_info_list = []

for infoin info_s:

rank1 = info.xpath('div[1]/p')[0]

rank = rank1.xpath('string(.)').strip()

song1 = info.xpath('div[2]/a')[0]

song = song1.xpath('string(.)').strip()

singer1 = info.xpath('div[3]/a')[0]

singer = singer1.xpath('string(.)').strip()

# 打印排行、歌曲、歌手

# print(rank, song, singer)

    info_list = [rank,  song, singer]

all_info_list.append(info_list)

book = xlwt.Workbook(encoding='utf-8')

sheet = book.add_sheet('Sheet1')

header = ['rank', 'song', 'singer']

for tin range(len(header)):

sheet.write(0, t, header[t])

i =1

for lin all_info_list:

j =0

    for datain l:

sheet.write(i, j, data)

j +=1

    i +=1

book.save('C:/Users/madin/Desktop/test.xls')

你可能感兴趣的:(第二次作业:爬取网页数据)