简单的练手实例:爬取酷我音乐排行榜前200并进Exel

这个实例用正则表达式最为简单

import re
import requests
import xlwt

key_value = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}

def get_html(url):
    try:
        webData = requests.get(url,headers = key_value)
        webData.raise_for_status()
        webData.encoding = webData.apparent_encoding
        return webData.text
    except:
        return None
        
def get_info(html,mylist):
    # selector = etree.HTML(html)观察结构发现Xpath不好用,select也不好使,故用re
    # find_all = selector.xpath('//')
    ranks = re.findall('

(.*?)

'
,html,re.S) titles = re.findall('2016" target="_blank">(.*?)',html,re.S) singers = re.findall('',html,re.S) for rank,title,singer in zip(ranks,titles,singers): temp = [rank,title,singer] mylist.append(temp) def getExel(mylist): header = ['排名','歌名','歌手'] book = xlwt.Workbook(encoding='utf-8') sheet = book.add_sheet('Sheet1') for k in range(len(header)): sheet.write(0,k,header[k]) i = 1#注意要从1开始,上面那一行标题已经占了一行 for list in mylist: j = 0 for data in list: sheet.write(i,j,data) j += 1 i += 1 book.save('D:/酷我音乐排行榜前200.xls') if __name__ == '__main__': mylist = [] url = 'http://www.kuwo.cn/bang/index' html = get_html(url) get_info(html,mylist) getExel(mylist)

每天坚持。。
Result!

简单的练手实例:爬取酷我音乐排行榜前200并进Exel_第1张图片
热爱生活,热爱编程。

你可能感兴趣的:(爬虫)