用python爬取糗事百科搞笑段子

以前一直想学的爬虫,这两天闲来,终于有所眉目

环境是jupyter和spyder

用的python3.5

先上代码

 # -*-encoding:utf-8 -*-
import requests
import xlwt
from bs4 import BeautifulSoup
def get_data(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text,'lxml')
    cont = soup.select('.content')
    auth = soup.select('.author h2')
    for con,aut in zip(cont,auth):
        if con and aut:
            content.append(con.text.split('\n'))
            author.append(aut.text)
    return author,content
def write_excel(book,sheet_name,author,content):
    sheet = book.add_sheet(sheet_name)
    sheet.write(0,0,'作者')
    sheet.write(0,1,'内容')
    for i in range(len(author)):
        sheet.write(i+1,0,author[i])
        sheet.write(i+1,1,content[i])
    book.save('./qiushi.xls')
if __name__ == "__main__":
    book = xlwt.Workbook(encoding='utf-8')
    web_url = ["http://www.qiushibaike.com/8hr/page/{}/?s=4943272" .format(str(i)) for i in range (1,5,1)]
    sheet_name = '糗事'
    author = list()
    content = list()
    for url in web_url:
        print (url)
        get_data(url)
    write_excel(book,sheet_name,author,content)
不得不说,BeautifulSoup确实很是强大啊,省了正则好多事

你可能感兴趣的:(用python爬取糗事百科搞笑段子)