简单使用re模块爬取糗事百科文字

# 爬取糗事百科文字
import re
import requests

def data_capture(url):
    headers = {
        'User-Agent':'ozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }
    response = requests.get(url,headers = headers)
    text = response.text
    contents = re.findall('
.*?(.*?)',text,re.S) # re.S == re.DOTALL for content in contents: content = re.sub('
','',content) print(content.strip()) # 去除空格,换行 def spider(): urls = 'https://www.qiushibaike.com/text/page/{}/' for i in range(1,6): url = urls.format(i) data_capture(url) break spider()

你可能感兴趣的:(简单使用re模块爬取糗事百科文字)