1、环境准备
pip install requests
pip install re
pip install openpyxl
2、源代码
import requests
import re
import openpyxl
baseurl = 'https://zhuanlan.zhihu.com/p/357510629'
wb = openpyxl.Workbook()
ws = wb.active
ws.append(['事件名称', '时间', '地点名称', '事件简介'])
headers = {
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Upgrade-Insecure-Requests': '1'
}
content = requests.get(baseurl,headers=headers).content.decode('utf-8')
event_name = re.findall(r"事件 | (.+?) | ",content)
print(event_name)
start_time = re.findall(r"时间 | (.+?) | ",content)
print(start_time)
area_name = re.findall(r"地点 | (.+?) | ",content)
print(area_name)
introduction = re.findall(r"简介 | (.+?) | ",content)
print(introduction)
for i in range(len(event_name)):
ws.append([event_name[i], start_time[i], area_name[i], introduction[i]])
wb.save('数据.xlsx')