一、爬虫代码
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "http://www.tianqihoubao.com/lishi/changsha/month/202103.html"
# 在开发者工具中寻找网络->Ctrl+R刷新->点击第一个选项->选择标头->最下满即可看到header
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.76'}
response = requests.get(url=url, headers=header) # 利用requests的get()函数发送get请求,
soup = BeautifulSoup(response.text, 'lxml') # pip install lxml,使用BeautifulSoup库进行解析,解析器是lxml
tr_list = soup.find_all('tr')
datas, condition, temp = [], [], []
for data in tr_list[1:]:
sub_data = data.text.split()
a = sub_data[0].replace('年', '/')
b = a.replace('月', '/')
c = b.replace('日', '')
datas.append(c)
condition.append(''.join(sub_data[1:3]))
temp.append(''.join(sub_data[3:6]))
# 数据保存
_data = pd.DataFrame() # 创建一个表格
_data['日期'] = datas # 向表格内添加数据
_data['天气状况'] = condition
_data['温度'] = temp
_data.to_csv('长沙2021.03天气记录.csv', index=False, encoding='utf-8')
二、爬取内容
日期,天气状况,温度
2021/03/01,小雨/多云,8℃/4℃
2021/03/02,多云/小雨,12℃/6℃
2021/03/03,小雨/小雨,10℃/8℃
2021/03/04,多云/小雨,17℃/6℃
2021/03/05,小雨/小雨,13℃/8℃
2021/03/06,小雨/小雨,14℃/7℃
2021/03/07,阴/小雨,11℃/5℃
2021/03/08,中雨/多云,12℃/7℃
2021/03/09,小雨/小雨,11℃/8℃
2021/03/10,中雨/小雨,11℃/8℃
2021/03/11,阴/多云,15℃/9℃
2021/03/12,多云/小雨,17℃/9℃
2021/03/13,阴/多云,18℃/11℃
2021/03/14,多云/晴,21℃/13℃
2021/03/15,小雨/中雨,25℃/17℃
2021/03/16,小雨/中雨,23℃/16℃
2021/03/17,小雨/小雨,15℃/13℃
2021/03/18,阴/小雨,17℃/11℃
2021/03/19,小雨/小雨,11℃/8℃
2021/03/20,小雨/小雨,10℃/8℃
2021/03/21,阴/阴,13℃/8℃
2021/03/22,晴/多云,18℃/6℃
2021/03/23,阴/多云,18℃/8℃
2021/03/24,晴/晴,23℃/10℃
2021/03/25,晴/晴,27℃/14℃
2021/03/26,阴/小雨,27℃/17℃
2021/03/27,小雨/多云,17℃/12℃
2021/03/28,多云/多云,27℃/13℃
2021/03/29,小雨/小雨,30℃/19℃
2021/03/30,小雨/小雨,23℃/16℃
2021/03/31,小雨/中雨,20℃/13℃