爬取天气数据+热力图

第一次用旧知识爬取天气网站数据。

import requests
from bs4 import BeautifulSoup
import bs4

def getHTMLText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""

def fillWList(ist, html):
    soup = BeautifulSoup(html, 'html.parser')
    for ul in soup.find('div','tqtongji2').findAll('ul')[1:]: #原代码:for ul in soup.find('div','tqtongji2').children 去除了每个月的表头
        if isinstance(ul, bs4.element.Tag):
            lis = ul('li')
            ist.append([lis[0].string,lis[1].string,lis[2].string,lis[3].string,lis[4].string,lis[5].string])

def printWList(ist):
    tplt = "{:10}\t{:2}\t{:2}\t{:2}\t{:4}\t{:4}"
    print(tplt.format('date','high','low','weather','wind','force'))
    for w in ist:
        print(tplt.format(w[0],w[1],w[2],w[3],w[4],w[5]))


def main():
    ist = []
    start_url = 'http://lishi.tianqi.com/shanghai/'
    count = 201700

    for i in range(9):
        try:
            count = count + 1
            url = start_url + str(count) + '.html' 
            html = getHTMLText(url)
            fillWList(ist, html)
        except:
            continue
    printWList(ist)

main()

爬取天气数据;
再制作天气热力图。

import requests
from bs4 import BeautifulSoup
import bs4
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

r = requests.get('http://lishi.tianqi.com/shanghai/201710.html')
html=r.text

date=[]
high=[]
soup = BeautifulSoup(html,'html.parser')
for ul in soup.find('div',"tqtongji2").find_all('ul')[1:]:
    if isinstance(ul,bs4.element.Tag):
        lis = ul('li')
        date.append(lis[0].string)
        high.append(lis[1].string)

w_list = pd.DataFrame([date,high]).T
w_list = w_list.rename(columns = {0:'date',1:'high'})
w_list.dtypes
w_list.date = pd.to_datetime(w_list.date)
w_list.high = w_list.high.astype('int')

weeknum=[]
m=w_list.weekday[0]
for j in range(w_list.weekday.count()):
    m=1+m
    n=(m-1)//7+1     
    weeknum.append(n)
w_list['weeknum'] = weeknum
df_pivoted = w_list.pivot_table(index='weeknum',columns='weekday',values='high').fillna(0)
df_pivoted.index.name=None
df_pivoted.sort_index(ascending=False, inplace=True)

用matplotlib制图:
plt.pcolor(df_pivoted, # 指定绘图数据
           cmap=plt.cm.Blues, # 指定填充色
           edgecolors = 'white' # 指点单元格之间的边框色
          )
# 添加x轴和y轴刻度标签(加0.5是为了让刻度标签居中显示)
plt.xticks(np.arange(7)+0.5,['Mon','Tue','Wen','Tus','Fri','Sta','Sun'],fontsize=20)
plt.yticks(np.arange(6)+0.5,['sixth','fifth','forth','third','second','first'],fontsize=20)
plt.title('weather of shanghai',fontsize=30)
plt.show()

用seaborn制图(可增加图例、最高温度):
ax = sns.heatmap(df_pivoted, # 指定绘图数据
                 cmap=plt.cm.Reds, # 指定填充色
                 linewidths=.1, # 设置每个单元方块的间隔
                 annot=True # 显示数值
                )

# 添加x轴刻度标签(加0.5是为了让刻度标签居中显示)
plt.xticks(np.arange(7)+0.5,['Mon','Tue','Wen','Tus','Fri','Sta','Sun'],fontsize=15)
# 可以将刻度标签置于顶部显示
# ax.xaxis.tick_top()

# 添加y轴刻度标签
plt.yticks(np.arange(6)+0.5,['sixth','fifth','forth','third','second','first'],fontsize=15)
# 旋转y刻度0度,即水平显示
plt.yticks(rotation = 0)

# 设置标题和坐标轴标签
ax.set_title('Shanghai October weather',fontsize=30)
ax.set_xlabel('')
ax.set_ylabel('')

# 显示图形
plt.show()

爬取天气数据+热力图_第1张图片

你可能感兴趣的:(python爬虫,python数据分析)