少数变量
多元变量
数据分布
通过不同的图表(例如:散点图、柱状图)来观察数据分布
箱线图
上四分位数于下四分位数之间的范围称为四分位间距
上/下边界
上 / 下 边 界 = 上 / 下 四 分 位 数 ± 1 1 2 四 分 位 间 距 上/下边界 = {上/下四分位数}\pm1\frac{1}{2}{四分位间距} 上/下边界=上/下四分位数±121四分位间距
导入可视化所需的模块
import pandas as pd
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
import pyecharts.options as opts
from pyecharts.globals import ThemeType
平行折线图
from pyecharts.charts import Parallel
# 导入数据
df_final = pd.read_csv('data/beijing_AQI_2018.csv')
df_final = df_final[['AQI', 'AQI_rank', 'PM', 'Quality_grade']].values.tolist()
parallel = (
Parallel(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_schema(
[
opts.ParallelAxisOpts(dim=0, name="AQI"),
opts.ParallelAxisOpts(dim=1, name="AQI_rank"),
opts.ParallelAxisOpts(dim=2, name="PM"),
opts.ParallelAxisOpts(
dim=3,
name="Quality_grade",
type_="category",
data=["优", "良", "轻度污染", "中度污染", "重度污染", "严重污染"],
),
]
)
.add("parallel", df_final[:50])
.set_global_opts(title_opts=opts.TitleOpts(title="北京空气质量平行折线图"))
)
parallel.load_javascript()
parallel.render_notebook()
散点矩阵图
import matplotlib.pyplot as plt
import seaborn as sns
# 数据准备
iris = pd.read_csv('data/iris.csv')
# 用Seabron画成对关系
sns.pairplot(iris, hue='species')
plt.show()
实验环境
2018北京AQI全年走势图
from pyecharts.charts import Line
df = pd.read_csv('data/beijing_AQI_2018.csv')
attr = df['Date'].values.tolist()
v1 = df['AQI'].values.tolist()
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(attr)
.add_yaxis("AQI值", v1,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_='average')]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
opts.MarkPointItem(type_='min')])
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京AQI全年走势图'))
)
line.render_notebook()
2018北京PM2.5全年走势图
v1 = df['PM'].values.tolist()
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(attr)
.add_yaxis("PM2.5值", v1,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_='average')]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
opts.MarkPointItem(type_='min')])
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京PM2.5全年走势图'))
)
line.render_notebook()
2018年北京月均AQI走势图
import numpy as np
dom = df[['Date', 'AQI']]
list1 = []
for j in dom['Date']:
time = j.split('/')[1]
list1.append(time)
df['month'] = list1
month_message = df.groupby(['month'])
month_com = month_message['AQI'].agg(['mean'])
month_com.reset_index(inplace=True)
month_com_last = month_com.sort_index()
attr = ['{}'.format(str(i) + '月') for i in range(1, 13)]
v1 = np.array(month_com_last['mean'])
v1 = [int(i) for i in v1]
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(attr)
.add_yaxis("AQI月均值", v1,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
opts.MarkPointItem(type_='min')])
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京月均AQI走势图'))
)
line.render_notebook()
2018年北京月均PM2.5走势图
dom = df[['Date', 'PM']]
list1 = []
for j in dom['Date']:
time = j.split('/')[1]
list1.append(time)
df['month'] = list1
month_message = df.groupby(['month'])
month_com = month_message['PM'].agg(['mean'])
month_com.reset_index(inplace=True)
month_com_last = month_com.sort_index()
attr = ['{}'.format(str(i) + '月') for i in range(1, 13)]
v1 = np.array(month_com_last['mean'])
v1 = [int(i) for i in v1]
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(attr)
.add_yaxis("PM2.5月均值", v1,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
opts.MarkPointItem(type_='min')])
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京月均PM2.5走势图'))
)
line.render_notebook()
2018年北京季度AQI箱型图
from pyecharts.charts import Boxplot
dom = df[['Date', 'AQI']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['AQI']):
time = i.split('/')[1]
if time in ['1', '2', '3']:
dom1.append(j)
elif time in ['4', '5', '6']:
dom2.append(j)
elif time in ['7', '8', '9']:
dom3.append(j)
else:
dom4.append(j)
boxplot = Boxplot(init_opts=opts.InitOpts(theme=ThemeType.DARK))
boxplot = (
boxplot.add_xaxis(['第一季度', '第二季度', '第三季度', '第四季度'])
.add_yaxis("", boxplot.prepare_data([dom1, dom2, dom3, dom4]))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京季度AQI箱型图'))
)
boxplot.render_notebook()
2018年北京季度PM2.5箱型图
dom = df[['Date', 'PM']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['PM']):
time = i.split('/')[1]
if time in ['1', '2', '3']:
dom1.append(j)
elif time in ['4', '5', '6']:
dom2.append(j)
elif time in ['7', '8', '9']:
dom3.append(j)
else:
dom4.append(j)
boxplot = Boxplot(init_opts=opts.InitOpts(theme=ThemeType.DARK))
boxplot = (
boxplot.add_xaxis(['第一季度', '第二季度', '第三季度', '第四季度'])
.add_yaxis("", boxplot.prepare_data([dom1, dom2, dom3, dom4]))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京季度PM2.5箱型图'))
)
boxplot.render_notebook()
2018年北京全年空气质量情况
from pyecharts.charts import Pie
rank_message = df.groupby(['Quality_grade'])
rank_com = rank_message['Quality_grade'].agg(['count'])
rank_com.reset_index(inplace=True)
rank_com_last = rank_com.sort_values('count', ascending=False)
attr = rank_com_last['Quality_grade']
v1 = rank_com_last['count']
pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add("空气质量", [list(z) for z in zip(attr, v1)], radius=[130, 180],
tooltip_opts=opts.TooltipOpts(textstyle_opts=opts.TextStyleOpts(align='center'),
formatter='{a}'+'
'+'{b}: {c} ({d}%)'))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北京全年空气质量情况', pos_left='center'),
legend_opts=opts.LegendOpts(orient='vertical', pos_top='5%', pos_left='2%')
)
)
pie.render_notebook()
2018年北京PM2.5指数日历图
import datetime
import random
from pyecharts.charts import Calendar
dom = df[['Date', 'PM']]
list1 = []
for i, j in zip(dom['Date'], dom['PM']):
time_list = i.split('/')
time = datetime.date(int(time_list[0]), int(time_list[1]), int(time_list[2]))
PM = int(j)
list1.append([str(time), int(PM)])
calendar=(
Calendar(init_opts=opts.InitOpts(bg_color='white', height='300px'))
.add("PM2.5", list1, calendar_opts=opts.CalendarOpts(range_="2018"))
.set_global_opts(
title_opts=opts.TitleOpts(title="2018年北京PM2.5指数日历图"),
visualmap_opts=opts.VisualMapOpts(
max_=max(dom['PM']),
min_=min(dom['PM']),
orient="horizontal",
is_piecewise=True,
pos_top="230px",
pos_left="100px",
)
)
)
calendar.render_notebook()
2018年北上广深AQI全年走势图
city_name = ['beijing', 'shanghai', 'guangzhou', 'shenzhen']
cityes_AQI = []
for i in range(4):
filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
aqi_data = pd.read_csv(filename)
get_data = aqi_data[['Date', 'AQI']]
month_for_data = []
for j in get_data['Date']:
time = j.split('/')[1]
month_for_data.append(time)
# 获取每行数据的月份
aqi_data['Month'] = month_for_data
# 求每个月AQI平均值
month_data = aqi_data.groupby(['Month'])
month_AQI = month_data['AQI'].agg(['mean'])
month_AQI.reset_index(inplace = True)
month_AQI_average = month_AQI.sort_index()
# 获取每个城市月均AQI的数据,转化为int数据类型
month_AQI_data = np.array(month_AQI_average['mean'])
month_AQI_data_int = [int(i) for i in month_AQI_data]
cityes_AQI.append(month_AQI_data_int)
months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(months)
.add_yaxis("北京", cityes_AQI[0])
.add_yaxis("上海", cityes_AQI[1])
.add_yaxis("广州", cityes_AQI[2])
.add_yaxis("深圳", cityes_AQI[3])
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深AQI全年走势图'),
legend_opts=opts.LegendOpts(pos_top='8%')
)
)
line.render_notebook()
2018年北上广深PM2.5全年走势图
cityes_PM = []
for i in range(4):
filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
pm_data = pd.read_csv(filename)
get_data = pm_data[['Date', 'PM']]
month_for_data = []
for j in get_data['Date']:
time = j.split('/')[1]
month_for_data.append(time)
# 获取每行数据的月份
pm_data['Month'] = month_for_data
# 求每个月PM平均值
month_data = pm_data.groupby(['Month'])
month_PM = month_data['PM'].agg(['mean'])
month_PM.reset_index(inplace = True)
month_PM_average = month_PM.sort_index()
# 获取每个城市月均PM的数据,转化为int数据类型
month_PM_data = np.array(month_PM_average['mean'])
month_PM_data_int = [int(i) for i in month_PM_data]
cityes_PM.append(month_PM_data_int)
months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
line = (
Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(months)
.add_yaxis("北京", cityes_PM[0])
.add_yaxis("上海", cityes_PM[1])
.add_yaxis("广州", cityes_PM[2])
.add_yaxis("深圳", cityes_PM[3])
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深PM2.5全年走势图'),
legend_opts=opts.LegendOpts(pos_top='8%')
)
)
line.render_notebook()
2018年北上广深全年空气质量情况
v = []
attrs = []
for i in range(4):
filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
df = pd.read_csv(filename)
Quality_grade_message = df.groupby(['Quality_grade'])
Quality_grade_com = Quality_grade_message['Quality_grade'].agg(['count'])
Quality_grade_com.reset_index(inplace = True)
Quality_grade_com_list = Quality_grade_com.sort_values('count', ascending=False)
Quality_grade_array = np.array(Quality_grade_com_list['Quality_grade'])
attrs.append(Quality_grade_array)
Quality_grade_count = np.array(Quality_grade_com_list['count'])
v.append(Quality_grade_count)
months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add("北京", [list(z) for z in zip(attrs[0].tolist(), v[0].tolist())], radius=[60,80], center=['20%', '30%'],
label_opts=opts.LabelOpts(formatter="北京", position="center", font_size='25')
)
.add("上海", [list(z) for z in zip(attrs[1].tolist(), v[1].tolist())], radius=[60,80], center=['55%', '30%'],
label_opts=opts.LabelOpts(formatter="上海", position="center", font_size='25')
)
.add("广州", [list(z) for z in zip(attrs[2].tolist(), v[2].tolist())], radius=[60,80], center=['20%', '70%'],
label_opts=opts.LabelOpts(formatter="广州", position="center", font_size='25')
)
.add("深圳", [list(z) for z in zip(attrs[3].tolist(), v[3].tolist())], radius=[60,80], center=['55%', '70%'],
label_opts=opts.LabelOpts(formatter="深圳", position="center", font_size='25')
)
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深全年空气质量情况'),
legend_opts=opts.LegendOpts(type_="scroll", pos_top="20%", pos_left="80%", orient="vertical")
)
)
pie.render_notebook()