数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)

八、多元变量及数据分布可视化

1、多元变量及数据分布常用图形

  • 少数变量

    • 相关性——因果性,相似但不一定相同
  • 多元变量

    • 多个变量直接存在正相关行、负相关行和弱相关性
    • 正相关:线条呈现平行
    • 负相关:线条一直交叉(顶端与底端相连)
    • 弱相关:方向不清晰
  • 数据分布

    • 通过不同的图表(例如:散点图、柱状图)来观察数据分布

    • 箱线图

      image-20200318194231260

      • 上四分位数于下四分位数之间的范围称为四分位间距

      • 上/下边界
        上 / 下 边 界 = 上 / 下 四 分 位 数 ± 1 1 2 四 分 位 间 距 上/下边界 = {上/下四分位数}\pm1\frac{1}{2}{四分位间距} /=/±121

2、多元变量案例

  • 导入可视化所需的模块

    import pandas as pd
    from pyecharts.globals import CurrentConfig, NotebookType
    CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
    
    import pyecharts.options as opts
    from pyecharts.globals import ThemeType
    
  • 平行折线图

    from pyecharts.charts import Parallel
    # 导入数据
    df_final = pd.read_csv('data/beijing_AQI_2018.csv')
    df_final = df_final[['AQI', 'AQI_rank', 'PM', 'Quality_grade']].values.tolist()
    
    parallel = (
        Parallel(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_schema(
            [
                opts.ParallelAxisOpts(dim=0, name="AQI"),
                opts.ParallelAxisOpts(dim=1, name="AQI_rank"),
                opts.ParallelAxisOpts(dim=2, name="PM"),
                opts.ParallelAxisOpts(
                    dim=3,
                    name="Quality_grade",
                    type_="category",
                    data=["优", "良", "轻度污染", "中度污染", "重度污染", "严重污染"],
                ),
            ]
        )
        .add("parallel", df_final[:50])
        .set_global_opts(title_opts=opts.TitleOpts(title="北京空气质量平行折线图"))
    )
    parallel.load_javascript()
    parallel.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第1张图片

  • 散点矩阵图

    import matplotlib.pyplot as plt
    import seaborn as sns
    # 数据准备
    iris = pd.read_csv('data/iris.csv')
    # 用Seabron画成对关系
    sns.pairplot(iris, hue='species')
    plt.show()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第2张图片

3、数据分布案例

  • 直方图
  • 箱线图
  • 多个时间序列图

4、分类数据可视化实验

  • 实验环境

    • python=3.7.6
    • pyecharts=1.7.1
    • jupyterlab=1.2.6
  • 2018北京AQI全年走势图

    from pyecharts.charts import Line
    df = pd.read_csv('data/beijing_AQI_2018.csv')
    attr = df['Date'].values.tolist()
    v1 = df['AQI'].values.tolist()
    
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(attr)
        .add_yaxis("AQI值", v1,
                   markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_='average')]),
                   markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
                                                          opts.MarkPointItem(type_='min')])
                   )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京AQI全年走势图'))
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第3张图片

  • 2018北京PM2.5全年走势图

    v1 = df['PM'].values.tolist()
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(attr)
        .add_yaxis("PM2.5值", v1,
                   markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_='average')]),
                   markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
                                                          opts.MarkPointItem(type_='min')])
                   )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京PM2.5全年走势图'))
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第4张图片

  • 2018年北京月均AQI走势图

    import numpy as np
    
    dom = df[['Date', 'AQI']]
    list1 = []
    for j in dom['Date']:
        time = j.split('/')[1]
        list1.append(time)
    df['month'] = list1
    
    month_message = df.groupby(['month'])
    month_com = month_message['AQI'].agg(['mean'])
    month_com.reset_index(inplace=True)
    month_com_last = month_com.sort_index()
    
    attr = ['{}'.format(str(i) + '月') for i in range(1, 13)]
    v1 = np.array(month_com_last['mean'])
    v1 = [int(i) for i in v1]
    
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(attr)
        .add_yaxis("AQI月均值", v1,
                   markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
                                                          opts.MarkPointItem(type_='min')])
                   )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京月均AQI走势图'))
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第5张图片

  • 2018年北京月均PM2.5走势图

    dom = df[['Date', 'PM']]
    list1 = []
    for j in dom['Date']:
        time = j.split('/')[1]
        list1.append(time)
    df['month'] = list1
    
    month_message = df.groupby(['month'])
    month_com = month_message['PM'].agg(['mean'])
    month_com.reset_index(inplace=True)
    month_com_last = month_com.sort_index()
    
    attr = ['{}'.format(str(i) + '月') for i in range(1, 13)]
    v1 = np.array(month_com_last['mean'])
    v1 = [int(i) for i in v1]
    
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(attr)
        .add_yaxis("PM2.5月均值", v1,
                   markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
                                                          opts.MarkPointItem(type_='min')])
                   )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京月均PM2.5走势图'))
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第6张图片

  • 2018年北京季度AQI箱型图

    from pyecharts.charts import Boxplot
    
    dom = df[['Date', 'AQI']]
    data = [[], [], [], []]
    dom1, dom2, dom3, dom4 = data
    for i, j in zip(dom['Date'], dom['AQI']):
        time = i.split('/')[1]
        if time in ['1', '2', '3']:
            dom1.append(j)
        elif time in ['4', '5', '6']:
            dom2.append(j)
        elif time in ['7', '8', '9']:
            dom3.append(j)
        else:
            dom4.append(j)
    
    boxplot = Boxplot(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    boxplot = (
        boxplot.add_xaxis(['第一季度', '第二季度', '第三季度', '第四季度'])
        .add_yaxis("", boxplot.prepare_data([dom1, dom2, dom3, dom4]))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京季度AQI箱型图'))
    )
    boxplot.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第7张图片

  • 2018年北京季度PM2.5箱型图

    dom = df[['Date', 'PM']]
    data = [[], [], [], []]
    dom1, dom2, dom3, dom4 = data
    for i, j in zip(dom['Date'], dom['PM']):
        time = i.split('/')[1]
        if time in ['1', '2', '3']:
            dom1.append(j)
        elif time in ['4', '5', '6']:
            dom2.append(j)
        elif time in ['7', '8', '9']:
            dom3.append(j)
        else:
            dom4.append(j)
    
    boxplot = Boxplot(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    boxplot = (
        boxplot.add_xaxis(['第一季度', '第二季度', '第三季度', '第四季度'])
        .add_yaxis("", boxplot.prepare_data([dom1, dom2, dom3, dom4]))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京季度PM2.5箱型图'))
    )
    boxplot.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第8张图片

  • 2018年北京全年空气质量情况

    from pyecharts.charts import Pie
    
    rank_message = df.groupby(['Quality_grade'])
    rank_com = rank_message['Quality_grade'].agg(['count'])
    rank_com.reset_index(inplace=True)
    rank_com_last = rank_com.sort_values('count', ascending=False)
    
    attr = rank_com_last['Quality_grade']
    v1 = rank_com_last['count']
    
    pie = (
        Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add("空气质量", [list(z) for z in zip(attr, v1)], radius=[130, 180], 
             tooltip_opts=opts.TooltipOpts(textstyle_opts=opts.TextStyleOpts(align='center'),
                                           formatter='{a}'+'
    '
    +'{b}: {c} ({d}%)')) .set_global_opts(title_opts=opts.TitleOpts(title='2018年北京全年空气质量情况', pos_left='center'), legend_opts=opts.LegendOpts(orient='vertical', pos_top='5%', pos_left='2%') ) ) pie.render_notebook()

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第9张图片

  • 2018年北京PM2.5指数日历图

    import datetime
    import random
    from pyecharts.charts import Calendar
    
    dom = df[['Date', 'PM']]
    list1 = []
    for i, j in zip(dom['Date'], dom['PM']):
        time_list = i.split('/')
        time = datetime.date(int(time_list[0]), int(time_list[1]), int(time_list[2]))
        PM = int(j)
        list1.append([str(time), int(PM)])
    
    calendar=(
        Calendar(init_opts=opts.InitOpts(bg_color='white', height='300px'))
        .add("PM2.5", list1, calendar_opts=opts.CalendarOpts(range_="2018"))
        .set_global_opts(
            title_opts=opts.TitleOpts(title="2018年北京PM2.5指数日历图"),
            visualmap_opts=opts.VisualMapOpts(
                max_=max(dom['PM']),
                min_=min(dom['PM']),
                orient="horizontal",
                is_piecewise=True,
                pos_top="230px",
                pos_left="100px",
            )
        )
    )
    calendar.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第10张图片

  • 2018年北上广深AQI全年走势图

    city_name = ['beijing', 'shanghai', 'guangzhou', 'shenzhen']
    cityes_AQI = []
    for i in range(4):
        filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
        aqi_data = pd.read_csv(filename)
        
        get_data = aqi_data[['Date', 'AQI']]
        month_for_data = []
        for j in get_data['Date']:
            time = j.split('/')[1]
            month_for_data.append(time)
    #   获取每行数据的月份
        aqi_data['Month'] = month_for_data
    #   求每个月AQI平均值
        month_data = aqi_data.groupby(['Month'])
        month_AQI = month_data['AQI'].agg(['mean'])
        month_AQI.reset_index(inplace = True)
        month_AQI_average = month_AQI.sort_index()
    #   获取每个城市月均AQI的数据,转化为int数据类型
        month_AQI_data = np.array(month_AQI_average['mean'])
        month_AQI_data_int = [int(i) for i in month_AQI_data]
        cityes_AQI.append(month_AQI_data_int)
    months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
    
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(months)
        .add_yaxis("北京", cityes_AQI[0])
        .add_yaxis("上海", cityes_AQI[1])
        .add_yaxis("广州", cityes_AQI[2])
        .add_yaxis("深圳", cityes_AQI[3])
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深AQI全年走势图'), 
                         legend_opts=opts.LegendOpts(pos_top='8%')
                        )
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第11张图片

  • 2018年北上广深PM2.5全年走势图

    cityes_PM = []
    for i in range(4):
        filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
        pm_data = pd.read_csv(filename)
        
        get_data = pm_data[['Date', 'PM']]
        month_for_data = []
        for j in get_data['Date']:
            time = j.split('/')[1]
            month_for_data.append(time)
    #   获取每行数据的月份
        pm_data['Month'] = month_for_data
    #   求每个月PM平均值
        month_data = pm_data.groupby(['Month'])
        month_PM = month_data['PM'].agg(['mean'])
        month_PM.reset_index(inplace = True)
        month_PM_average = month_PM.sort_index()
    #   获取每个城市月均PM的数据,转化为int数据类型
        month_PM_data = np.array(month_PM_average['mean'])
        month_PM_data_int = [int(i) for i in month_PM_data]
        cityes_PM.append(month_PM_data_int)
    months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
    
    line = (
        Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add_xaxis(months)
        .add_yaxis("北京", cityes_PM[0])
        .add_yaxis("上海", cityes_PM[1])
        .add_yaxis("广州", cityes_PM[2])
        .add_yaxis("深圳", cityes_PM[3])
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深PM2.5全年走势图'), 
                         legend_opts=opts.LegendOpts(pos_top='8%')
                        )
    )
    line.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第12张图片

  • 2018年北上广深全年空气质量情况

    v = []
    attrs = []
    for i in range(4):
        filename = 'data/' + city_name[i] + '_AQI' + '_2018.csv'
        df = pd.read_csv(filename)
        
        Quality_grade_message = df.groupby(['Quality_grade'])
        Quality_grade_com = Quality_grade_message['Quality_grade'].agg(['count'])
        Quality_grade_com.reset_index(inplace = True)
        Quality_grade_com_list = Quality_grade_com.sort_values('count', ascending=False)
    
        Quality_grade_array = np.array(Quality_grade_com_list['Quality_grade'])
        attrs.append(Quality_grade_array)
        Quality_grade_count = np.array(Quality_grade_com_list['count'])
        v.append(Quality_grade_count)
    months = ['{}'.format(str(i) + '月') for i in range(1, 13)]
    
    
    pie = (
        Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
        .add("北京", [list(z) for z in zip(attrs[0].tolist(), v[0].tolist())], radius=[60,80], center=['20%', '30%'],
             label_opts=opts.LabelOpts(formatter="北京", position="center", font_size='25')
            )
        .add("上海", [list(z) for z in zip(attrs[1].tolist(), v[1].tolist())], radius=[60,80], center=['55%', '30%'],
             label_opts=opts.LabelOpts(formatter="上海", position="center", font_size='25')
            )
        .add("广州", [list(z) for z in zip(attrs[2].tolist(), v[2].tolist())], radius=[60,80], center=['20%', '70%'],
             label_opts=opts.LabelOpts(formatter="广州", position="center", font_size='25')
            )
        .add("深圳", [list(z) for z in zip(attrs[3].tolist(), v[3].tolist())], radius=[60,80], center=['55%', '70%'],
             label_opts=opts.LabelOpts(formatter="深圳", position="center", font_size='25')
            )
        .set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深全年空气质量情况'),
                         legend_opts=opts.LegendOpts(type_="scroll", pos_top="20%", pos_left="80%", orient="vertical")
                        )
    )
    pie.render_notebook()
    

    数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8)_第13张图片

你可能感兴趣的:(数据可视化(pyecharts 1.7.1)学习笔记——系列笔记(8))