2018年销售汇总表-pandas数据分析

import numpy as np
import pandas as pd
from sparklines import sparklines
df = pd.read_csv('2018年销售汇总表.csv',parse_dates = ['日期'],encoding="gbk")
df.head()# 查看前五条数据
t 姓名 单品 数量 单价 金额 日期
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2018-01-01 07:21:00
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2018-01-01 10:00:00
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2018-01-01 13:24:00
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2018-01-01 15:05:00
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2018-01-01 23:26:00
df.info()#显示数据基础信息

RangeIndex: 1500 entries, 0 to 1499
Data columns (total 7 columns):
t     1500 non-null int64
姓名    1500 non-null object
单品    1500 non-null object
数量    1500 non-null int64
单价    1500 non-null float64
金额    1500 non-null float64
日期    1500 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 82.2+ KB
# 讲解:按《姓名》分组,聚合《金额》的均值和总和
# 用法:按客人姓名查看消费金额的平均值与总金额
df.groupby("姓名")["金额"].agg(["mean","sum"])
mean sum
姓名
Barton LLC 1334.615854 109438.50
Cronin, Oberbrunner and Spencer 1339.321642 89734.55
Frami, Hills and Schmidt 1438.466528 103569.59
Fritsch, Russel and Anderson 1385.366790 112214.71
Halvorson, Crona and Champlin 1206.971724 70004.36
Herman LLC 1336.532258 82865.00
Jerde-Hilpert 1265.072247 112591.43
Kassulke, Ondricka and Metz 1350.797969 86451.07
Keeling LLC 1363.977027 100934.30
Kiehn-Spinka 1260.870506 99608.77
Koepp Ltd 1264.152927 103660.54
Kuhn-Gusikowski 1247.866849 91094.28
Kulas Inc 1461.191064 137351.96
Pollich LLC 1196.536712 87347.18
Purdy-Kunde 1469.777547 77898.21
Sanford and Sons 1391.872958 98822.98
Stokes LLC 1271.332222 91535.92
Trantow-Barrows 1312.567872 123381.38
White-Trantow 1579.558023 135841.99
Will LLC 1411.318919 104437.60
# 讲解:在上一条命令中,并设置平均值与总金额的显示风格
# 用法:格式化数值,在前面添加美元符号,包括2位小数点
(
    df.groupby("姓名")["金额"]
    .agg(["mean","sum"])
    .style.format("${0:,.2f}")
)
mean sum
姓名
Barton LLC $1,334.62 $109,438.50
Cronin, Oberbrunner and Spencer $1,339.32 $89,734.55
Frami, Hills and Schmidt $1,438.47 $103,569.59
Fritsch, Russel and Anderson $1,385.37 $112,214.71
Halvorson, Crona and Champlin $1,206.97 $70,004.36
Herman LLC $1,336.53 $82,865.00
Jerde-Hilpert $1,265.07 $112,591.43
Kassulke, Ondricka and Metz $1,350.80 $86,451.07
Keeling LLC $1,363.98 $100,934.30
Kiehn-Spinka $1,260.87 $99,608.77
Koepp Ltd $1,264.15 $103,660.54
Kuhn-Gusikowski $1,247.87 $91,094.28
Kulas Inc $1,461.19 $137,351.96
Pollich LLC $1,196.54 $87,347.18
Purdy-Kunde $1,469.78 $77,898.21
Sanford and Sons $1,391.87 $98,822.98
Stokes LLC $1,271.33 $91,535.92
Trantow-Barrows $1,312.57 $123,381.38
White-Trantow $1,579.56 $135,841.99
Will LLC $1,411.32 $104,437.60
# 平均值与总金额显示为不带小数点
(
    df.groupby("姓名")["金额"]
    .agg(["mean","sum"])
    .style.format("${0:,.0f}")
)
mean sum
姓名
Barton LLC $1,335 $109,438
Cronin, Oberbrunner and Spencer $1,339 $89,735
Frami, Hills and Schmidt $1,438 $103,570
Fritsch, Russel and Anderson $1,385 $112,215
Halvorson, Crona and Champlin $1,207 $70,004
Herman LLC $1,337 $82,865
Jerde-Hilpert $1,265 $112,591
Kassulke, Ondricka and Metz $1,351 $86,451
Keeling LLC $1,364 $100,934
Kiehn-Spinka $1,261 $99,609
Koepp Ltd $1,264 $103,661
Kuhn-Gusikowski $1,248 $91,094
Kulas Inc $1,461 $137,352
Pollich LLC $1,197 $87,347
Purdy-Kunde $1,470 $77,898
Sanford and Sons $1,392 $98,823
Stokes LLC $1,271 $91,536
Trantow-Barrows $1,313 $123,381
White-Trantow $1,580 $135,842
Will LLC $1,411 $104,438
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
# 1.使用普通格式字典控制每列的格式
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
)

日期 sum 月占比
2018-01-31 $185,362 9.1818%
2018-02-28 $146,212 7.2426%
2018-03-31 $203,921 10.1012%
2018-04-30 $174,574 8.6475%
2018-05-31 $165,419 8.1940%
2018-06-30 $174,089 8.6235%
2018-07-31 $191,662 9.4939%
2018-08-31 $153,779 7.6174%
2018-09-30 $168,443 8.3438%
2018-10-31 $171,495 8.4950%
2018-11-30 $119,961 5.9423%
2018-12-31 $163,867 8.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 2.使用条件格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
    .highlight_max(color="lightgreen")
    .highlight_min(color="red")
)
日期 sum 月占比
2018-01-31 $185,362 9.1818%
2018-02-28 $146,212 7.2426%
2018-03-31 $203,921 10.1012%
2018-04-30 $174,574 8.6475%
2018-05-31 $165,419 8.1940%
2018-06-30 $174,089 8.6235%
2018-07-31 $191,662 9.4939%
2018-08-31 $153,779 7.6174%
2018-09-30 $168,443 8.3438%
2018-10-31 $171,495 8.4950%
2018-11-30 $119,961 5.9423%
2018-12-31 $163,867 8.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 3.使用渐变色格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .background_gradient(subset=["sum"],cmap="PRGn")
)
日期 sum 月占比
0 2018-01-31 $185,362 9.1818%
1 2018-02-28 $146,212 7.2426%
2 2018-03-31 $203,921 10.1012%
3 2018-04-30 $174,574 8.6475%
4 2018-05-31 $165,419 8.1940%
5 2018-06-30 $174,089 8.6235%
6 2018-07-31 $191,662 9.4939%
7 2018-08-31 $153,779 7.6174%
8 2018-09-30 $168,443 8.3438%
9 2018-10-31 $171,495 8.4950%
10 2018-11-30 $119,961 5.9423%
11 2018-12-31 $163,867 8.1171%
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()

# 4.使用单元格柱形格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
    monthly_sales
    .style
    .format(format_dict)
    .hide_index()
    .bar(color="#FFA07A",vmin=100_000,subset=["sum"],align="zero")
    .bar(color='lightblue', vmin=0, subset=['月占比'], align='zero')
    .set_caption("2018年销售一览表")
)
2018年销售一览表
日期 sum 月占比
2018-01-31 $185,362 9.1818%
2018-02-28 $146,212 7.2426%
2018-03-31 $203,921 10.1012%
2018-04-30 $174,574 8.6475%
2018-05-31 $165,419 8.1940%
2018-06-30 $174,089 8.6235%
2018-07-31 $191,662 9.4939%
2018-08-31 $153,779 7.6174%
2018-09-30 $168,443 8.3438%
2018-10-31 $171,495 8.4950%
2018-11-30 $119,961 5.9423%
2018-12-31 $163,867 8.1171%
def sparkline_str(x):
    bins=np.histogram(x)[0]#直方图
    s1="".join(sparklines(bins))
    return s1
sparkline_str.__name__ = "走势图"
df.groupby("姓名")["数量","金额"].agg(["mean",sparkline_str])
数量 金额
mean 走势图 mean 走势图
姓名
Barton LLC 24.890244 ▄▄▃▂▃▆▄█▁▄ 1334.615854 █▄▃▆▄▄▁▁▁▁
Cronin, Oberbrunner and Spencer 24.970149 █▄▁▄▄▇▅▁▄▄ 1339.321642 █▅▅▃▃▃▂▂▁▁
Frami, Hills and Schmidt 26.430556 ▄▄▁▂▇█▂▂▅▅ 1438.466528 █▅▄▇▅▃▄▁▁▁
Fritsch, Russel and Anderson 26.074074 ▁▄▇▃▂▂█▃▄▄ 1385.366790 ▇█▃▄▂▂▁▂▁▁
Halvorson, Crona and Champlin 22.137931 ▇▆▆▇█▁▄▂▄▃ 1206.971724 ██▆▅▁▃▂▂▂▂
Herman LLC 24.806452 ▄▃▅▁▆▄▂▆▃█ 1336.532258 █▅▇▄▅▄▁▃▂▂
Jerde-Hilpert 22.460674 ▄▄█▁▂▅▃▂▄▃ 1265.072247 █▄▅▂▁▂▃▂▂▁
Kassulke, Ondricka and Metz 25.734375 ▂▂▁▁▂▂▁▅▄█ 1350.797969 █▆▆▄▄▃▂▁▁▂
Keeling LLC 24.405405 ▁▄▇▃▅█▃▄▃▆ 1363.977027 ▅█▆▃▄▂▂▁▁▁
Kiehn-Spinka 22.227848 ▃▂█▂▃▅▄▁▄▁ 1260.870506 █▇▄▃▃▂▁▂▁▁
Koepp Ltd 21.829268 ▅▇█▆▄▇▅▁▅▇ 1264.152927 █▇▅▂▄▂▂▂▁▁
Kuhn-Gusikowski 22.808219 ▂▄█▄▃▁█▄▂▄ 1247.866849 ▆█▄▃▃▃▃▁▁▁
Kulas Inc 24.095745 ▇▃▇▂▇▁▅▆█▆ 1461.191064 █▅█▂▄▄▄▂▁▁
Pollich LLC 23.383562 █▂▅▂▃▆▁▂▄▇ 1196.536712 █▆▆▃▃▃▂▂▁▁
Purdy-Kunde 27.358491 ▄▅█▁▄▇▅█▇█ 1469.777547 █▅▄▃▄▃▃▂▁▂
Sanford and Sons 24.000000 ▂▂▂▆▁█▂▂▁▅ 1391.872958 ▅▇█▃▃▄▁▃▁▂
Stokes LLC 24.527778 ▄▄▄▁▃▂▄█▄▄ 1271.332222 █▇▆▆▄▂▂▃▁▂
Trantow-Barrows 24.159574 ▅▄▅▂▃▃▄▄▁█ 1312.567872 █▇▅▆▃▂▂▁▁▁
White-Trantow 26.255814 ▃▂▆▆▃██▁██ 1579.558023 ▄█▆▅▄▃▄▂▁▃
Will LLC 24.702703 ▂█▁▄▄▄▂▅▅▄ 1411.318919 ▆█▄▅▄▂▁▂▁▂

你可能感兴趣的:(Pthon,numpy)