import numpy as np
import pandas as pd
from sparklines import sparklines
df = pd.read_csv('2018年销售汇总表.csv',parse_dates = ['日期'],encoding="gbk")
df.head()# 查看前五条数据
t | 姓名 | 单品 | 数量 | 单价 | 金额 | 日期 | |
---|---|---|---|---|---|---|---|
0 | 740150 | Barton LLC | B1-20000 | 39 | 86.69 | 3380.91 | 2018-01-01 07:21:00 |
1 | 714466 | Trantow-Barrows | S2-77896 | -1 | 63.16 | -63.16 | 2018-01-01 10:00:00 |
2 | 218895 | Kulas Inc | B1-69924 | 23 | 90.70 | 2086.10 | 2018-01-01 13:24:00 |
3 | 307599 | Kassulke, Ondricka and Metz | S1-65481 | 41 | 21.05 | 863.05 | 2018-01-01 15:05:00 |
4 | 412290 | Jerde-Hilpert | S2-34077 | 6 | 83.21 | 499.26 | 2018-01-01 23:26:00 |
df.info()#显示数据基础信息
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 7 columns):
t 1500 non-null int64
姓名 1500 non-null object
单品 1500 non-null object
数量 1500 non-null int64
单价 1500 non-null float64
金额 1500 non-null float64
日期 1500 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 82.2+ KB
# 讲解:按《姓名》分组,聚合《金额》的均值和总和
# 用法:按客人姓名查看消费金额的平均值与总金额
df.groupby("姓名")["金额"].agg(["mean","sum"])
mean | sum | |
---|---|---|
姓名 | ||
Barton LLC | 1334.615854 | 109438.50 |
Cronin, Oberbrunner and Spencer | 1339.321642 | 89734.55 |
Frami, Hills and Schmidt | 1438.466528 | 103569.59 |
Fritsch, Russel and Anderson | 1385.366790 | 112214.71 |
Halvorson, Crona and Champlin | 1206.971724 | 70004.36 |
Herman LLC | 1336.532258 | 82865.00 |
Jerde-Hilpert | 1265.072247 | 112591.43 |
Kassulke, Ondricka and Metz | 1350.797969 | 86451.07 |
Keeling LLC | 1363.977027 | 100934.30 |
Kiehn-Spinka | 1260.870506 | 99608.77 |
Koepp Ltd | 1264.152927 | 103660.54 |
Kuhn-Gusikowski | 1247.866849 | 91094.28 |
Kulas Inc | 1461.191064 | 137351.96 |
Pollich LLC | 1196.536712 | 87347.18 |
Purdy-Kunde | 1469.777547 | 77898.21 |
Sanford and Sons | 1391.872958 | 98822.98 |
Stokes LLC | 1271.332222 | 91535.92 |
Trantow-Barrows | 1312.567872 | 123381.38 |
White-Trantow | 1579.558023 | 135841.99 |
Will LLC | 1411.318919 | 104437.60 |
# 讲解:在上一条命令中,并设置平均值与总金额的显示风格
# 用法:格式化数值,在前面添加美元符号,包括2位小数点
(
df.groupby("姓名")["金额"]
.agg(["mean","sum"])
.style.format("${0:,.2f}")
)
Barton LLC
$1,334.62
$109,438.50
Cronin, Oberbrunner and Spencer
$1,339.32
$89,734.55
Frami, Hills and Schmidt
$1,438.47
$103,569.59
Fritsch, Russel and Anderson
$1,385.37
$112,214.71
Halvorson, Crona and Champlin
$1,206.97
$70,004.36
Herman LLC
$1,336.53
$82,865.00
Jerde-Hilpert
$1,265.07
$112,591.43
Kassulke, Ondricka and Metz
$1,350.80
$86,451.07
Keeling LLC
$1,363.98
$100,934.30
Kiehn-Spinka
$1,260.87
$99,608.77
Koepp Ltd
$1,264.15
$103,660.54
Kuhn-Gusikowski
$1,247.87
$91,094.28
Kulas Inc
$1,461.19
$137,351.96
Pollich LLC
$1,196.54
$87,347.18
Purdy-Kunde
$1,469.78
$77,898.21
Sanford and Sons
$1,391.87
$98,822.98
Stokes LLC
$1,271.33
$91,535.92
Trantow-Barrows
$1,312.57
$123,381.38
White-Trantow
$1,579.56
$135,841.99
Will LLC
$1,411.32
$104,437.60
mean | sum | |
---|---|---|
姓名 |
# 平均值与总金额显示为不带小数点
(
df.groupby("姓名")["金额"]
.agg(["mean","sum"])
.style.format("${0:,.0f}")
)
Barton LLC
$1,335
$109,438
Cronin, Oberbrunner and Spencer
$1,339
$89,735
Frami, Hills and Schmidt
$1,438
$103,570
Fritsch, Russel and Anderson
$1,385
$112,215
Halvorson, Crona and Champlin
$1,207
$70,004
Herman LLC
$1,337
$82,865
Jerde-Hilpert
$1,265
$112,591
Kassulke, Ondricka and Metz
$1,351
$86,451
Keeling LLC
$1,364
$100,934
Kiehn-Spinka
$1,261
$99,609
Koepp Ltd
$1,264
$103,661
Kuhn-Gusikowski
$1,248
$91,094
Kulas Inc
$1,461
$137,352
Pollich LLC
$1,197
$87,347
Purdy-Kunde
$1,470
$77,898
Sanford and Sons
$1,392
$98,823
Stokes LLC
$1,271
$91,536
Trantow-Barrows
$1,313
$123,381
White-Trantow
$1,580
$135,842
Will LLC
$1,411
$104,438
mean | sum | |
---|---|---|
姓名 |
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
# 1.使用普通格式字典控制每列的格式
(
monthly_sales
.style
.format(format_dict)
.hide_index()
)
2018-01-31
$185,362
9.1818%
2018-02-28
$146,212
7.2426%
2018-03-31
$203,921
10.1012%
2018-04-30
$174,574
8.6475%
2018-05-31
$165,419
8.1940%
2018-06-30
$174,089
8.6235%
2018-07-31
$191,662
9.4939%
2018-08-31
$153,779
7.6174%
2018-09-30
$168,443
8.3438%
2018-10-31
$171,495
8.4950%
2018-11-30
$119,961
5.9423%
2018-12-31
$163,867
8.1171%
日期 | sum | 月占比 |
---|
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()
# 2.使用条件格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
monthly_sales
.style
.format(format_dict)
.hide_index()
.highlight_max(color="lightgreen")
.highlight_min(color="red")
)
2018-01-31
$185,362
9.1818%
2018-02-28
$146,212
7.2426%
2018-03-31
$203,921
10.1012%
2018-04-30
$174,574
8.6475%
2018-05-31
$165,419
8.1940%
2018-06-30
$174,089
8.6235%
2018-07-31
$191,662
9.4939%
2018-08-31
$153,779
7.6174%
2018-09-30
$168,443
8.3438%
2018-10-31
$171,495
8.4950%
2018-11-30
$119,961
5.9423%
2018-12-31
$163,867
8.1171%
日期 | sum | 月占比 |
---|
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()
# 3.使用渐变色格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
monthly_sales
.style
.format(format_dict)
.background_gradient(subset=["sum"],cmap="PRGn")
)
0
2018-01-31
$185,362
9.1818%
1
2018-02-28
$146,212
7.2426%
2
2018-03-31
$203,921
10.1012%
3
2018-04-30
$174,574
8.6475%
4
2018-05-31
$165,419
8.1940%
5
2018-06-30
$174,089
8.6235%
6
2018-07-31
$191,662
9.4939%
7
2018-08-31
$153,779
7.6174%
8
2018-09-30
$168,443
8.3438%
9
2018-10-31
$171,495
8.4950%
10
2018-11-30
$119,961
5.9423%
11
2018-12-31
$163,867
8.1171%
日期 | sum | 月占比 |
---|
# 按月分析销售金额,并显示每月销售额占年度销售总额比例
by=[pd.Grouper(key='日期', freq='M')]#自定义字段的格式
monthly_sales=df.groupby(by)["金额"].agg(["sum"]).reset_index()
monthly_sales["月占比"]=monthly_sales["sum"]/df["金额"].sum()
# 4.使用单元格柱形格式字典控制每列的格式
format_dict = {'sum': '${0:,.0f}', '日期': '{:%Y-%m-%d}', '月占比': '{:.4%}'}
(
monthly_sales
.style
.format(format_dict)
.hide_index()
.bar(color="#FFA07A",vmin=100_000,subset=["sum"],align="zero")
.bar(color='lightblue', vmin=0, subset=['月占比'], align='zero')
.set_caption("2018年销售一览表")
)
2018-01-31
$185,362
9.1818%
2018-02-28
$146,212
7.2426%
2018-03-31
$203,921
10.1012%
2018-04-30
$174,574
8.6475%
2018-05-31
$165,419
8.1940%
2018-06-30
$174,089
8.6235%
2018-07-31
$191,662
9.4939%
2018-08-31
$153,779
7.6174%
2018-09-30
$168,443
8.3438%
2018-10-31
$171,495
8.4950%
2018-11-30
$119,961
5.9423%
2018-12-31
$163,867
8.1171%
日期 | sum | 月占比 |
---|
def sparkline_str(x):
bins=np.histogram(x)[0]#直方图
s1="".join(sparklines(bins))
return s1
sparkline_str.__name__ = "走势图"
df.groupby("姓名")["数量","金额"].agg(["mean",sparkline_str])
数量 | 金额 | |||
---|---|---|---|---|
mean | 走势图 | mean | 走势图 | |
姓名 | ||||
Barton LLC | 24.890244 | ▄▄▃▂▃▆▄█▁▄ | 1334.615854 | █▄▃▆▄▄▁▁▁▁ |
Cronin, Oberbrunner and Spencer | 24.970149 | █▄▁▄▄▇▅▁▄▄ | 1339.321642 | █▅▅▃▃▃▂▂▁▁ |
Frami, Hills and Schmidt | 26.430556 | ▄▄▁▂▇█▂▂▅▅ | 1438.466528 | █▅▄▇▅▃▄▁▁▁ |
Fritsch, Russel and Anderson | 26.074074 | ▁▄▇▃▂▂█▃▄▄ | 1385.366790 | ▇█▃▄▂▂▁▂▁▁ |
Halvorson, Crona and Champlin | 22.137931 | ▇▆▆▇█▁▄▂▄▃ | 1206.971724 | ██▆▅▁▃▂▂▂▂ |
Herman LLC | 24.806452 | ▄▃▅▁▆▄▂▆▃█ | 1336.532258 | █▅▇▄▅▄▁▃▂▂ |
Jerde-Hilpert | 22.460674 | ▄▄█▁▂▅▃▂▄▃ | 1265.072247 | █▄▅▂▁▂▃▂▂▁ |
Kassulke, Ondricka and Metz | 25.734375 | ▂▂▁▁▂▂▁▅▄█ | 1350.797969 | █▆▆▄▄▃▂▁▁▂ |
Keeling LLC | 24.405405 | ▁▄▇▃▅█▃▄▃▆ | 1363.977027 | ▅█▆▃▄▂▂▁▁▁ |
Kiehn-Spinka | 22.227848 | ▃▂█▂▃▅▄▁▄▁ | 1260.870506 | █▇▄▃▃▂▁▂▁▁ |
Koepp Ltd | 21.829268 | ▅▇█▆▄▇▅▁▅▇ | 1264.152927 | █▇▅▂▄▂▂▂▁▁ |
Kuhn-Gusikowski | 22.808219 | ▂▄█▄▃▁█▄▂▄ | 1247.866849 | ▆█▄▃▃▃▃▁▁▁ |
Kulas Inc | 24.095745 | ▇▃▇▂▇▁▅▆█▆ | 1461.191064 | █▅█▂▄▄▄▂▁▁ |
Pollich LLC | 23.383562 | █▂▅▂▃▆▁▂▄▇ | 1196.536712 | █▆▆▃▃▃▂▂▁▁ |
Purdy-Kunde | 27.358491 | ▄▅█▁▄▇▅█▇█ | 1469.777547 | █▅▄▃▄▃▃▂▁▂ |
Sanford and Sons | 24.000000 | ▂▂▂▆▁█▂▂▁▅ | 1391.872958 | ▅▇█▃▃▄▁▃▁▂ |
Stokes LLC | 24.527778 | ▄▄▄▁▃▂▄█▄▄ | 1271.332222 | █▇▆▆▄▂▂▃▁▂ |
Trantow-Barrows | 24.159574 | ▅▄▅▂▃▃▄▄▁█ | 1312.567872 | █▇▅▆▃▂▂▁▁▁ |
White-Trantow | 26.255814 | ▃▂▆▆▃██▁██ | 1579.558023 | ▄█▆▅▄▃▄▂▁▃ |
Will LLC | 24.702703 | ▂█▁▄▄▄▂▅▅▄ | 1411.318919 | ▆█▄▅▄▂▁▂▁▂ |