操作背景:每周都要做周会材料并且数据都是手工计算费时费力,于是萌生了使用python来操作的想法,正好也在学习Python,为了学以制用更为了熟悉Python的操作特别是pandas的操作。
将表1与表2遂宁市场部下的客户袋装、散装销量分别合并按客户销量大小排序,每个客户分别进行周销量环比
第一步:导入报表
# 首先我们需要需要3个库,pandas库,numpy库,openpyxl库。pandas在数据整理方面强大的能力,特别是在大数据的整理方面,小数据我们当然可以通过excel来实现,numpy在数据运算方面表现优异,openpyxl库是我们在导出到xlsx的文件时所需要用到的不然to_excel的时候可能会报错。
import pandas as pd
import numpy as np
import openpyxl
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
# pd.set_option('display.max_columns', None)
pd.set_option('display.max_columns', 10)
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_rows', 90)
# def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
# index_col=None, names=None, usecols=None, parse_dates=False,
# date_parser=None, na_values=None, thousands=None,
# convert_float=True, converters=None, dtype=None,
# true_values=None, false_values=None, engine=None,
# squeeze=False, **kwds):
# df = pd.read_excel(r'C:\Users\Tcement\Desktop\客户信息库.xlsx',sheet_name='销售网点')
df_cq = pd.read_excel(r'重庆厂.xlsx',sheet_name='袋装')
df_hy = pd.read_excel(r'华蓥厂.xlsx',sheet_name='袋装')
第二步:筛选需要的列行
1.清洗数据及数据筛选
# DataFrame.rename(mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False, level=None)
# df_cq = df_cq.rename(str.lower, axis='columns')
# df_cq = df_cq.rename(str.lower, axis='index')
# s1.str.contains('oG', case=True, regex=True)
# s1.str.contains('og', na=False, regex=True)
# s1.str.contains('house|parrot', regex=True)
df_cq = df_cq.rename({'重庆厂2019年9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'}, axis='columns')
df_cq['市场部'].fillna(method='ffill', inplace=True)
df_cq['姓名'].fillna(method='ffill', inplace=True)
df_cq_temp = df_cq[['客户名称','1号','2号','3号','4号','5号','6号','7号']][df_cq['市场部'] == '遂宁市场部']
df_cq_temp = df_cq_temp[df_cq_temp['客户名称'].isna() == False]
df_cq_temp.fillna(value=0, inplace=True)
df_cq_temp = df_cq_temp.reset_index(drop=True)
df_cq_temp['小计'] = df_cq_temp['1号'] + df_cq_temp['2号'] + df_cq_temp['3号'] + df_cq_temp['4号'] + df_cq_temp['5号'] + df_cq_temp['6号'] + df_cq_temp['7号']
for i in range(df_cq_temp.shape[0]):
s = df_cq_temp.loc[i]['客户名称']
s = s.replace('\n', '').replace('\r','') # 去除回车和换行
a = re.sub(u"\\(.*?)|\\(.*?)|\\[.*?]|\\{.*?}", "", s) # 去除括号
df_cq_temp.loc[[i],['客户名称']] = a # 重新赋值
2.数据合并
df_cq = df_cq_temp.groupby('客户名称').sum()
print(df_cq_temp.sort_values(by='小计', ascending=False))
3.成果展示
import pandas as pd
import numpy as np
import openpyxl
import re
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
# pd.set_option('display.max_columns', None)
pd.set_option('display.max_columns', 10)
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_rows', 90)
def read_excel(changbie=r'重庆厂.xlsx',baozhuang='袋装',biaotou={'重庆厂2019年9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'}, zhouqi=['客户名称','1号','2号','3号','4号','5号','6号','7号']):
df = pd.read_excel(changbie,sheet_name=baozhuang)
df = df.rename(biaotou, axis='columns')
df['市场部'].fillna(method='ffill', inplace=True)
df['姓名'].fillna(method='ffill', inplace=True)
df_temp = df[zhouqi][df['市场部'].str.contains('遂宁')]
df_temp = df_temp[df_temp['客户名称'].isna() == False]
df_temp.fillna(value=0, inplace=True)
df_temp = df_temp.reset_index(drop=True)
for i in range(df_temp.shape[0]):
s = df_temp.loc[i]['客户名称']
s = s.replace('\n', '').replace('\r','') # 去除回车和换行
a = re.sub(u"\\(.*?)|\\(.*?)|\\[.*?]|\\{.*?}", "", s) # 去除括号
df_temp.loc[[i],['客户名称']] = a
return df_temp
# 注意表文件名字或行列内元素出现空格等情况下会出现No sheet named或者keyword error情况
# df = read_excel(r'重庆厂.xlsx','袋装',{'重庆厂2019年9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','1号','2号','3号','4号','5号','6号','7号'])
df = read_excel(r'重庆厂.xlsx','散装',{'重庆厂2019年9月散装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','1号','2号','3号','4号','5号','6号','7号'])
# df1 = read_excel(r'华蓥厂.xlsx','袋装',{'华蓥厂9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','1号','2号','3号','4号','5号','6号','7号'])
df1 = read_excel(r'华蓥厂.xlsx',' 散装',{'华蓥厂9月散装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','1号','2号','3号','4号','5号','6号','7号'])
df = df.append(df1,ignore_index=True)
df = df.groupby('客户名称').sum()
df['小计'] = df['1号'] + df['2号'] + df['3号'] + df['4号'] + df['5号'] + df['6号'] + df['7号']
df_shang = df.sort_values(by='小计', ascending=False)
# df = read_excel(r'重庆厂.xlsx','袋装',{'重庆厂2019年9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','8号','9号','10号','11号','12号','13号','14号'])
df = read_excel(r'重庆厂.xlsx','散装',{'重庆厂2019年9月散装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','8号','9号','10号','11号','12号','13号','14号'])
# df1 = read_excel(r'华蓥厂.xlsx','袋装',{'华蓥厂9月袋装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','8号','9号','10号','11号','12号','13号','14号'])
df1 = read_excel(r'华蓥厂.xlsx',' 散装',{'华蓥厂9月散装客户销量统计':'市场部', 'Unnamed: 1':'姓名', 'Unnamed: 2':'客户名称', 'Unnamed: 3':'1号', 'Unnamed: 4':'2号', 'Unnamed: 5':'3号', 'Unnamed: 6':'4号', 'Unnamed: 7':'5号', 'Unnamed: 8':'6号', 'Unnamed: 9':'7号', 'Unnamed: 10':'8号', 'Unnamed: 11':'9号', 'Unnamed: 12':'10号','Unnamed: 13':'11号', 'Unnamed: 14':'12号','Unnamed: 15':'13号','Unnamed: 16':'14号', 'Unnamed: 17':'15号', 'Unnamed: 18':'16号', 'Unnamed: 19':'17号', 'Unnamed: 20':'18号', 'Unnamed: 21`':'19号', 'Unnamed: 22':'20号', 'Unnamed: 23':'21号', 'Unnamed: 24':'22号', 'Unnamed: 25':'23号', 'Unnamed: 26':'24号', 'Unnamed: 27':'25号', 'Unnamed: 28':'26号', 'Unnamed: 29':'27号', 'Unnamed: 30':'28号', 'Unnamed: 31':'29号', 'Unnamed: 32':'30号', 'Unnamed: 33':'31号', 'Unnamed: 34':'本月销量'},['客户名称','8号','9号','10号','11号','12号','13号','14号'])
df = df.append(df1,ignore_index=True)
df = df.groupby('客户名称').sum()
df['小计'] = df['8号'] + df['9号'] + df['10号'] + df['11号'] + df['12号'] + df['13号'] + df['14号']
df_beng = df.sort_values(by='小计', ascending=False)
df_beng["散装结果"] = df_beng["小计"] - df_shang["小计"]
print(df_beng)
注意:出现'>' not supported between instances of 'float' and 'NoneType'多是因为使用for in range语句变量超出范围