对于昨天写的用python实现自动化办公------Excel操作进入了全站热榜,真的很让我意外,但是对于Excel的自动化操作还远远没有结束。python自动化办公之Excel还有很长一段路要走,读者朋友们可以关注我,对于Excel的操作我会进行不定时的更新,也会进一步改善我的表达方式,从而给读者一个美好的阅读体验。
来看看今天学习的内容:
考虑到Excel操作需要真实数据作为支撑,读者,尤其是根据本文进行学习的读者无法直接使用Excel数据表进行学习,所以本文会在列出的python文件中前段代码为生成的xlsx文件的代码,具体操作如下所示,。对于代码的解释,重要的部分已经在代码中进行解释。
Excel功能:实现多表联合,列出学生对应的成绩,如果没有找到相应数据,则将Score设置为0,相关公式:IFNA(VLOOKUP(),0)
python实现方法:
learn_14.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,40,2)]
# name=[]
# for i in id:
# before_name='Student_'
# before_name+=str(i)
# name.append(before_name)
#
# df1=pd.DataFrame({
# 'Id':pd.Series(id),
# 'name':pd.Series(name)
# })
# df1.set_index('Id',inplace=True)
# print(df1)
# df1.to_excel('./dataframe/learn_14.xlsx')
#vlookup
Student=pd.read_excel('./dataframe/learn_14.xlsx',sheet_name='Student')
Score=pd.read_excel('./dataframe/learn_14.xlsx',sheet_name='Score')
# Student.set_index('Id',inplace=True)
# Score.set_index('Id',inplace=True)
# print(Student)
# print(Score)
#相当于Excel中的vlookup查询Student和Score两张表Id列所对应的分数
# 合并成一张表,how为左边中的表无论如何,都要保持完整
#当数据为NA时,填充为0
#left_on=列名,right_on=列名
table=Student.merge(Score,how='left',on='Id').fillna(0)
table.Score=table.Score.astype(int)#将Score列转换为整数
print(table)
Student['Score']=table['Score']
print(Student)
Student.set_index('Id',inplace=True)
Student.to_excel('./dataframe/learn_14_last.xlsx')
python实现方法:
learn_15.py
```python
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,21)]
# name=[]
# for i in id:
# before_name='Student_'
# before_name+=str(i)
# name.append(before_name)
#
# df1=pd.DataFrame({
# 'Id':pd.Series(id),
# 'name':pd.Series(name),
# 'score':pd.Series(range(-40,160,10))
# })
# df1.set_index('Id',inplace=True)
# print(df1)
# df1.to_excel('./dataframe/learn_15.xlsx')
def score_validation(row):
#方法一
# try:
# assert 0<=row.score<=100
# except:
# print(f"#{row.Id}\t student {row.name} has invalid score{row.score}")
#方法二
if not 0<=row.score<=100:
print(f"#{row.Id}\t student {row.name} has invalid score{row.score}")
#数据校验
df=pd.read_excel('./dataframe/learn_15.xlsx')
# print(df)
df.apply(score_validation,axis=1)#校验从上到下进行校验
Excel功能:将full_name列以空格为分隔符分割成两列。
python实现:
learn_16.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,21)]
# name_select=[chr(i) for i in range(97,123)]
# name=[]
# for i in range(20):
# name_before=''
# for i in range(5):
# name_before+=random.choice(name_select)
# name_before+=' '
# for i in range(5):
# name_before+=random.choice(name_select)
# name.append(name_before.title())
#
#
# df1=pd.DataFrame({
# 'id':pd.Series(id),
# 'full_name':pd.Series(name)
# })
# df1.set_index('id',inplace=True)
# print(df1)
# df1.to_excel('./dataframe/learn_16.xlsx')
#拆分单元格
employees=pd.read_excel('./dataframe/learn_16.xlsx')
# print(df)
df=employees['full_name'].str.split(n=3,expand=True)#默认空格,seres str split
employees['first_name']=df[0].str.upper()#大写
employees['last_name']=df[1]
print(employees)
employees.set_index('id',inplace=True)
employees.to_excel('./dataframe/learn_16_last.xlsx')
Excel功能:实现总分、平均分汇总
python实现:
learn_17.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,21)]
# name_select=[chr(i) for i in range(97,123)]
# name=[]
# for i in range(20):
# name_before=''
# for i in range(5):
# name_before+=random.choice(name_select)
# name_before+=' '
# for i in range(5):
# name_before+=random.choice(name_select)
# name.append(name_before.title())
#
#
# df1=pd.DataFrame({
# 'id':pd.Series(id),
# 'full_name':pd.Series(name),
# 'test1':pd.Series([random.choice(range(40,100)) for i in range(20)]),
# 'test2':pd.Series([random.choice(range(40,100)) for i in range(20)]),
# 'test3':pd.Series([random.choice(range(40,100)) for i in range(20)])
# })
# df1.set_index('id',inplace=True)
# print(df1)
# df1.to_excel('./dataframe/learn_17.xlsx')
#统计分析
students=pd.read_excel('./dataframe/learn_17.xlsx',index_col='id')
# print(students)
temp=students[['test1','test2','test3']]
print(temp)
result=temp.sum()
print(result)
row_sum=temp.sum(axis=1)#从左到右为1,从上到下为0
row_mean=temp.mean(axis=1)
students['total']=row_sum
students['average']=row_mean
print(students)
col_mean=students[['test1','test2','test3','total','average']].mean()
print(col_mean)
col_mean['full_name']='summary'
print(col_mean)
students=students.append(col_mean,ignore_index=True)
print(students)
students.to_excel('./dataframe/learn_17_last.xlsx')
Excel功能:删除重复的数据(id[1:5]和id[21:25]重复)。
python实现:
learn_18.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,26)]
# name_select=[chr(i) for i in range(97,123)]
# name=[]
# for i in range(20):
# name_before=''
# for i in range(5):
# name_before+=random.choice(name_select)
# name_before+=' '
# for i in range(5):
# name_before+=random.choice(name_select)
# name.append(name_before.title())
# df1=pd.DataFrame({
# 'id':pd.Series(id),
# 'full_name':pd.Series(name),
# 'test1':pd.Series([random.choice(range(40,100)) for i in range(20)]),
# 'test2':pd.Series([random.choice(range(40,100)) for i in range(20)]),
# 'test3':pd.Series([random.choice(range(40,100)) for i in range(20)])
# })
# print(df1)
# print(df1['full_name'].at[6])
# for i in range(20,25):
# df1['full_name'].at[i]=df1['full_name'].at[i-20]
# df1['test1'].at[i]=df1['test1'].at[i-20]
# df1['test2'].at[i]=df1['test2'].at[i-20]
# df1['test3'].at[i]=df1['test3'].at[i-20]
# df1.set_index('id',inplace=True)
# print(df1)
#
# df1.to_excel('./dataframe/learn_18.xlsx')
#清除重复数据
df=pd.read_excel('./dataframe/learn_18.xlsx')
df.drop_duplicates(subset='full_name',inplace=True,keep='first')#subset设置列名keep为如果有重复的数据保留第一个出现的last/first
print(df)
#查询重复数据
# dupe=df.duplicated(subset='full_name')
# print(dupe)
# print(dupe.any())
# dupe=dupe[dupe]
# print(dupe.index)
# print(df.iloc[dupe.index])
df.set_index('id',inplace=True)
df.to_excel('./dataframe/learn_18_last.xlsx')
# df.to_excel('./dataframe/learn_18_last.xlsx',sheet_name='最终数据')
python实现:
learn_19.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
#create random data
# id=[i for i in range(1,26)]
# name_select=[chr(i) for i in range(97,123)]
# name=[]
# for i in range(20):
# name_before=''
# for i in range(5):
# name_before+=random.choice(name_select)
# name_before+=' '
# for i in range(5):
# name_before+=random.choice(name_select)
# name.append(name_before.title())
# df1=pd.DataFrame({
# 'Month':pd.Series(pd.date_range('20210101',periods=30)),
# 'Active_user':pd.Series([random.choice(range(400,1000)) for i in range(30)]),
# 'Page_view':pd.Series([random.choice(range(100,3000)) for i in range(30)]),
# })
# df1.set_index('Month',inplace=True)
# df1.to_excel('./dataframe/learn_19.xlsx')
#旋转表格
pd.options.display.max_columns=999
df=pd.read_excel('./dataframe/learn_19.xlsx',index_col='Month')
table=df.transpose()#旋转
print(table)
table.to_excel('./dataframe/learn_19_last.xlsx')
df.to_csv('./dataframe/learn_19_last.csv')
df.to_csv('./dataframe/learn_19_last.txt')
实现效果:
Excel功能:实现从Excel中获取csv、tsk、txt文件的数据。
python实现:
本代码中需要用的csv和txt文件已经在learn_18中生成。
learn_20.py
student1=pd.read_csv('./dataframe/learn_19_last.csv')#CSV文件和TXT文件都可以通过read_csv读取
print(student1)
student2=pd.read_csv('./dataframe/learn_19_last.txt',sep=',',)
print(student2)
Excel功能:实现Excel的透视表功能(excel文件可以私聊我)
python实现:
learn_21.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import numpy as np
pd.options.display.max_columns=999
orders=pd.read_excel('./dataframe/Orders.xlsx')
orders['Year']=pd.DatetimeIndex(orders['Date']).year
print(orders.head(10))
#数据透视表方法一
# pt1=orders.pivot_table(index='Category',columns='Year',values='Total',aggfunc=np.sum)
# print(pt1)
# pt1.to_excel('./dataframe/Orders_toushibiao_1.xlsx')
#数据透视表方法二
groups=orders.groupby(['Category', 'Year'])
sum=groups['Total'].sum()
count=groups['ID'].count()
pt2=pd.DataFrame({
'sum':sum,
'count':count
})
pt2.to_excel('./dataframe/Orders_toushibiao_2.xlsx')
python实现:
learn_22.py
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from scipy.stats import linregress
#create random data
# df1=pd.DataFrame({
# 'Month':pd.Series(pd.date_range('20210101',periods=30)),
# 'Active_user':pd.Series([random.choice(range(400,1000)) for i in range(30)]),
# })
# df1.set_index('Month',inplace=True)
# df1.to_excel('./dataframe/learn_22.xlsx')
#线性回归,数据预测
sales=pd.read_excel('./dataframe/learn_22.xlsx',dtype={'Month':str})
print(sales.index)
slope,intercept,r,p,std_err=linregress(sales.index,sales.Active_user)
exp=sales.index*slope+intercept
plt.plot(sales.index,exp,color='orange')
plt.scatter(sales.index,sales.Active_user)
plt.xticks(sales.index,sales.Month,rotation=90)
plt.title(f'y={slope}*x+{intercept}')
plt.tight_layout()
plt.show()
实现效果:
今天的《用python实现自动化办公------Excel》操作就分享到这儿了,欢迎各位读者能够进行点评和分享。
如果对我的文章感兴趣,请为我点一个赞,如果有python的知识需要了解或探讨,可以加本人微信cuiliang1666457052