import os
import win32com.client as win32
import xlwt
from datetime import date
from docx import Document
from docx.shared import Cm
from docx.enum.text import WD_ALIGN_PARAGRAPH
import re
import tempfile
import xlrd
from xlutils.copy import copy
import warnings
warnings.simplefilter('ignore')
import pandas as pd
import msoffcrypto
import io
def read_excel(path, sheet_name, title, header=0, password=''):
decrypted = io.BytesIO()
try:
with open(path, 'rb') as f:
if not msoffcrypto.olefile.isOleFile(f):
pd_excel = pd.read_excel(f, sheet_name, header=header)
logging.info("{title}文件读取成功".format(title=title))
else:
xlf = msoffcrypto.OfficeFile(f)
xlf.load_key(password=password)
xlf.decrypt(decrypted)
print('xlf.decrypt(decrypted)',decrypted)
pd_excel = pd.read_excel(decrypted, names=['a','b','c','d','e','f','g','h','i'])
return pd_excel
except Exception as e:
error_msg = "Excel文件{title}打开失败,请检查文件路径及权限。".format(title=title)
return
print(pd_excel)
return pd_excel
def open_workbook(workbook_file_path, password=None):
if password is None:
wb = xlrd.open_workbook(workbook_file_path)
else:
with open(workbook_file_path, 'rb') as fin,\
tempfile.TemporaryFile() as tfp:
encrypted = msoffcrypto.OfficeFile(fin)
encrypted.load_key(password=password)
encrypted.decrypt(tfp)
tfp.seek(0)
wb = xlrd.open_workbook( tfp)
return wb
a=str(date.today())
import datetime
today = datetime.datetime.today()
year = today.year
month = today.month
day = today.day
def getYesterday():
today=datetime.date.today()
oneday=datetime.timedelta(days=1)
yesterday=today-oneday
return yesterday
print(getYesterday(),year,month,day)
yesday = getYesterday()
yyear = yesday.year
ymonth = yesday.month
yday = yesday.day
def replaceinrun(replacedstr,replacestr, arun):
if replacedstr in arun.text:
arun.text=arun.text.replace(replacedstr,replacestr)
print(arun.text)
def reparagraph(replacedstr,replacestr, aparagraph):
for run in aparagraph.runs:
replaceinrun(replacedstr, replacestr, run)
def han():
document=Document(r"函模板.docx")
paragraphs=document.paragraphs
i = 0
for pa in paragraphs:
print(i, pa.text)
i = i+1
print(paragraphs[2].text)
for run in paragraphs[2].runs:
replaceinrun('ym', str(ymonth), run)
replaceinrun('yd', str(yday), run)
for run in paragraphs[5].runs:
replaceinrun('ym', str(ymonth), run)
replaceinrun('yd', str(yday), run)
for run in paragraphs[7].runs:
replaceinrun('ym', str(ymonth), run)
replaceinrun('yd', str(yday), run)
replaceinrun('yyyy', str(yyear), run)
for run in paragraphs[13].runs:
replaceinrun('mm', str(month), run)
replaceinrun('dd', str(day), run)
replaceinrun('yyyy', str(year), run)
document.save('关于提供'+str(ymonth)+'月'+str(yday)+'日广州市手机用户漫游情况的函.docx')
print(getYesterday(),year,month,day)
def changecell(cell, atext):
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.text = str(atext)
def getfilename(keyw):
file_dir=os.getcwd ()
for root,dirs,files in os.walk(file_dir):
for name in files:
if (name.find(keyw)>-1) and (name.find('xls')>-1):
print(name)
filename=os.path.join(root,name)
print(filename)
return filename
def getdatenum(filename):
print(filename)
p202=filename.find('202')
strdate = filename[p202:len(filename)-5]
print(strdate)
return strdate
def getdate(filename):
print(filename)
p202=filename.find('202')
strdate = filename[p202:len(filename)-5]
print(strdate)
a = strdate.split('-')
return a[1]+'月'+a[2]+'日'
def savefile(pd, filename):
print(filename)
p202=filename.find('202')
strdate = filename[p202:len(filename)-5]
print(strdate)
pn=filename.rfind('\\')+1
savename = filename[0:p202]
print(savename)
pd['date']=strdate
print(pd)
pd.to_csv(savename+'.csv', mode='a')
def fujian():
document=Document(r"附件模板.docx")
paragraphs=document.paragraphs
i = 0
for pa in paragraphs:
print(i, pa.text)
i = i+1
for run in paragraphs[3].runs:
replaceinrun('mm', str(month), run)
replaceinrun('dd', str(day), run)
replaceinrun('yyyy', str(year), run)
for run in paragraphs[6].runs:
replaceinrun('lm', str(ymonth), run)
replaceinrun('ld', str(yday), run)
file_dir=os.getcwd ()
filenameout = ''
filenamein = ''
for root,dirs,files in os.walk(file_dir):
for name in files:
if name.find('漫出')>-1:
print(name)
filename=os.path.join(root,name)
filename = getfilename('漫出')
print(filename)
filenameout = filename
wb = read_excel(filename, '','',0,'passwrod')
print(wb)
pdout=wb
print(wb[['e','f','h','i']])
province = wb[wb['f'].isna()]
print(province)
print(province['h'].sum())
sumall = province['h'].sum()/10000
gdoutall = wb.iloc[[0],[7]].values[0][0]/10000
reparagraph('2.8', '{:.2f}'.format(sumall-gdoutall), paragraphs[6])
reparagraph('17.5', '{:.2f}'.format(province['h'].sum()/10000), paragraphs[6])
reparagraph('14.7', '{:.2f}'.format(gdoutall), paragraphs[6])
print(province['i'].sum())
c5 = wb.iloc[[1],[5]].values[0][0]+'、'+wb.iloc[[2],[5]].values[0][0]+'、'+wb.iloc[[3],[5]].values[0][0]+'、'+wb.iloc[[4],[5]].values[0][0]+'、'+wb.iloc[[5],[5]].values[0][0]
reparagraph('c', c5, paragraphs[6])
s5 = province.iloc[[1],[4]].values[0][0]+'、'+province.iloc[[2],[4]].values[0][0]+'、'+province.iloc[[3],[4]].values[0][0]+'、'+province.iloc[[4],[4]].values[0][0]+'、'+province.iloc[[5],[4]].values[0][0]
reparagraph('s', s5, paragraphs[6])
print(province.iloc[[2],[4]].values[0][0], pd.notnull(province.iloc[[2],[4]]), pd.notnull(province.iloc[[2],[4]]).sum())
print(province.iloc[[2],[5]].values[0][0], pd.notnull(province.iloc[[2],[5]]), pd.notnull(province.iloc[[2],[5]]).sum())
print(province.iloc[[2],[4]].values[0][0])
print(wb.iloc[[2],[5]].values[0][0])
print(wb.iloc[[2],[7]].values[0][0])
print(wb.iloc[[2],[8]].values[0][0])
tables=document.tables
changecell(tables[0].cell(1,2), int(wb.iloc[[2],[7]].values[0][0]))
changecell(tables[0].cell(1,3), '{:.2%}'.format(wb.iloc[[2],[8]].values[0][0]))
for i in range(0,25):
print(pd.notnull(wb.iloc[[i],[5]]).sum().sum())
if pd.notnull(wb.iloc[[i],[5]]).sum().sum()>0:
print(i, wb.iloc[[i],[5]].values[0][0])
i = 1
while pd.notnull(wb.iloc[[i],[5]]).sum().sum()>0:
changecell(tables[0].cell(i,1), str(wb.iloc[[i],[5]].values[0][0]))
changecell(tables[0].cell(i,2), int(wb.iloc[[i],[7]].values[0][0]))
changecell(tables[0].cell(i,3), '{:.2%}'.format(wb.iloc[[i],[8]].values[0][0]))
i +=1
print(i, wb.iloc[[i],[4]].values[0][0])
if i<21:
for j in (i, 21):
changecell(tables[0].cell(j,1), '')
changecell(tables[0].cell(j,2), '')
changecell(tables[0].cell(j,3), '')
print(j, wb.iloc[[j],[4]].values[0][0])
print(province.shape[0])
prow = province.shape[0]
for i in range(1,31):
if (i)< prow:
changecell(tables[1].cell(i,1), str(province.iloc[[i],[4]].values[0][0]))
changecell(tables[1].cell(i,2), int(province.iloc[[i],[7]].values[0][0]))
changecell(tables[1].cell(i,3), '{:.2%}'.format(province.iloc[[i],[8]].values[0][0]))
else:
changecell(tables[1].cell(i,1), '')
changecell(tables[1].cell(i,2), '')
changecell(tables[1].cell(i,3), '')
for run in paragraphs[14].runs:
replaceinrun('m', str(ymonth), run)
replaceinrun('d', str(yday), run)
filename = getfilename('漫入')
print(filename)
filenamein = filename
wb = read_excel(filename, '','',0,'passwrod')
print(wb)
pdin = wb
print(wb[['b','c','h','i']])
province = wb[wb['c'].isna()]
print(province)
print(province['h'].sum())
sumall = province['h'].sum()/10000
gdoutall = wb.iloc[[0],[7]].values[0][0]/10000
reparagraph('7.1', '{:.2f}'.format(sumall-gdoutall), paragraphs[14])
reparagraph('17.5', '{:.2f}'.format(province['h'].sum()/10000), paragraphs[14])
reparagraph('10.5', '{:.2f}'.format(gdoutall), paragraphs[14])
print(province['i'].sum())
print(wb.iloc[[1],[2]].values[0][0], wb.iloc[[1],[3]].values[0][0])
c5 = wb.iloc[[1],[2]].values[0][0]+'、'+wb.iloc[[2],[2]].values[0][0]+'、'+wb.iloc[[3],[2]].values[0][0]+'、'+wb.iloc[[4],[2]].values[0][0]+'、'+wb.iloc[[5],[2]].values[0][0]
reparagraph('c', c5, paragraphs[14])
s5 = province.iloc[[1],[1]].values[0][0]+'、'+province.iloc[[2],[1]].values[0][0]+'、'+province.iloc[[3],[1]].values[0][0]+'、'+province.iloc[[4],[1]].values[0][0]+'、'+province.iloc[[5],[1]].values[0][0]
reparagraph('s', s5, paragraphs[14])
i = 1
while pd.notnull(wb.iloc[[i],[2]]).sum().sum()>0:
changecell(tables[2].cell(i,1), str(wb.iloc[[i],[2]].values[0][0]))
changecell(tables[2].cell(i,2), int(wb.iloc[[i],[7]].values[0][0]))
changecell(tables[2].cell(i,3), '{:.2%}'.format(wb.iloc[[i],[8]].values[0][0]))
i +=1
if i<21:
for j in (i, 21):
changecell(tables[2].cell(j,1), '')
changecell(tables[2].cell(j,2), '')
changecell(tables[2].cell(j,3), '')
print(j, wb.iloc[[j],[1]].values[0][0])
prow = province.shape[0]
for i in range(1,31):
if (i)< prow:
changecell(tables[3].cell(i,1), str(province.iloc[[i],[1]].values[0][0]))
changecell(tables[3].cell(i,2), int(province.iloc[[i],[7]].values[0][0]))
changecell(tables[3].cell(i,3), '{:.2%}'.format(province.iloc[[i],[8]].values[0][0]))
else:
changecell(tables[3].cell(i,1), '')
changecell(tables[3].cell(i,2), '')
changecell(tables[3].cell(i,3), '')
document.save('附件1:关于'+str(yyear)+'年'+str(ymonth)+'月'+str(yday)+'日广州市手机用户漫游情况的通报.docx')
savefile(pdin,filenamein)
savefile(pdout,filenameout)
def gdru():
filename = getfilename('广东省漫入省份排行')
print(filename)
wb = read_excel(filename, '','',0,'passwrod')
wb.rename(columns={'f':'漫入地市'},inplace=True)
print(wb)
wbp = wb.pivot_table(index='漫入地市',columns='b',values='h', aggfunc='sum')
wbp['总计'] = wbp.apply(lambda x: x.sum(), axis=1)
wbp.loc['总计'] = wbp.apply(lambda x: x.sum())
print(wbp)
savefile(wb,filename)
p202=filename.find('202')
strdate = filename[p202:len(filename)-5]
print(strdate)
pn=filename.rfind('\\')+1
savename = filename[0:p202]
print(savename)
wbp.to_excel('广东省漫入'+strdate+'.xlsx')
import openpyxl
from copy import copy
def copycell(source_cell, target_cell):
target_cell.value=source_cell.value
target_cell.data_type = source_cell.data_type
target_cell.fill = copy(source_cell.fill)
target_cell.border = copy(source_cell.border)
target_cell.font = copy(source_cell.font)
target_cell.fill = copy(source_cell.fill)
target_cell.number_format = copy(source_cell.number_format)
target_cell.protection = copy(source_cell.protection)
target_cell.alignment = copy(source_cell.alignment)
def bjsh(table, datestr, rout,rin):
nrows = table.max_row
copycell(table.cell(row=nrows , column=1), table.cell(row=nrows+1 , column=1))
copycell(table.cell(row=nrows , column=2), table.cell(row=nrows+1 , column=2))
copycell(table.cell(row=nrows , column=3), table.cell(row=nrows+1 , column=3))
table.cell(row=nrows+1 , column=1).value = datestr
table.cell(row=nrows+1 , column=2).value = rout
table.cell(row=nrows+1 , column=3).value = rin
def gz(table, datestr, rout,rin):
nrows = table.max_row
print(datestr, rout,rin,nrows)
copycell(table.cell(row=nrows , column=1), table.cell(row=nrows+1 , column=1))
copycell(table.cell(row=nrows , column=2), table.cell(row=nrows+1 , column=2))
copycell(table.cell(row=nrows , column=3), table.cell(row=nrows+1 , column=3))
copycell(table.cell(row=nrows , column=4), table.cell(row=nrows+1 , column=4))
table.cell(row=nrows+1 , column=1).value = datestr
table.cell(row=nrows+1 , column=2).value = rout
table.cell(row=nrows+1 , column=3).value = rin
table.cell(row=nrows+1 , column=4).value = rout-rin
def gdman():
filenamegdin = getfilename('广东省漫入省份排行')
filenamegdout = getfilename('广东省漫出省份排行')
wbgdin = read_excel(filenamegdin, '','',0,'passwrod')
wbgdin.rename(columns={'f':'漫入地市'},inplace=True)
print(wbgdin)
wbgdout = read_excel(filenamegdout, '','',0,'passwrod')
wbgdout.rename(columns={'f':'漫入地市'},inplace=True)
print(wbgdout)
provincegdin = wbgdin[wbgdin['c'].isna()]
print(provincegdin)
provincegdout = wbgdout[wbgdout['c'].isna()]
print(provincegdout)
print(provincegdin['h'].sum())
print(provincegdout['h'].sum())
filenamesm = getfilename('省漫统计')
print(filenamesm)
data = openpyxl.load_workbook(filenamesm)
print(data.get_named_ranges())
print(data.get_sheet_names())
table = data.get_sheet_by_name('广东')
print(table.title)
nrows = table.max_row
ncolumns = table.max_column
copycell(table.cell(row=nrows , column=1), table.cell(row=nrows+1 , column=1))
copycell(table.cell(row=nrows , column=2), table.cell(row=nrows+1 , column=2))
copycell(table.cell(row=nrows , column=3), table.cell(row=nrows+1 , column=3))
table.cell(row=nrows+1 , column=1).value = getdate(filenamegdin)
table.cell(row=nrows+1 , column=2).value = provincegdout['h'].sum()
table.cell(row=nrows+1 , column=3).value = provincegdin['h'].sum()
table = data.get_sheet_by_name('广东SM TOP5')
print(table.title)
nrows = table.max_row
datenum = getdatenum(filenamegdin)
for i in range(1,6):
table.cell(row=nrows+i , column=1).value = i
table.cell(row=nrows+i , column=2).value = provincegdin.iloc[[i-1],[1]].values[0][0]
table.cell(row=nrows+i , column=3).value = provincegdin.iloc[[i-1],[7]].values[0][0]
table.cell(row=nrows+i , column=4).value = '漫入'
table.cell(row=nrows+i , column=5).value = datenum
for i in range(1,6):
table.cell(row=nrows+5+i , column=1).value = i
table.cell(row=nrows+5+i , column=2).value = provincegdout.iloc[[i-1],[4]].values[0][0]
table.cell(row=nrows+5+i , column=3).value = provincegdout.iloc[[i-1],[7]].values[0][0]
table.cell(row=nrows+5+i , column=4).value = '漫出'
table.cell(row=nrows+5+i , column=5).value = datenum
table = data.get_sheet_by_name('北京')
print(table.title)
strdate = getdate(filenamegdin)
city = '北京市'
bjout = provincegdout[provincegdout['e']==city]
print(bjout.iloc[[0],[7]].values[0][0],bjout)
bjin = provincegdin[provincegdin['b']==city]
print(bjin.iloc[[0],[7]].values[0][0],bjin)
bjsh(table,strdate,bjout.iloc[[0],[7]].values[0][0],bjin.iloc[[0],[7]].values[0][0])
table = data.get_sheet_by_name('上海')
print(table.title)
city = '上海市'
bjout = provincegdout[provincegdout['e']==city]
print(bjout.iloc[[0],[7]].values[0][0],bjout)
bjin = provincegdin[provincegdin['b']==city]
print(bjin.iloc[[0],[7]].values[0][0],bjin)
bjsh(table,strdate,bjout.iloc[[0],[7]].values[0][0],bjin.iloc[[0],[7]].values[0][0])
gzfilenamegdin = getfilename('广州市漫入省份排行')
gzfilenamegdout = getfilename('广州市漫出省份排行')
gzwbgdin = read_excel(gzfilenamegdin, '','',0,'passwrod')
gzwbgdin.rename(columns={'f':'漫入地市'},inplace=True)
print(gzwbgdin)
gzwbgdout = read_excel(gzfilenamegdout, '','',0,'passwrod')
gzwbgdout.rename(columns={'f':'漫入地市'},inplace=True)
print(gzwbgdout)
gzprovincegdin = gzwbgdin[gzwbgdin['c'].isna()]
print(gzprovincegdin)
gzprovincegdout = gzwbgdout[gzwbgdout['漫入地市'].isna()]
print(gzprovincegdout)
print(gzprovincegdin['h'].sum())
print(gzprovincegdout['h'].sum())
szgzfilenamegdin = getfilename('深圳市漫入省份排行')
szgzfilenamegdout = getfilename('深圳市漫出省份排行')
szgzwbgdin = read_excel(szgzfilenamegdin, '','',0,'passwrod')
szgzwbgdin.rename(columns={'f':'漫入地市'},inplace=True)
print(szgzwbgdin)
szgzwbgdout = read_excel(szgzfilenamegdout, '','',0,'passwrod')
szgzwbgdout.rename(columns={'f':'漫入地市'},inplace=True)
print(szgzwbgdout)
szgzprovincegdin = szgzwbgdin[szgzwbgdin['c'].isna()]
print(szgzprovincegdin)
szgzprovincegdout = szgzwbgdout[szgzwbgdout['漫入地市'].isna()]
pinnum = gzprovincegdin.iloc[[1],[0]].values[0][0]
gzin = gzwbgdin[1:int(pinnum-1)]
print(gzin['h'].sum())
print(gzprovincegdin)
table = data.get_sheet_by_name('广州(漫入)')
print(table.title)
gz(table,strdate, gzprovincegdin['h'].sum(),gzprovincegdin.iloc[[0],[7]].values[0][0])
table = data.get_sheet_by_name('广州(漫出)')
print(table.title)
gz(table,strdate, gzprovincegdout['h'].sum(),gzprovincegdout.iloc[[0],[7]].values[0][0])
table = data.get_sheet_by_name('深圳(漫入)')
print(table.title)
gz(table,strdate, szgzprovincegdin['h'].sum(),szgzprovincegdin.iloc[[0],[7]].values[0][0])
table = data.get_sheet_by_name('深圳(漫出)')
print(table.title)
gz(table,strdate, szgzprovincegdout['h'].sum(),szgzprovincegdout.iloc[[0],[7]].values[0][0])
print(gzprovincegdin)
table = data.get_sheet_by_name('广州漫入 TOP5')
print(table.title)
nrows = table.max_row
datenum = getdatenum(filenamegdin)
for i in range(1,7):
table.cell(row=nrows+i , column=1).value = i
table.cell(row=nrows+i , column=2).value = gzprovincegdin.iloc[[i],[1]].values[0][0]
table.cell(row=nrows+i , column=3).value = gzprovincegdin.iloc[[i],[7]].values[0][0]
table.cell(row=nrows+i , column=4).value = '省外'
table.cell(row=nrows+i , column=5).value = datenum
print(gzwbgdin)
for i in range(1,7):
table.cell(row=nrows+6+i , column=1).value = i
table.cell(row=nrows+6+i , column=2).value = gzwbgdin.iloc[[i],[2]].values[0][0]
table.cell(row=nrows+6+i , column=3).value = gzwbgdin.iloc[[i],[7]].values[0][0]
table.cell(row=nrows+6+i , column=4).value = '省内'
table.cell(row=nrows+6+i , column=5).value = datenum
table = data.get_sheet_by_name('广州漫出 TOP5')
print(table.title)
nrows = table.max_row
datenum = getdatenum(filenamegdin)
for i in range(1,7):
table.cell(row=nrows+i , column=1).value = i
table.cell(row=nrows+i , column=2).value = gzprovincegdout.iloc[[i],[4]].values[0][0]
table.cell(row=nrows+i , column=3).value = gzprovincegdout.iloc[[i],[7]].values[0][0]
table.cell(row=nrows+i , column=4).value = '省外'
table.cell(row=nrows+i , column=5).value = datenum
print(gzwbgdout)
for i in range(1,7):
table.cell(row=nrows+6+i , column=1).value = i
table.cell(row=nrows+6+i , column=2).value = gzwbgdout.iloc[[i],[5]].values[0][0]
table.cell(row=nrows+6+i , column=3).value = gzwbgdout.iloc[[i],[7]].values[0][0]
table.cell(row=nrows+6+i , column=4).value = '省内'
table.cell(row=nrows+6+i , column=5).value = datenum
savefile(wbgdin,filenamegdin)
savefile(wbgdout,filenamegdout)
savefile(gzwbgdin,gzfilenamegdin)
savefile(gzwbgdout,gzfilenamegdout)
savefile(szgzwbgdin,szgzfilenamegdin)
savefile(szgzwbgdout,szgzfilenamegdout)
data.save(datenum+'省漫统计.xlsx')
gdman()
def compute():
file_dir="."
style = xlwt.XFStyle()
xlAPP=win32.Dispatch("Excel.Application")
xlAPP.Visible=False
iter=-1
password= 'passwrod'
for root,dirs,files in os.walk(file_dir):
for name in files:
iter+=1
print(name)
filename=os.path.join(root,name)