每周邮件发送:近两周 top30 的客户周环比
Win7
Python 3.6
Excel
# _*_ conding: utf-8 _*_
'''
// getConfig.py
从配置文件中读取信息
'''
from configparser import ConfigParser
class Conf():
def __init__(self):
self._path = r'H:\SZ_数据\Python\c.s.conf'
def getEmail(self, sec, smt, email, pw):
fil = ConfigParser()
fil.read(self._path)
return (fil.get(sec, smt),
fil.get(sec, email),
fil.get(sec, pw))
def getToEmail(self, sec, toEmail):
fil = ConfigParser()
fil.read(self._path)
return fil.get(sec, toEmail)
# _*_ coding: utf-8 _*_
'''
// sendEmail.py
Send email
'''
import smtplib
from getConfig import Conf
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
def _format_addr(s):
# 格式化邮件地址
# Header 如是中文必须编码
#
name, addr = parseaddr(s)
return formataddr((Header(name, 'utf-8').encode(), addr))
def sendEmail(subject, message, files=None, to_addr='newIOSys'):
# login info
conf = Conf()
smt, fro, pw = conf.getEmail('mail_baidu', 'sender server', 'email'
, 'password')
to = conf.getToEmail('to_addr', to_addr)
# header of email
msg = MIMEMultipart()
msg['From'] = fro
msg['To'] = to
msg['Subject'] = Header(subject, 'utf-8').encode()
# Main Body
msg.attach(MIMEText(message, 'plain', 'utf-8'))
# attachment
import os
if files != None:
for i in range(len(files)):
if os.path.isfile(files[i]):
with open(files[i], 'rb') as f:
xl = MIMEApplication(f.read())
xl.add_header('Content-Disposition', 'attachment'
, filename=os.path.split(files[i])[-1])
msg.attach(xl)
# Send
with smtplib.SMTP(smt, 25) as smtp:
smtp.ehlo()
smtp.starttls()
smtp.ehlo()
smtp.set_debuglevel(1)
smtp.login(fro, pw)
try:
smtp.sendmail(fro, to.split(','), msg.as_string())
except Exception as e:
print('Failed send: {}'.format(e))
else:
print('Success send.')
# _*_ coding: utf-8 _*_
'''
// db.py
'''
from getConfig import Conf
def getUrl(sec, acc, pw, ip, port, db):
conf = Conf()
url = ('mssql+pymssql://%s:%s@%s:%s/%s'
% conf.getInfo(sec, acc, pw, ip, port, db))
return url
# _*_ coding: utf-8 _*_
'''
// lt_top30Cash.py
'''
import time
import pandas as pd
from db import getUrl
from datetime import date, timedelta
from sqlalchemy import create_engine
PATH = r'H:\SZ_数据\Python\c.s.conf'
URL = getUrl('SQL Server', 'acc', 'pw', 'ip', 'port', 'db')
ST_DAT, ED_DAT = eval(input('输入:QTD起始日,终止日;如(20200401,20200430):'))
# 时间
now = lambda: time.perf_counter()
dat = lambda n: (date.today() - timedelta(n)).strftime('%Y%m%d')
mDat = lambda n: (date.today() - timedelta(n)).strftime('%m.%d')
qDat = lambda n: (int((date.today() - timedelta(n)).strftime('%m'))-1)//3+1
yDat = lambda n: (date.today() - timedelta(n)).strftime('%Y')
def getData(sql, url):
with create_engine(URL).begin() as conn:
return map(lambda x: list(x), conn.execute(sql).fetchall())
def week(data):
# 周粒度消费
df = pd.DataFrame(list(data), columns=('用户名', '类别', '金额', '周'))
df = df.pivot_table(values=['金额'], index=['用户名']
, columns=['类别', '周'])
return df
def getP4P(df):
# 上上、上、本
datLis = df.columns.get_level_values(2).unique()
# 产品大类
clsLis = tuple(set(df.columns.get_level_values(1)))
for d in datLis:
df[('金额', 'P4P', d)] = (df[('金额', clsLis[0], d)]
+ df[('金额',clsLis[1], d)]
+ df[('金额', clsLis[2], d)])
# 索引
df.columns = [c + dat for c in df.columns.get_level_values(1).unique()
for dat in datLis]
df.reset_index(inplace=True)
def merge(basicInfo, qtd, data):
# basicInfo & qtd
col1 = ['用户名', '广告主', '二级行业', '区域']
col2 = ['用户名', 'QTD']
df = pd.merge(pd.DataFrame(list(basicInfo), columns=col1)
, pd.DataFrame(list(qtd), columns=col2)
, how='left', on='用户名')
# region -> hk
df['区域'] = df['区域'].str.replace(r'^HK.+', 'HK')
# week
df = pd.merge(df, data, how='left', on='用户名')
df.fillna(0, inplace=True)
return df
def rank(df):
# 上周排名
df['sum'] = df['P4P上上周'] + df['P4P上周']
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
df.index = [i+1 for i in df.index]
df.index.name = '上周排名'
df.reset_index(inplace=True)
# 本周排名
df['sum'] = df['P4P上周'] + df['P4P本周']
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
df.index = [i+1 for i in df.index]
df.index.name = '本周排名'
df.drop(columns=['sum'] + list(filter(lambda x: '上上' in x, df.columns))
, inplace=True)
def ringRatio(df):
df['环比增长'] = df['P4P本周'] - df['P4P上周']
df['环比增长率'] = df['环比增长'] / df['P4P上周']
def fmt(df):
# Output
path = r'H:\SZ_数据\Download\Top30Cash(' + mDat(14
) +'_' + mDat(1) + ').xlsx'
with pd.ExcelWriter(path) as writer:
df[:30].to_excel(writer, startrow=2, freeze_panes=(3,0))
# 修改表
import xlwings as xw
wb = xw.Book(path)
sht = wb.sheets[0]
cntRow = sht['A3'].current_region.rows.count
cntCol = sht['A3'].current_region.columns.count
# 标签
sht[0, 0].value = 'P4P'
for n, v in enumerate(sht[2, :cntCol].value):
if '排名' in v:
sht[1, n].clear()
elif '上' in v:
sht[1, n].value = mDat(14) + '-' + mDat(7)
elif '本' in v:
sht[1, n].value = mDat(7) + '-' + mDat(1)
elif 'QTD' in v:
sht[1, n].value = yDat(n) + 'Q' + str(qDat(n)) + '现金'
elif '环比' in v:
sht[1, n].value = 'P4P现金'
# 边框
for b in range(7, 13):
sht[1:cntRow, :cntCol].current_region.api.Borders(b).weight = 2
# 列宽
sht[:, :cntCol].autofit()
# 加粗
sht[:3, :cntCol].api.Font.Bold = True
# 数字格式
sht[3:, :cntCol-1].api.NumberFormat = '#,##0'
sht[3:, cntCol-1].api.NumberFormat = '0.0%'
#
wb.save()
wb.close()
return path
def main():
# basicInfo
sql1 = "SELECT 用户名, 广告主, 信誉成长值, 区域 FROM basicInfo"
# QTD
sql2 = "SELECT * FROM getCashSUM('%s', '%s')" % (ST_DAT, ED_DAT)
# Nearly Three weeks Spending
sql3 = '''SELECT * FROM getThrWeekCash(%s)''' % dat(21)
#
basicInfo, qtd, data = (getData(sql1, URL), getData(sql2, URL)
, getData(sql3, URL))
# week
w = week(data)
# calculate P4P
getP4P(w)
# merge
df = merge(basicInfo, qtd, w)
# groupby
df = df.groupby(['广告主', '二级行业', '区域']).sum()
df.reset_index(inplace=True)
# rank
rank(df)
# ring ratio
ringRatio(df)
# fmt
path = fmt(df)
# 邮件发送
from sendEmail import sendEmail
sendEmail('Top 30广告主现金', ' 见附件。', [path])
if __name__ == '__main__':
st = now()
main()
print('Runtime: {:.3f} min'.format((now()-st)/60))
# S1 在DataFrame中进行数据聚合
# 客户起始消费日不一致,聚合起始点不一致,聚合结果错误
# sql 函数替代
#
df.resample('7D').sum()
-- S2 获取近3周消费
IF OBJECT_ID('getThreeWeekCash', 'IF') IS NOT NULL
DROP FUNCTION getThreeWeekCash
GO
CREATE FUNCTION [dbo].[getThreeWeekCash]
(
@st varchar(10)
)
RETURNS TABLE
AS
RETURN(
SELECT 用户名, 类别, sum(金额) AS 金额, '上上周' AS '周'
FROM 现金
WHERE 日期 >= @st
AND 日期 < DATEADD(D, 7, @st)
AND 类别 IN ('搜索点击', '新产品', '自主投放')
GROUP BY 用户名, 类别
UNION
SELECT 用户名, 类别, sum(金额) AS 金额, '上周' AS '周'
FROM 现金
WHERE 日期 >= DATEADD(D, 7, @st)
AND 日期 < DATEADD(D, 14, @st)
AND 类别 IN ('搜索点击', '新产品', '自主投放')
GROUP BY 用户名, 类别
UNION
SELECT 用户名, 类别, sum(金额) AS 金额, '本周' AS '周'
FROM 现金
WHERE 日期 >= DATEADD(D, 14, @st)
AND 日期 < DATEADD(D, 21, @st)
AND 类别 IN ('搜索点击', '新产品', '自主投放')
GROUP BY 用户名, 类别
)
pd.DataFrame
中自动迭代# 3.6 不支持
# pd.DataFrame(list(tu))
n [35]: tu = (i for i in range(10))
tu
Out[37]: <generator object <genexpr> at 0x0000023319776B48>
pd.DataFrame(tu)
Out[36]:
0
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
ddf
Out[156]:
0
0 ac
1 ad
0 cc
0 acdd
1 aacdf
ddf[0].str.replace(r'^a.+', 'a')
Out[155]:
0 a
1 a
0 cc
0 a
1 a
Name: 0, dtype: object
# 3.7
df.sort_values('sum', inplace=True, ascending=False, ignore_index=True)
# 3.6
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
-- 不同数据库之间,存储过程不能直接调用 ?
-- [Output].dbo.pr_check 'pr_checkFunc'
--
CREATE PROC [dbo].[pr_check]
@obj nvarchar(50)
AS
BEGIN
DECLARE @sql nvarchar(max)
SET @sql = 'IF OBJECT_ID(''' + @obj + ''', ''P'') IS NOT NULL DROP PROC ' + @obj
EXEC sp_executesql @sql, N'@obj nvarchar(50)', @obj
END
GO
EXEC pr_check 'pr_checkFunc'
GO
CREATE PROC pr_checkFunc
@name nvarchar(50)
AS
BEGIN
DECLARE @sql nvarchar(max)
SET @sql = 'IF OBJECT_ID(''' + @name + ''', ''IF'') IS NOT NULL DROP FUNCTION ' + @name
PRINT @sql
EXEC sp_executesql @sql, N'@name nvarchar(50)',@name
END
GO