【综合案例】sqlalchemy + pandas + xlwings + smtplib + configparser

需求

每周邮件发送:近两周 top30 的客户周环比

环境 & 工具

Win7
Python 3.6
Excel

流程

【综合案例】sqlalchemy + pandas + xlwings + smtplib + configparser_第1张图片

代码

  • 访问配置文件
# _*_ conding: utf-8 _*_
'''
// getConfig.py
从配置文件中读取信息
'''

from configparser import ConfigParser

class Conf():

    def __init__(self):
        self._path = r'H:\SZ_数据\Python\c.s.conf'

    def getEmail(self, sec, smt, email, pw):
        fil = ConfigParser()
        fil.read(self._path)
        return (fil.get(sec, smt),
                fil.get(sec, email),
                fil.get(sec, pw))
                
    def getToEmail(self, sec, toEmail):
        fil = ConfigParser()
        fil.read(self._path)
        return fil.get(sec, toEmail)
        

  • 发送邮件
# _*_ coding: utf-8 _*_
'''
// sendEmail.py
Send email

'''

import smtplib
from getConfig import Conf
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication

def _format_addr(s):
    # 格式化邮件地址
    # Header 如是中文必须编码
    #
    name, addr = parseaddr(s)
    return formataddr((Header(name, 'utf-8').encode(), addr))

def sendEmail(subject, message, files=None, to_addr='newIOSys'):
    # login info
    conf = Conf()
    smt, fro, pw = conf.getEmail('mail_baidu', 'sender server', 'email'
                                , 'password')
    to = conf.getToEmail('to_addr', to_addr)
    # header of email
    msg = MIMEMultipart()
    msg['From'] = fro
    msg['To'] = to
    msg['Subject'] = Header(subject, 'utf-8').encode()
    # Main Body
    msg.attach(MIMEText(message, 'plain', 'utf-8'))
    # attachment
    import os
    if files != None:
        for i in range(len(files)):
            if os.path.isfile(files[i]):
                with open(files[i], 'rb') as f:
                    xl = MIMEApplication(f.read())
                    xl.add_header('Content-Disposition', 'attachment'
                                , filename=os.path.split(files[i])[-1])
                    msg.attach(xl)
    # Send
    with smtplib.SMTP(smt, 25) as smtp:
        smtp.ehlo()
        smtp.starttls()
        smtp.ehlo()
        smtp.set_debuglevel(1)
        smtp.login(fro, pw)
        try:
            smtp.sendmail(fro, to.split(','), msg.as_string())
        except Exception as e:
            print('Failed send: {}'.format(e))
        else:
            print('Success send.')

  • 构造访问数据库URL
# _*_ coding: utf-8 _*_
'''
// db.py
'''
from getConfig import Conf

def getUrl(sec, acc, pw, ip, port, db):
    conf = Conf()
    url = ('mssql+pymssql://%s:%s@%s:%s/%s'
            % conf.getInfo(sec, acc, pw, ip, port, db))
    return url
    
  • 主代码
# _*_ coding: utf-8 _*_
'''
// lt_top30Cash.py

'''
import time
import pandas as pd
from db import getUrl
from datetime import date, timedelta
from sqlalchemy import create_engine

PATH = r'H:\SZ_数据\Python\c.s.conf'
URL = getUrl('SQL Server', 'acc', 'pw', 'ip', 'port', 'db')
ST_DAT, ED_DAT = eval(input('输入:QTD起始日,终止日;如(20200401,20200430):'))

# 时间
now = lambda: time.perf_counter()
dat = lambda n: (date.today() - timedelta(n)).strftime('%Y%m%d')
mDat = lambda n: (date.today() - timedelta(n)).strftime('%m.%d')
qDat = lambda n: (int((date.today() - timedelta(n)).strftime('%m'))-1)//3+1
yDat = lambda n: (date.today() - timedelta(n)).strftime('%Y')

def getData(sql, url):
    with create_engine(URL).begin() as conn:
        return map(lambda x: list(x), conn.execute(sql).fetchall())

def week(data):
    # 周粒度消费
    df = pd.DataFrame(list(data), columns=('用户名', '类别', '金额', '周'))
    df = df.pivot_table(values=['金额'], index=['用户名']
                        , columns=['类别', '周'])
    return df

def getP4P(df):
    # 上上、上、本
    datLis = df.columns.get_level_values(2).unique()
    # 产品大类
    clsLis = tuple(set(df.columns.get_level_values(1)))
    for d in datLis:
        df[('金额', 'P4P', d)] = (df[('金额', clsLis[0], d)]
                                    + df[('金额',clsLis[1], d)]
                                    + df[('金额', clsLis[2], d)])
    # 索引
    df.columns = [c + dat for c in df.columns.get_level_values(1).unique()
                  for dat in datLis]
    df.reset_index(inplace=True)

def merge(basicInfo, qtd, data):
    # basicInfo & qtd
    col1 = ['用户名', '广告主', '二级行业', '区域']
    col2 = ['用户名', 'QTD']
    df = pd.merge(pd.DataFrame(list(basicInfo), columns=col1)
                , pd.DataFrame(list(qtd), columns=col2)
                , how='left', on='用户名')
    # region -> hk
    df['区域'] = df['区域'].str.replace(r'^HK.+', 'HK')
    # week
    df = pd.merge(df, data, how='left', on='用户名')
    df.fillna(0, inplace=True)
    return df
    
def rank(df):
    # 上周排名
    df['sum'] = df['P4P上上周'] + df['P4P上周']
    df.sort_values('sum', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.index = [i+1 for i in df.index]
    df.index.name = '上周排名'
    df.reset_index(inplace=True)
    # 本周排名
    df['sum'] = df['P4P上周'] + df['P4P本周']
    df.sort_values('sum', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.index = [i+1 for i in df.index]
    df.index.name = '本周排名'
    df.drop(columns=['sum'] + list(filter(lambda x: '上上' in x, df.columns))
            , inplace=True)

def ringRatio(df):
    df['环比增长'] = df['P4P本周'] - df['P4P上周']
    df['环比增长率'] = df['环比增长'] / df['P4P上周']

def fmt(df):
    # Output
    path = r'H:\SZ_数据\Download\Top30Cash(' + mDat(14
                                                  ) +'_' + mDat(1) + ').xlsx'
    with pd.ExcelWriter(path) as writer:
        df[:30].to_excel(writer, startrow=2, freeze_panes=(3,0))
    # 修改表
    import xlwings as xw
    wb = xw.Book(path)
    sht = wb.sheets[0]
    cntRow = sht['A3'].current_region.rows.count
    cntCol = sht['A3'].current_region.columns.count
    # 标签
    sht[0, 0].value = 'P4P'
    for n, v in enumerate(sht[2, :cntCol].value):
        if '排名' in v:
            sht[1, n].clear()
        elif '上' in v:
            sht[1, n].value = mDat(14) + '-' + mDat(7)
        elif '本' in v:
            sht[1, n].value = mDat(7) + '-' + mDat(1)
        elif 'QTD' in v:
            sht[1, n].value = yDat(n) + 'Q' + str(qDat(n)) + '现金'
        elif '环比' in v:
            sht[1, n].value = 'P4P现金'
    # 边框
    for b in range(7, 13):
        sht[1:cntRow, :cntCol].current_region.api.Borders(b).weight = 2
    # 列宽
    sht[:, :cntCol].autofit()
    # 加粗
    sht[:3, :cntCol].api.Font.Bold = True
    # 数字格式
    sht[3:, :cntCol-1].api.NumberFormat = '#,##0'
    sht[3:, cntCol-1].api.NumberFormat = '0.0%'
    #
    wb.save()
    wb.close()
    return path
    
def main():
    # basicInfo
    sql1 = "SELECT 用户名, 广告主, 信誉成长值, 区域 FROM basicInfo"
    # QTD
    sql2 = "SELECT * FROM getCashSUM('%s', '%s')" % (ST_DAT, ED_DAT)
    # Nearly Three weeks Spending
    sql3 = '''SELECT * FROM getThrWeekCash(%s)''' % dat(21)
    # 
    basicInfo, qtd, data = (getData(sql1, URL), getData(sql2, URL)
                            , getData(sql3, URL))
    # week
    w = week(data)
    # calculate P4P
    getP4P(w)
    # merge
    df = merge(basicInfo, qtd, w)
    # groupby
    df = df.groupby(['广告主', '二级行业', '区域']).sum()
    df.reset_index(inplace=True)
    # rank
    rank(df)
    # ring ratio
    ringRatio(df)
    # fmt
    path = fmt(df)
    # 邮件发送
    from sendEmail import sendEmail
    sendEmail('Top 30广告主现金', '    见附件。', [path])
    
if __name__ == '__main__':
    st = now()
    main()
    print('Runtime: {:.3f} min'.format((now()-st)/60))
    

Q/A

  • 如何将data.frame分组几周然后求和?
# S1 在DataFrame中进行数据聚合
# 客户起始消费日不一致,聚合起始点不一致,聚合结果错误
# sql 函数替代
#
df.resample('7D').sum()
-- S2 获取近3周消费
IF OBJECT_ID('getThreeWeekCash', 'IF') IS NOT NULL
	DROP FUNCTION getThreeWeekCash
GO
CREATE FUNCTION [dbo].[getThreeWeekCash]
(
	@st varchar(10)
)
	RETURNS TABLE
AS
	RETURN(
		SELECT 用户名, 类别, sum(金额) AS 金额, '上上周' AS '周'
			FROM 现金
			WHERE 日期 >= @st
				AND 日期 < DATEADD(D, 7, @st)
				AND 类别 IN ('搜索点击', '新产品', '自主投放')
			GROUP BY 用户名, 类别
		UNION
		SELECT 用户名, 类别, sum(金额) AS 金额, '上周' AS '周'
			FROM 现金
			WHERE 日期 >= DATEADD(D, 7, @st)
				AND 日期 < DATEADD(D, 14, @st)
				AND 类别 IN ('搜索点击', '新产品', '自主投放')
			GROUP BY 用户名, 类别
		UNION
		SELECT 用户名, 类别, sum(金额) AS 金额, '本周' AS '周'
			FROM 现金
			WHERE 日期 >= DATEADD(D, 14, @st)
				AND 日期 < DATEADD(D, 21, @st)
				AND 类别 IN ('搜索点击', '新产品', '自主投放')
			GROUP BY 用户名, 类别
	)
  • 迭代器可以直接在pd.DataFrame中自动迭代
# 3.6 不支持
# pd.DataFrame(list(tu))
n [35]: tu = (i for i in range(10))

tu
Out[37]: <generator object <genexpr> at 0x0000023319776B48>

pd.DataFrame(tu)
Out[36]: 
   0
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9

  • 正则
ddf
Out[156]: 
       0
0     ac
1     ad
0     cc
0   acdd
1  aacdf
ddf[0].str.replace(r'^a.+', 'a')
Out[155]: 
0     a
1     a
0    cc
0     a
1     a
Name: 0, dtype: object
  • 排序
# 3.7
df.sort_values('sum', inplace=True, ascending=False, ignore_index=True)
# 3.6
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
  • 存储过程
-- 不同数据库之间,存储过程不能直接调用 ?
-- [Output].dbo.pr_check 'pr_checkFunc'
--
CREATE PROC [dbo].[pr_check]
	@obj nvarchar(50)
AS
BEGIN
	DECLARE @sql nvarchar(max)
	SET @sql = 'IF OBJECT_ID(''' + @obj + ''', ''P'') IS NOT NULL DROP PROC ' + @obj
	EXEC sp_executesql @sql, N'@obj nvarchar(50)', @obj
END
GO
EXEC pr_check 'pr_checkFunc'
GO
CREATE PROC pr_checkFunc
	@name nvarchar(50)
AS
BEGIN
	DECLARE @sql nvarchar(max)
	SET @sql = 'IF OBJECT_ID(''' + @name + ''', ''IF'') IS NOT NULL DROP FUNCTION ' + @name
	PRINT @sql
	EXEC sp_executesql @sql, N'@name nvarchar(50)',@name 
END
GO

你可能感兴趣的:(python基础,SQL)