连接SQL&用SQL语句取数
import pymysql
import pandas as pd
date_base=pymysql.connect(host=,
port=,
user=,
password=,
database=,
charset='utf8')
date=pd.read_sql("""SQL语句""",date_base)
#使用SSH连接数据库
import pymysql
from sshtunnel import SSHTunnelForwarder
with SSHTunnelForwarder(
(IP_B, Port_B), # 跳板机(堡垒机)B配置
ssh_password=password_B,
ssh_username=username_B,
remote_bind_address=(IP_C, Port_C)) as server: # 数据库存放服务器C配置
# 打开数据库连接
db_connect = pymysql.connect(host='',
port=,
user=,
passwd=,
db=)
#用元组的方式取出数据
cur = sevend_ku.cursor()
cur.execute(sql)
dbRet = cur.fetchall()
pandas 排序&去重
import pandas as pd
data=pd.DataFrame()
date.sort_values(['columns1','columns2'],ascending=[True,False])
date.drop_duplicates(['columns1','columns2'], 'last')
pandas group by 方法
#使用内置函数进行聚合
dataframe.groupby('columns1').agg({'columns2':['max','min'],'columns3':['mean','avg']})
#使用自定义聚合方式进行聚合
#如非重复数值计数
def count_nunique(S):
return S.nunique()
dataframe.groupby('columns1').agg(count_nunique)
#求和后以万为单位且保留两位小数:
def sum_w(S):
return round(S.sum()/10000,2)
dataframe.groupby('columns1').agg(sum_w)
#定义匿名函数实现聚合后求极差的方法:
df.loc[:,["average_monthly_hours","department"]].groupby("department")["average_monthly_hours"].apply(lambda x:x.max()-x.min())
pandas 透视表及自定义聚合方法的运用
#透视表
data_pt=pd.pivot_table(原始数据,index=['纵坐标1','纵坐标2'],columns='横坐标',values='值',aggfunc=聚合方式,fill_value=空值填充方式,margins=True)
#自定义聚合方式
def sum_w(S):
return str(round(S.sum()/10000,2))+'w'
pd.pivot_table(result,index=['催收结果'],values=['remain_principal','order_no'],aggfunc={'remain_principal':sum_w,'order_no':'count'},fill_value=None,margins=True)
#将透视表中的数据转换成百分比
collect_total_pt11=collect_total_pt1.copy()
for col_name in collect_total_pt11.columns[2:]:
collect_total_pt11[col_name]=collect_total_pt11[col_name]/collect_total_pt11.loc[12,col_name]
collect_total_pt11[col_name]=collect_total_pt11[col_name].apply(lambda x:format(x,'.2%'))
日期操作
#日期的输入
from datetime import datetime
date=datetime(2019,5,30)
#任意日期加上指定天数
date+pd.to_timedelta("1 day")
#任意日期加上指定月份数
from datetime import datetime
def month_add(input_date, months):
import calendar
End_day = ((0,31,28,31,30,31,30,31,31,30,31,30,31),(0,31,29,31,30,31,30,31,31,30,31,30,31))#存储闰年及非闰年每月的最大日期
m = input_date.year*12 + input_date.month - 1+months#以月为单位设定月基数
rpyear = int(m/12)#判断月基数对应的年份
rpmonth = m%12 + 1 #n除以12取余,判断月份
rpday = input_date.day
if calendar.isleap(rpyear):#判断是否闰年,用于判断日期是否超出范围
if rpday > End_day[1][rpmonth]:
rpday = End_day[1][rpmonth]
else:
if rpday > End_day[0][rpmonth]:
rpday = End_day[0][rpmonth]
return input_date.replace(year=rpyear, month=rpmonth, day = rpday)#返回日期
#_______________________________时间戳转换____________________________________
def stamp_to_datetime(stamp):
"""
将时间戳(1539100800)转换为 datetime2018-10-09 16:00:00格式并返回
:param stamp:
:return:
"""
time_stamp_array = datetime.utcfromtimestamp(stamp)
date_time = time_stamp_array.strftime("%Y-%m-%d %H:%M:%S")
# 如果直接返回 date_time则为字符串格式2018-10-09 16:00:00
date = datetime.strptime(date_time,"%Y-%m-%d %H:%M:%S")
return date
def datetime_to_stamp(date_time):
"""
将字符串日期格式转换为时间戳 2018-10-09 16:00:00==>1539100800
:param date_time:
:return:
"""
# 字符类型的时间
time_array = time.strptime(date_time, "%Y-%m-%d %H:%M:%S")
time_stamp = int(time.mktime(time_array))
return time_stamp
excel操作类
#直接写入
data.to_excel(r'文件路径',index=False)
#同一个excel的不同sheet里写入数据
excelWriter=pd.ExcelWriter (r"file_path")
df.to_excel(excel_writer=excelWriter, sheet_name="1",index=None)
df.to_excel(excel_writer=excelWriter, sheet_name="2",index=None)
excelWriter.save()
excelWriter.close()
#例如
excelWriter=pd.ExcelWriter (os.getcwd()+'\\催收记录.xlsx')
collect_record.to_excel(excel_writer=excelWriter, sheet_name="催记",encoding='GB18030',index=None)
task_follow_person1.to_excel(excel_writer=excelWriter, sheet_name=str(datetime.today().month-1)+"月持案情况",encoding='GB18030',index=None)
task_follow_person2.to_excel(excel_writer=excelWriter, sheet_name=str(datetime.today().month)+"持案情况2",encoding='GB18030',index=None)
excelWriter.save()
excelWriter.close()
#设置列宽
from openpyxl import load_workbook
wb = load_workbook(filename='文件路径')
wo=wb['sheetname']
wo.column_dimensions['A'].width = 20
wb.save('文件路径')
#自定义格式输入excel
#创建工作簿
wb = Workbook()
#自动设置单元格格式?
wb.guess_types=True
#创建多个表
#1、创建第一个表
ws1=wb.worksheets[0]
ws2=wb.create_sheet('原始数据')
ws3=wb.create_sheet('保监信息查询原始数据')
#设置表名
ws1.title='自动化占比'
#通用边框信息
left, right, top, bottom = [Side(style='thin',color='000000')]*4
border = Border(left=left, right=right, top=top, bottom=bottom)
font = Font(name='微软雅黑', size=10,bold=False,italic=False,vertAlign=None,underline='none',strike=False,color='FF000000')
font2 = Font(name='微软雅黑', size=10,bold=True,italic=False,vertAlign=None,underline='none',strike=False,color='FFFFFF')
fill = PatternFill(fill_type='solid',fgColor="1D7FC0")
change_list2="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
change_list=list(change_list2)
for i in list(change_list2):#创建一个A到ZZ的list
for j in list(change_list2):
change_list.append(str(i)+str(j))
def write_in(df,sheet_name,col,row):#将df填到表名为sheet_name的表中起始位置为col,row的地方(默认index,col=0&row=1为A1)
for col_number,col_name in enumerate(df.columns):#col_number记录是第几列,col_name记录列名
#print(col_number)
#先填入列名,从B23开始填m,col_number+1就是B
sheet_name[change_list[col_number+col-1]+str(row)]=col_name
sheet_name[change_list[col_number+col-1]+str(row)].border = border#设置边框
sheet_name[change_list[col_number+col-1]+str(row)].font = font2#设置字体
sheet_name[change_list[col_number+col-1]+str(row)].fill=fill#设置填充方式
#再填入表格内容(len(DataFrame)统计dataframe类型数据的行数)
for row2 in range(len(df)):
#change_list[col_number]是将数字转变为字母,实现选择A1单元格和B2单元格的效果,比如change_list[0]就是A,change_list[1]就是B
sheet_name[change_list[col_number+col-1]+str(row+row2+1)]=df.loc[row2,col_name]
sheet_name[change_list[col_number+col-1]+str(row+row2+1)].border = border
sheet_name[change_list[col_number+col-1]+str(row+row2+1)].font = font
def format_table(sheet_name,tformat,col1,col2,row1,row2):#指定区域col1~col2,row1~row2格式修改1
for x in range(col1-1,col2-1):
for y in range(row1,row2):
sheet_name[change_list[x]+str(y)].number_format = tformat
ws1['B2']='初审处理件数'
write_in(xj_auto_first_type_pt.reset_index(),ws1,2,3)
ws1['B8']='初审处理占比'
write_in(xj_auto_first_type_pt2.reset_index(),ws1,2,9)
ws1['B14']='人工/自动初审后终审处理结果'
write_in(xj_first_status_pt.reset_index(),ws1,2,15)
ws1['B24']='初审处理时效'
write_in(xj_first_efficiency_pt.reset_index(),ws1,2,25)
ws1['B36']='助保贷保监会人工查得数量'
write_in(xj_auto_first_CIRC_rgpt.reset_index(),ws1,2,37)
ws1['B43']='助保贷保监会人工查得占比'
write_in(xj_auto_first_CIRC_rgpt2,ws1,2,44)
ws1['B50']='各产品进件量'
write_in(xj_first_createtype_pt.reset_index(),ws1,2,51)
ws1['B59']='各产品进件占比'
write_in(xj_first_createtype_pt2.reset_index(),ws1,2,60)
write_in(xj_auto_first_total,ws2,1,1)#添加原始数据
write_in(xj_auto_first_CIRC,ws3,1,1)#添加保监原始数据
format_table(ws1,'0.00%',3,40,10,12)
format_table(ws1,'0.00%',4,40,45,48)
format_table(ws1,'0.00%',3,40,61,66)
ws1.column_dimensions['A'].width = 4
for col in change_list[1:]:
ws1.column_dimensions[col].width = 11
wb.save(r"C:\Users\admin\Desktop\自动初审监控报表.xlsx")
发送邮件
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.header import Header
smtpserver = 'smtp.exmail.qq.com'
username = '用户名'
password='密码'
sender='发送邮箱'
receiver=['接收邮箱列表']
subject = '主题'
msg = MIMEMultipart('mixed')
msg['Subject'] = subject
msg['From'] = sender
msg['To'] = ";".join(receiver)
html = """
邮件正文
"""
text_html = MIMEText(html,'html', 'utf-8')
msg.attach(text_html)
#构造附件
sendfile=open('文件路径','rb').read()
text_att = MIMEText(sendfile, 'base64', 'GB18030')
text_att["Content-Type"] = 'application/octet-stream'
text_att.add_header('Content-Disposition', 'attachment', filename="%s" %Header("文件名称",'utf-8').encode())
msg.attach(text_att)
#发送邮件
smtp = smtplib.SMTP()
smtp.connect(smtpserver)
#我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
#smtp.set_debuglevel(1)
smtp.login(username, password)
smtp.sendmail(sender, receiver, msg.as_string())
smtp.quit()
正则表达式
import pandas as pd
import numpy as ny
import re
message=pd.read_excel(r'C:\Users\admin\Desktop\python\复习\短信2.xlsx',encoding='GB18030')
def get_title(x):
try:
return str(re.compile(r'[【]\w∗[【]\w∗[】]').findall(x)[0])[1:-1]
except:
return None
message['message_title']=message.content.apply(lambda x:get_title(x))
message.to_excel(r'C:\Users\admin\Desktop\python\复习\短信解析结果.xlsx',encoding='GB18030',index=False)
#案例2:
re.findall('[\s\S]*www.wdzj.com*[\s\S]*','//www.wdzj.com/zhuanlan/guancha/17-11397-1.html')
自动执行代码
import schedule
import os
import sys
sys.path.append(r'C:\Users\chenxudong\Desktop\auto')
'''python import模块时, 是在sys.path里按顺序查找的。
sys.path是一个列表,里面以字符串的形式存储了许多路径。
使用A.py文件中的函数需要先将他的文件路径放到sys.path中'''
#在sys.path 中放入py文件用于导入
import T0
import collect_task
import extension
import collect_record
#T0.do_job()
#collect_task.do_jobcollect()
extension.job()
#collect_result.job()
#collect_record.job()
schedule.every().day.at("09:00").do(T0.do_job)
schedule.every().day.at("09:30").do(T0.do_job)
schedule.every().day.at("11:30").do(T0.do_job)
schedule.every().day.at("13:30").do(T0.do_job)
schedule.every().day.at("15:30").do(T0.do_job)
schedule.every().day.at("17:30").do(T0.do_job)
schedule.every().day.at("19:30").do(T0.do_job)
schedule.every().day.at("20:30").do(T0.do_job)
schedule.every().day.at("09:59").do(collect_task.do_jobcollect)
schedule.every().day.at("14:30").do(collect_task.do_jobcollect)
schedule.every().day.at("23:45").do(extension.job)
schedule.every().day.at("13:30").do(collect_record.job)
schedule.every().day.at("17:30").do(collect_record.job)
schedule.every().day.at("23:50").do(collect_record.job)
#schedule.every().day.at("08:58").do(collect_result.job)
while True:
schedule.run_pending()
time.sleep(1)
#schedule介绍
import schedule
import time
def job():
print("I'm working...")
schedule.every(10).seconds.do(job)#每隔10秒执行函数job
schedule.every(10).minutes.do(job)#每隔10分钟执行函数job
schedule.every().hour.do(job)#每隔1小时执行函数job
schedule.every().day.at("10:30").do(job) #每天的10点半执行函数job
schedule.every().monday.do(job)#每周一执行函数job
schedule.every().wednesday.at("13:15").do(job) #每周三下午1点14分执行函数job
while True:
schedule.run_pending() #执行任务
time.sleep(1)
圆饼图
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
#设置编码格式,使标签可以显示中文
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
#画圆饼图(bbox_inches = 'tight'的作用是使图片显示完整,os.getcwd()当前文件路径)
plt.pie(变量,labels=标签)
#设置图例:fontsize字体大小,title标题
plt.legend(fontsize=10,title="剩余本金分布")
plt.title('主题')
plt.savefig(os.getcwd()+"\\图片名称.jpg",bbox_inches = 'tight')
企业微信机器人配置
import json
import requests
import base64
import hashlib
md5 = hashlib.md5()
img = open("图片地址","rb").read()
md5.update(img)
img64 = base64.b64encode(img)
url = "微信机器人地址"
header = { "Content-Type":"application/json", "Charset":"utf-8" }
img_data = {
"msgtype": "image",
"image": {
"base64": str(img64,'utf-8'),
'md5': md5.hexdigest()
}
}
request = requests.post(url,data = json.dumps(img_data),headers=header)
markdown = {"msgtype":"markdown",
"markdown": {
"content": """实时新增用户反馈132例,请相关同事注意。\n
>类型:用户反馈 \n
>普通用户反馈:117例 \n
>VIP用户反馈:15例""",
"image": {
"base64": str(img64),
"md5": "MD5"
}
}
}
request = requests.post(url,data = json.dumps(markdown),headers=header)