python 个人知识库

连接SQL&用SQL语句取数

import pymysql
import pandas as pd

date_base=pymysql.connect(host=,
                          port=,
                         user=,
                         password=,
                         database=,
                          charset='utf8')

date=pd.read_sql("""SQL语句""",date_base)

#使用SSH连接数据库
import pymysql
from sshtunnel import SSHTunnelForwarder
 
with SSHTunnelForwarder(
        (IP_B, Port_B),  # 跳板机(堡垒机)B配置
        ssh_password=password_B,
        ssh_username=username_B,
        remote_bind_address=(IP_C, Port_C)) as server:  # 数据库存放服务器C配置
 
    # 打开数据库连接
    db_connect = pymysql.connect(host='',
                                 port=, 
                                 user=,
                                 passwd=,
                                 db=) 

#用元组的方式取出数据
cur = sevend_ku.cursor()
cur.execute(sql)
dbRet = cur.fetchall()

pandas 排序&去重

import pandas as pd

data=pd.DataFrame()
date.sort_values(['columns1','columns2'],ascending=[True,False])
date.drop_duplicates(['columns1','columns2'], 'last')

pandas group by 方法

#使用内置函数进行聚合
dataframe.groupby('columns1').agg({'columns2':['max','min'],'columns3':['mean','avg']})

#使用自定义聚合方式进行聚合
#如非重复数值计数
def count_nunique(S):
    return S.nunique()

dataframe.groupby('columns1').agg(count_nunique)

#求和后以万为单位且保留两位小数:
def sum_w(S):
    return round(S.sum()/10000,2)

dataframe.groupby('columns1').agg(sum_w)

#定义匿名函数实现聚合后求极差的方法:

df.loc[:,["average_monthly_hours","department"]].groupby("department")["average_monthly_hours"].apply(lambda x:x.max()-x.min())

pandas 透视表及自定义聚合方法的运用

#透视表
data_pt=pd.pivot_table(原始数据,index=['纵坐标1','纵坐标2'],columns='横坐标',values='值',aggfunc=聚合方式,fill_value=空值填充方式,margins=True)

#自定义聚合方式
def sum_w(S):
    return str(round(S.sum()/10000,2))+'w'
 
pd.pivot_table(result,index=['催收结果'],values=['remain_principal','order_no'],aggfunc={'remain_principal':sum_w,'order_no':'count'},fill_value=None,margins=True)

#将透视表中的数据转换成百分比
collect_total_pt11=collect_total_pt1.copy()
for col_name in collect_total_pt11.columns[2:]:
    collect_total_pt11[col_name]=collect_total_pt11[col_name]/collect_total_pt11.loc[12,col_name]
    collect_total_pt11[col_name]=collect_total_pt11[col_name].apply(lambda x:format(x,'.2%'))

日期操作

#日期的输入
from datetime import datetime
date=datetime(2019,5,30)

#任意日期加上指定天数
date+pd.to_timedelta("1 day")

#任意日期加上指定月份数
from datetime import datetime

def month_add(input_date, months):
    import calendar
    End_day = ((0,31,28,31,30,31,30,31,31,30,31,30,31),(0,31,29,31,30,31,30,31,31,30,31,30,31))#存储闰年及非闰年每月的最大日期
    m = input_date.year*12 + input_date.month - 1+months#以月为单位设定月基数
    rpyear = int(m/12)#判断月基数对应的年份
    rpmonth = m%12 + 1 #n除以12取余,判断月份
    rpday = input_date.day
    if calendar.isleap(rpyear):#判断是否闰年,用于判断日期是否超出范围
        if rpday > End_day[1][rpmonth]:
            rpday = End_day[1][rpmonth]
    else:
        if rpday > End_day[0][rpmonth]:
            rpday = End_day[0][rpmonth]
    return input_date.replace(year=rpyear, month=rpmonth, day = rpday)#返回日期

#_______________________________时间戳转换____________________________________
def stamp_to_datetime(stamp):
    """
    将时间戳(1539100800)转换为 datetime2018-10-09 16:00:00格式并返回
    :param stamp:
    :return:
    """
    time_stamp_array = datetime.utcfromtimestamp(stamp)
    date_time = time_stamp_array.strftime("%Y-%m-%d %H:%M:%S")
    # 如果直接返回 date_time则为字符串格式2018-10-09 16:00:00
    date = datetime.strptime(date_time,"%Y-%m-%d %H:%M:%S")
    return date





def datetime_to_stamp(date_time):
    """
    将字符串日期格式转换为时间戳  2018-10-09 16:00:00==>1539100800
    :param date_time:
    :return:
    """
    # 字符类型的时间
    time_array = time.strptime(date_time, "%Y-%m-%d %H:%M:%S")
    time_stamp = int(time.mktime(time_array))
    return time_stamp

excel操作类

#直接写入
data.to_excel(r'文件路径',index=False)

#同一个excel的不同sheet里写入数据
excelWriter=pd.ExcelWriter (r"file_path")
df.to_excel(excel_writer=excelWriter, sheet_name="1",index=None) 
df.to_excel(excel_writer=excelWriter, sheet_name="2",index=None)
excelWriter.save()
excelWriter.close()

#例如
excelWriter=pd.ExcelWriter (os.getcwd()+'\\催收记录.xlsx')
collect_record.to_excel(excel_writer=excelWriter, sheet_name="催记",encoding='GB18030',index=None)
task_follow_person1.to_excel(excel_writer=excelWriter, sheet_name=str(datetime.today().month-1)+"月持案情况",encoding='GB18030',index=None)
task_follow_person2.to_excel(excel_writer=excelWriter, sheet_name=str(datetime.today().month)+"持案情况2",encoding='GB18030',index=None)
excelWriter.save()
excelWriter.close()

#设置列宽
from openpyxl import load_workbook
wb = load_workbook(filename='文件路径')
wo=wb['sheetname']
wo.column_dimensions['A'].width = 20
wb.save('文件路径')



#自定义格式输入excel
#创建工作簿
wb = Workbook()
#自动设置单元格格式?
wb.guess_types=True

#创建多个表
#1、创建第一个表
ws1=wb.worksheets[0]
ws2=wb.create_sheet('原始数据')
ws3=wb.create_sheet('保监信息查询原始数据')
#设置表名
ws1.title='自动化占比'
#通用边框信息
left, right, top, bottom = [Side(style='thin',color='000000')]*4
border = Border(left=left, right=right, top=top, bottom=bottom)
font = Font(name='微软雅黑', size=10,bold=False,italic=False,vertAlign=None,underline='none',strike=False,color='FF000000')
font2 = Font(name='微软雅黑', size=10,bold=True,italic=False,vertAlign=None,underline='none',strike=False,color='FFFFFF')
fill = PatternFill(fill_type='solid',fgColor="1D7FC0")
change_list2="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
change_list=list(change_list2)
for i in list(change_list2):#创建一个A到ZZ的list
    for j in list(change_list2):
        change_list.append(str(i)+str(j))
def write_in(df,sheet_name,col,row):#将df填到表名为sheet_name的表中起始位置为col,row的地方(默认index,col=0&row=1为A1)
    for col_number,col_name in enumerate(df.columns):#col_number记录是第几列,col_name记录列名
        #print(col_number)
        #先填入列名,从B23开始填m,col_number+1就是B
        sheet_name[change_list[col_number+col-1]+str(row)]=col_name
        sheet_name[change_list[col_number+col-1]+str(row)].border = border#设置边框
        sheet_name[change_list[col_number+col-1]+str(row)].font = font2#设置字体
        sheet_name[change_list[col_number+col-1]+str(row)].fill=fill#设置填充方式
        #再填入表格内容(len(DataFrame)统计dataframe类型数据的行数)
        for row2 in range(len(df)):
            #change_list[col_number]是将数字转变为字母,实现选择A1单元格和B2单元格的效果,比如change_list[0]就是A,change_list[1]就是B
            sheet_name[change_list[col_number+col-1]+str(row+row2+1)]=df.loc[row2,col_name]
            sheet_name[change_list[col_number+col-1]+str(row+row2+1)].border = border
            sheet_name[change_list[col_number+col-1]+str(row+row2+1)].font = font
def format_table(sheet_name,tformat,col1,col2,row1,row2):#指定区域col1~col2,row1~row2格式修改1
    for x in range(col1-1,col2-1):
        for y in range(row1,row2):
            sheet_name[change_list[x]+str(y)].number_format = tformat
ws1['B2']='初审处理件数'
write_in(xj_auto_first_type_pt.reset_index(),ws1,2,3)
ws1['B8']='初审处理占比'
write_in(xj_auto_first_type_pt2.reset_index(),ws1,2,9)
ws1['B14']='人工/自动初审后终审处理结果'
write_in(xj_first_status_pt.reset_index(),ws1,2,15)
ws1['B24']='初审处理时效'
write_in(xj_first_efficiency_pt.reset_index(),ws1,2,25)
ws1['B36']='助保贷保监会人工查得数量'
write_in(xj_auto_first_CIRC_rgpt.reset_index(),ws1,2,37)
ws1['B43']='助保贷保监会人工查得占比'
write_in(xj_auto_first_CIRC_rgpt2,ws1,2,44)
ws1['B50']='各产品进件量'
write_in(xj_first_createtype_pt.reset_index(),ws1,2,51)
ws1['B59']='各产品进件占比'
write_in(xj_first_createtype_pt2.reset_index(),ws1,2,60)
write_in(xj_auto_first_total,ws2,1,1)#添加原始数据
write_in(xj_auto_first_CIRC,ws3,1,1)#添加保监原始数据
format_table(ws1,'0.00%',3,40,10,12)
format_table(ws1,'0.00%',4,40,45,48)
format_table(ws1,'0.00%',3,40,61,66)
ws1.column_dimensions['A'].width = 4
for col in change_list[1:]:
    ws1.column_dimensions[col].width = 11   
wb.save(r"C:\Users\admin\Desktop\自动初审监控报表.xlsx")

发送邮件

import smtplib    
from email.mime.multipart import MIMEMultipart    
from email.mime.text import MIMEText    
from email.mime.image import MIMEImage 
from email.header import Header

smtpserver = 'smtp.exmail.qq.com'
username = '用户名'
password='密码'
sender='发送邮箱'

receiver=['接收邮箱列表']

subject = '主题'
msg = MIMEMultipart('mixed') 
msg['Subject'] = subject
msg['From'] = sender
msg['To'] = ";".join(receiver)

html = """

    
    
    

邮件正文

""" text_html = MIMEText(html,'html', 'utf-8') msg.attach(text_html) #构造附件 sendfile=open('文件路径','rb').read() text_att = MIMEText(sendfile, 'base64', 'GB18030') text_att["Content-Type"] = 'application/octet-stream' text_att.add_header('Content-Disposition', 'attachment', filename="%s" %Header("文件名称",'utf-8').encode()) msg.attach(text_att) #发送邮件 smtp = smtplib.SMTP() smtp.connect(smtpserver) #我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。 #smtp.set_debuglevel(1) smtp.login(username, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit()

正则表达式

import pandas as pd
import numpy as ny
import re

message=pd.read_excel(r'C:\Users\admin\Desktop\python\复习\短信2.xlsx',encoding='GB18030')

def get_title(x):
    try: 
        return str(re.compile(r'[【]\w∗[【]\w∗[】]').findall(x)[0])[1:-1]
    except:
        return None

message['message_title']=message.content.apply(lambda x:get_title(x))

message.to_excel(r'C:\Users\admin\Desktop\python\复习\短信解析结果.xlsx',encoding='GB18030',index=False)

 

#案例2:
re.findall('[\s\S]*www.wdzj.com*[\s\S]*','//www.wdzj.com/zhuanlan/guancha/17-11397-1.html')

自动执行代码

import schedule
import os

import sys
sys.path.append(r'C:\Users\chenxudong\Desktop\auto')
'''python import模块时, 是在sys.path里按顺序查找的。
sys.path是一个列表,里面以字符串的形式存储了许多路径。
使用A.py文件中的函数需要先将他的文件路径放到sys.path中'''

#在sys.path 中放入py文件用于导入
import T0
import collect_task
import extension
import collect_record

#T0.do_job()
#collect_task.do_jobcollect()
extension.job()
#collect_result.job()
#collect_record.job()

schedule.every().day.at("09:00").do(T0.do_job)
schedule.every().day.at("09:30").do(T0.do_job)
schedule.every().day.at("11:30").do(T0.do_job)
schedule.every().day.at("13:30").do(T0.do_job)
schedule.every().day.at("15:30").do(T0.do_job)
schedule.every().day.at("17:30").do(T0.do_job)
schedule.every().day.at("19:30").do(T0.do_job)
schedule.every().day.at("20:30").do(T0.do_job)

schedule.every().day.at("09:59").do(collect_task.do_jobcollect)
schedule.every().day.at("14:30").do(collect_task.do_jobcollect)

schedule.every().day.at("23:45").do(extension.job)

schedule.every().day.at("13:30").do(collect_record.job)
schedule.every().day.at("17:30").do(collect_record.job)
schedule.every().day.at("23:50").do(collect_record.job)

#schedule.every().day.at("08:58").do(collect_result.job)


while True:
    schedule.run_pending()
    time.sleep(1)



#schedule介绍
import schedule
import time
def job():
    print("I'm working...")
schedule.every(10).seconds.do(job)#每隔10秒执行函数job
schedule.every(10).minutes.do(job)#每隔10分钟执行函数job
schedule.every().hour.do(job)#每隔1小时执行函数job
schedule.every().day.at("10:30").do(job) #每天的10点半执行函数job 
schedule.every().monday.do(job)#每周一执行函数job
schedule.every().wednesday.at("13:15").do(job)  #每周三下午1点14分执行函数job
while True:
    schedule.run_pending() #执行任务
    time.sleep(1)

圆饼图

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
#设置编码格式,使标签可以显示中文
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False

#画圆饼图(bbox_inches = 'tight'的作用是使图片显示完整,os.getcwd()当前文件路径)
plt.pie(变量,labels=标签)
#设置图例:fontsize字体大小,title标题
plt.legend(fontsize=10,title="剩余本金分布")
plt.title('主题')
plt.savefig(os.getcwd()+"\\图片名称.jpg",bbox_inches = 'tight')

企业微信机器人配置

import json  
import requests
import base64
import hashlib
md5 = hashlib.md5()
img = open("图片地址","rb").read()
md5.update(img)
img64 = base64.b64encode(img)
url = "微信机器人地址"  

header = {  "Content-Type":"application/json",  "Charset":"utf-8"  }  
img_data = {
    "msgtype": "image",
    "image": {
        "base64": str(img64,'utf-8'),
        'md5': md5.hexdigest()
    }
}
request = requests.post(url,data = json.dumps(img_data),headers=header) 
markdown = {"msgtype":"markdown",  
        "markdown": {
            "content": """实时新增用户反馈132例,请相关同事注意。\n
>类型:用户反馈 \n
>普通用户反馈:117例 \n
>VIP用户反馈:15例""",
        "image": {
            "base64": str(img64),
            "md5": "MD5"
            }
        }
    } 
request = requests.post(url,data = json.dumps(markdown),headers=header) 

 

你可能感兴趣的:(python 个人知识库)