Python绘图与数据处理

# coding: utf-8
import time
import random
import os
import re
import xlwt
import requests
import pymysql
import scipy
import numpy as np
import xlsxwriter
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
from sklearn.cluster import KMeans
import math
from ggplot import *




os.chdir('*****')






guangdabase = pd.read_csv('**.csv' , header = None)
guangdadzd = pd.read_csv('**.csv' , header = None)
guangfabase = pd.read_csv('**.csv' , header = None)
guangfadzd = pd.read_csv('**.csv' , header = None)
jianhangbase = pd.read_csv('**.csv' , header = None)
jianhangdzd = pd.read_csv('**.csv' , header = None)
pinganbase = pd.read_csv('**.csv' , header = None)
pingandzd = pd.read_csv('**.csv' , header = None)
xingyebase = pd.read_csv('**.csv' , header = None)
xingyedzd = pd.read_csv('**.csv' , header = None)
zhaoshangbase = pd.read_csv('***.csv' , header = None)
zhaoshangdzd = pd.read_csv('***.csv' , header = None)


province = pd.read_excel('***xlsx')








xybase = pd.concat([guangdabase,guangfabase,jianhangbase,pinganbase,xingyebase,zhaoshangbase],ignore_index=True)
xybase.shape




xydzd = pd.concat([guangdadzd,guangfadzd,jianhangdzd,pingandzd,xingyedzd,zhaoshangdzd],ignore_index=True)
xydzd.shape




def xybasenames(data1):
    data1.colnames=['id', 'UpdateFlag', 'branch', 'ajbh', 'kehu', 'ajlx', 'shfzh', 'shfzh18', 'shebaoID', 'xm', 'pinyin', 'sex',                     'zhiwu', 'zjqkje', 'zjshje', 'zjzxqke', 'zjzxqkerq', 'zjyhlx', 'jdsj', 'dqsj', 'zu', 'ywy', 'states', 'period',                    'yjbl', 'fenpeisj', 'urgent', 'lasttime', 'closetime', 'czy', 'addtime', 'pici', 'inpici', 'shengfen', 'chengshi',                    'remark1', 'remark2', 'remark3', 'lastJzSj', 'kongguan', 'PromisedDate', 'PromisedJe', 'nextStep', 'hint',                     'dingyueTime', 'fabuTime', 'gaNum', 'Ajsx', 'ajInfo', 'kehuAjBh', 'ajStop', 'ajLock', 'yxAj', 'isShare', 'zxxddm',                    'picipizhu']
    return data1






def xydzdnames(data2):
    data2.colnames=['shfzh18','kehu','ajlx','inpici','account','cardno','hkrq','dzrq','hkze','hkmx','rate','huobi',                    'zhrmb','qkbj','yhlx','zxqkje','hkbz','ywy','czy','Lasttime','flag','IsAggregatedCard','ajbh',                    'fabuTime','kehuliushuiNum']
    return data2






xybase =xybasenames(xybase[:])
xybase.columns=xybase.colnames








xydzd =xydzdnames(xydzd[:])
xydzd.columns=xydzd.colnames






xydzd = xydzd[xydzd.hkbz==1]
xydzd = xydzd[xydzd.hkmx>0]




xydzddata =xydzd[['ajbh','kehu','hkrq','hkmx','shfzh18']]




xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()
xydzddata = xydzddata[xydzddata.shfzhnum==18]
xydzddata["bornyear"]=xydzddata["shfzh18"].str.slice(6,10)
xydzddata["sex"]=xydzddata["shfzh18"].str.get(16)
xydzddata["address"]=xydzddata["shfzh18"].str.slice(0,2)
xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()






xydzddata['year'] = xydzddata['hkrq'].str.slice(0,4).astype(int)
xydzddata['month'] = xydzddata['hkrq'].str.slice(5,7).astype(int)






xydzddata["nnn"] = 1
xydzddata["bornyear"] = xydzddata["bornyear"].astype(int)
xydzddata["age"] = 2017-xydzddata["bornyear"]
xydzddata["sex"] = xydzddata["sex"].astype(int)
xydzddata["sex"][xydzddata["sex"]%2==0]='女'
xydzddata["sex"][xydzddata["sex"]!='女']='男'






xydzddata['address'] = xydzddata['address'].astype(int)






xydzddata = pd.merge(xydzddata,province, left_on='address', right_on='shfnum', left_index=False, right_index=False ,how='left')
xydzddata = xydzddata[['ajbh','kehu','hkrq','hkmx','shfzh18','sex','age','province','year','month','nnn']]
xydzddata = xydzddata[xydzddata.year>2015]










xydzdhkmx = xydzddata['hkmx'].groupby([xydzddata['kehu'], xydzddata['year'], xydzddata['month']]).sum().reset_index()









xydzddata1 = xydzddata[xydzddata.hkmx>0]
hkcsh = xydzddata1['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()











xydzddata2 = xydzddata[xydzddata.hkmx>0]
xydzddata2 = xydzddata2[['ajbh','kehu','year','month']]
xydzddata2 = xydzddata2.drop_duplicates()
hkaj = xydzddata2['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()

region = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['province']]).count().reset_index()








bins = [18,30,40,50,100]


group_names = ['20-30','30-40','40-50','50以上']






xydzddata1['age1']=pd.cut(xydzddata1['age'],bins,labels=group_names)














agedata = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['age1']]).count().reset_index()
agedata = agedata[(agedata.kehu == '兴业') | (agedata.kehu == '广发')]






xybasedata = xybase[['ajbh','kehu','shfzh18','zjqkje','zjshje','jdsj']]
xybasedata = xybasedata[xybasedata.zjqkje>0]




xybasedata['year'] = xybasedata['jdsj'].str.slice(0,4).astype(int)
xybasedata['month'] = xybasedata['jdsj'].str.slice(5,7).astype(int)




xybasedata = xybasedata[xybasedata.year>2015]




xybasedata = xybasedata.drop_duplicates()






xybasezhanbi = xybasedata[['zjqkje','zjshje']].groupby([xybasedata['kehu'], xybasedata['year'], xybasedata['month']]).sum().reset_index()




xybasezhanbi['zhanbi'] =round(xybasezhanbi['zjshje']/xybasezhanbi['zjqkje'],5)








xingyehkmx = xydzdhkmx[(xydzdhkmx.kehu == '兴业')]
guangfahkmx = xydzdhkmx[(xydzdhkmx.kehu == '广发')]




get_ipython().magic('matplotlib inline')
sns.set_style("whitegrid")
sns.set_context("talk")
mpl.rcParams['font.sans-serif'] = ['Microsoft YaHei'] #指定默认字体  
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
ax = sns.barplot(x="month", y="hkmx", hue="year", data=xingyehkmx)
ax.set_xlabel('月份',fontsize=15)
ax.set_ylabel('还款总额',fontsize=15)
ax.set_title('兴业客户',fontsize=15)
plt.show()






ax1 = sns.barplot(x="month", y="hkmx", hue="year", data=guangfahkmx)
ax1.set_xlabel('月份',fontsize=15)
ax1.set_ylabel('还款总额',fontsize=15)
ax1.set_title('广发客户',fontsize=15)
plt.show()






xingyehkzhb = xybasezhanbi[(xybasezhanbi.kehu == '兴业')]
guangfahkzhb = xybasezhanbi[(xybasezhanbi.kehu == '广发')]
xingyehkzhb['year'] = xingyehkzhb['year'].astype(str)
guangfahkzhb['year'] = guangfahkzhb['year'].astype(str)






ggplot(aes(x='month', y='zhanbi', colour='year'), data=xingyehkzhb) +   geom_point()+    geom_line()+    xlab('月份')+    ylab('还款占比')+    ggtitle('兴业客户还款占比情况')+    scale_x_continuous(breaks=range(1,13))




ggplot(aes(x='month', y='zhanbi', colour='year'), data=guangfahkzhb) +   geom_point()+    geom_line()+    xlab('月份')+    ylab('还款占比')+    ggtitle('广发客户还款占比情况')+    scale_x_continuous(breaks=range(1,13))




plt.figure(1)
plt.figure(2) 
plt1=plt.subplot(221)
plt2=plt.subplot(222)
plt.figure(1) 
ax2 = sns.barplot(x="age1", y="nnn", hue="kehu", data=agedata)
ax2.set_xlabel('年龄段',fontsize=15)
ax2.set_ylabel('数量',fontsize=15)
ax2.set_title('年龄段回款分析',fontsize=15)
plt.sca(plt1)  
explode = [0, 0.1, 0, 0] 
xingyeagedata = agedata[agedata.kehu=='兴业']


plt.pie(x=xingyeagedata['nnn'], labels=xingyeagedata['age1'],  explode=explode, autopct='%3.1f %%',        shadow=True, labeldistance=1.1,   startangle = 90,pctdistance = 0.6)


plt.title('兴业客户年龄段回款情况')


plt.sca(plt2)  
explode = [0, 0.1, 0, 0] 
guangfaagedata = agedata[agedata.kehu=='广发']


plt.title('广发客户年龄段回款情况')
plt.pie(x=guangfaagedata['nnn'], labels=guangfaagedata['age1'], explode=explode,  autopct='%3.1f %%',        shadow=True,  labeldistance=1.1,  startangle = 90,pctdistance = 0.6)


plt.show()











































































































































































你可能感兴趣的:(Python)