# coding: utf-8
# In[1]:
import os
import pymysql
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
from ggplot import *
# In[2]:
#配置MySQL
conn = pymysql.connect(host='127.0.0.1', port=3306, user='*****', passwd='*****',db='******',charset='utf8')
cur = conn.cursor()
# In[3]:
# sql语句
xybase = "SELECT f4 as ajbh,f5 as kehu, f8 as shfzh18,f22 as ywy,f14 as zjqkje, f15 as zjshje,f19 as jdsj from xybase where f22 is not null and f5 in ('兴业','广发')"
# 利用pandas 模块导入mysql数据
mysqlxybase = pd.read_sql(xybase, conn)
# In[4]:
# sql语句
xydzd = "SELECT f23 as ajbh,f2 as kehu, f1 as shfzh18,f18 as ywy,f7 as hkrq,f10 as hkmx, f17 as hkbz from xydzd where f10 > 0 and f2 in ('兴业','广发')"
# 利用pandas 模块导入mysql数据
mysqlxydzd = pd.read_sql(xydzd, conn)
# In[5]:
# sql语句
mysqlprovince = "SELECT * from province"
# 利用pandas 模块导入mysql数据
province = pd.read_sql(mysqlprovince, conn)
# In[6]:
xydzd = mysqlxydzd
# In[7]:
xydzd['hkmx'] = xydzd['hkmx'].astype(float)
xydzd['hkbz'] = xydzd['hkbz'].astype(int)
# In[8]:
xydzd = xydzd[xydzd.hkbz==1]
xydzd = xydzd[xydzd.hkmx>0]
# In[9]:
xydzddata =xydzd[['ajbh','kehu','hkrq','hkmx','shfzh18']]
# In[10]:
xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()
xydzddata = xydzddata[xydzddata.shfzhnum==18]
xydzddata["bornyear"]=xydzddata["shfzh18"].str.slice(6,10)
xydzddata["sex"]=xydzddata["shfzh18"].str.get(16)
xydzddata["address"]=xydzddata["shfzh18"].str.slice(0,2)
xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()
# In[11]:
xydzddata['year'] = xydzddata['hkrq'].str.slice(0,4).astype(int)
xydzddata['month'] = xydzddata['hkrq'].str.slice(5,7).astype(int)
# In[12]:
xydzddata["nnn"] = 1
xydzddata["bornyear"] = xydzddata["bornyear"].astype(int)
xydzddata["age"] = 2017-xydzddata["bornyear"]
xydzddata["sex"] = xydzddata["sex"].astype(int)
xydzddata["sex"][xydzddata["sex"]%2==0]='女'
xydzddata["sex"][xydzddata["sex"]!='女']='男'
# In[13]:
xydzddata['address'] = xydzddata['address'].astype(int)
# In[14]:
xydzddata = pd.merge(xydzddata,province, left_on='address', right_on='shfnum', left_index=False, right_index=False ,how='left')
xydzddata = xydzddata[['ajbh','kehu','hkrq','hkmx','shfzh18','sex','age','province','year','month','nnn']]
xydzddata = xydzddata[xydzddata.year>2015]
# In[15]:
#每个客户每月回款情况
xydzdhkmx = xydzddata['hkmx'].groupby([xydzddata['kehu'], xydzddata['year'], xydzddata['month']]).sum().reset_index()
# In[16]:
#每个月还款次数
xydzddata1 = xydzddata[xydzddata.hkmx>0]
hkcsh = xydzddata1['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()
# In[17]:
#每个月还款案件
xydzddata2 = xydzddata[xydzddata.hkmx>0]
xydzddata2 = xydzddata2[['ajbh','kehu','year','month']]
xydzddata2 = xydzddata2.drop_duplicates()
hkaj = xydzddata2['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()
# In[18]:
#地区图
region = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['province']]).count().reset_index()
# In[19]:
bins = [18,30,40,50,100]
# In[20]:
group_names = ['20-30','30-40','40-50','50以上']
# In[21]:
xydzddata1['age1']=pd.cut(xydzddata1['age'],bins,labels=group_names)
# In[22]:
agedata = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['age1']]).count().reset_index()
agedata = agedata[(agedata.kehu == '兴业') | (agedata.kehu == '广发')]
# In[23]:
mysqlxybase['zjqkje'] = mysqlxybase['zjqkje'].astype(float)
mysqlxybase['zjshje'] = mysqlxybase['zjshje'].astype(float)
# In[24]:
xybase = mysqlxybase
xybasedata = xybase[['ajbh','kehu','shfzh18','zjqkje','zjshje','jdsj']]
xybasedata = xybasedata[xybasedata.zjqkje>0]
# In[25]:
xybasedata['year'] = xybasedata['jdsj'].str.slice(0,4).astype(int)
xybasedata['month'] = xybasedata['jdsj'].str.slice(5,7).astype(int)
# In[26]:
xybasedata = xybasedata[xybasedata.year>2015]
# In[27]:
#xybase去重
xybasedata = xybasedata.drop_duplicates()
# In[28]:
xybasezhanbi = xybasedata[['zjqkje','zjshje']].groupby([xybasedata['kehu'], xybasedata['year'], xybasedata['month']]).sum().reset_index()
# In[29]:
xybasezhanbi['zhanbi'] =round(xybasezhanbi['zjshje']/xybasezhanbi['zjqkje'],5)
# In[30]:
xingyehkmx = xydzdhkmx[(xydzdhkmx.kehu == '兴业')]
guangfahkmx = xydzdhkmx[(xydzdhkmx.kehu == '广发')]
# In[31]:
get_ipython().magic('matplotlib inline')
sns.set_style("whitegrid")
sns.set_context("talk")
mpl.rcParams['font.sans-serif'] = ['Microsoft YaHei'] #指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
ax = sns.barplot(x="month", y="hkmx", hue="year", data=xingyehkmx)
ax.set_xlabel('月份',fontsize=15)
ax.set_ylabel('还款总额',fontsize=15)
ax.set_title('兴业客户',fontsize=15)
plt.show()
# In[32]:
ax1 = sns.barplot(x="month", y="hkmx", hue="year", data=guangfahkmx)
ax1.set_xlabel('月份',fontsize=15)
ax1.set_ylabel('还款总额',fontsize=15)
ax1.set_title('广发客户',fontsize=15)
plt.show()
# In[33]:
xingyehkzhb = xybasezhanbi[(xybasezhanbi.kehu == '兴业')]
guangfahkzhb = xybasezhanbi[(xybasezhanbi.kehu == '广发')]
xingyehkzhb['year'] = xingyehkzhb['year'].astype(str)
guangfahkzhb['year'] = guangfahkzhb['year'].astype(str)
# In[34]:
ggplot(aes(x='month', y='zhanbi', colour='year'), data=xingyehkzhb) + geom_point()+ geom_line()+ xlab('月份')+ ylab('还款占比')+ ggtitle('兴业客户还款占比情况')+ scale_x_continuous(breaks=range(1,13))
# In[35]:
ggplot(aes(x='month', y='zhanbi', colour='year'), data=guangfahkzhb) + geom_point()+ geom_line()+ xlab('月份')+ ylab('还款占比')+ ggtitle('广发客户还款占比情况')+ scale_x_continuous(breaks=range(1,13))
# In[36]:
plt.figure(1)
plt.figure(2)
plt1=plt.subplot(221)
plt2=plt.subplot(222)
plt.figure(1)
ax2 = sns.barplot(x="age1", y="nnn", hue="kehu", data=agedata)
ax2.set_xlabel('年龄段',fontsize=15)
ax2.set_ylabel('数量',fontsize=15)
ax2.set_title('年龄段回款分析',fontsize=15)
plt.sca(plt1)
explode = [0, 0.1, 0, 0]
xingyeagedata = agedata[agedata.kehu=='兴业']
plt.pie(x=xingyeagedata['nnn'], labels=xingyeagedata['age1'], explode=explode, autopct='%3.1f %%', shadow=True, labeldistance=1.1, startangle = 90,pctdistance = 0.6)
plt.title('兴业客户年龄段回款情况')
plt.sca(plt2)
explode = [0, 0.1, 0, 0]
guangfaagedata = agedata[agedata.kehu=='广发']
plt.title('广发客户年龄段回款情况')
plt.pie(x=guangfaagedata['nnn'], labels=guangfaagedata['age1'], explode=explode, autopct='%3.1f %%', shadow=True, labeldistance=1.1, startangle = 90,pctdistance = 0.6)
plt.show()
# In[37]:
#地区图
region1 = xydzddata1['hkmx'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['province']]).sum().reset_index()
# In[38]:
regiondata = pd.merge(region1,region,how='left',on=['kehu','year','province'])
# In[39]:
regiondata['newprovince'] = '地区'
# In[40]:
regiondata['province'] = regiondata['province'].astype(str)
regiondata['newprovince'] = regiondata['newprovince'].astype(str)
# In[41]:
regiondata['newprovince'][(regiondata.province == '北京') | (regiondata.province == '天津') | (regiondata.province == '河北') | (regiondata.province == '山西') | (regiondata.province == '内蒙古' )] = '华北'
# In[42]:
regiondata['newprovince'][(regiondata.province == '辽宁' )|( regiondata.province == '吉林') | (regiondata.province == '黑龙江') ] = '东北'
# In[43]:
regiondata['newprovince'][(regiondata.province == '上海' )| (regiondata.province == '江苏') |( regiondata.province == '浙江') | (regiondata.province == '安徽') | (regiondata.province == '福建' )| (regiondata.province == '江西' )| (regiondata.province == '山东' )] = '华东'
# In[44]:
regiondata['newprovince'][(regiondata.province == '河南') |( regiondata.province == '湖北') | (regiondata.province == '湖南') ] = '华中'
# In[45]:
regiondata['newprovince'][(regiondata.province == '重庆') | (regiondata.province == '四川') | (regiondata.province == '贵州' )|( regiondata.province == '云南') | (regiondata.province == '西藏') ] = '西南'
# In[46]:
regiondata['newprovince'][(regiondata.province == '陕西') | (regiondata.province == '甘肃') | (regiondata.province == '青海' )|( regiondata.province == '宁夏') | (regiondata.province == '新疆') ] = '西北'
# In[47]:
regiondata['newprovince'][(regiondata.province == '广东') | (regiondata.province == '广西' )| (regiondata.province == '海南' )] = '华南'
# In[48]:
regionplot = regiondata['hkmx'].groupby([regiondata['kehu'], regiondata['year'], regiondata['newprovince']]).sum().reset_index()
# In[49]:
regionplot['year'] = regionplot['year'].astype(str)
# In[50]:
plt.figure(1)
plt.figure(2)
plt.figure(1)
ax3 = sns.barplot(x="newprovince", y="hkmx", hue="kehu", data=regionplot[regionplot.year=='2016'])
ax3.set_xlabel('地区',fontsize=15)
ax3.set_ylabel('还款金额',fontsize=15)
ax3.set_title('2016地区回款分析',fontsize=15)
plt.figure(2)
ax4 = sns.barplot(x="newprovince", y="hkmx", hue="kehu", data=regionplot[regionplot.year=='2017'])
ax4.set_xlabel('地区',fontsize=15)
ax4.set_ylabel('还款金额',fontsize=15)
ax4.set_title('2017地区回款分析',fontsize=15)
plt.show()
# In[ ]:
# In[133]:
import plotly.plotly as py
import plotly.graph_objs as go
fig = {
"data": [
{
"values": [16, 25, 12, 6],
"labels": [
"20-30",
"30-40",
"40-50",
"50以上"
],
"domain": {"x": [0, .48]},
"name": "回款年龄段",
"hoverinfo":"label+percent+name",
"hole": .4,
"type": "pie"
},
{
"values": [27, 11, 25, 8],
"labels": [
"20-30",
"30-40",
"40-50",
"50以上"
],
"text":"委案年龄段",
"textposition":"inside",
"domain": {"x": [.52, 1]},
"name": "委案年龄段",
"hoverinfo":"label+percent+name",
"hole": .4,
"type": "pie"
}],
"layout": {
"title":"兴业客户回款与委案年龄段分析",
"annotations": [
{
"font": {
"size": 20
},
"showarrow": False,
"text": "回款年龄段占比",
"x": 0.20,
"y": 0.5
},
{
"font": {
"size": 20
},
"showarrow": False,
"text": "委案年龄段占比",
"x": 0.8,
"y": 0.5
}
]
}
}
fig['data'][0]['values'] = agedata['nnn'][agedata.kehu=='兴业']
pltoff.plot(fig)
# In[141]:
'''
绘制饼图
'''
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
import plotly.offline as pltoff
dataset = {'labels':['男', '女'],
'values':[280, 100]}
sexdata = xydzddata.groupby('sex').count().reset_index()
sexdata = sexdata[['sex','nnn']]
dataset['values'] = sexdata['nnn']
data_g = []
tr_p = go.Pie(
labels = dataset['labels'],
values = dataset['values']
)
data_g.append(tr_p)
layout = go.Layout(title="兴业回款案件性别占比")
fig = go.Figure(data=data_g, layout=layout)
pltoff.plot(fig)
# In[125]:
import plotly.plotly as py
import plotly.graph_objs as go
trace0 = go.Bar(
x=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
y=[20, 14, 25, 16, 18, 22, 19, 15, 12, 16, 14, 17],
name='兴业',
marker=dict(
color='rgb(49,130,189)'
)
)
trace0['y'] = xingyehkmx['hkmx'][xingyehkmx.year==2016]
trace1 = go.Bar(
x=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
y=[50, 30,60, 30, 30, 40, 43, 32, 27, 32, 28, 35],
name='广发',
marker=dict(
color='rgb(204,204,204)',
)
)
trace1['y'] = guangfahkmx['hkmx'][guangfahkmx.year==2016]
data = [trace0, trace1]
layout = go.Layout(
xaxis=dict(tickangle=-45),
title='客户回款',
barmode='group',
)
fig = go.Figure(data=data, layout=layout)
pltoff.plot(fig)
# In[110]:
import plotly.plotly as py
import plotly.graph_objs as go
trace0 = go.Bar(
x=['东北', '华东','华中', '华北', '华南', '西北',
'西南'],
y=[20, 14, 25, 16, 18, 22, 19],
name='兴业',
marker=dict(
color='rgb(0,0,0)'
)
)
trace0['y'] = regionplot['hkmx'][(regionplot.year=='2017')&(regionplot.kehu=='兴业')]
trace1 = go.Bar(
x=['东北', '华东','华中', '华北', '华南', '西北',
'西南'],
y=[50, 30,60, 30, 30, 40, 43],
name='广发',
marker=dict(
color='rgb(255,0,0)',
)
)
trace1['y'] = regionplot['hkmx'][(regionplot.year=='2017')&(regionplot.kehu=='广发')]
data = [trace0, trace1]
layout = go.Layout(
xaxis=dict(tickangle=-45),
title='地区回款分布',
barmode='group',
)
fig = go.Figure(data=data, layout=layout)
pltoff.plot(fig)
# In[116]:
import plotly.offline as pltoff
'''
绘制普通线图
'''
#数据,x为横坐标,y,z为纵坐标的两项指标,三个array长度相同
dataset = {'x':[1,2,3,4,5,6,7,8,9,10,11,12],
'y':[0.05,0.04,0.01,0.03,0.011,0.02,0.06,0.07,0.019,0.020,0.025,0.08],
'z':[0.012,0.09,0.02,0.06,0.03,0.025,0.08,0.017,0.022,0.05,0.06,0.07]}
dataset['y'] = xingyehkzhb['zhanbi'][xingyehkzhb.year=='2016']
dataset['z'] = guangfahkzhb['zhanbi'][guangfahkzhb.year=='2016']
data_g = []
#分别插入 y, z
tr_x = go.Scatter(
x = dataset['x'],
y = dataset['y'],
name = '兴业回收率'
)
data_g.append(tr_x)
tr_z = go.Scatter(
x = dataset['x'],
y = dataset['z'],
name = '广发回收率'
)
data_g.append(tr_z)
#设置layout,指定图表title,x轴和y轴名称
layout = go.Layout(title="客户月回收率统计", xaxis={'title':'x'}, yaxis={'title':'value'})
#将layout设置到图表
fig = go.Figure(data=data_g, layout=layout)
#绘图,输出路径为name参数指定
pltoff.plot(fig)