1.拿出发标时间在2018年4月至2018年6月成交的新客客户,区分M站与APP,计算他们的年龄,性别,1期30+逾期金额
2.计算发标前一个月内,用户午夜通话次数的占比
3.拿出题1中的用户在发标前1周的平均通话间隔时长
(逻辑:半夜通话较多的用户,除特殊工种以外,有可能近期遇到紧急事件,需要到处筹钱
发标前一周频繁通话,是否是频繁接到催收电话,或者急用钱到处打电话借钱)
~逾期表 yuqi
userid, listingid, creation_date, principal(本金), duedate_30 (30+逾期金额) , risk_category(发标类型)
~通话历史表 tonghualishi
userid,calltime(通话时间),duration(通话时长秒)
~身份信息的表 shenfenxinxi
字段:userid, idnumber, reg_time(入表时间)
创建表:
#逾期表 yuqi
create table yuqi (userid int,
listingid int,
creation_date date,
principal varchar(45),
duedate_30 varchar(45),
risk_category varchar(45))
insert into yuqi values (001,1001,'2018-04-01',2000,0,'非APP新客'),
(002,1002,'2018-05-01',4000,0,'APP新客')
#身份信息的表 shenfenxinxi
create table shenfenxinxi (userid int,
idnumber varchar(45),
reg_time date)
insert into shenfenxinxi values (001,'330621940801000','2017-01-01'),
(002,'33062119980101001x','2017-01-01')
#通话历史表 tonghualishi
create table tonghualishi (userid int,
calltime datetime,
duration time,
inserttime datetime)
insert into tonghualishi values (001,'2018-03-26 02:00:30','00:00:30','2017-01-01 00:00:30'),
(001,'2018-03-25 06:00:00','00:00:40','2017-01-01 00:00:30'),
(001,'2018-03-29 04:00:00','00:01:20','2017-01-01 00:00:30'),
(002,'2018-04-10 04:00:00','00:00:20','2017-01-01 00:00:30'),
(002,'2018-04-30 12:00:10','00:00:50','2017-01-01 00:00:30')
解答:
1.拿出发标时间在2018年4月至2018年6月成交的新客客户,区分M站与APP,计算他们的年龄,性别,1期30+逾期金额
#1,拿出发标时间在2018年4月至2018年6月的M站与APP的成交的新客客户,并且区分M站与APP
#此处不用status like '%成功%'的原因是,已经取得是逾期表中得数据,说明都已经是有借款成功的
create table t0 as
select userid, listingid, creation_date, principal, duedate_30,
case when risk_category='非APP新客' then '新客闪电' else '新客APP' end as cj_type
from yuqi
where creation_date>='2018-04-01' and creation_date<'2018-07-01'
and risk_category in ('非APP新客','APP新客')
#2,计算年龄,性别
create table t1 as
select userid, listingid, creation_date, principal, duedate_30, cj_type,
case when length(a.idnumber)=18 then ((date_format(creation_date,'%Y')-substr(idnumber,7,4))
-case when date_format(creation_date,'%m%d')
2.计算发标前一个月内,用户午夜通话次数的占比
#1,拿出目标用户
create table t2 as
select a.userid, a.listingid, a.creation_date, a.principal, a.duedate_30, a.cj_type, b.calltime
from t1 a
inner join tonghualishi b
on a.userid=b.userid
and datediff(a.creation_date,b.calltime)>0
and datediff(a.creation_date,b.calltime)<=30
and a.creation_date>=inserttime
#2,计算总通话次数
create table t3 as
select userid, listingid, count(calltime) as zth_cs
from t2
group by userid, listingid
#3,计算通话时间段在2:00至5:00的通话次数
create table t4 as
select userid, listingid, count(calltime) as wyth_cs
from t2
where substr(calltime,12,5) >='02:00' and substr(calltime,12,5) <='05:00'
group by userid,listingid
#4,计算占比
create table t5 as
select a.userid, a.listingid, a.creation_date, a.principal, a.duedate_30, a.cj_type, (coalesce(c.wyth_cs,0)/b.zth_cs) as wy_zb
from t2 a
inner join t3 b
on a.listingid=b.listingid
left join t4 c
on a.listingid=c.listingid
where b.zth_cs>0
3.拿出题1中的用户在发标前1周的平均通话间隔时长
#1,拿出发标前1周的通话记录
create table t6 as
select a.userid, a.listingid, a.creation_date, b.calltime, b.duration
from t1 a
inner join tonghualishi b
on a.userid=b.userid
and datediff(a.creation_date,b.calltime)>0
and datediff(a.creation_date,b.calltime)<=7
and a.creation_date>=b.inserttime
#2,将用户的通话记录按照通话时间进行排序
create table t7 as
select a.*,
(select count(*) from t6 b where a.listingid=b.listingid and a.calltime>=b.calltime) as r
from t6 a
#3,相邻的两个通话,上一个加上通话时间,然后进行相减,求出通话间隔;计算平均值
#内嵌部分
select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1
#求平均之前
select userid, listingid, unix_timestamp(t2)-(unix_timestamp(t1)+duration)
from (select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1) a
#最终组合结果
create table t8 as
select userid, listingid, avg( unix_timestamp(t2)-(unix_timestamp(t1)+duration) ) as thjg_avg
from (select a.userid, a.listingid, a.calltime as t1, b.calltime as t2, a.duration
from t7 a
inner join t7 b
on a.listingid=b.listingid
and a.r = b.r-1) a