create database taobao;
use taobao;
desc use_behaviour;
create table use_behaviour(
user_id int(9),
item_id int(9),
category_id int(9),
behaviour_type varchar(5),
timestamps int(14));
select count(*) from use_behaviour;
select * from use_behaviour limit 10;
alter table use_behaviour change timestamp timestamps int(14);
desc use_behaviour;
select user_id, item_id, timestamps from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1;
alter table use_behaviour add id int first;
select * from use_behaviour limit 10;
alter table use_behaviour modify id int primary key auto_increment;
select * from use_behaviour limit 10;
delete use_behaviour from
use_behaviour,
(
select user_id,item_id, timestamps, min(id) id
from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1
) t2
where use_behaviour.user_id=t2.user_id
and use_behaviour.item_id=t2.item_id
and use_behaviour.timestamps=t2.timestamps
and use_behaviour.id>t2.id;
show variables like '%_buffer%';
set global innodb_buffer_pool_size = 10700000000;
alter table use_behaviour add datetimes TIMESTAMP(0);
update use_behaviour set datetimes = FROM_UNIXTIME(timestamps);
select * from use_behaviour limit 5;
alter table use_behaviour add dates char(10);
alter table use_behaviour add times char(8);
alter table use_behaviour add hours char(2);
update use_behaviour set dates=substring(datetimes,1,10 ),times=substring(datetimes,12,8 ),dates=substring(datetimes,12,2);
update use_behaviour set dates=substring(datetimes,1,10 );
update use_behaviour set times=substring(datetimes,12,8 );
update use_behaviour set hours=substring(datetimes,12,2);
select * from use_behaviour limit 5;
select max(datetimes),min(datetimes) from use_behaviour;
delete from use_behaviour
where datetimes <'2017-11-25 00:00:00'
or datetimes > '2017-12-03 23:59:59'
desc use_behaviour;
select * from use_behaviour limit 5;
select count(1) from use_behaviour;
drop table if exists temp_behaviour;
create table temp_behaviour like use_behaviour;
insert into temp_behaviour
select * from use_behaviour limit 100000;
select * from temp_behaviour limit 5;
select dates
, count(*) 'pv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;
select dates
, count(distinct user_id) 'uv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from temp_behaviour
where behaviour_type ='pv'
group by dates;
create table pv_uv_puv(
dates char(10),
pv int(9),
uv int(9),
puv decimal(10,1)
);
insert into pv_uv_puv
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from use_behaviour
where behaviour_type ='pv'
group by dates;
select * from pv_uv_puv
delete from pv_uv_puv where dates is null;
delete from use_behaviour where dates is null;
select user_id,dates
from temp_behaviour
group by user_id,dates;
select * from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<b.dates;
select a.dates
,count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) retention_0
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null)) rentention_1
,count(if (datediff(b.dates,a.dates)=3, b.user_id, null)) rentention_33
from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) rentention_1
from
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;
create table retention_rate(
dates char(10),
retention_1 float
)
insert into retention_rate
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) rentention_1
from
(
select user_id,dates
from use_behaviour
group by user_id,dates
) a
,
(
select user_id,dates
from use_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;
select count(*)
from
(
select user_id from use_behaviour
group by user_id
having count(behaviour_type)=1
) a
select sum(pv) from pv_uv_puv;
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from temp_behaviour
group by dates,hours
order by dates,hours
create table date_hour_behaviour(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int
);
insert into date_hour_behaviour
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from use_behaviour
group by dates,hours
order by dates,hours
select * from date_hour_behaviour
select behaviour_type
,count(DISTINCT user_id) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type desc;
create table behaviour_user_num(
behaviour_type varchar(5),
user_num int
);
insert into behaviour_user_num
select behaviour_type
,count(DISTINCT user_id) user_num
from use_behaviour
group by behaviour_type
order by behaviour_type desc;
select * from behaviour_user_num;
select 12630/18453
select behaviour_type
,count(*) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type desc;
create table behaviour_num(
behaviour_type varchar(5),
behaviour_num int
);
insert into behaviour_num
select behaviour_type
,count(*) behaviour_count_num
from use_behaviour
group by behaviour_type
order by behaviour_type desc;
select * from behaviour_num;
select user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from temp_behaviour
group by user_id,item_id
create table use_behaviout_view(
uesr_id int(9),
item_id int(9),
pv int,
cart int,
fav int,
buy int
);
insert into use_behaviout_view
select user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from use_behaviour
group by user_id,item_id
alter table use_behaviout_view change uesr_id user_id int;
select * from use_behaviout_view
ALTER TABLE use_behaviout_view RENAME TO use_behaviour_view
select * from use_behaviour_view
create view user_behaviour_standard as
select user_id
,item_id
,(case when pv>0 then 1 else 0 end) 浏览了
,(case when fav>0 then 1 else 0 end) 收藏了
,(case when cart>0 then 1 else 0 end) 加购了
,(case when buy>0 then 1 else 0 end) 购买了
from use_behaviour_view
select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0
create view path_count as
select 购买路径类型
,count(*) 数量
from
(
select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0
) b
group by 购买路径类型
order by 数量 desc;
create table renhua(
path_type char(4),
description varchar(40)
);
insert into renhua
values('0001','直接购买了'),
('1001','浏览后购买了'),
('0011','加购后购买了'),
('1011','浏览加购后购买了'),
('0101','收藏后购买了'),
('1101','浏览收藏后购买了'),
('0111','收藏加购后购买了'),
('1111','浏览收藏加购后购买了')
select * from renhua
select * from path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;
create table path_result(
path_type char(4),
description varchar(40),
num int
);
insert into path_result
select path_type, description, 数量 num
from
path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;
select * from path_result
select sum(buy)
from use_behaviour_view
where buy>0 and fav=0 and cart=0
select user_id
,max(dates) '最近购买时间'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;
select user_id
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;
select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc;
drop table if exists rfm_model;
create table rfm_model(
user_id int,
frequency int,
recent char(10)
)
insert into rfm_model
select user_id
,count(user_id) '购买次数'
,max(dates) '最近购买时间'
from use_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc;
select * from rfm_model;
alter table rfm_model add column rscore int;
update rfm_model
set rscore = case
when recent ='2017-12-03' then 5
when recent in ('2017-12-01','2017-12-02') then 4
when recent in ('2017-11-29','2017-11-28') then 3
when recent in ('2017-11-27','2017-11-26') then 2
else 1
end
alter table rfm_model add column fscore int;
select max(frequency),min(frequency) from rfm_model;
update rfm_model
set fscore = case
when frequency between 72 and 40 then 5
when frequency between 21 and 39 then 4
when frequency between 11 and 200 then 3
when frequency between 5 and 10 then 2
else 1
end
set @f_avg=null;
set @r_avg=null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;
select *
,(case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end) class
from rfm_model
alter table rfm_model add column class varchar(40);
update rfm_model
set class =case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end;
select * from rfm_model limit 10;
select class,count(distinct user_id) from rfm_model
group by class
select category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from temp_behaviour
group by category_id
order by 2 desc
limit 10;
select item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '商品浏览量'
from temp_behaviour
group by item_id
order by 2 desc
limit 10;
select category_id,item_id,品类商品浏览量
from
(
select category_id,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类商品浏览量'
,rank() over(partition by category_id order by count(if(behaviour_type='pv',behaviour_type,null)) desc) r
from temp_behaviour
group by category_id,item_id
order by 3 desc
) a
where r =1
order by a.品类商品浏览量 desc
limit 10
create table popular_categories(
category_id int,
pv int);
create table popular_items(
item_id int,
pv int);
create table popular_cateitems(
category_id int,
item_id int,
pv int);
insert into popular_categories
select category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by category_id
order by 2 desc
limit 10;
insert into popular_items
select item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by item_id
order by 2 desc
limit 10;
insert into popular_cateitems
select category_id
,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour
group by category_id,item_id
order by 3 desc
limit 10;
select * from popular_cateitems;
select item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;
create table item_detail(
item_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);
insert into item_detail
select item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;
select * from item_detail
select category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;
create table category_detail(
category_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);
insert into category_detail
select category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;
select * from category_detail