淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析

-- 创建数据库
create database taobao;
use taobao;
desc use_behaviour;

-- 创建表格
create table use_behaviour(
user_id int(9),
item_id int(9),
category_id int(9),
behaviour_type varchar(5),
timestamps  int(14));

-- 查询已导入多少条
select  count(*)  from use_behaviour;
select  *  from use_behaviour limit 10;

#改变字段名timestamp 改成timestamps
alter table use_behaviour change  timestamp timestamps int(14);
desc use_behaviour;


-- -检查空值
-- select * from   where   ziduanming  is  null

-- 检查重复值
select user_id, item_id, timestamps from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1;


-- -去重
alter table use_behaviour add id  int first;
select  *  from use_behaviour limit 10;
-- 将id设置成自增主键
alter table   use_behaviour modify id int primary key  auto_increment;
select  *  from use_behaviour limit 10;

delete use_behaviour from 
use_behaviour, 
(
select user_id,item_id, timestamps, min(id) id 
from use_behaviour
group by user_id,item_id,timestamps
having count(*) > 1
) t2
where use_behaviour.user_id=t2.user_id
and use_behaviour.item_id=t2.item_id
and use_behaviour.timestamps=t2.timestamps 
and use_behaviour.id>t2.id;



-- 增加三个字段 date time hour
-- 更改buffer值
show variables like '%_buffer%';

set global  innodb_buffer_pool_size = 10700000000;

alter table use_behaviour  add datetimes TIMESTAMP(0);
update use_behaviour set  datetimes = FROM_UNIXTIME(timestamps);
select * from use_behaviour limit 5;

-- date
alter table use_behaviour  add dates char(10);
alter table use_behaviour  add times char(8);
alter table use_behaviour  add hours char(2);
-- 一次性对三个字段进行截取活分开截取,分开截取将多花两倍时间
update use_behaviour set dates=substring(datetimes,1,10 ),times=substring(datetimes,12,8 ),dates=substring(datetimes,12,2);


update use_behaviour set dates=substring(datetimes,1,10 );
update use_behaviour set times=substring(datetimes,12,8 );
update use_behaviour set hours=substring(datetimes,12,2);
select * from use_behaviour limit 5;

 
-- 去异常;三部曲:去空去重去异常
select max(datetimes),min(datetimes) from use_behaviour;

delete from use_behaviour
where datetimes <'2017-11-25 00:00:00'
or datetimes > '2017-12-03 23:59:59'
-- 共删除942行

-- 数据概览
desc use_behaviour;
select * from use_behaviour limit 5;
select count(1) from use_behaviour; -- 1889658条记录

-- 创建临时表
drop  table if exists temp_behaviour;
create table temp_behaviour  like use_behaviour;

-- 截取
insert into temp_behaviour
select * from use_behaviour limit 100000;

select *  from  temp_behaviour limit  5;

-- pv
select dates
, count(*) 'pv'
from temp_behaviour 
where behaviour_type ='pv'
group by dates;

-- 独立访客数uv
select dates
, count(distinct user_id) 'uv'
from temp_behaviour 
where behaviour_type ='pv'
group by dates;


-- 一条语句
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from temp_behaviour 
where behaviour_type ='pv'
group by dates;


-- 处理真实数据
create table pv_uv_puv(
dates  char(10),
pv int(9),
uv int(9),
puv decimal(10,1)
);

insert into pv_uv_puv
select dates
, count(*) 'pv'
, count(distinct user_id) 'uv'
,round(count(*)/count(distinct user_id),1) 'pv/uv'
from use_behaviour 
where behaviour_type ='pv'
group by dates;


-- 测试
select * from pv_uv_puv

-- 去除异常数据
delete  from  pv_uv_puv  where dates is null;
delete from use_behaviour where dates is null;


-- 留存率
select  user_id,dates
from  temp_behaviour
group by user_id,dates;


-- 自关联  相同的userid以及b的日期比a的日期大的数据
select * from 
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) a
,
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<b.dates;

-- 次日留存数,即日期相隔一天的
select a.dates
,count(if (datediff(b.dates,a.dates)=0, b.user_id, null)) retention_0
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null)) rentention_1
,count(if (datediff(b.dates,a.dates)=3, b.user_id, null)) rentention_33
from 
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) a
,
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;


-- 留存率
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null))  rentention_1

from 
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) a
,
(
select  user_id,dates
from  temp_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;

-- 保存结果
create table retention_rate(
dates  char(10),
retention_1 float
)

insert into retention_rate
select a.dates
,count(if (datediff(b.dates,a.dates)=1, b.user_id, null))/count(if (datediff(b.dates,a.dates)=0, b.user_id, null))  rentention_1

from 
(
select  user_id,dates
from  use_behaviour
group by user_id,dates
) a
,
(
select  user_id,dates
from  use_behaviour
group by user_id,dates
) b
where a.user_id = b.user_id
and a.dates<=b.dates
group by a.dates;



-- 跳失率
-- 跳使用户
select count(*)
from
(
select user_id from use_behaviour
group by user_id
having count(behaviour_type)=1
) a

select  sum(pv) from pv_uv_puv; -- 1782280

-- 1/1782280


-- 时间序列分析
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from temp_behaviour 
group by dates,hours
order by dates,hours

--存储
create  table date_hour_behaviour(
dates char(10),
hours char(2),
pv int,
cart int,
fav int,
buy int
);

--结果插入
insert into date_hour_behaviour
select dates,hours
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type='buy',behaviour_type,null)) 'buy'
from use_behaviour 
group by dates,hours
order by dates,hours


select * from date_hour_behaviour

-- 统计各类行为数
select behaviour_type
,count(DISTINCT user_id) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type  desc;

-- 存储 ARCHAR类型用于存储可变长度字符串 CHAR类型用于存储固定长度字符串
create table behaviour_user_num(
behaviour_type varchar(5),
user_num int
);

insert into behaviour_user_num
select behaviour_type
,count(DISTINCT user_id) user_num
from use_behaviour
group by behaviour_type
order by behaviour_type  desc;
-- 测试
select *  from behaviour_user_num;

-- 转化率分析  0.6844
select 12630/18453

-- 统计各类行为数量
select behaviour_type
,count(*) user_num
from temp_behaviour
group by behaviour_type
order by behaviour_type  desc;

-- 存储各类行为数据数量
create table behaviour_num(
behaviour_type varchar(5),
behaviour_num int
);

insert into behaviour_num
select behaviour_type
,count(*) behaviour_count_num
from use_behaviour
group by behaviour_type
order by behaviour_type  desc;
-- 检查是否成功
select  * from  behaviour_num;


-- 行为路径分析

select  user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from temp_behaviour
group by user_id,item_id


create table use_behaviout_view(
uesr_id int(9),
item_id int(9),
pv int,
cart int,
fav int,
buy int
);

insert into use_behaviout_view
select  user_id,item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
from use_behaviour
group by user_id,item_id

-- 修改字段名
alter table use_behaviout_view change uesr_id user_id int;
select * from use_behaviout_view

-- 修改表名
ALTER  TABLE use_behaviout_view RENAME TO use_behaviour_view
select * from use_behaviour_view


--  用户行为标准化
create view user_behaviour_standard as
select user_id
,item_id
,(case when pv>0 then 1 else 0 end) 浏览了
,(case when fav>0 then 1 else 0 end) 收藏了
,(case when cart>0 then 1 else 0 end) 加购了
,(case when buy>0 then 1 else 0 end) 购买了
from use_behaviour_view

-- 路径类型

select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0

--统计各类购买行为数量
create view path_count as
select 购买路径类型
,count(*) 数量
from 
(
select *
,concat(浏览了,收藏了,加购了,购买了) 购买路径类型
from user_behaviour_standard as a
where a.购买了>0
) b
group by 购买路径类型
order by 数量  desc;

create table renhua(
path_type char(4),
description varchar(40)
);

insert into  renhua
values('0001','直接购买了'),
('1001','浏览后购买了'),
('0011','加购后购买了'),
('1011','浏览加购后购买了'),
('0101','收藏后购买了'),
('1101','浏览收藏后购买了'),
('0111','收藏加购后购买了'),
('1111','浏览收藏加购后购买了')

select  * from renhua

select * from path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;


-- 存储
create table path_result(
path_type char(4),
description varchar(40),
num int
);

insert into path_result
select path_type, description, 数量 num
from
path_count p
join renhua r
on p.购买路径类型 = r.path_type
order by 数量 desc;

select * from path_result


select sum(buy)
from use_behaviour_view 
where buy>0 and fav=0 and cart=0
-- 28790


-- 用户定位1 付费和非付费 2RFM模型: 指标(R值:最近一次消费  F值:消费频率  M值:消费金额)+根据F值和R值分类(价值用户、发展用户、保持用户、挽留用户)

-- 最近购买时间
select user_id
,max(dates) '最近购买时间'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;
-- ASC表示按升序排序,DESC表示按降序排序  2 表示按照第二栏


-- 购买次数
select user_id
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc;


-- 统一

select user_id
,max(dates) '最近购买时间'
,count(user_id) '购买次数'
from temp_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc; -- 优先按照第二列排序,二裂一样的按照第三列排序

drop table if exists rfm_model;
create table rfm_model(
user_id int,
frequency int,
recent char(10)
)


insert into rfm_model
select user_id
,count(user_id) '购买次数'
,max(dates) '最近购买时间'
from use_behaviour
where behaviour_type='buy'
group by user_id
order by 2 desc, 3 desc; -- 优先按照第二列排序,二裂一样的按照第三列排序


select * from rfm_model;

-- 根据最近购买时间对用户进行分层
alter table rfm_model add column rscore int;


update rfm_model
set rscore = case
when recent ='2017-12-03' then 5
when recent in ('2017-12-01','2017-12-02') then 4
when recent in ('2017-11-29','2017-11-28') then 3
when recent in ('2017-11-27','2017-11-26') then 2
else 1
end


-- 根据购买频次对用户进行分层
alter table rfm_model add column fscore int;

select max(frequency),min(frequency) from  rfm_model; -- 72 1


update rfm_model
set fscore = case
when frequency between 72 and 40 then 5
when frequency between 21 and 39 then 4
when frequency between 11 and 200 then 3
when frequency between 5 and 10 then 2
else 1
end



--  分层
set @f_avg=null;
set @r_avg=null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;


select * 
,(case 
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end) class
from rfm_model

--  将结果插入

alter table  rfm_model add column class varchar(40);
update rfm_model
set class =case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end;


select * from rfm_model limit 10;

--  统计各分区用户数

select class,count(distinct user_id)  from rfm_model
group by class




--  商品按照热度分类

select  category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from temp_behaviour 
group by category_id
order by 2 desc
limit 10;

select  item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '商品浏览量'
from temp_behaviour 
group by item_id
order by 2 desc
limit 10;


select category_id,item_id,品类商品浏览量
from 
(
select  category_id,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类商品浏览量'
,rank() over(partition by category_id order by count(if(behaviour_type='pv',behaviour_type,null))  desc) r
from temp_behaviour 
group by category_id,item_id
order by 3 desc
) a
where r =1 
order by a.品类商品浏览量 desc
limit 10


create table popular_categories(
category_id int,
pv int);

create table popular_items(
item_id int,
pv int);

create table popular_cateitems(
category_id int,
item_id int,
pv int);


insert into popular_categories
select  category_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour 
group by category_id
order by 2 desc
limit 10;

insert into popular_items
select  item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour 
group by item_id
order by 2 desc
limit 10;

insert into popular_cateitems
select  category_id
,item_id
,count(if(behaviour_type='pv',behaviour_type,null)) '品类浏览量'
from use_behaviour 
group by category_id,item_id
order by 3 desc
limit 10;


select * from popular_cateitems;



-- 特定商品转化率
select  item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;

--  保存
create table item_detail(
item_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);

insert into item_detail
select  item_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 商品转化率
from use_behaviour
group by item_id
order by 商品转化率 desc;

select  * from item_detail

-- 品类转化率
select  category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;

--  保存
create table category_detail(
category_id int,
pv int,
cart int,
fav int,
buy int,
user_buy_rate float
);

insert into category_detail
select  category_id
,count(if(behaviour_type ='pv',behaviour_type,null)) 'pv'
,count(if(behaviour_type ='cart',behaviour_type,null)) 'cart'
,count(if(behaviour_type ='fav',behaviour_type,null)) 'fav'
,count(if(behaviour_type ='buy',behaviour_type,null)) 'buy'
,count(distinct if(behaviour_type='buy', user_id,null))/count(distinct user_id) 品类转化率
from use_behaviour
group by category_id
order by 品类转化率 desc;

select  * from category_detail


-- 商品特征分析  tableau  添加平均值参考线  筛选器

-- 数据可视化

淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析_第1张图片
淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析_第2张图片
淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析_第3张图片
淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析_第4张图片

淘宝数据,数据处理,时间序列分析,获客分析,购买路径分析_第5张图片

你可能感兴趣的:(数据库,sql,数据库)