【开端】clickhouse入门使用

一、绪论

这两天使用clickhouse进行数据分析,在使用上和mysql等关系型数据库还是有区别的,在SQL语法上也有差别,所以这里总结一下使用。

【开端】clickhouse入门使用_第1张图片

二、clickhouse入门使用

ClickHouse介绍

ClickHouse是俄罗斯的Yandex公司于2016年开源的列式存储数据库(DBMS),它使用C++语言编写,主要面向在线分析处理查询(OLAP),能够使用SQL查询实时生成分析数据报告。ClickHouse在数据处理和查询性能上表现优异,尤其适用于大数据量的实时分析场景。

主要特点
  1. 列式存储
    • 相较于传统的行式存储,列式存储在处理大量数据的聚合、计数、求和等统计操作时具有显著优势。
    • 由于同一列的数据类型相同,因此更容易进行数据压缩,节省磁盘空间并提高缓存效率。
  2. 高性能写入
    • ClickHouse采用类LSM Tree的结构,数据写入后定期在后台进行Compaction,实现高效的顺序写操作。
    • 官方公开的benchmark测试显示,其写入吞吐能力可达50MB-200MB/s,相当于每秒写入50万至200万条数据。
  3. 高并行处理能力
    • ClickHouse将数据划分为多个partition和index granularity,通过多个CPU核心并行处理查询,极大地降低了查询延时。
    • 然而,需要注意的是,对于高并发查询业务,ClickHouse可能不是最佳选择,因为它倾向于使用多CPU处理单条查询。
  4. 灵活的存储引擎
    • ClickHouse支持多样化的存储引擎,根据表的不同需求可以设定不同的存储引擎,以满足不同的应用场景。
  5. 几乎覆盖标准SQL语法
    • ClickHouse支持包括DDL和DML在内的标准SQL语法,以及配套的各种函数、用户管理及权限管理、数据的备份与恢复等功能。
局限性与不足
  • 不支持事务:ClickHouse不支持传统的ACID事务,也不支持真正的删除/更新操作。
  • 不支持高并发:官方建议的QPS(每秒查询率)为100,对于需要高并发的应用场景可能不是最佳选择。
  • 不支持二级索引:ClickHouse主要依赖分区和索引粒度来实现数据的快速访问,但不支持传统的二级索引结构。

ClickHouse的SQL用法

ClickHouse支持丰富的SQL语法,以下是一些常用的SQL操作示例:

--查询集群
SELECT * FROM system.clusters;


drop table cdf_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

--有交易的会员
CREATE TABLE data_ods.cdf_ordr_user_cdj_01 ON CLUSTER default_cluster 
(

    `user_id` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192


truncate cdf_ordr_user_cdj_01;
insert into cdf_ordr_user_cdj_01
SELECT DISTINCT user_id from data_ods.ctg_cdf_order_item_stat 
where  transaction_time >='2023-09-01 00:00:00' and transaction_time <'2024-09-01 00:00:00'  ;


--member_travel 有交易的会员
drop table travel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

CREATE TABLE data_ods.travel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `contact_tel_ciphertext` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_001',
 '{replica}')
ORDER BY contact_tel_ciphertext
SETTINGS index_granularity = 8192

--7660
select  count(*) from  travel_ordr_user_cdj_01;
truncate travel_ordr_user_cdj_01;
insert into travel_ordr_user_cdj_01
SELECT DISTINCT contact_tel_ciphertext from  data_ods.ctg_travel_order_base_info
where  order_date >='2023-09-01 00:00:00' and order_date <'2024-09-01 00:00:00' 
and   contact_tel_ciphertext is not null  ;
 

--hotel 有交易的会员
drop table hotel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;
CREATE TABLE data_ods.hotel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `userId` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_01',
 '{replica}')
ORDER BY userId
SETTINGS index_granularity = 8192
--4188
select  count(*) from  hotel_ordr_user_cdj_01;
truncate hotel_ordr_user_cdj_01;
insert into hotel_ordr_user_cdj_01
SELECT DISTINCT loy_mem_id as  userId from data_ods.ctg_htl_s_hotel_bill 
where  created >='2023-09-01 00:00:00' and created <'2024-09-01 00:00:00' ;

insert into hotel_ordr_user_cdj_01
SELECT DISTINCT userId from data_ods.ctg_htl_mt_order 
where  addTime >='2023-09-01 00:00:00'
and addTime <'2024-09-01 00:00:00'
and  userId is not  null;

--touzi 有交易的会员
drop table tz_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `user_id` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_01;
truncate tz_ordr_user_cdj_01;
insert into tz_ordr_user_cdj_01
select DISTINCT id from (
select  toString(t1.id) AS id   from  data_ods.ctg_invest_member t1
join   data_ods.ctg_mall_member t2
on  toString(t1.id) = t2.id 
where  
t2.create_time >='2023-09-01 00:00:00'
and 
t2.create_time <'2024-09-01 00:00:00' ) t3 ;

--有交易的会员手机号
drop table cdf_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_02;

truncate cdf_ordr_user_cdj_02;
insert into cdf_ordr_user_cdj_02
select  t1.cellphone 
from data_ods.ctg_cdf_member t1
join data_ods.cdf_ordr_user_cdj_01  t2
on toString(t1.userid) = t2.user_id
where t1.cellphone is not  null and  t1.cellphone <>'';

--member_travel 有交易的会员手机号
drop table travel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `phone_ciphertext` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_02',
 '{replica}')
ORDER BY phone_ciphertext
SETTINGS index_granularity = 8192;


select  count(*) from  travel_ordr_user_cdj_02;

truncate travel_ordr_user_cdj_02;
insert into travel_ordr_user_cdj_02
select  t1.phone_ciphertext  as cellphone
from  data_ods.ctg_travel_member  t1
join data_ods.travel_ordr_user_cdj_01  t2
on t1.phone_ciphertext = t2.contact_tel_ciphertext;


--hotel 有交易的会员手机号

drop table hotel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 
CREATE TABLE data_ods.hotel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  hotel_ordr_user_cdj_02;
truncate hotel_ordr_user_cdj_02;
insert into hotel_ordr_user_cdj_02
select  t1.cellphone
from   data_ods.ctg_htl_s_hotel_member  t1
join data_ods.hotel_ordr_user_cdj_01  t2
on t1.row_id = t2.userId;

--touzi 有交易的会员手机号
drop table tz_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_02;
truncate tz_ordr_user_cdj_02;

insert into tz_ordr_user_cdj_02
select  t1.mobile as cellphone
from    data_ods.ctg_invest_member  t1
join data_ods.tz_ordr_user_cdj_01  t2
on toString(t1.id) = t2.user_id;
 

--交易用户总数据量:     9301479
drop table ordr_user_cdj_03  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_03 ON CLUSTER default_cluster

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_03',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  ordr_user_cdj_03;
truncate ordr_user_cdj_03;
insert into ordr_user_cdj_03
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,'中旅免税' as bgname  FROM  cdf_ordr_user_cdj_02 a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM  travel_ordr_user_cdj_02  b 
UNION ALL
SELECT   c.cellphone AS phone ,'中旅酒店' as bgname  FROM   hotel_ordr_user_cdj_02 c  
UNION ALL
SELECT   d.cellphone AS phone ,'中旅投资' as bgname  FROM    tz_ordr_user_cdj_02 d ) t

--酒店会员重叠 

drop table hotle_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  hotle_ordr_user_cdj_04;
truncate hotle_ordr_user_cdj_04;

insert into hotle_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM      data_ods.ctg_cdf_member a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member d 
 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct cellphone from data_ods.ctg_htl_s_hotel_member) cc
on y.phone = cc.cellphone ;
 

--旅行会员重叠 
drop table travel_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.travel_ordr_user_cdj_04  ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_04;
truncate travel_ordr_user_cdj_04;

insert into travel_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct phone_ciphertext from data_ods.ctg_travel_member) cc
on y.phone = cc.phone_ciphertext;

--会员重叠 


drop table cdf_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.cdf_ordr_user_cdj_04  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  cdf_ordr_user_cdj_04;
truncate cdf_ordr_user_cdj_04;
insert into cdf_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join  (SELECT distinct cellphone from data_ods.ctg_cdf_member) cc
on phone = cc.cellphone;

--投资会员重叠 
drop table tz_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_04;
truncate tz_ordr_user_cdj_04;


insert into tz_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join (SELECT distinct mobile from data_ods.ctg_invest_member) cc
on y.phone = cc.mobile;


--酒店重叠的有交易会员
drop table hotle_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_05 ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from   data_ods.hotle_ordr_user_cdj_05;
truncate hotle_ordr_user_cdj_05;

insert into data_ods.hotle_ordr_user_cdj_05
select  distinct t1.phone   from  data_ods.hotle_ordr_user_cdj_04 t1   
join data_ods.ordr_user_cdj_03  t2 
on t1.phone = t2.phone;


--旅行重叠的有交易会员
drop table travel_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_05;
truncate travel_ordr_user_cdj_05;

insert into data_ods.travel_ordr_user_cdj_05
select  distinct t1.phone   from travel_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--重叠的有交易会员
drop table cdf_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_05  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_05;
truncate cdf_ordr_user_cdj_05;

insert into data_ods.cdf_ordr_user_cdj_05
select  distinct t1.phone   from cdf_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--投资重叠的有交易会员
drop table tz_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_05;
truncate tz_ordr_user_cdj_05;
insert into data_ods.tz_ordr_user_cdj_05
select  distinct  t1.phone   from tz_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;


-重叠率计算
--酒店: 322903
select  count(1) from   hotle_ordr_user_cdj_05;
--旅行:287291
select  count(1) from   travel_ordr_user_cdj_05;
--cdf:702559
select  count(1) from   cdf_ordr_user_cdj_05;
--投资:159162
select  count(1) from   tz_ordr_user_cdj_05;

--总数: 9045900
select  count(1) from ordr_user_cdj_03;

--去重: 571211
drop table ordr_user_cdj_06  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_06 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_06',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  ordr_user_cdj_06;
truncate ordr_user_cdj_06;

insert into data_ods.ordr_user_cdj_06
select distinct phone from (
select phone from   hotle_ordr_user_cdj_05
union all
select  phone from   travel_ordr_user_cdj_05
union all
select  phone from   cdf_ordr_user_cdj_05
union all
select  phone  from   tz_ordr_user_cdj_05) t

select  count(1) from   ordr_user_cdj_06;
 

--查询集群
SELECT * FROM system.clusters;


drop table cdf_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

--有交易的会员
CREATE TABLE data_ods.cdf_ordr_user_cdj_01 ON CLUSTER default_cluster 
(

    `user_id` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192


truncate cdf_ordr_user_cdj_01;
insert into cdf_ordr_user_cdj_01
SELECT DISTINCT user_id from data_ods.ctg_cdf_order_item_stat 
where  transaction_time >='2023-09-01 00:00:00' and transaction_time <'2024-09-01 00:00:00'  ;


--member_travel 有交易的会员
drop table travel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

CREATE TABLE data_ods.travel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `contact_tel_ciphertext` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_001',
 '{replica}')
ORDER BY contact_tel_ciphertext
SETTINGS index_granularity = 8192

--7660
select  count(*) from  travel_ordr_user_cdj_01;
truncate travel_ordr_user_cdj_01;
insert into travel_ordr_user_cdj_01
SELECT DISTINCT contact_tel_ciphertext from  data_ods.ctg_travel_order_base_info
where  order_date >='2023-09-01 00:00:00' and order_date <'2024-09-01 00:00:00' 
and   contact_tel_ciphertext is not null  ;
 

--hotel 有交易的会员
drop table hotel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;
CREATE TABLE data_ods.hotel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `userId` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_01',
 '{replica}')
ORDER BY userId
SETTINGS index_granularity = 8192
--4188
select  count(*) from  hotel_ordr_user_cdj_01;
truncate hotel_ordr_user_cdj_01;
insert into hotel_ordr_user_cdj_01
SELECT DISTINCT loy_mem_id as  userId from data_ods.ctg_htl_s_hotel_bill 
where  created >='2023-09-01 00:00:00' and created <'2024-09-01 00:00:00' ;

insert into hotel_ordr_user_cdj_01
SELECT DISTINCT userId from data_ods.ctg_htl_mt_order 
where  addTime >='2023-09-01 00:00:00'
and addTime <'2024-09-01 00:00:00'
and  userId is not  null;

--touzi 有交易的会员
drop table tz_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `user_id` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_01;
truncate tz_ordr_user_cdj_01;
insert into tz_ordr_user_cdj_01
select DISTINCT id from (
select  toString(t1.id) AS id   from  data_ods.ctg_invest_member t1
join   data_ods.ctg_mall_member t2
on  toString(t1.id) = t2.id 
where  
t2.create_time >='2023-09-01 00:00:00'
and 
t2.create_time <'2024-09-01 00:00:00' ) t3 ;



--有交易的会员手机号
drop table cdf_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_02;

truncate cdf_ordr_user_cdj_02;
insert into cdf_ordr_user_cdj_02
select  t1.cellphone 
from data_ods.ctg_cdf_member t1
join data_ods.cdf_ordr_user_cdj_01  t2
on toString(t1.userid) = t2.user_id
where t1.cellphone is not  null and  t1.cellphone <>'';

--member_travel 有交易的会员手机号
drop table travel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `phone_ciphertext` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_02',
 '{replica}')
ORDER BY phone_ciphertext
SETTINGS index_granularity = 8192;


select  count(*) from  travel_ordr_user_cdj_02;

truncate travel_ordr_user_cdj_02;
insert into travel_ordr_user_cdj_02
select  t1.phone_ciphertext  as cellphone
from  data_ods.ctg_travel_member  t1
join data_ods.travel_ordr_user_cdj_01  t2
on t1.phone_ciphertext = t2.contact_tel_ciphertext;


--hotel 有交易的会员手机号

drop table hotel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 
CREATE TABLE data_ods.hotel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  hotel_ordr_user_cdj_02;
truncate hotel_ordr_user_cdj_02;
insert into hotel_ordr_user_cdj_02
select  t1.cellphone
from   data_ods.ctg_htl_s_hotel_member  t1
join data_ods.hotel_ordr_user_cdj_01  t2
on t1.row_id = t2.userId;

--touzi 有交易的会员手机号
drop table tz_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_02;
truncate tz_ordr_user_cdj_02;

insert into tz_ordr_user_cdj_02
select  t1.mobile as cellphone
from    data_ods.ctg_invest_member  t1
join data_ods.tz_ordr_user_cdj_01  t2
on toString(t1.id) = t2.user_id;
 

--交易用户总数据量: 	9301479
drop table ordr_user_cdj_03  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_03 ON CLUSTER default_cluster
( 

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_03',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  ordr_user_cdj_03;
truncate ordr_user_cdj_03;
insert into ordr_user_cdj_03
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,'中旅免税' as bgname  FROM  cdf_ordr_user_cdj_02 a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM  travel_ordr_user_cdj_02  b 
UNION ALL
SELECT   c.cellphone AS phone ,'中旅酒店' as bgname  FROM   hotel_ordr_user_cdj_02 c  
UNION ALL
SELECT   d.cellphone AS phone ,'中旅投资' as bgname  FROM    tz_ordr_user_cdj_02 d ) t



--酒店会员重叠 

drop table hotle_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  hotle_ordr_user_cdj_04;
truncate hotle_ordr_user_cdj_04;

insert into hotle_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM      data_ods.ctg_cdf_member a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member d 
 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct cellphone from data_ods.ctg_htl_s_hotel_member) cc
on y.phone = cc.cellphone ;
 

--旅行会员重叠 
drop table travel_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.travel_ordr_user_cdj_04  ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_04;
truncate travel_ordr_user_cdj_04;

insert into travel_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct phone_ciphertext from data_ods.ctg_travel_member) cc
on y.phone = cc.phone_ciphertext;

--会员重叠 


drop table cdf_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.cdf_ordr_user_cdj_04  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  cdf_ordr_user_cdj_04;
truncate cdf_ordr_user_cdj_04;
insert into cdf_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join  (SELECT distinct cellphone from data_ods.ctg_cdf_member) cc
on phone = cc.cellphone;

--投资会员重叠 
drop table tz_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_04;
truncate tz_ordr_user_cdj_04;


insert into tz_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join (SELECT distinct mobile from data_ods.ctg_invest_member) cc
on y.phone = cc.mobile;




--酒店重叠的有交易会员
drop table hotle_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_05 ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from   data_ods.hotle_ordr_user_cdj_05;
truncate hotle_ordr_user_cdj_05;

insert into data_ods.hotle_ordr_user_cdj_05
select  distinct t1.phone   from  data_ods.hotle_ordr_user_cdj_04 t1   
join data_ods.ordr_user_cdj_03  t2 
on t1.phone = t2.phone;




--旅行重叠的有交易会员
drop table travel_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_05;
truncate travel_ordr_user_cdj_05;

insert into data_ods.travel_ordr_user_cdj_05
select  distinct t1.phone   from travel_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--重叠的有交易会员
drop table cdf_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_05  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_05;
truncate cdf_ordr_user_cdj_05;

insert into data_ods.cdf_ordr_user_cdj_05
select  distinct t1.phone   from cdf_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--投资重叠的有交易会员
drop table tz_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_05;
truncate tz_ordr_user_cdj_05;
insert into data_ods.tz_ordr_user_cdj_05
select  distinct  t1.phone   from tz_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;


-重叠率计算
--酒店: 322903
select  count(1) from   hotle_ordr_user_cdj_05;
--旅行:287291
select  count(1) from   travel_ordr_user_cdj_05;
--cdf:702559
select  count(1) from   cdf_ordr_user_cdj_05;
--投资:159162
select  count(1) from   tz_ordr_user_cdj_05;

--总数: 9045900
select  count(1) from ordr_user_cdj_03;

 

--去重: 571211
drop table ordr_user_cdj_06  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_06 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_06',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  ordr_user_cdj_06;
truncate ordr_user_cdj_06;

insert into data_ods.ordr_user_cdj_06
select distinct phone from (
select phone from   hotle_ordr_user_cdj_05
union all
select  phone from   travel_ordr_user_cdj_05
union all
select  phone from   cdf_ordr_user_cdj_05
union all
select  phone  from   tz_ordr_user_cdj_05) t

select  count(1) from   ordr_user_cdj_06;
 



你可能感兴趣的:(数据库,clickhouse)