Doris bitmap人群圈选案例

  1. 构建标签表
 CREATE TABLE `user_label` (
  `label_name` varchar(20) NULL COMMENT '标签名称',
  `label_version` datetime NULL COMMENT '标签版本号',
  `label_type` varchar(10) NULL COMMENT '标签类型data-double-int-string',
  `label_value` varchar(100) NULL COMMENT 'label_string标签值',
  `bitmap_uid` bitmap BITMAP_UNION NULL COMMENT '实体id集合'
) ENGINE=OLAP
AGGREGATE KEY(`label_name`, `label_version`, `label_type`, `label_value`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`label_name`) BUCKETS 20
PROPERTIES (
"bloom_filter_columns" = "label_value",
"replication_allocation" = "tag.location.default: 1"
);      
  1. 构建事实表(明细表)

CREATE TABLE dwd_event
(
event_id VARCHAR(100),
event_time DateTime,
event_type VARCHAR(100),
tag_name VARCHAR(100), 
uid BIGINT,  
ver VARCHAR(20) COMMENT 'app版本号', 
brand VARCHAR(80) COMMENT '设备所属品牌',
app_id VARCHAR(20)
)DUPLICATE KEY(`event_id`,`event_time`)
PARTITION BY RANGE(`event_time`) ()
DISTRIBUTED BY HASH(`event_id`) BUCKETS 12
PROPERTIES (
 "dynamic_partition.enable" = "true",
 "dynamic_partition.time_unit" = "DAY",
  "dynamic_partition.start" = "-30",
  "dynamic_partition.end" = "3",
  "dynamic_partition.prefix" = "p",
  "dynamic_partition.buckets" = "12",
  "dynamic_partition.create_history_partition"="true",
  "bloom_filter_columns"="uid,tag_name",
  "replication_allocation" = "tag.location.default: 1"
);
  1. 人群圈选
    标签过滤的和行为明细数据求交集
select '2023-03-11' as group_version
, 123  as group_id
, bitmap_uid as bitmap_uid
from (
SELECT bitmap_union(bitmap_uid) as bitmap_uid, 1 as joinKey
FROM (            
    SELECT bitmap_uid as bitmap_uid, 1 joinKey
    FROM user_label
    WHERE label_version = '2023-03-11'
    and label_name = 'charge_amt'
    AND label_value < 1000.0    
    UNION ALL               
    SELECT to_bitmap(entity_id) as bitmap_uid, 1 as joinKey
    FROM (
        SELECT uid as entity_id
        FROM dwd_event
        WHERE brand = 'xiaomi' AND event_time BETWEEN '2022-11-04 00:00:00'
         AND '2023-02-02 23:59:59' and tag_name IN ('tag_a') GROUP BY uid HAVING COUNT(1) >= 1
    ) t2    
) t3
) user_group_temp

你可能感兴趣的:(数据库,java,jvm)