MaxCompute分区表选择MaxCompute公共数据集public_data中的分区表dwd_ product_movie_basic_info。
--MaxCompute分区表DDL
CREATE TABLE IF NOT EXISTS public_data.dwd_product_movie_basic_info(
movie_name STRING COMMENT '电影名称',
dirctor STRING COMMENT '导演',
scriptwriter STRING COMMENT '编剧',
area STRING COMMENT '制片地区/国家',
actors STRING COMMENT '主演',
`type` STRING COMMENT '类型',
movie_length STRING COMMENT '电影长度',
movie_date STRING COMMENT '上映日期',
movie_language STRING COMMENT '语言',
imdb_url STRING COMMENT 'imdb号'
)
PARTITIONED BY (ds STRING) STORED AS ALIORC;
查看分区表20170112分区的数据。
SELECT * FROM public_data.dwd_product_movie_basic_info WHERE ds = '20170112';
新建一张Hologres外部表,用于映射MaxCompute源头表数据。外表的字段顺序和字段类型需要和MaxCompute表的一一对应。
使用import foreign schema
命令,创建名称为dwd_product_movie_basic_info的Hologres外部表。
import foreign schema public_data limit to (dwd_product_movie_basic_info) from server odps_server into public options(if_table_exist 'update');
BEGIN;
CREATE TABLE "public"."holo_dwd_product_movie_basic_info" (
"movie_name" text,
"dirctor" text,
"scriptwriter" text,
"area" text,
"actors" text,
"type" text,
"movie_length" text,
"movie_date" text,
"movie_language" text,
"imdb_url" text,
"ds" text
)
PARTITION BY LIST (ds);
CALL SET_TABLE_PROPERTY('"public"."holo_dwd_product_movie_basic_info"', 'orientation', 'column');
CALL SET_TABLE_PROPERTY('"public"."holo_dwd_product_movie_basic_info"', 'bitmap_columns', '"movie_name","dirctor","scriptwriter","area","actors","type","movie_length","movie_date","movie_language","imdb_url","ds"');
CALL SET_TABLE_PROPERTY('"public"."holo_dwd_product_movie_basic_info"', 'dictionary_encoding_columns', '"movie_name:auto","dirctor:auto","scriptwriter:auto","area:auto","actors:auto","type:auto","movie_length:auto","movie_date:auto","movie_language:auto","imdb_url:auto","ds:auto"');
CALL SET_TABLE_PROPERTY('"public"."holo_dwd_product_movie_basic_info"', 'time_to_live_in_seconds', '3153600000');
comment on column "public"."holo_dwd_product_movie_basic_info"."movie_name" is '电影名称';
comment on column "public"."holo_dwd_product_movie_basic_info"."dirctor" is '导演';
comment on column "public"."holo_dwd_product_movie_basic_info"."scriptwriter" is '编剧';
comment on column "public"."holo_dwd_product_movie_basic_info"."area" is '制片地区/国家';
comment on column "public"."holo_dwd_product_movie_basic_info"."actors" is '主演';
comment on column "public"."holo_dwd_product_movie_basic_info"."type" is '类型';
comment on column "public"."holo_dwd_product_movie_basic_info"."movie_length" is '电影长度';
comment on column "public"."holo_dwd_product_movie_basic_info"."movie_date" is '上映日期';
comment on column "public"."holo_dwd_product_movie_basic_info"."movie_language" is '语言';
comment on column "public"."holo_dwd_product_movie_basic_info"."imdb_url" is 'imdb号';
COMMIT;
在Hologres中不支持直接将分区数据直接写入分区父表,因此需要在Hologres中创建对应MaxCompute分区表中分区键值的分区子表,然后将分区数据导入对应的分区子表。分区键值由参数${bizdate}
控制,在调度系统中自动赋值完成周期性调度。
导入分区数据的逻辑场景比较多,下面有两个场景供参考,请您根据实际业务逻辑两者选其中一个。
--创建临时分区子表
BEGIN;
CREATE TABLE IF NOT EXISTS "public".tmp_holo_dwd_product_movie_basic_info_${bizdate} (
"movie_name" text,
"dirctor" text,
"scriptwriter" text,
"area" text,
"actors" text,
"type" text,
"movie_length" text,
"movie_date" text,
"movie_language" text,
"imdb_url" text,
"ds" text
);
COMMIT;
--更新外表数据
import foreign schema public_data limit to (dwd_product_movie_basic_info) from server odps_server into public options(if_table_exist 'update');
--等待30s再导入Hologres,以防Hologres meta信息更新缓存慢导致的数据不一致而同步不成功
select pg_sleep(30);
--将MaxCompute数据导入临时分区子表
INSERT INTO "public".tmp_holo_dwd_product_movie_basic_info_${bizdate}
SELECT
"movie_name",
"dirctor",
"scriptwriter",
"area",
"actors",
"type",
"movie_length",
"movie_date",
"movie_language",
"imdb_url",
"ds"
FROM "public".dwd_product_movie_basic_info
WHERE ds='${bizdate}';
--导入新的分区数据
BEGIN;
ALTER TABLE tmp_holo_dwd_product_movie_basic_info_${bizdate} RENAME TO holo_dwd_product_movie_basic_info_${bizdate};
--将临时分区子表绑定在分区父表上
ALTER TABLE holo_dwd_product_movie_basic_info ATTACH PARTITION holo_dwd_product_movie_basic_info_${bizdate} FOR VALUES in ('${bizdate}');
COMMIT;
--创建临时分区子表
BEGIN;
CREATE TABLE IF NOT EXISTS "public".tmp_holo_dwd_product_movie_basic_info_${bizdate} (
"movie_name" text,
"dirctor" text,
"scriptwriter" text,
"area" text,
"actors" text,
"type" text,
"movie_length" text,
"movie_date" text,
"movie_language" text,
"imdb_url" text,
"ds" text
);
COMMIT;
--更新外表数据
import foreign schema public_data limit to (dwd_product_movie_basic_info) from server odps_server into public options(if_table_exist 'update');
--等待30s再导入Hologres,以防Hologres meta信息更新缓存慢导致的数据不一致而同步不成功
select pg_sleep(30);
--将MaxCompute数据导入临时分区子表
INSERT INTO "public".tmp_holo_dwd_product_movie_basic_info_${bizdate}
SELECT
"movie_name",
"dirctor",
"scriptwriter",
"area",
"actors",
"type",
"movie_length",
"movie_date",
"movie_language",
"imdb_url",
"ds"
FROM "public".dwd_product_movie_basic_info
WHERE ds='${bizdate}';
--导入新的分区数据
BEGIN;
ALTER TABLE tmp_holo_dwd_product_movie_basic_info_${bizdate} RENAME TO holo_dwd_product_movie_basic_info_${bizdate};
--将临时分区子表绑定在分区父表上
ALTER TABLE holo_dwd_product_movie_basic_info ATTACH PARTITION holo_dwd_product_movie_basic_info_${bizdate} FOR VALUES in ('${bizdate}');
COMMIT;
在Hologres SQL编辑页面,单击节点编辑区域右侧的调度配置,配置节点的调度属性。
参数 | 值 |
---|---|
参数 | bizdate=${yyyymmdd} |
参数 | 值 |
---|---|
生成实例方式 | 发布后即时生成 |
重跑属性 | 运行成功后不可重跑,运行失败后可以重跑 |
定时调度时间 | 00:05 |
调度依赖为root节点即可(也可以根据业务逻辑选择已有的父节点)。请先将代码解析选择为是,然后单击代码解析,会自动解析出root节点,最后再将代码解析选择为否。
提交并发布节点。
任务执行成功之后,将会在Hologres中自动创建对应分区数据的分区子表。
进入holoweb查询页面:
select * from holo_dwd_product_movie_basic_info_20170112;
select count (*) from holo_dwd_product_movie_basic_info;
参考链接:https://help.aliyun.com/document_detail/330947.html