hive从第一个表insert overwhite数据到另一张表根据半自动分区覆盖

删除并清除第一张表
drop table test.dxp_qgm_hbase_linshi_01 purge;
第一张表的建表sql
CREATE EXTERNAL TABLE test.dxp_qgm_hbase_linshi_01 (
test_int int,
gupiaoid string
)
PARTITIONED BY(hdfs_par string)
STORED AS PARQUET LOCATION ‘hdfs://nameservice2/htdata/test/dxp_qgm_hbase_linshi_01’;

insert造数据到第一张表
insert into table test.dxp_qgm_hbase_linshi_01 partition(hdfs_par =‘20200715’) values (01,‘1’);
insert into table test.dxp_qgm_hbase_linshi_01 partition(hdfs_par =‘20200715’) values (02,‘2’);
查询第一张表数据
select * from test.dxp_qgm_hbase_linshi_01

删除并清除第二张表
drop table test.dxp_qgm_hbase_test purge;
第二张表的建表sql
CREATE EXTERNAL TABLE test.dxp_qgm_hbase_test (
test_int int,
hd_business_date string
)
PARTITIONED BY(hdfs_par string,gupiaoid string)
STORED AS PARQUET LOCATION ‘hdfs://nameservice2/htdata/test/dxp_qgm_hbase_test’;

insert造数据到第二张表
insert into table dxp_qgm_hbase_test partition(hdfs_par =‘202007’,gupiaoid =‘1’) values (01,‘20200714’);
insert into table dxp_qgm_hbase_test partition(hdfs_par =‘202007’,gupiaoid =‘2’) values (02,‘20200715’);
查询第二张表数据
select * from test.dxp_qgm_hbase_test

第一种写法,有多少列写多少列,已知表列的情况下。

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.created.files=10000;
set parquet.memory.min.chunk.size=100000;
set mapreduce.map.memory.mb=4096;
insert overwrite table dxp_qgm_hbase_test partition(hdfs_par =‘202007’,gupiaoid)
select * from
(
select test_int,concat(‘20200715’) as hd_business_date,gupiaoid from test.dxp_qgm_hbase_linshi_01 where hdfs_par = ‘20200715’
union all
select test_int,hd_business_date,gupiaoid from test.dxp_qgm_hbase_test where hdfs_par = ‘202007’ and hd_business_date != ‘20200715’
) tmp;

第二种写法,不想写那么多列名,
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.created.files=10000;
set parquet.memory.min.chunk.size=100000;
set hive.support.quoted.identifiers=none;
set mapreduce.map.memory.mb=4096;

insert overwrite table test.dxp_qgm_hbase_test partition(hdfs_par= ‘20200715’,gupiaoid)
select * from (
select (hdfs_par|gupiaoid|hd_business_date)? . ,concat(‘20200715’) as hd_business_date,gupiaoid from test.dxp_qgm_hbase_linshi_01 where hdfs_par = ‘202007’ and gupiaoid is not null
union all
select (hdfs_par|gupiaoid)? . ,gupiaoid from test.dxp_qgm_hbase_test where hdfs_par = ‘20200715’ and hd_business_date != ‘20200715’ and gupiaoid is not null ) tmp;

你可能感兴趣的:(大数据)