MPP update优化一

前言:对于列式存储直接update性能低下,可采用update+insert方式或者delete+insert 方式 实现更新操作

例子:套牌车的一个测试例子

 
drop table dts_vehicle_resource.fake_plate_info_sp;
drop table viid_vehicle.vehiclestructured_kafka_sp;
drop table dts_vehicle_resource.fake_plate_result_sp; 

-- create or insert 每个车牌的最近一条记录 对应的表 40s
create table dts_vehicle_resource.fake_plate_info_sp as
  SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
  ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn 
  FROM viid_vehicle.vehiclestructured_a05000 
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1 ;

--以下步骤一共 :1.6s
-- create or insert 新进入数据的表
create table viid_vehicle.vehiclestructured_kafka_sp as  SELECT a.plateno,a.platecolor,a.tollgateid ,a.passtime as passtime 
FROM viid_vehicle.vehiclestructured_a050800 a  where passtime>=1585756800000 and passtime<1585843200000;

-- create or insert 套牌车结果表
create table dts_vehicle_resource.fake_plate_result_sp 
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp 
where plateno||','||platecolor in (select plateno||','||platecolor from viid_vehicle.vehiclestructured_kafka_sp)
union all 
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join  dts.tollgate_info b 
on a.tollgateid=b.id
)a  
)a WHERE  a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);

--更新每个车牌的最近的那条记录 对应的表 
--delete+insert方式
delete FROM dts_vehicle_resource.fake_plate_info_sp 
WHERE plateno||','||platecolor in (select plateno||','||platecolor FROM viid_vehicle.vehiclestructured_kafka_sp);

insert into dts_vehicle_resource.fake_plate_info_sp 
 SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
  ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn 
  FROM viid_vehicle.vehiclestructured_kafka_sp 
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1;



测试数据以及测试结果:
   每个车牌的最近一条记录 对应的历史表数据量:2845550条
   每次过车数据量:3000条
   整个判断套牌车过程耗时(包括判断套牌车并插入结果表 且更新历史最新记录表):1.6s
   检测出套牌车数量:1564辆
   更新每个车牌的最新的那条记录条数:3000条

 


 

-- create or insert 每个车牌的最近一条记录 对应的表 29.361s
 create table dts_vehicle_resource.fake_plate_info_sp as
  SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
  ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn 
  FROM viid_vehicle.vehiclestructured_a05000 
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1 ;

--以下步骤一共 :11.23s
-- create or insert 新进入数据的表
create table viid_vehicle.vehiclestructured_kafka_sp as  SELECT a.plateno,a.platecolor,a.tollgateid ,a.passtime as passtime 
FROM viid_vehicle.vehiclestructured_a050800 a  where passtime>=1585756800000 and passtime<1585843200000;

-- create or insert 套牌车结果表
法一:
create table dts_vehicle_resource.fake_plate_result_sp 
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp
union all 
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join  dts.tollgate_info b 
on a.tollgateid=b.id
)a  
)a WHERE  a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);

--法二
create table dts_vehicle_resource.fake_plate_result_sp 
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp 
where plateno||','||platecolor in (select plateno||','||platecolor from viid_vehicle.vehiclestructured_kafka_sp)
union all 
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join  dts.tollgate_info b 
on a.tollgateid=b.id
)a  
)a WHERE  a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);



--更新每个车牌的最近的那条记录 对应的表 
发一:update+insert方式
update dts_vehicle_resource.fake_plate_info_sp a 
 	set tollgateid=b.tollgateid,passtime=b.passtime,geog=b.geog
	from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog 
	      from viid_vehicle.vehiclestructured_kafka_sp a
          inner join  dts.tollgate_info b 
                      on a.tollgateid=b.id)b
        where a.plateno=b.plateno and a.platecolor=b.platecolor;
       	
insert into dts_vehicle_resource.fake_plate_info_sp 
select b.* from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog 
	      from viid_vehicle.vehiclestructured_kafka_sp a
          inner join  dts.tollgate_info b 
                      on a.tollgateid=b.id) b 
  where NOT EXISTS (
                   SELECT 1 FROM dts_vehicle_resource.fake_plate_info_sp a WHERE a.plateno=b.plateno and a.platecolor=b.platecolor);
				   
--法二 :delete+insert
delete FROM dts_vehicle_resource.fake_plate_info_sp 
WHERE plateno||','||platecolor in (select plateno||','||platecolor FROM viid_vehicle.vehiclestructured_kafka_sp);

insert into dts_vehicle_resource.fake_plate_info_sp 
 SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
  ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn 
  FROM viid_vehicle.vehiclestructured_kafka_sp 
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1;

测试数据以及测试结果:
   每个车牌的最近一条记录 对应的历史表数据量:2845550条
   每次过车数据量:3000条
   整个判断套牌车过程耗时(包括判断套牌车并插入结果表 且更新历史最新记录表):11.23s
   检测出套牌车数量:1564辆
   更新每个车牌的最新的那条记录条数:3000条
       


------------------------------------
辅助sql:
truncate table dts_vehicle_resource.fake_plate_info_sp;
truncate table viid_vehicle.vehiclestructured_kafka_sp;
truncate table dts_vehicle_resource.fake_plate_result_sp;

drop table dts_vehicle_resource.fake_plate_info_sp;
drop table viid_vehicle.vehiclestructured_kafka_sp;
drop table dts_vehicle_resource.fake_plate_result_sp;

SELECT *from dts_vehicle_resource.fake_plate_info_sp;
SELECT *from viid_vehicle.vehiclestructured_kafka_sp;
SELECT *from dts_vehicle_resource.fake_plate_result_sp;

SELECT count(1) from dts_vehicle_resource.fake_plate_info_sp;

SELECT count(1) from dts_vehicle_resource.fake_plate_result_sp;

--检测更新条数
select count(1) from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog 
	      from viid_vehicle.vehiclestructured_kafka_sp a
          inner join  dts.tollgate_info b 
                      on a.tollgateid=b.id)b,dts_vehicle_resource.fake_plate_info_sp a
        where a.plateno=b.plateno and a.platecolor=b.platecolor;

 

你可能感兴趣的:(vertica,sql)