前言:对于列式存储直接update性能低下,可采用update+insert方式或者delete+insert 方式 实现更新操作
例子:套牌车的一个测试例子
drop table dts_vehicle_resource.fake_plate_info_sp;
drop table viid_vehicle.vehiclestructured_kafka_sp;
drop table dts_vehicle_resource.fake_plate_result_sp;
-- create or insert 每个车牌的最近一条记录 对应的表 40s
create table dts_vehicle_resource.fake_plate_info_sp as
SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn
FROM viid_vehicle.vehiclestructured_a05000
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1 ;
--以下步骤一共 :1.6s
-- create or insert 新进入数据的表
create table viid_vehicle.vehiclestructured_kafka_sp as SELECT a.plateno,a.platecolor,a.tollgateid ,a.passtime as passtime
FROM viid_vehicle.vehiclestructured_a050800 a where passtime>=1585756800000 and passtime<1585843200000;
-- create or insert 套牌车结果表
create table dts_vehicle_resource.fake_plate_result_sp
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp
where plateno||','||platecolor in (select plateno||','||platecolor from viid_vehicle.vehiclestructured_kafka_sp)
union all
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id
)a
)a WHERE a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);
--更新每个车牌的最近的那条记录 对应的表
--delete+insert方式
delete FROM dts_vehicle_resource.fake_plate_info_sp
WHERE plateno||','||platecolor in (select plateno||','||platecolor FROM viid_vehicle.vehiclestructured_kafka_sp);
insert into dts_vehicle_resource.fake_plate_info_sp
SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn
FROM viid_vehicle.vehiclestructured_kafka_sp
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1;
测试数据以及测试结果:
每个车牌的最近一条记录 对应的历史表数据量:2845550条
每次过车数据量:3000条
整个判断套牌车过程耗时(包括判断套牌车并插入结果表 且更新历史最新记录表):1.6s
检测出套牌车数量:1564辆
更新每个车牌的最新的那条记录条数:3000条
-- create or insert 每个车牌的最近一条记录 对应的表 29.361s
create table dts_vehicle_resource.fake_plate_info_sp as
SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn
FROM viid_vehicle.vehiclestructured_a05000
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1 ;
--以下步骤一共 :11.23s
-- create or insert 新进入数据的表
create table viid_vehicle.vehiclestructured_kafka_sp as SELECT a.plateno,a.platecolor,a.tollgateid ,a.passtime as passtime
FROM viid_vehicle.vehiclestructured_a050800 a where passtime>=1585756800000 and passtime<1585843200000;
-- create or insert 套牌车结果表
法一:
create table dts_vehicle_resource.fake_plate_result_sp
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp
union all
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id
)a
)a WHERE a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);
--法二
create table dts_vehicle_resource.fake_plate_result_sp
as select *from(
select *,ST_Distance(lead(geog)over(partition by plateno,platecolor order by passtime),geog) as distance,
lead(tollgateid)over(partition by plateno,platecolor order by passtime) lead_tollgateid,
lead(passtime)over(partition by plateno,platecolor order by passtime) lead_passtime,
count(1)OVER (partition by plateno,platecolor)num
from (
select * from dts_vehicle_resource.fake_plate_info_sp
where plateno||','||platecolor in (select plateno||','||platecolor from viid_vehicle.vehiclestructured_kafka_sp)
union all
select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id
)a
)a WHERE a.distance >=150.0 * (abs(a.lead_passtime-a.passtime)/3600000);
--更新每个车牌的最近的那条记录 对应的表
发一:update+insert方式
update dts_vehicle_resource.fake_plate_info_sp a
set tollgateid=b.tollgateid,passtime=b.passtime,geog=b.geog
from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog
from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id)b
where a.plateno=b.plateno and a.platecolor=b.platecolor;
insert into dts_vehicle_resource.fake_plate_info_sp
select b.* from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog
from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id) b
where NOT EXISTS (
SELECT 1 FROM dts_vehicle_resource.fake_plate_info_sp a WHERE a.plateno=b.plateno and a.platecolor=b.platecolor);
--法二 :delete+insert
delete FROM dts_vehicle_resource.fake_plate_info_sp
WHERE plateno||','||platecolor in (select plateno||','||platecolor FROM viid_vehicle.vehiclestructured_kafka_sp);
insert into dts_vehicle_resource.fake_plate_info_sp
SELECT a.plateno,a.platecolor,a.tollgateid,a.passtime ,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog from (SELECT *,
ROW_NUMBER()over(partition by plateno,platecolor ORDER by passtime DESC )rn
FROM viid_vehicle.vehiclestructured_kafka_sp
)a inner join dts.tollgate_info b
on a.tollgateid=b.id WHERE a.rn=1;
测试数据以及测试结果:
每个车牌的最近一条记录 对应的历史表数据量:2845550条
每次过车数据量:3000条
整个判断套牌车过程耗时(包括判断套牌车并插入结果表 且更新历史最新记录表):11.23s
检测出套牌车数量:1564辆
更新每个车牌的最新的那条记录条数:3000条
------------------------------------
辅助sql:
truncate table dts_vehicle_resource.fake_plate_info_sp;
truncate table viid_vehicle.vehiclestructured_kafka_sp;
truncate table dts_vehicle_resource.fake_plate_result_sp;
drop table dts_vehicle_resource.fake_plate_info_sp;
drop table viid_vehicle.vehiclestructured_kafka_sp;
drop table dts_vehicle_resource.fake_plate_result_sp;
SELECT *from dts_vehicle_resource.fake_plate_info_sp;
SELECT *from viid_vehicle.vehiclestructured_kafka_sp;
SELECT *from dts_vehicle_resource.fake_plate_result_sp;
SELECT count(1) from dts_vehicle_resource.fake_plate_info_sp;
SELECT count(1) from dts_vehicle_resource.fake_plate_result_sp;
--检测更新条数
select count(1) from (select a.*,ST_GeographyFromText('POINT('||b.longitude||' '||b.latitude||')')as geog
from viid_vehicle.vehiclestructured_kafka_sp a
inner join dts.tollgate_info b
on a.tollgateid=b.id)b,dts_vehicle_resource.fake_plate_info_sp a
where a.plateno=b.plateno and a.platecolor=b.platecolor;