join大表和小表对查询性能的影响和索引类型的选择
示例数据库采用PostgreSQL
drop function if exists gen_random_gps(bigint);
drop function if exists gps_save(bigint,bigint,timestamptz,geometry);
drop function if exists gen_random_plate(int,int);
drop function if exists gen_random_az(int);
drop function if exists gen_random_num(int);
drop table if exists vehgps;
drop table if exists vehicles;
/****************************************************************************************
车辆基本信息表
drop table if exists vehicles;
****************************************************************************************/
create table vehicles(
objectid int not null, --唯一编号
numplate text not null, --车牌号
describe text, --备注
generate timestamptz default now() not null, --注册时间
constraint pk_vehicles_objectid primary key (objectid)
);
create index idx_vehicles_numplate on vehicles(numplate);
/****************************************************************************************
车辆GPS轨迹表
drop table if exists vehiclegps;
****************************************************************************************/
create table if not exists vehiclegps(
objectid bigint not null, --唯一编号
vehid int not null, --车辆编号,外键
generate timestamptz default now() not null, --GPS时间,实际应用应按此字段分表
constraint pk_vehiclegps_objectid primary key (objectid),
constraint fk_vehiclegps_vehid foreign key(vehid) references vehicles(objectid) on delete cascade
);
create index idx_vehiclegps_vehid on vehiclegps(vehid); --车辆编号
select AddGeometryColumn ('vehiclegps','geom',4326,'POINT',2); -- GPS位置,GPS采用4326 ,类型为点,二维坐标
create index idx_vehiclegps_generate on vehiclegps using brin(generate) with (pages_per_range='1');
/****************************************************************************************
车辆基本信息函数
drop function if exists gen_random_plate(int,int);
drop function if exists gen_random_az(int);
drop function if exists gen_random_num(int);
****************************************************************************************/
--生成指定位数的数字
create or replace function gen_random_num(int)
returns text
as $$
select string_agg(chr((random()*(57-48)+48 )::integer) , '') from generate_series(1,$1);
$$ language sql;
--生成指定位数的大写A-Z
create or replace function gen_random_az(int)
returns text
as $$
select string_agg(chr((random()*(90-65)+65 )::integer) , '') from generate_series(1,$1);
$$ language sql;
--随机生成车牌号
create or replace function gen_random_plate(int default 1,int default 5)
returns text
as $$
with cte as(
select array['浙','粤','京','津','冀','晋','蒙','辽','黑','沪','吉','苏','皖','赣','鲁','豫','鄂','湘','桂','琼','渝','川','贵','云','藏','陕','甘','青','宁'] as provinces
),provinces as(
select array_to_string(array[provinces[(random()*(29-1)+1)::integer],gen_random_az($1)],'') as pro,
gen_random_num($2) as num
from cte
) select array_to_string(array[pro,num],'-') from provinces
$$ language sql;
/****************************************************************************************
随机插入车辆GPS轨迹函数,调用一次插件1千条记录,gps时间控制在2018-01-01至2018-04-16之间
drop function if exists gen_random_gps(bigint);
drop function if exists gps_save(bigint,bigint,timestamptz,geometry);
****************************************************************************************/
create or replace function gen_random_gps(bigint)
returns table(objectid bigint,vehid bigint,generate timestamptz,geom geometry)
as $$
with cte as(
select start,(start + 999) as end,
extract(epoch from '2018-01-01'::timestamptz) as sdate,
extract(epoch from '2018-04-16'::timestamptz) as edata,
(select array[min(objectid),max(objectid)] from vehicles) as vehid
from (select $1 as start) as tmp
),points as(
select id,
((random()*(vehid[2]-vehid[1])+vehid[1])::bigint) as vehid, --车辆id是连续的才这么干
to_timestamp((random()*(edata-sdate)+sdate)) as gpstime,
ST_SetSRID(ST_Point(
round((random()*(135.085831-73.406586)+73.406586)::numeric,6),
round((random()*(53.880950-3.408477)+3.408477)::numeric,6)
),4326) as geom
from cte,generate_series(cte.start,cte.end) as id
)select * from points
$$ language sql;
--保存GPS轨迹
create or replace function gps_save(bigint,bigint,timestamptz,geometry)
returns bigint
as $$
insert into vehiclegps(objectid,vehid,generate,geom) values($1,$2,$3,$4) returning objectid;
$$ language sql;
/****************************************************************************************
随机插入车辆基本信息测试数据(50W)
****************************************************************************************/
insert into vehicles select id, gen_random_plate() from generate_series(1,500000) as id;
/****************************************************************************************
随机插入车辆GPS轨迹测试数据(10000000W)
****************************************************************************************/
do $$
declare
v_id bigint;
begin
for i in 1..10000 loop
select max(id) into v_id from (select gps_save(objectid,vehid,generate,geom) as id from gen_random_gps((i-1)*1000 + 1)) as tmp;
raise notice '%', v_id;
end loop;
end;
$$;
为了不影响执行计划数据插入完成后运行vacuum
vacuum full freeze VERBOSE vehicles;
vacuum full freeze VERBOSE vehiclegps;
在本案例中,因为单位时间内的gps数据非常密集,brin类型索引只有在一个非常小的范围内(大约3小时以内的数据,返回的数据约为14000行,执行时间约为360ms,以实测为准,误差基本不会太大)才有效(注意pages_per_range已经设置为1).
drop index if exists idx_vehiclegps_generate;
create index idx_vehiclegps_generate on vehiclegps using brin(generate) with (pages_per_range='1');
--测试获取3小时25分内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
--测试获取一天内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
在本案例中,B对索引表现优于brin.
drop index if exists idx_vehiclegps_generate;
create index idx_vehiclegps_generate on vehiclegps(generate);
--测试获取3小时25分内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
--测试获取一天内的数据
explain (analyze,verbose,costs,buffers,timing)
select * from vehiclegps
where generate>=('2018-01-01 12:00:00'::timestamptz) and generate<('2018-01-01 15:25:00'::timestamptz)
本章小结
根据第3章的结论,后面的示例采用B树索引.
/****************************************************************************************
大表join小表
****************************************************************************************/
explain (analyze,verbose,costs,buffers,timing)
select gps.* from vehiclegps as gps
inner join vehicles as veh on veh.objectid=gps.vehid
where gps.generate>=('2018-01-01'::timestamptz) and gps.generate<('2018-01-02'::timestamptz)
explain (analyze,verbose,costs,buffers,timing)
select gps.* from vehicles as veh
inner join vehiclegps as gps on gps.vehid=veh.objectid
where gps.generate>=('2018-01-01'::timestamptz) and gps.generate<('2018-01-02'::timestamptz)
本章小结