服务器CPU排行榜
相关行业的同学如看不懂应该该好好反思一下自己了,思考人生了.
drop table if exists test;
create table test(
objectid serial not null,
num integer not null,
ref integer[] not null,
constraint pk_test_objectid primary key(objectid)
)with (fillfactor=100);
alter table test cluster on pk_test_objectid;
为加快插入速度,其它索引在生成数据完成后再创建.
函数用于控制num和ref的值分布,以便num和ref字段上的索引具有较高的可选择性.
drop function if exists saveAsTest(integer,integer[]);
drop function if exists gen_row(integer[],tweights[],tweights[]);
drop function if exists gen_array(integer[],tweights[]);
drop function if exists get_next_index(tweights[]);
drop type if exists tweights;
/****************************************************************************************
创建平滑加权轮询系数类型
weight:设置的系数
curweight:当前使用的系数,初始化设置为0即可
****************************************************************************************/
create type tweights as(weight integer,curweight integer);
/****************************************************************************************
平滑加权轮询(smooth weighted round-robin balancing)算法
示例: array[((50,0)::tweights),((30,0)::tweights),((15,0)::tweights),((5,0)::tweights)]
配置了4个系数参数,注意所有系数值累加为100,每调用一百次
第一个系数返回索引1的概率为50%
第二个系数返回索引2的概率为30%
第三个系数返回索引3的概率为15%
第四个系数返回索引4的概率为5%
****************************************************************************************/
create or replace function get_next_index(tweights[])
returns table(index integer, weights tweights[])
as $$
declare
v_i integer;
v_len integer;
v_index integer;
v_total integer;
v_tmp tweights;
v_tmpindex tweights;
begin
v_len := array_length($1,1);
if (1 = v_len) then
return query select 1,$1;
end if;
v_index := -1; v_total := 0;
for v_i in 1..v_len loop
v_tmp := $1[v_i];
v_tmp.curweight := (v_tmp.curweight + v_tmp.weight);
v_total := (v_total + v_tmp.weight);
$1[v_i] = v_tmp;
if (-1 = v_index or ($1[v_index]).curweight < v_tmp.curweight) then
v_index := v_i;
end if;
end loop;
v_tmpindex := $1[v_index];
v_tmpindex.curweight := v_tmpindex.curweight - v_total;
$1[v_index] = v_tmpindex;
return query select v_index,$1;
end;
$$ language plpgsql strict;
/****************************************************************************************
随机生成1-4个元素的数组
drop function if exists gen_array(integer[],tweights[]);
****************************************************************************************/
create or replace function gen_array(integer[],tweights[])
returns table(vals integer[], weights tweights[])
as $$
with recursive cte(id,val,weights,count) as (
(select 1,$1[index],weights,((random()*(4-1)+1)::integer) from get_next_index($2))
union all
select (p.id+1),$1[a.index],a.weights,p.count from cte as p,get_next_index(p.weights) as a where p.id < count
) select array_agg(val),(select weights from cte where id=count) from cte;
$$ language sql strict;
/****************************************************************************************
生成行
$1、$2、$3的数组大小必须一至
$2:为生成integer的平滑加权轮询系数
$3:为生成integer[]的平滑加权轮询系数
drop function if exists gen_row(integer[],tweights[],tweights[]);
****************************************************************************************/
create or replace function gen_row(integer[],tweights[],tweights[])
returns table(num integer,weights1 tweights[],ref integer[],weights2 tweights[])
as $$
select $1[num.index],num.weights,ref.*
from get_next_index($2) as num,gen_array($1,$3) as ref;
$$ language sql strict;
/****************************************************************************************
函数测试是否符合预期
****************************************************************************************/
/*
select *
from gen_row(
array[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
array[
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights
],
array[
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights
]);
*/
/****************************************************************************************
保存数据到Test表
drop function if exists saveAsTest(integer,integer[]);
****************************************************************************************/
create or replace function saveAsTest(integer,integer[])
returns integer
as $$
insert into test(num,ref) values($1,$2) returning objectid;
$$ language sql strict;
delete from test;
select setval(pg_get_serial_sequence('test','objectid'), 1, false);
/****************************************************************************************
导入测试数据,开10个终端,每个终端都执行以下脚本.
博主测试机cpu为双路16核,因此开了16个终端.CPU型号为Intel(R) Xeon(R) CPU E5530 @ 2.40GHz,现属于垃圾cpu,排行榜在倒数...
因表比较简单导入测试数据硬盘写入较少(最高约16MB/s,大多数情况下小于2MB/s).
本例主要是cpu运算,因此16个终端同时运行cpu达到了100%.kao运行了一会风扇狂响.......
****************************************************************************************/
\timing on
do $$
declare
v_nums integer[];
v_weights1 tweights[];
v_weights2 tweights[];
v_num integer;
v_ref integer[];
v_coun integer;
begin
v_coun := 1;
v_nums:=array[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];
v_weights1:=array[
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights
];
v_weights2:=array[
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,
(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights,(5,0)::tweights
];
for i in 1..1000000 loop
select num,weights1,ref,weights2 into v_num,v_weights1,v_ref,v_weights2 from gen_row(v_nums,v_weights1,v_weights2);
perform saveAsTest(v_num,v_ref);
--raise notice '% %', v_num,v_ref;
if ( 0 = (i % 1000) ) then
raise notice '%', v_coun;
v_coun := v_coun + 1;
end if;
end loop;
end;
$$;
序号 | 耗时(ms) |
---|---|
1 | 1491206.016 |
2 | 1511390.919 |
3 | 1517245.568 |
4 | 1509241.432 |
5 | 1519552.252 |
6 | 1514420.896 |
7 | 1520820.174 |
8 | 1512984.280 |
9 | 1519851.215 |
10 | 1514590.502 |
11 | 1505463.332 |
12 | 1503091.390 |
13 | 1503749.024 |
14 | 1501670.722 |
15 | 1500027.669 |
16 | 1503459.150 |
插入完成后vacuum表,测试时结果更准确.
vacuum freeze verbose analyze test;
select count(*) from test;
/*
count
----------
16000000
(1 row)
Time: 587.956 ms
*/
/*B树索引*/
create index idx_test_num on test(num);
/*数组索引
使用gin__int_ops,截止目前根据我的需求数组索引测试下来gin__int_ops效果最好
gin__int_ops依赖intarray扩展
create extension intarray;
*/
create index idx_test_ref on test using gin(ref gin__int_ops);
/*其它数组类型索引,需要相关扩展*/
--create index idx_test_ref on test using gist(ref gist__int_ops);
--create index idx_test_ref on test using rum(ref rum_anyarray_ops);
/*可以查看一下表结构*/
\dS+ test;
注意不要加order by,order by会影响执行计划,目前只单纯的测试limit和索引之间的关系.
执行查询时多执行几次,直至不读取磁盘(没有Buffers: shared read).
因为数据在表中的占比一样,因此只要查询一个值就可以了.
/*表包含的数据,b树索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where num=1;
--Execution time: 2568.059 ms
/*表里不包含的数据,b树索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where num=21;
--Execution time: 0.044 ms
/*表包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[1];
--Execution time: 6589.734 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2];
--Execution time: 9037.726 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2,3];
--Execution time: 11621.418 ms
/*表不包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[21];
--Execution time: 0.065 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22];
--Execution time: 0.056 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22,23];
--Execution time: 0.060 ms
/*表包含的数据,b树索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where num=1 limit 50;
--Execution time: 0.535 ms
/*表里不包含的数据,b树索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where num=21 limit 50;
--Execution time: 0.050 ms
/*表包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[1] limit 50;
--Execution time: 0.585 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2] limit 50;
--Execution time: 0.561 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2,3] limit 50;
--Execution time: 0.537 ms
/*表不包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[21] limit 50;
--Execution time: 3572.286 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22] limit 50;
--Execution time: 3944.530 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22,23] limit 50;
--Execution time: 4130.662 ms
通过对比可以看到B树索引添加limit性能更高,只返回limit限定的数据,无论表中是否包含条件值.
数组索引分两种情况,表中包含条件值、表中不包含条件值.
不会使用数组索引,使用全表扫描,但是有limit限定,所以速度很快.
不会使用数组索引,使用全表扫描,因为值不包含在表中,所以需要全表扫描,然后过滤所有数据,速度非常慢.
with会使用数组索引.
/*表包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref@>array[1]
)select * from cte limit 10;
--Execution time: 293.301 ms
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref&&array[1,2]
)select * from cte limit 10;
--Execution time: 464.427 ms
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref&&array[1,2,3]
)select * from cte limit 10;
--Execution time: 717.172 ms
/*表不包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref@>array[21]
)select * from cte limit 10;
--Execution time: 0.075 ms
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref&&array[21,22]
)select * from cte limit 10;
--Execution time: 0.078 ms
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where ref&&array[21,22,23]
)select * from cte limit 10;
--Execution time: 0.079 ms
禁用全表扫描后,PostgreSQL会自动选择合适的索引,在本例中使用了索引idx_test_ref.类似Oracle的强制索引.
set enable_seqscan只对当前会话有效,注意使用完成后要打开.
set enable_seqscan = off;
/*表包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[1] limit 50;
--Execution time: 297.018 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2] limit 50;
--Execution time: 466.661 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[1,2,3] limit 50;
--Execution time: 708.372 ms
/*表不包含的数据,数组索引*/
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref@>array[21] limit 50;
--Planning time: 0.089 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22] limit 50;
--Execution time: 0.065 ms
explain (analyze,verbose,costs,buffers,timing)
select objectid from test where ref&&array[21,22,23] limit 50;
--Execution time: 0.066 ms
set enable_seqscan = on;
--多个条件
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where num=1 and ref&&array[1,2,3]
)select * from cte limit 10;
explain (analyze,verbose,costs,buffers,timing)
with cte as(
select objectid from test where num=1 and ref&&array[21,22,23]
)select * from cte limit 10;