前几天和朋友讨论数据库建模的时候,说起PK使用的数据类型这个话题。我个人是支持使用int,尤其是sequence生成的无意义数字。不过朋友坚持认为GUID更具有唯一性,并且经过测试,两者性能差异不大。我就做了这个测试用数字来说话。
测试环境
VMWare ESXi 5.0
RHEL 5.1 64bit
Oracle 11gR2 64bit
测试数据
方案1采用int类型,方案2采用sys_guid()产生的RAW(16)。
两种方案中除了字段类型不同之外,其他字段都是随机长度,随机内容的字符串,其中一个字段有索引。保证两种方案具有可比性。
-- 测试的语句,索引查询之后进行join和group。
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';
select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';
测试方法和结果
整个测试中,对两种方案在不同环境下的性能进行了记录。具体脚本在后面给出,这里直接列出测试的结果。
1. 表和索引所占空间
数据量 | int 物理空间(M) | guid 物理空间(M) | PCT | |
P表 | 100,000 | 72 | 80 | 90% |
C表 | 1,000,000 | 208 | 232 | 90% |
索引类型 | int 物理空间(M) | guid 物理空间(M) | PCT | |
PK_T_P | 主键 | 2 | 3 | 67% |
PK_T_C | 主键 | 21 | 38 | 55% |
IDX_FK_T_C_P | 外键索引 | 23 | 39 | 59% |
IDX_T_P_CODE | 普通索引 | 10 | 10 | 100% |
2. 单用户sqlplus中consistent gets的数据
int | guid | PCT | |
consistent gets | 28,290 | 29,727 | 95% |
3. 单用户下锁资源占用情况。这里主要对比了latch锁。
int 时间 | guid 时间 | pct 时间 | int latch | guid latch | pct latch |
25 | 26 | 96.1% | 55,297 | 61,201 | 90.35% |
4. 单用户大数据量数据插入时间
数据量 | int 时间 | guid 时间 | PCT 时间 | |
P表 | 100,000 | 72.60 | 73.32 | 99% |
C表 | 1,000,000 | 506.72 | 569.31 | 89% |
5. 单用户根据索引删除大表中2.5%比例的数据。查看运行时间和消耗的UNDO
int 时间 | guid 时间 | pct 时间 | int undo | guid undo | pct undo |
0.85 | 1.21 | 70% | 10,910,328 | 12,092,804 | 90% |
6. 并发环境下,查询运行时间百分比。这里使用的是极短时间内后台提交多个JOB的方式模拟并发。
并发数 | int 时间 | guid 时间 | pct 时间 |
1 | 24 | 25 | 96% |
10 | 46 | 50 | 92% |
20 | 36 | 45 | 80% |
30 | 49 | 60 | 82% |
40 | 48 | 52 | 92% |
50 | 49 | 53 | 92% |
我的结论
从性能角度考虑,GUID方案在上述几种情况下都处于劣势。不过实话实说,用20%左右的性能差异来换取绝对的全局唯一性,在很多场景还是可以接受的。
以下是测试用到的代码
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- sample table set 1
drop table t_c1;
drop table t_p1;
create table t_p1 (id int,code varchar2(100),message varchar2(1000));
alter table t_p1 add constraint pk_t_p1 primary key (id);
create index idx_t_p1_code on t_p1 (code);
create table t_c1(id int, pid int, val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c1 add constraint pk_t_c1 primary key (id);
alter table t_c1 add constraint fk_t_c1_p1 foreign key (pid) references t_p1 (id);
create index idx_fk_t_c1_p1 on t_c1 (pid);
-- init parent table
insert into t_p1
select rownum, -- id
dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
from dual
connect by rownum <= 100000;
commit;
-- init child table
insert into t_c1
select rownum, --id
p.id, --pid
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
from t_p1 p, (select rownum from dual connect by rownum <= 10) n
order by dbms_random.value();
commit;
----------------------------------------------------
-- sample table set 2
drop table t_c2 ;
drop table t_p2 ;
create table t_p2 (id raw(16),code varchar2(100),message varchar2(1000));
alter table t_p2 add constraint pk_t_p2 primary key (id);
create index idx_t_p2_code on t_p2 (code);
create table t_c2(id raw(16), pid raw(16), val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c2 add constraint pk_t_c2 primary key (id);
alter table t_c2 add constraint fk_t_c2_p2 foreign key (pid) references t_p2 (id);
create index idx_fk_t_c2_p2 on t_c2 (pid);
-- init parent table
insert into t_p2
select sys_guid(), -- id
dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
from dual
connect by rownum <= 100000;
commit;
-- init child table
insert into t_c2
select sys_guid(), --id
p.id, --pid
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
from t_p2 p, (select rownum from dual connect by rownum <= 10) n
order by dbms_random.value();
commit;
-----------------------------------------------------
-- stat table
drop table concurrent_stat;
create table concurrent_stat(run_type int, concurrent_count int, delta_time int);
truncate table concurrent_stat;
-----------------------------------------------------
-- collection stat
begin
dbms_stats.gather_table_stats(user, 'T_P1', cascade => true);
dbms_stats.gather_table_stats(user, 'T_C1', cascade => true);
dbms_stats.gather_table_stats(user, 'T_P2', cascade => true);
dbms_stats.gather_table_stats(user, 'T_C2', cascade => true);
end;
/
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
create or replace procedure run
(
p_type int,
p_conc_count int := 1
) as
l_btime int;
l_etime int;
l_cnt int;
l_min varchar2(1000);
l_max varchar2(1000);
l_s varchar2(2);
begin
select dbms_random.string('U', 1) || '%' into l_s from dual;
l_btime := dbms_utility.get_time;
if p_type = 1 then
select count(1), min(c.val1), max(c.val2)
into l_cnt, l_min, l_max
from t_p1 p, t_c1 c
where p.id = c.pid
and p.code like l_s;
else
select count(1), min(c.val1), max(c.val2)
into l_cnt, l_min, l_max
from t_p2 p, t_c2 c
where p.id = c.pid
and p.code like l_s;
end if;
l_etime := dbms_utility.get_time;
insert into concurrent_stat
(run_type, concurrent_count, delta_time)
values
(p_type, p_conc_count, l_etime - l_btime);
commit;
end;
/
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- compare table and index size
select s.segment_name, s.segment_type, bytes / 1024 / 1024 as size_mb
from dba_segments s
where s.owner = user
order by s.segment_name;
-- join p and c to get some values
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';
select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- show difference between running statistics
begin
my_rs.rs_start;
run(1);
my_rs.rs_middle;
run(2);
my_rs.rs_stop(100);
end;
/
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
select s.concurrent_count, s.run_type, count(1) as cnt, round(avg(s.delta_time)) as delta
from concurrent_stat s
group by s.concurrent_count, s.run_type
order by s.concurrent_count, s.run_type ;
-- display pct under different concurrent conditions
select v.*, round(v.delta1 / v.delta2, 2) as pct_1_2
from (select s.concurrent_count,
sum(decode(s.run_type, 1, s.delta)) as delta1,
sum(decode(s.run_type, 2, s.delta)) as delta2
from (select s.concurrent_count,
s.run_type,
round(avg(s.delta_time)) as delta
from concurrent_stat s
group by s.concurrent_count, s.run_type) s
group by s.concurrent_count) v
order by v.concurrent_count;
------------------------------------------------------------------------------------
truncate table concurrent_stat;
------------------------------------------------------------------------------------
-- concurrent run
declare
l_type int := 1;
l_concurrent_count int := 40;
l_job_name varchar2(30);
l_job_action varchar2(1000);
begin
l_job_action := '
begin
run(' || l_type || ',' ||
l_concurrent_count || ');
end;';
for i in 1 .. l_concurrent_count loop
l_job_name := 'j_' || l_type || '_' || i;
dbms_scheduler.create_job(job_name => l_job_name,
job_type => 'PLSQL_BLOCK',
job_action => l_job_action,
start_date => systimestamp,
auto_drop => true,
enabled => true);
end loop;
commit;
end;
/
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- get undo stat for deleting operation
select n.name, m.value
from v$mystat m, v$statname n
where n.STATISTIC# = m.STATISTIC#
and n.name = 'undo change vector size';
delete t_c1 c where exists (select 1 from t_p1 p where p.id = c.pid and p.code like 'A%' );
select n.name, m.value
from v$mystat m, v$statname n
where n.STATISTIC# = m.STATISTIC#
and n.name = 'undo change vector size';