主键字段使用不同数据类型的简单比较

前几天和朋友讨论数据库建模的时候,说起PK使用的数据类型这个话题。我个人是支持使用int,尤其是sequence生成的无意义数字。不过朋友坚持认为GUID更具有唯一性,并且经过测试,两者性能差异不大。我就做了这个测试用数字来说话。

测试环境
VMWare ESXi 5.0
RHEL 5.1 64bit
Oracle 11gR2 64bit

测试数据
方案1采用int类型,方案2采用sys_guid()产生的RAW(16)。

两种方案中除了字段类型不同之外,其他字段都是随机长度,随机内容的字符串,其中一个字段有索引。保证两种方案具有可比性。

-- 测试的语句,索引查询之后进行join和group。
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

测试方法和结果
整个测试中,对两种方案在不同环境下的性能进行了记录。具体脚本在后面给出,这里直接列出测试的结果。

    1. 表和索引所占空间

  数据量 int 物理空间(M) guid 物理空间(M) PCT
P表 100,000 72 80 90%
C表 1,000,000 208 232 90%



  索引类型 int 物理空间(M) guid 物理空间(M) PCT
PK_T_P 主键 2 3 67%
PK_T_C 主键 21 38 55%
IDX_FK_T_C_P 外键索引 23 39 59%
IDX_T_P_CODE 普通索引 10 10 100%



    2. 单用户sqlplus中consistent gets的数据 

  int guid PCT
consistent gets  28,290 29,727 95%

   

    3. 单用户下锁资源占用情况。这里主要对比了latch锁。

int 时间 guid 时间 pct 时间 int latch guid latch pct latch
25 26 96.1% 55,297 61,201 90.35%



    4. 单用户大数据量数据插入时间

  数据量 int 时间 guid 时间 PCT 时间
P表 100,000 72.60 73.32 99%
C表 1,000,000 506.72 569.31 89%



    5. 单用户根据索引删除大表中2.5%比例的数据。查看运行时间和消耗的UNDO

int 时间 guid 时间 pct 时间 int undo guid undo pct undo
0.85 1.21 70% 10,910,328 12,092,804 90%



    6. 并发环境下,查询运行时间百分比。这里使用的是极短时间内后台提交多个JOB的方式模拟并发。

并发数 int 时间 guid 时间 pct 时间
1 24 25 96%
10 46 50 92%
20 36 45 80%
30 49 60 82%
40 48 52 92%
50 49 53 92%



我的结论
从性能角度考虑,GUID方案在上述几种情况下都处于劣势。不过实话实说,用20%左右的性能差异来换取绝对的全局唯一性,在很多场景还是可以接受的。


以下是测试用到的代码

View Code
------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
--
sample table set 1
drop table t_c1;
drop table t_p1;

create table t_p1 (id int,code varchar2(100),message varchar2(1000));
alter table t_p1 add constraint pk_t_p1 primary key (id);
create index idx_t_p1_code on t_p1 (code);

create table t_c1(id int, pid int, val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c1 add constraint pk_t_c1 primary key (id);
alter table t_c1 add constraint fk_t_c1_p1 foreign key (pid) references t_p1 (id);
create index idx_fk_t_c1_p1 on t_c1 (pid);

-- init parent table
insert into t_p1
select rownum, -- id
dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
from dual
connect by rownum <= 100000;
commit;

-- init child table
insert into t_c1
select rownum, --id
p.id, --pid
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
from t_p1 p, (select rownum from dual connect by rownum <= 10) n
order by dbms_random.value();
commit;


----------------------------------------------------
--
sample table set 2
drop table t_c2 ;
drop table t_p2 ;

create table t_p2 (id raw(16),code varchar2(100),message varchar2(1000));
alter table t_p2 add constraint pk_t_p2 primary key (id);
create index idx_t_p2_code on t_p2 (code);

create table t_c2(id raw(16), pid raw(16), val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c2 add constraint pk_t_c2 primary key (id);
alter table t_c2 add constraint fk_t_c2_p2 foreign key (pid) references t_p2 (id);
create index idx_fk_t_c2_p2 on t_c2 (pid);

-- init parent table
insert into t_p2
select sys_guid(), -- id
dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
from dual
connect by rownum <= 100000;
commit;

-- init child table
insert into t_c2
select sys_guid(), --id
p.id, --pid
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
from t_p2 p, (select rownum from dual connect by rownum <= 10) n
order by dbms_random.value();
commit;

-----------------------------------------------------
--
stat table
drop table concurrent_stat;
create table concurrent_stat(run_type int, concurrent_count int, delta_time int);
truncate table concurrent_stat;

-----------------------------------------------------
--
collection stat
begin
dbms_stats.gather_table_stats(user, 'T_P1', cascade => true);
dbms_stats.gather_table_stats(user, 'T_C1', cascade => true);
dbms_stats.gather_table_stats(user, 'T_P2', cascade => true);
dbms_stats.gather_table_stats(user, 'T_C2', cascade => true);
end;
/

------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
create or replace procedure run
(
p_type int,
p_conc_count int := 1
) as
l_btime int;
l_etime int;
l_cnt int;
l_min varchar2(1000);
l_max varchar2(1000);
l_s varchar2(2);
begin
select dbms_random.string('U', 1) || '%' into l_s from dual;

l_btime := dbms_utility.get_time;

if p_type = 1 then
select count(1), min(c.val1), max(c.val2)
into l_cnt, l_min, l_max
from t_p1 p, t_c1 c
where p.id = c.pid
and p.code like l_s;
else
select count(1), min(c.val1), max(c.val2)
into l_cnt, l_min, l_max
from t_p2 p, t_c2 c
where p.id = c.pid
and p.code like l_s;
end if;

l_etime := dbms_utility.get_time;

insert into concurrent_stat
(run_type, concurrent_count, delta_time)
values
(p_type, p_conc_count, l_etime - l_btime);
commit;
end;

/

------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
--
compare table and index size
select s.segment_name, s.segment_type, bytes / 1024 / 1024 as size_mb
from dba_segments s
where s.owner = user
order by s.segment_name;


-- join p and c to get some values
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';


------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
--
show difference between running statistics
begin
my_rs.rs_start;
run(1);
my_rs.rs_middle;
run(2);
my_rs.rs_stop(100);
end;
/


------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
select s.concurrent_count, s.run_type, count(1) as cnt, round(avg(s.delta_time)) as delta
from concurrent_stat s
group by s.concurrent_count, s.run_type
order by s.concurrent_count, s.run_type ;

-- display pct under different concurrent conditions
select v.*, round(v.delta1 / v.delta2, 2) as pct_1_2
from (select s.concurrent_count,
sum(decode(s.run_type, 1, s.delta)) as delta1,
sum(decode(s.run_type, 2, s.delta)) as delta2
from (select s.concurrent_count,
s.run_type,
round(avg(s.delta_time)) as delta
from concurrent_stat s
group by s.concurrent_count, s.run_type) s
group by s.concurrent_count) v
order by v.concurrent_count;

------------------------------------------------------------------------------------
truncate table concurrent_stat;
------------------------------------------------------------------------------------
--
concurrent run
declare
l_type int := 1;
l_concurrent_count int := 40;
l_job_name varchar2(30);
l_job_action varchar2(1000);
begin
l_job_action := '
begin
run(
' || l_type || ',' ||
l_concurrent_count || ');
end;
';

for i in 1 .. l_concurrent_count loop
l_job_name := 'j_' || l_type || '_' || i;
dbms_scheduler.create_job(job_name => l_job_name,
job_type => 'PLSQL_BLOCK',
job_action => l_job_action,
start_date => systimestamp,
auto_drop => true,
enabled => true);
end loop;
commit;
end;

/


------------------------------------------------------------------------------------
--
----------------------------------------------------------------------------------
--
get undo stat for deleting operation
select n.name, m.value
from v$mystat m, v$statname n
where n.STATISTIC# = m.STATISTIC#
and n.name = 'undo change vector size';

delete t_c1 c where exists (select 1 from t_p1 p where p.id = c.pid and p.code like 'A%' );

select n.name, m.value
from v$mystat m, v$statname n
where n.STATISTIC# = m.STATISTIC#
and n.name = 'undo change vector size';



你可能感兴趣的:(数据类型)