在项目中,我们经常遇到或用到分页,那么在大数据量(百万级以上)下,哪种分页算法效率最优呢?我们不妨用事实说话。
测试环境
硬件:CPU 酷睿双核T5750 内存:2G
软件:Windows server 2003 + Sql server 2005
OK,我们首先创建一数据库:data_Test,并在此数据库中创建一表:tb_TestTable
1
create
database
data_Test
--
创建数据库data_Test
2
GO
3
use
data_Test
4
GO
5
create
table
tb_TestTable
--
创建表
6
(
7
id
int
identity
(
1
,
1
)
primary
key
,
8
userName
nvarchar
(
20
)
not
null
,
9
userPWD
nvarchar
(
20
)
not
null
,
10
userEmail
nvarchar
(
40
)
null
11
)
12
GO
然后我们在数据表中插入2000000条数据:
1
--
插入数据
2
set
identity_insert
tb_TestTable
on
3
declare
@count
int
4
set
@count
=
1
5
while
@count
<=
2000000
6
begin
7
insert
into
tb_TestTable(id,userName,userPWD,userEmail)
values
(
@count
,
'
admin
'
,
'
admin888
'
,
'
[email protected]
'
)
8
set
@count
=
@count
+
1
9
end
10
set
identity_insert
tb_TestTable
off
我首先写了五个常用存储过程:
1,利用select top 和select not in进行分页,具体代码如下:
1
create
procedure
proc_paged_with_notin
--
利用select top and select not in
2
(
3
@pageIndex
int
,
--
页索引
4
@pageSize
int
--
每页记录数
5
)
6
as
7
begin
8
set
nocount
on
;
9
declare
@timediff
datetime
--
耗时
10
declare
@sql
nvarchar
(
500
)
11
select
@timediff
=
Getdate
()
12
set
@sql
=
'
select top
'
+
str
(
@pageSize
)
+
'
* from tb_TestTable where(ID not in(select top
'
+
str
(
@pageSize
*
@pageIndex
)
+
'
id from tb_TestTable order by ID ASC)) order by ID
'
13
execute
(
@sql
)
--
因select top后不支技直接接参数,所以写成了字符串@sql
14
select
datediff
(ms,
@timediff
,
GetDate
())
as
耗时
15
set
nocount
off
;
16
end
2,利用select top 和 select max(列键)
1
create
procedure
proc_paged_with_selectMax
--
利用select top and select max(列)
2
(
3
@pageIndex
int
,
--
页索引
4
@pageSize
int
--
页记录数
5
)
6
as
7
begin
8
set
nocount
on
;
9
declare
@timediff
datetime
10
declare
@sql
nvarchar
(
500
)
11
select
@timediff
=
Getdate
()
12
set
@sql
=
'
select top
'
+
str
(
@pageSize
)
+
'
* From tb_TestTable where(ID>(select max(id) From (select top
'
+
str
(
@pageSize
*
@pageIndex
)
+
'
id From tb_TestTable order by ID) as TempTable)) order by ID
'
13
execute
(
@sql
)
14
select
datediff
(ms,
@timediff
,
GetDate
())
as
耗时
15
set
nocount
off
;
16
end
3,利用select top和中间变量--此方法因网上有人说效果最佳,所以贴出来一同测试
1
create
procedure
proc_paged_with_Midvar
--
利用ID>最大ID值和中间变量
2
(
3
@pageIndex
int
,
4
@pageSize
int
5
)
6
as
7
declare
@count
int
8
declare
@ID
int
9
declare
@timediff
datetime
10
declare
@sql
nvarchar
(
500
)
11
begin
12
set
nocount
on
;
13
select
@count
=
0
,
@ID
=
0
,
@timediff
=
getdate
()
14
select
@count
=
@count
+
1
,
@ID
=
case
when
@count
<=
@pageSize
*
@pageIndex
then
ID
else
@ID
end
from
tb_testTable
order
by
id
15
set
@sql
=
'
select top
'
+
str
(
@pageSize
)
+
'
* from tb_testTable where ID>
'
+
str
(
@ID
)
16
execute
(
@sql
)
17
select
datediff
(ms,
@timediff
,
getdate
())
as
耗时
18
set
nocount
off
;
19
end
20
4,利用Row_number() 此方法为SQL server 2005中新的方法,利用Row_number()给数据行加上索引
1
create
procedure
proc_paged_with_Rownumber
--
利用SQL 2005中的Row_number()
2
(
3
@pageIndex
int
,
4
@pageSize
int
5
)
6
as
7
declare
@timediff
datetime
8
begin
9
set
nocount
on
;
10
select
@timediff
=
getdate
()
11
select
*
from
(
select
*
,Row_number()
over
(
order
by
ID
asc
)
as
IDRank
from
tb_testTable)
as
IDWithRowNumber
where
IDRank
>
@pageSize
*
@pageIndex
and
IDRank
<
@pageSize
*
(
@pageIndex
+
1
)
12
select
datediff
(ms,
@timediff
,
getdate
())
as
耗时
13
set
nocount
off
;
14
end
15
5,利用临时表及Row_number
1
create
procedure
proc_CTE
--
利用临时表及Row_number
2
(
3
@pageIndex
int
,
--
页索引
4
@pageSize
int
--
页记录数
5
)
6
as
7
set
nocount
on
;
8
declare
@ctestr
nvarchar
(
400
)
9
declare
@strSql
nvarchar
(
400
)
10
declare
@datediff
datetime
11
begin
12
select
@datediff
=
GetDate
()
13
set
@ctestr
=
'
with Table_CTE as
14
(select ceiling((Row_number() over(order by ID ASC))/
'
+
str
(
@pageSize
)
+
'
) as page_num,* from tb_TestTable)
'
;
15
set
@strSql
=
@ctestr
+
'
select * From Table_CTE where page_num=
'
+
str
(
@pageIndex
)
16
end
17
begin
18
execute
sp_executesql
@strSql
19
select
datediff
(ms,
@datediff
,
GetDate
())
20
set
nocount
off
;
21
end
22
OK,至此,存储过程创建完毕,我们分别在每页10条数据的情况下在第2页,第1000页,第10000页,第100000页,第199999页进行测试,耗时单位:ms 每页测试5次取其平均值
存过 |
第2页耗时 |
第1000页耗时 |
第10000页耗时 |
第100000页耗时 |
第199999页耗时 |
效率排行 |
1用not in |
0ms |
16ms |
47ms |
475ms |
953ms |
3 |
2用select max |
5ms |
16ms |
35ms |
325ms |
623ms |
1 |
3中间变量 |
966ms |
970ms |
960ms |
945ms |
933ms |
5 |
4row_number |
0ms |
0ms |
34ms |
365ms |
710ms |
2 |
4临时表 |
780ms |
796ms |
798ms |
780ms |
805ms |
4 |
测试结果显示:select max >row_number>not in>临时表>中间变量
于是我对效率最高的select max方法用2分法进行了扩展,代码取自互联网,我修改了ASC排序时取不到值的BUG,测试结果:
2分法 |
156ms |
156ms |
180ms |
470ms |
156ms |
1* |
从测试结果来看,使用2分法确实可以提高效率并使效率更为稳定,我又增加了第159999页的测试,用时仅296ms,效果相当的不错!
下面是2分法使用select max的代码,已相当完善。
1
--
/*-----存储过程 分页处理 孙伟 2005-03-28创建 -------*/
2
--
/*-----存储过程 分页处理 浪尘 2008-9-1修改----------*/
3
--
/*----- 对数据进行了2分处理使查询前半部分数据与查询后半部分数据性能相同 -------*/
4
5
alter
PROCEDURE
proc_paged_2part_selectMax
6
(
7
@tblName
nvarchar
(
200
),
--
--要显示的表或多个表的连接
8
@fldName
nvarchar
(
500
)
=
'
*
'
,
--
--要显示的字段列表
9
@pageSize
int
=
10
,
--
--每页显示的记录个数
10
@page
int
=
1
,
--
--要显示那一页的记录
11
@fldSort
nvarchar
(
200
)
=
null
,
--
--排序字段列表或条件
12
@Sort
bit
=
0
,
--
--排序方法,0为升序,1为降序(如果是多字段排列Sort指代最后一个排序字段的排列顺序(最后一个排序字段不加排序标记)--程序传参如:' SortA Asc,SortB Desc,SortC ')
13
@strCondition
nvarchar
(
1000
)
=
null
,
--
--查询条件,不需where
14
@ID
nvarchar
(
150
),
--
--主表的主键
15
@Dist
bit
=
0
,
--
--是否添加查询字段的 DISTINCT 默认0不添加/1添加
16
@pageCount
int
=
1
output,
--
--查询结果分页后的总页数
17
@Counts
int
=
1
output
--
--查询到的记录数
18
)
19
AS
20
SET
NOCOUNT
ON
21
Declare
@sqlTmp
nvarchar
(
1000
)
--
--存放动态生成的SQL语句
22
Declare
@strTmp
nvarchar
(
1000
)
--
--存放取得查询结果总数的查询语句
23
Declare
@strID
nvarchar
(
1000
)
--
--存放取得查询开头或结尾ID的查询语句
24
25
Declare
@strSortType
nvarchar
(
10
)
--
--数据排序规则A
26
Declare
@strFSortType
nvarchar
(
10
)
--
--数据排序规则B
27
28
Declare
@SqlSelect
nvarchar
(
50
)
--
--对含有DISTINCT的查询进行SQL构造
29
Declare
@SqlCounts
nvarchar
(
50
)
--
--对含有DISTINCT的总数查询进行SQL构造
30
31
declare
@timediff
datetime
--
耗时测试时间差
32
select
@timediff
=
getdate
()
33
34
if
@Dist
=
0
35
begin
36
set
@SqlSelect
=
'
select
'
37
set
@SqlCounts
=
'
Count(*)
'
38
end
39
else
40
begin
41
set
@SqlSelect
=
'
select distinct
'
42
set
@SqlCounts
=
'
Count(DISTINCT
'
+
@ID
+
'
)
'
43
end
44
45
46
if
@Sort
=
0
47
begin
48
set
@strFSortType
=
'
ASC
'
49
set
@strSortType
=
'
DESC
'
50
end
51
else
52
begin
53
set
@strFSortType
=
'
DESC
'
54
set
@strSortType
=
'
ASC
'
55
end
56
57
58
59
--
------生成查询语句--------
60
--
此处@strTmp为取得查询结果数量的语句
61
if
@strCondition
is
null
or
@strCondition
=
''
--
没有设置显示条件
62
begin
63
set
@sqlTmp
=
@fldName
+
'
From
'
+
@tblName
64
set
@strTmp
=
@SqlSelect
+
'
@Counts=
'
+
@SqlCounts
+
'
FROM
'
+
@tblName
65
set
@strID
=
'
From
'
+
@tblName
66
end
67
else
68
begin
69
set
@sqlTmp
=
+
@fldName
+
'
From
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
70
set
@strTmp
=
@SqlSelect
+
'
@Counts=
'
+
@SqlCounts
+
'
FROM
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
71
set
@strID
=
'
From
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
72
end
73
74
--
--取得查询结果总数量-----
75
exec
sp_executesql
@strTmp
,N
'
@Counts int out
'
,
@Counts
out
76
declare
@tmpCounts
int
77
if
@Counts
=
0
78
set
@tmpCounts
=
1
79
else
80
set
@tmpCounts
=
@Counts
81
82
--
取得分页总数
83
set
@pageCount
=
(
@tmpCounts
+
@pageSize
-
1
)
/
@pageSize
84
85
/**/
/**/
/**/
/**当前页大于总页数 取最后一页**/
86
if
@page
>
@pageCount
87
set
@page
=
@pageCount
88
89
--
/*-----数据分页2分处理-------*/
90
declare
@pageIndex
int
--
总数/页大小
91
declare
@lastcount
int
--
总数%页大小
92
93
set
@pageIndex
=
@tmpCounts
/
@pageSize
94
set
@lastcount
=
@tmpCounts
%
@pageSize
95
if
@lastcount
>
0
96
set
@pageIndex
=
@pageIndex
+
1
97
else
98
set
@lastcount
=
@pagesize
99
100
--
//***显示分页
101
if
@strCondition
is
null
or
@strCondition
=
''
--
没有设置显示条件
102
begin
103
if
@pageIndex
<
2
or
@page
<=
@pageIndex
/
2
+
@pageIndex
%
2
--
前半部分数据处理
104
begin
105
if
@page
=
1
106
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
107
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
108
else
109
begin
110
if
@Sort
=
1
111
begin
112
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
113
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
114
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
115
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
116
end
117
else
118
begin
119
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
120
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
121
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
122
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
123
end
124
end
125
end
126
else
127
begin
128
set
@page
=
@pageIndex
-
@page
+
1
--
后半部分数据处理
129
if
@page
<=
1
--
最后一页数据显示
130
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@lastcount
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
131
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
132
else
133
if
@Sort
=
1
134
begin
135
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
136
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
137
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
138
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
139
end
140
else
141
begin
142
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
143
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
144
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
145
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
146
end
147
end
148
end
149
150
else
--
有查询条件
151
begin
152
if
@pageIndex
<
2
or
@page
<=
@pageIndex
/
2
+
@pageIndex
%
2
--
前半部分数据处理
153
begin
154
if
@page
=
1
155
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
156
+
'
where 1=1
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
157
else
if
(
@Sort
=
1
)
158
begin
159
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
160
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
161
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
162
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
163
end
164
else
165
begin
166
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
167
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
168
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
169
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
170
end
171
end
172
else
173
begin
174
set
@page
=
@pageIndex
-
@page
+
1
--
后半部分数据处理
175
if
@page
<=
1
--
最后一页数据显示
176
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@lastcount
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
177
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
178
else
if
(
@Sort
=
1
)
179
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
180
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
181
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
182
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
183
else
184
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
185
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
186
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
187
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
188
end
189
end
190
191
--
----返回查询结果-----
192
exec
sp_executesql
@strTmp
193
select
datediff
(ms,
@timediff
,
getdate
())
as
耗时
194
--
print @strTmp
195
SET
NOCOUNT
OFF
196
GO
197
执行示例:exec proc_paged_2part_selectMax 'tb_testTable','ID,userName,userPWD,userEmail',10,100000,'ID',0,null,'ID',0
这种测试只在单机进行,并且没有在实际开发WEB项目中分页测试,测试项也比较单一,所以不够全面系统,但从其效率相比上,我们可以在数据库分页算法上进行有效的控制。