需求:求200w以内的素数
素数是只能被1和自身整除的数,1不是素数
一.SQL版
先用2w进行测试
-- 非1和自身,只要有整除的,通过not exists 剔除
WITH t AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
t1 AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1)
SELECT count(*)
FROM t
WHERE NOT EXISTS (SELECT t1.rn
FROM t1
WHERE t1.rn != t.rn
AND MOD(t.rn, t1.rn) = 0);
-- 一个数不可能整除比自身大的数
WITH t AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
t1 AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1)
SELECT count(*)
FROM t
WHERE NOT EXISTS (SELECT t1.rn
FROM t1
WHERE t1.rn < t.rn
AND MOD(t.rn, t1.rn) = 0);
-- 其实一个数不能整除比自身平方根更大的数据,所以t1表其实可以缩小限制
-- where后面的谓词条件也可以缩小限制
WITH t AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
t1 AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= sqrt(20000) -1)
SELECT count(*)
FROM t
WHERE NOT EXISTS (SELECT t1.rn
FROM t1
WHERE t1.rn <= sqrt(t.rn)
AND MOD(t.rn, t1.rn) = 0);
运行记录:
scoot@orcl>-- 非1和自身,只要有整除的,通过not exists 剔除
scoot@orcl>WITH t AS
2 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
3 t1 AS
4 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1)
5 SELECT count(*)
6 FROM t
7 WHERE NOT EXISTS (SELECT t1.rn
8 FROM t1
9 WHERE t1.rn != t.rn
10 AND MOD(t.rn, t1.rn) = 0);
COUNT(*)
----------
2262
已用时间: 00: 00: 32.19
scoot@orcl>
scoot@orcl>
scoot@orcl>-- 一个数不可能整除比自身大的数
scoot@orcl>WITH t AS
2 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
3 t1 AS
4 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1)
5 SELECT count(*)
6 FROM t
7 WHERE NOT EXISTS (SELECT t1.rn
8 FROM t1
9 WHERE t1.rn < t.rn
10 AND MOD(t.rn, t1.rn) = 0);
COUNT(*)
----------
2262
已用时间: 00: 00: 24.42
scoot@orcl>
scoot@orcl>
scoot@orcl>-- 其实一个数不能比自身平方根更大的数据,所以t1表其实可以缩小限制
scoot@orcl>-- where后面的谓词条件也可以缩小限制
scoot@orcl>WITH t AS
2 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 20000 -1),
3 t1 AS
4 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= sqrt(20000) -1)
5 SELECT count(*)
6 FROM t
7 WHERE NOT EXISTS (SELECT t1.rn
8 FROM t1
9 WHERE t1.rn <= sqrt(t.rn)
10 AND MOD(t.rn, t1.rn) = 0);
COUNT(*)
----------
2262
已用时间: 00: 00: 00.57
scoot@orcl>
一个简单的逻辑上的优化,让CPU少计算了很多不必要的数据
计算时间由32秒提升到0.57秒,性能大幅提升
将2w调整为200w
-- 上例中最后一个由2w调整为200w
WITH t AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 2000000 -1),
t1 AS
(SELECT rownum+1 rn FROM dual CONNECT BY rownum <= sqrt(2000000) -1)
SELECT count(*)
FROM t
WHERE NOT EXISTS (SELECT t1.rn
FROM t1
WHERE t1.rn <= sqrt(t.rn)
AND MOD(t.rn, t1.rn) = 0);
-- 这个是否可以持续优化,当然可以
-- 我们优化算法,通过minus,只要2个数的成绩在20w以内,就进行剔除
-- 然后再把 能整除2的给剔除掉
WITH t AS (
SELECT ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1
)
,t_prime AS (
SELECT 2*ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1
UNION ALL
SELECT 2 FROM DUAL
)
SELECT COUNT(*)
FROM (SELECT rn from t_prime
MINUS
SELECT t1.rn * t2.rn
FROM t t1, t t2
WHERE t1.rn <= t2.rn
AND t1.rn <= (SELECT SQRT(2000000) FROM DUAL)
AND t1.rn * t2.rn <2000000
);
-- 继续优化,把 3 、5、7都给剔除
WITH t0 AS
(SELECT 2*rownum+1 rn FROM dual CONNECT BY rownum <= 1000000 -1),
t AS
(select rn from t0 where mod(rn,3) != 0 and mod(rn,5) != 0 and mod(rn,7) != 0 )
SELECT COUNT(*)+1+3 --2,3,5,7
FROM (SELECT rn from t
MINUS
SELECT t1.rn * t2.rn
FROM t t1, t t2
WHERE t1.rn <= t2.rn
AND t1.rn BETWEEN 9 AND (SELECT SQRT(2000000) FROM DUAL)
AND t1.rn * t2.rn <2000000
);
-- 剔除越多就越快
WITH t0 AS (
SELECT 2*ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1-1
),
t as(SELECT rn from t0
where mod(rn,3)<>0
and mod(rn,5)<>0
and mod(rn,7)<>0
and mod(rn,11)<>0
and mod(rn,13)<>0
and mod(rn,17)<>0
and mod(rn,19)<>0
and mod(rn,23)<>0
and mod(rn,29)<>0
)
SELECT COUNT(*)+10 --2,3,5,7,11,13,17,19,23,29
FROM (SELECT rn from t
MINUS
SELECT t1.rn * t2.rn
FROM t t1, t t2
WHERE t1.rn <= t2.rn
AND t1.rn BETWEEN 31 AND (SELECT SQRT(2000000) FROM DUAL)
AND t1.rn * t2.rn <2000000
);
执行记录
scoot@orcl>-- 上例中最后一个由2w调整为200w
scoot@orcl>WITH t AS
2 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= 2000000 -1),
3 t1 AS
4 (SELECT rownum+1 rn FROM dual CONNECT BY rownum <= sqrt(2000000) -1)
5 SELECT count(*)
6 FROM t
7 WHERE NOT EXISTS (SELECT t1.rn
8 FROM t1
9 WHERE t1.rn <= sqrt(t.rn)
10 AND MOD(t.rn, t1.rn) = 0);
COUNT(*)
----------
148933
已用时间: 00: 04: 13.83
scoot@orcl>-- 这个是否可以持续优化
scoot@orcl>-- 我们优化算法,通过minus,只要2个数的成绩在20w以内,就进行剔除
scoot@orcl>-- 然后再把 能整除2的给剔除掉
scoot@orcl>WITH t AS (
2 SELECT ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1
3 )
4 ,t_prime AS (
5 SELECT 2*ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1
6 UNION ALL
7 SELECT 2 FROM DUAL
8 )
9 SELECT COUNT(*)
10 FROM (SELECT rn from t_prime
11 MINUS
12 SELECT t1.rn * t2.rn
13 FROM t t1, t t2
14 WHERE t1.rn <= t2.rn
15 AND t1.rn <= (SELECT SQRT(2000000) FROM DUAL)
16 AND t1.rn * t2.rn <2000000
17 );
COUNT(*)
----------
148933
已用时间: 00: 02: 04.74
scoot@orcl>
scoot@orcl>
scoot@orcl>
scoot@orcl>
scoot@orcl>-- 继续优化,把 3 、5、7都给剔除
scoot@orcl>WITH t0 AS
2 (SELECT 2*rownum+1 rn FROM dual CONNECT BY rownum <= 1000000 -1),
3 t AS
4 (select rn from t0 where mod(rn,3) != 0 and mod(rn,5) != 0 and mod(rn,7) != 0 )
5 SELECT COUNT(*)+1+3 --2,3,5,7
6 FROM (SELECT rn from t
7 MINUS
8 SELECT t1.rn * t2.rn
9 FROM t t1, t t2
10 WHERE t1.rn <= t2.rn
11 AND t1.rn BETWEEN 9 AND (SELECT SQRT(2000000) FROM DUAL)
12 AND t1.rn * t2.rn <2000000
13 );
COUNT(*)+1+3--2,3,5,7
------------------------
148933
已用时间: 00: 00: 14.23
scott@orcl>WITH t0 AS (
2 SELECT 2*ROWNUM+1 rn FROM DUAL CONNECT BY ROWNUM <= (2000000)/2-1-1
3 ),
4 t as(SELECT rn from t0
5 where mod(rn,3)<>0
6 and mod(rn,5)<>0
7 and mod(rn,7)<>0
8 and mod(rn,11)<>0
9 and mod(rn,13)<>0
10 and mod(rn,17)<>0
11 and mod(rn,19)<>0
12 and mod(rn,23)<>0
13 and mod(rn,29)<>0
14 )
15 SELECT COUNT(*)+10 --2,3,5,7,11,13,17,19,23,29
16 FROM (SELECT rn from t
17 MINUS
18 SELECT t1.rn * t2.rn
19 FROM t t1, t t2
20 WHERE t1.rn <= t2.rn
21 AND t1.rn BETWEEN 31 AND (SELECT SQRT(2000000) FROM DUAL)
22 AND t1.rn * t2.rn <2000000
23 )
24 /
COUNT(*)+10--2,3,5,7,11,13,17,19,23,29
--------------------------------------
148933
已用时间: 00: 00: 08.13
运行时间由 4分13秒优化到8.13秒,性能提升明显。