mysql 随机查询 效率比较

select

    primary_count as primaryCount,  

    primary_score as primaryScore,  

    junior_count as juniorCount,    

    junior_score as juniorScore,    

    senior_count as seniorCount,    

    senoir_score as senoirScore,    

    total_score as totalScore,  

    pass_score as passScore 

from pd_paper p

where p.is_valid = '1'

order by RAND() limit 1

 

分析:

ORDER BY从句里面不能使用RAND()函数,因为这样会导致数据列被多次扫描。

测试发现这样效率非常低。一个15万余条的库,查询5条数据,要8秒以上。

 

You cannot use a column with RAND() values in an ORDER BY clause, because ORDER BY 

would evaluate the column multiple times.

 

更高效的做法:查询max(id) * rand()来随机获取数据。

 

 

SELECT * 

FROM `table` AS t1 

JOIN (

    SELECT ROUND(RAND() 

    * (SELECT MAX(id) FROM `table`)) AS id

) AS t2 

WHERE t1.id >= t2.id 

ORDER BY t1.id ASC LIMIT 5;

 

但是这样会产生连续的5条记录。解决办法只能是每次查询一条,查询5次。

即便如此也值得,因为15万条的表,查询只需要0.01秒不到。

采用join 语法  可以实现真正的随机。

 

SELECT * 

FROM `table` 

WHERE id >= (SELECT FLOOR( MAX(id) * RAND()) FROM `table` ) 

ORDER BY id LIMIT 1;

 

把语句完善一下,加上MIN(id)的判断。

如果没有加上MIN(id)的判断,结果有一半的时间总是查询到表中的前面几行。

完整的语句:

 

①  where 子句

 

SELECT * FROM `table` 

WHERE id >= (SELECT floor( RAND() * ((SELECT MAX(id) FROM `table`)-

 (SELECT MIN(id) FROM `table`)) + (SELECT MIN(id) FROM `table`)))  

ORDER BY id LIMIT 1;

 

② join

 

SELECT * 

FROM `table` AS t1 

JOIN (

    SELECT ROUND(RAND() 

    * ((SELECT MAX(id) FROM `table`)-(SELECT MIN(id) FROM `table`))

    +(SELECT MIN(id) FROM `table`)) AS id) AS t2 

WHERE t1.id >= t2.id 

ORDER BY t1.id LIMIT 1;

 

#随机查询一套考卷定义

SELECT

    p.primary_count as primaryCount,    

    p.primary_score as primaryScore,    

    p.junior_count as juniorCount,  

    p.junior_score as juniorScore,  

    p.senior_count as seniorCount,  

    p.senoir_score as senoirScore,  

    p.total_score   as totalScore,  

    p.pass_score as passScore   

 

FROM 

    pd_paper AS p

JOIN 

(

    SELECT ROUND(

        RAND() 

        *((SELECT MAX(id) FROM pd_paper)-(SELECT MIN(id) FROM pd_paper))

        +(SELECT MIN(id) FROM pd_paper)

    ) AS id

) 

    AS p2 

WHERE p.id >= p2.id 

ORDER BY p.id LIMIT 1;

 

 

最后在程序对这两个语句进行分别查询10次,

前者花费时间 0.147433 秒

后者花费时间 0.015130 秒

看来采用JOIN的语法比直接在WHERE中使用函数效率还要高很多。

 





附加一个复杂sql:按题目类型(三种)随机查询全部考题信息



 

SELECT * from (

    select

        p.id as id,

        p.title as title,   

        p.question as question, 

        p.answer as answer,

        p.crt_time as crtTime   

    from 

        pd_problem p

    join

        (SELECT ROUND(RAND() 

            * ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))

            +(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2 

    where p.id >= p2.pid

    and p.is_valid = '1'

    and p.paper_type = '1'

    and p.paper_class = '0'

    order by p.id limit 5

 ) as t1

 union all 

 SELECT * from (

    select

        p.id as id,

        p.title as title,   

        p.question as question, 

        p.answer as answer,

        p.crt_time as crtTime   

    from 

        pd_problem p

    join

        (SELECT ROUND(RAND() 

            * ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))

            +(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2 

    where p.id >= p2.pid

    and p.is_valid = '1'

    and p.paper_type = '1'

    and p.paper_class = '1'

    order by p.id limit 5

 ) as t2

 union all

 SELECT * from (

    select

        p.id as id,

        p.title as title,   

        p.question as question, 

        p.answer as answer,

        p.crt_time as crtTime   

    from 

        pd_problem p

    join

        (SELECT ROUND(RAND() 

            * ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))

            +(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2 

    where p.id >= p2.pid

    and p.is_valid = '1'

    and p.paper_type = '1'

    and p.paper_class = '2'

    order by p.id limit 5

 ) as t3

 

 

你可能感兴趣的:(mysql)