性能测试 hive Use ANALYZE table columns

测试记录数: 346804534 

数据大小:20GB


select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 179 sec


set hive.compute.query.using.stats = true;
set hive.stats.fetch.column.stats = true;
set hive.stats.fetch.partition.stats = true;
set hive.cbo.enable = true;
select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 365


set hive.execution.engine = tez ;
select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 175


--------Use ANALYZE table columns------------------------------------------
ANALYZE TABLE event8 partition(pdate='2015-09-01') COMPUTE STATISTICS FOR COLUMNS  eventchannel, eventsourcemachine;
cost: 161

select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 167


set hive.compute.query.using.stats = true;
set hive.stats.fetch.column.stats = true;
set hive.stats.fetch.partition.stats = true;
set hive.cbo.enable = true;
select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 224


set hive.execution.engine = tez ;
set hive.compute.query.using.stats = true;
set hive.stats.fetch.column.stats = true;
set hive.stats.fetch.partition.stats = true;
set hive.cbo.enable = true;
select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;
cost: 180




--------------------------------------

ANALYZE TABLE event8 partition(pdate='2015-09-01') COMPUTE STATISTICS;

cost: 425


set hive.execution.engine = tez ;
set hive.compute.query.using.stats = true;
set hive.stats.fetch.column.stats = true;
set hive.stats.fetch.partition.stats = true;
set hive.cbo.enable = true;
select eventchannel from event8 where pdate = '2015-09-01' and lower(eventchannel) rlike 'window' and eventsourcemachine rlike 'changhong' group by eventchannel having count(1) > 100;

cost: 407


结果是,什么都不用才是最好的。

你可能感兴趣的:(Hadoop,Hive)