3、Hive安装配置
3.1安装MySQL
在datanode5上安装MySQL
# yum -y installmysql-server mysql
# mysql
mysql> grant all privileges on *.* tohive@'10.40.214.%' identified by "hive";
mysql> flush privileges;
3.2安装hive
# tar -zxf apache-hive-0.13.1-bin.tar.gz-C /var/data/; mv /var/data/apache-hive-0.13.1 /var/data/hive
# cd /var/data/hive
# vimbin/hive-config.sh ##在脚本开头添加下面的内容
exportJAVA_HOME=/usr/java/jdk1.7.0_71
exportHIVE_HOME=/var/data/hive
exportHADOOP_HOME=/var/data/Hadoop
# vim hive-site.xml ##配置hive
# cpmysql-connector-java-5.1.18-bin.jar /var/data/hive/lib/ ##加入MySQL java驱动jar包
# vim /etc/profile ##配置hive环境变量
#hivepath
exportHIVE_HOME=/var/data/hive
exportPATH=$PATH:$HIVE_HOME/bin
# source /etc/profile
# hive ##启动hive
3.3hive的使用
##创建库
# hive -e "createdatabase maillog;"
##创建表
# hive -e"create table maillog.izhenxin(mail_time string, message_id string,mail_to string, mail_domain string, mail_relay string, mail_delay string,mail_delays string,mail_dsn string, mail_status string) ROW FORMAT DELIMITEDFIELDS TERMINATED BY '|' STORED AS TEXTFILE;"
##删除表
# hive -e "droptable maillog.izhenxin;"
##导入数据到hive表中
# hive -e "LOADDATA LOCAL INPATH '/opt/zhangdh/to_result.txt' OVERWRITE INTO TABLEmaillog.izhenxin;"
##简单的数据查询
# hive -e "usemaillog;select * from izhenxin_total limit 10;"
##用hive统计数据,会执行mapreduce过程
# hive -e"select mail_domain,sum(case when mail_status='sent' then 1 else 0 end )sent,sum(case when mail_status='bounced' then 1 else 0 end ) bounced,sum(casewhen mail_status='deferred' then 1 else 0 end ) deferred from maillog.izhenxingroup by mail_domain order by sent desc;"
##将hive表数据的查询结果,添加到一个新表中
# hive -e"create table maillog.izhenxin_total(mail_domain string, sent_number int,bounced_number int, deferred int) ROW FORMAT DELIMITED FIELDS TERMINATED BY'\t' STORED AS TEXTFILE;"
# hive -e "usemaillog; insert into table izhenxin_total select mail_domain,sum(case whenmail_status='sent' then 1 else 0 end ) sent,sum(case when mail_status='bounced'then 1 else 0 end ) bounced,sum(case when mail_status='deferred' then 1 else 0end ) deferred from maillog.izhenxin group by mail_domain order by sentdesc;"