1、下载了hive-0.9.0.tar.gz
http://hadoop.apache.org/hive/releases.html
1、tar xzf hive-0.9.0.tar.gz
在/etc/profile中添加:
export HIVE_INSTALL=/home/hadoop/hive-0.9.0
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HIVE_INSTALL/bin
在NameNode上用hadoop用户启动hadoop
/opt/hadoop/bin/start-all.sh
然后在hive服务器上
/home/hadoop/hive-0.9.0/bin/hive
启动hive
root@Slave2:/home/hadoop# /home/hadoop/hive-0.9.0/bin/hive
Logging initialized using configuration in jar:file:/home/hadoop/hive-0.9.0/lib/hive-common-0.9.0.jar!/hive-log4j.properties
Hive history file=/tmp/root/hive_job_log_root_201208021726_1864342643.txt
hive> show tables;
OK
Time taken: 4.664 seconds
hive>
配置:
root@Slave2:/home/hadoop/hive-0.9.0/conf# vim hive-env.sh.template
HADOOP_HOME=/opt/hadoop
//默认设置
root@Slave2:/home/hadoop/hive-0.9.0/conf# cp -rf hive-default.xml.template hive-default.xml
//个性化设置
root@Slave2:/home/hadoop/hive-0.9.0/conf# cp -rf hive-default.xml.template hive-site.xml
在namenode上用root用户
root@Master:/opt/hadoop# bin/hadoop dfsadmin -safemode leave
Safe mode is OFF
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -mkdir /tmp
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -mkdir /user/hive/warehouse
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -chmod g+w /tmp
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -chmod g+w /user/hive/warehouse
root@Slave2:/tmp# /home/hadoop/hive-0.9.0/bin/hive -e "create table dummy(values STRING);"
root@Slave2:/tmp# /home/hadoop/hive-0.9.0/bin/hive -e "load data local inpath '/tmp/dummy.txt' overwrite into table dummy"
root@Slave2:/tmp# /home/hadoop/hive-0.9.0/bin/hive
hive> show tables;
OK
dummy
Time taken: 3.398 seconds
或者:-S 表示强制不显示信息
root@Slave2:/tmp# /home/hadoop/hive-0.9.0/bin/hive -S -e 'select * from dummy'
x
创建表
hive> create table records(year STRING, temperature INT, qulity INT) row format delimited fields terminated by '\t';
-----------------------------------------------------
错误
FAILED: Error in metadata: MetaException(message:Got exception: org.apache.hadoop.security.AccessControlException org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="":hadoop:supergroup:rwxr-xr-x)
请重新执行
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -mkdir /tmp
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -mkdir /user/hive/warehouse
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -chmod g+w /tmp
root@Master:/opt/hadoop# /opt/hadoop/bin/hadoop fs -chmod g+w /user/hive/warehouse
-------------------------------------------------------
导入数据
hive> load data local inpath '/home/qiaowang/install_Hadoop/sample.txt' overwrite into table records;
hive> select * from records;
OK
1992 111 0
1993 171 2
1994 161 4
1995 151 1
1996 141 3
1997 121 4
1998 131 0
1999 101 3
Time taken: 0.232 seconds
hive> set -v 列出所有属性值
如果出现权限问题,重新使用hadoop用户创建并format所有文件夹。
Hive的使用
hadoop@Slave2:/tmp$ /home/hadoop/hive-0.9.0/bin/hive -servicehelp
Unrecognized option: -servicehelp
usage: hive
-d,--define <key=value> Variable subsitution to apply to hive
commands. e.g. -d A=B or --define A=B
-e <quoted-query-string> SQL from command line
-f <filename> SQL from files
-H,--help Print help information
-h <hostname> connecting to Hive Server on remote host
--hiveconf <property=value> Use value for given property
--hivevar <key=value> Variable subsitution to apply to hive
commands. e.g. --hivevar A=B
-i <filename> Initialization SQL file
-p <port> connecting to Hive Server on port number
-S,--silent Silent mode in interactive shell
-v,--verbose Verbose mode (echo executed SQL to the
console)
----------------------------------------------------------------
安装mysql
----------------------------------------------------------------
1、下载mysql-connector-java-5.1.15-bin.jar
http://www.java2s.com/Code/Jar/m/Downloadmysqlconnectorjava5115binjar.htm
root@Slave2:/home/hadoop/hive-0.9.0/lib#
rz -e
mysql-connector-java-5.1.15-bin.jar 加载进来
2、整合 mysql 做为hive的metastore
root@Slave2:/usr/local/mysql/bin# ./mysql -uroot -p
创建hive数据库:
mysql> create database hive;
③创建用户hive,它只能从localhost连接到数据库并可以连接到hive数据库:
mysql> grant all on hive.* to
[email protected] identified by '123456';
mysql> flush privileges;
root@Slave2:/home/hadoop/hive-0.9.0/conf# vim hive-site.xml
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://10.2.128.120:3306/hive?createDatabaseIfNOtExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
hadoop@Slave2:~/hive-0.9.0/bin$ ./hive
hive> CREATE TABLE my(id INT,name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
root@Slave2:/usr/local/mysql/bin# ./mysql -uhive -p123456 -h10.2.128.120
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 8
Server version: 5.0.82-log Source distribution
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> show tables;
ERROR 1046 (3D000): No database selected
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| hive |
| test |
+--------------------+
3 rows in set (0.00 sec)
mysql> use hive
Database changed
mysql> show tables;
+-----------------+
| Tables_in_hive |
+-----------------+
| BUCKETING_COLS |
| CDS |
| COLUMNS_V2 |
| DATABASE_PARAMS |
| DBS |
| PARTITION_KEYS |
| SDS |
| SD_PARAMS |
| SEQUENCE_TABLE |
| SERDES |
| SERDE_PARAMS |
| SORT_COLS |
| TABLE_PARAMS |
| TBLS |
+-----------------+
14 rows in set (0.00 sec)
mysql> select * from TBLS;
+--------+-------------+-------+------------------+--------+-----------+-------+----------+---------------+--------------------+--------------------+
| TBL_ID | CREATE_TIME | DB_ID | LAST_ACCESS_TIME | OWNER | RETENTION | SD_ID | TBL_NAME | TBL_TYPE | VIEW_EXPANDED_TEXT | VIEW_ORIGINAL_TEXT |
+--------+-------------+-------+------------------+--------+-----------+-------+----------+---------------+--------------------+--------------------+
| 1 | 1343987480 | 1 | 0 | hadoop | 0 | 1 | my | MANAGED_TABLE | NULL | NULL |
+--------+-------------+-------+------------------+--------+-----------+-------+----------+---------------+--------------------+--------------------+
1 row in set (0.00 sec)
创建完毕。
启动hive的web界面
http://10.2.128.120:9999/hwi/
方式一
export ANT_LIB=/opt/ant/lib
./hive --service hwi
------------------------------------------------------------------------------------
以下为安装 在Master上安装Hbase
1、下载地址
http://www.apache.org/dyn/closer.cgi/hbase
http://www.fayea.com/apache-mirror/hbase/stable/
hbase-0.92.1.tar.gz
root@Master:/opt# tar xfz hbase-0.92.1.tar.gz
root@Master:/opt# cd hbase-0.92.1/conf
root@Master:/opt/hbase-0.92.1/conf# vim hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://Master.Hadoop:54310/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master</name>
<value>hdfs://Master.Hadoop:60000</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2222</value>
<description>Property from ZooKeeper's config zoo.cfg.
The port at which the clients will connect.
</description>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>Master.Hadoop,Slave1.Hadoop,Slave2.Hadoop</value>
<description>Comma separated list of servers in the ZooKeeper Quorum.
For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
By default this is set to localhost for local and pseudo-distributed modes
of operation. For a fully-distributed setup, this should be set to a full
list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
this is the list of servers which we will start/stop ZooKeeper on.
</description>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/usr/local/zookeeper</value>
<description>Property from ZooKeeper's config zoo.cfg.
The directory where the snapshot is stored.
</description>
</property>
</configuration>
其中:
<value>Master,Slave1,Slave2</value> //配置的主机数为单数
hbase.rootdir:“hdfs://frank-1:9000”这部分必须和hadoop-config/core-site.xml中的fs.default.name保持一致;
hbase.zookeeper.property.clientPort,是由于我在zoo.cfg中修改了默认的clientPort,所以在这里要设置,监听2222 端口(默认是2181)
Hbase管理着的ZooKeeper集群在节点 rs{1,2,3,4,5}.example.com, 监听2222 端口(默认是2181),并确保conf/hbase-env.sh文件中 HBASE_MANAGE_ZK的值是 true ,再编辑 conf/hbase-site.xml 设置 hbase.zookeeper.property.clientPort 和 hbase.zookeeper.quorum。你还可以设置 hbase.zookeeper.property.dataDir属性来把ZooKeeper保存数据的目录地址改掉。默认值是 /tmp ,这里在重启的时候会被操作系统删掉,可以把它修改到 /user/local/zookeeper.
root@Master:/opt/hbase-0.92.1/conf# mkdir /usr/local/zookeeper
root@Master:/opt/hbase-0.92.1/conf# vim /etc/profile //每台机器都要配
export HBASE_HOME=/opt/hbase-0.92.1
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HIVE_INSTALL/bin:$HBASE_HOME/bin
root@Master:/opt/hadoop/conf# vim /opt/hbase-0.92.1/conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jdk1.6.0_33
export HBASE_MANAGES_ZK=true
export HBASE_CLASSPATH=/opt/hadoop/conf
5 把hadoop hdfs-site.xml考到hbase conf下
root@Master:/opt/hadoop/conf# cp hdfs-site.xml /opt/hbase-0.92.1/conf/
6 配置conf 下的regionservers
root@Master:/opt/hbase-0.92.1/conf# vim regionservers
Slave1
Slave2
拷贝到Slave2上
root@Slave2:/opt/hbase-0.92.1# scp -r
[email protected]:/opt/hbase-0.92.1/ /opt/
root@Slave1:/opt# scp -r
[email protected]:/opt/hbase-0.92.1/ /opt/
7、启动HBase
权限问题,都改为hadoop组
chown -R hadoop:hadoop hbase-0.92.1/
chown -R hadoop:hadoop /usr/local/zookeeper/
hadoop@Master:/opt/hbase-0.92.1/bin$ ./start-hbase.sh
Slave2.Hadoop: starting zookeeper, logging to /opt/hbase-0.92.1/bin/../logs/hbase-hadoop-zookeeper-Slave2.Hadoop.out
Slave1.Hadoop: starting zookeeper, logging to /opt/hbase-0.92.1/bin/../logs/hbase-hadoop-zookeeper-Slave1.Hadoop.out
Master.Hadoop: starting zookeeper, logging to /opt/hbase-0.92.1/bin/../logs/hbase-hadoop-zookeeper-Master.Hadoop.out
starting master, logging to /opt/hbase-0.92.1/logs/hbase-hadoop-master-Master.Hadoop.out
Slave2.Hadoop: starting regionserver, logging to /opt/hbase-0.92.1/bin/../logs/hbase-hadoop-regionserver-Slave2.Hadoop.out
Slave1.Hadoop: starting regionserver, logging to /opt/hbase-0.92.1/bin/../logs/hbase-hadoop-regionserver-Slave1.Hadoop.out
hadoop@Master:/opt/hbase-0.92.1/bin$
启动成功!
./hbase shell
create 't1', 'f1', 'f2', 'f3'
ERROR: org.apache.hadoop.hbase.MasterNotRunningException: Retried 7 times
很明显,日志中说RPC协议不一致所造成的,恍然见明白因为我在hbase的配置文件中将rootdir设置为hdfs,如果这两者的RPC协议不一致就会导致这个问题。
解决方法:
将hbase/lib目录下的hadoop-core的jar文件删除,将hadoop目录下的hadoop-0.20.2-core.jar拷贝到hbase/lib下面,然后重新启动hbase即可。
rm -rf hadoop-core-1.0.0.jar
cp hadoop-0.20.2-core.jar /opt/hbase-0.92.1/lib/
root@Master:/opt/hbase-0.92.1/lib# chown -R hadoop:hadoop *
错误
2012-08-06 18:42:03,177 FATAL org.apache.hadoop.hbase.master.HMaster: Unhandled exception. Starting shutdown.
java.net.ConnectException: Call to Master.Hadoop/127.0.1.1:54310 failed on connection exception: java.net.ConnectException: Connection refused
/etc/hosts 与hostname 不一致所致
修改后的/etc/hosts
127.0.0.1 localhost
10.2.128.46 Master.Hadoop
10.2.128.20 Slave1.Hadoop
10.2.128.120 Slave2.Hadoop
可查看日志 tail -f hbase-hadoop-master-Master.Hadoop.log
hbase(main):005:0> create 'test','id','name'
0 row(s) in 1.8840 seconds
hbase(main):001:0> put 'test','row1','id','1'
0 row(s) in 0.5120 seconds
hbase(main):002:0> put 'test','row1','name','feng'
0 row(s) in 0.0160 seconds
hbase(main):003:0> list
TABLE
test
1 row(s) in 0.1210 seconds
hbase(main):004:0> scan 'test'
ROW COLUMN+CELL
row1 column=id:, timestamp=1344251239462, value=1
row1 column=name:, timestamp=1344251259562, value=feng
1 row(s) in 0.0510 seconds
hbase(main):005:0> get 'test','row1'
COLUMN CELL
id: timestamp=1344251239462, value=1
name: timestamp=1344251259562, value=feng
2 row(s) in 0.0330 seconds
--------------------------------------------------------------------
1、Hive与HBase的整合
/home/hadoop/hive-0.9.0/conf/hive-site.xml 添加
<property>
<name>hive.aux.jars.path</name>
<value>file:///home/hadoop/hive-0.9.0/lib/hive-hbase-handler-0.9.0.jar,file:///home/hadoop/hive-0.9.0/lib/hbase-0.92.0.jar,file:///home/hadoop/hive-0.9.0/lib/zookeeper-3.4.3.jar</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>Master.Hadoop,Slave1.Hadoop,Slave2.Hadoop</value>
<description>The list of zookeeper servers to talk to. This is only needed for read/write locks.</description>
</property>
2、拷贝hbase-0.92.0.jar到所有hadoop节点的hadoop/lib下
root@Slave2:/home/hadoop/hive-0.9.0/lib# cp hbase-0.92.0.jar /opt/hadoop/lib
chown -R hadoop:hadoop hbase-0.92.0.jar
root@Slave2:/home/hadoop/hive-0.9.0/lib# scp -r hbase-0.92.0.jar
[email protected]:/opt/hadoop/lib/
root@Slave2:/home/hadoop/hive-0.9.0/lib# scp -r hbase-0.92.0.jar
[email protected]:/opt/hadoop/lib/
chown -R hadoop:hadoop hbase-0.92.0.jar
3、拷贝hbase/conf下的hbase-site.xml文件到所有hadoop节点(包括master)的hadoop/conf下
root@Master:/opt/hadoop/conf# cp /opt/hbase-0.92.1/conf/hbase-site.xml /opt/hadoop/conf/
chown -R hadoop:hadoop hbase-site.xml
root@Master:/opt/hbase-0.92.1/conf# scp -r hbase-site.xml
[email protected]:/opt/hadoop/conf/
root@Master:/opt/hbase-0.92.1/conf# scp -r hbase-site.xml
[email protected]:/opt/hadoop/conf/
chown -R hadoop:hadoop hbase-site.xml
启动mysql
/etc/init.d/mysqld start
启动hbase
hadoop@Master:/opt/hbase-0.92.1/bin$ ./start-hbase.sh
可以查看hbase状态
http://10.2.128.46:60010/master-status
启动Hive
root@Slave2:/opt/hadoop/conf# hive
或
./hive
hive
1.创建hbase识别的数据库
CREATE TABLE hbase_table_1(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val")
TBLPROPERTIES ("hbase.table.name" = "xyz");
hbase.table.name 定义在hbase的table名称
hbase.columns.mapping 定义在hbase的列族
登录hbase发现
hadoop@Master:/opt/hbase-0.92.1/bin$ ./hbase shell
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 0.92.1, r1298924, Fri Mar 9 16:58:34 UTC 2012
hbase(main):001:0> list
TABLE
test
xyz
2 row(s) in 0.5090 seconds
hbase(main):002:0>
hbase(main):003:0> describe 'xyz'
DESCRIPTION ENABLED
{NAME => 'xyz', FAMILIES => [{NAME => 'cf1', BLOOMFILTER => 'NONE', REPL true
ICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_VERSIO
NS => '0', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'fals
e', BLOCKCACHE => 'true'}]}
1 row(s) in 0.0500 seconds
hbase(main):004:0> scan 'xyz'
ROW COLUMN+CELL
0 row(s) in 0.1010 seconds
hbase(main):005:0> put 'xyz','100','cf1:val','www.51.com'
0 row(s) in 0.0700 seconds
返回Hive
hive> select * from hbase_table_1 limit 10;
OK
100 www.51.com
Time taken: 0.611 seconds
OK 数据已经存在!
查看已hase中已存在的表
hbase(main):010:0> scan 'test'
ROW COLUMN+CELL
row1 column=id:, timestamp=1344251239462, value=1
row1 column=name:, timestamp=1344251259562, value=feng
1 row(s) in 0.0720 seconds
drop table hbase_table_3;
hbase(main):013:0> put 'xyz','86','cf1:val','val_86'
0 row(s) in 0.0130 seconds
hive> select * from hbase_table_1;
OK
100 www.51.com
86 val_86
Time taken: 0.191 seconds
三、多列和多列族(Multiple Columns and Families)
CREATE TABLE pokes (foo INT, bar STRING);
b)批量插入数据
hive> LOAD DATA LOCAL INPATH '/home/hadoop/hive-0.9.0/examples/files/kv1.txt' OVERWRITE INTO TABLE pokes;、
这个文件位于hive的安装目录下,examples/files/kv1.txt
INSERT OVERWRITE TABLE hbase_table_1 SELECT * FROM pokes WHERE foo=200;
hive> select * from hbase_table_1;
OK
100 www.51.com
200 val_200
86 val_86
Time taken: 0.11 seconds
1.创建数据库
CREATE TABLE hbase_table_2(key int, value1 string, value2 int, value3 int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,a:b,a:c,d:e");
INSERT OVERWRITE TABLE hbase_table_2 SELECT foo, bar, foo+1, foo+2 FROM pokes WHERE foo=98 OR foo=100;
hive> INSERT OVERWRITE TABLE hbase_table_2 SELECT foo, bar, foo+1, foo+2 FROM pokes WHERE foo=98 OR foo=200;
hive> select * from hbase_table_2 limit 10;
OK
100 val_100 101 102
200 val_200 201 202
98 val_98 99 100
Time taken: 0.126 seconds