--hadoop文件位置:
log目录 :
1 |
/var/log/hadoop-hdfs |
2 |
/var/log/hadoop-mapreduce |
3 |
/var/log/hbase |
4 |
/var/log/hive |
5 |
/var/log/hive |
6 |
/var/log/impala |
安装目录:
1 |
/usr/lib |
启动命令目录:
1 |
/etc/init.d/ |
配置文件目录:
1 |
/etc/hadoop/conf |
2 |
/etc/hbase/conf |
3 |
/etc/hive/conf |
4 |
/etc/impala/conf |
环境变量:
1 |
/etc/profile |
让/etc/profile文件修改后立即生效 ,可以使用如下命令:
1 |
source /etc/profile |
node配置:
1 |
/etc/hosts |
资源库位置 :
1 |
/etc/yum.repos.d/ |
1.本地cdh4 搭建/
1、安装httpd服务,作用:用browser查看本地库
--安装httpd
1 |
yum install httpd |
--启动httpd
vi /etctpdtpd.conf (配置文件)
1 |
service httpd start |
2 |
service httpd stop |
3 |
service httpd restart |
--关闭防火墙
1 |
service iptables status |
2 |
service iptables stop |
--浏览器查看
http://ip地址
2、安装createrepo
--Createrepo用于创建本地源。
1 |
sudo yum install yum-utils createrepo |
3、搭建本地源
--下载cloudera-manager.repo,cloudera-cdh4.repo到/etc/yum.repos.d/目录下
1 |
cd /etc/yum.repos.d/ |
--将远程源的RPM包同步到本地的httpd服务目录
1 |
cd /var/www/html/ |
2 |
reposync -r cloudera-manager |
3 |
reposync -r cloudera-cdh4 |
--生成或刷新:RPMS,和一个放依赖关系等信息的文件夹repodata。
1 |
cd /var/www/html/cloudera-manager |
2 |
createrepo . |
3 |
cd /var/www/html/cloudera-cdh4 |
4 |
createrepo . |
至此,CDH4本地源搭建完成。
4:本地源的使用
--修改cloudera-manager.repo中的baseurl修改指向本地局域网的服务器
将baseurl=http://archive.cloudera.com/cm4/redhat/6/x86_64/cm/4/ 改为:
baseurl=http://172.16.2.52/cloudera-manager/
--修改cloudera-cdh4.repo中的baseurl修改指向本地局域网的服务器
将baseurl=http://archive.cloudera.com/cdh4/redhat/6/x86_64/cdh/4/ 改为:
baseurl=http://172.16.2.52/cloudera-cdh4/
baseurl的地址是服务器地址+放置RPM包的目录名,结构如下图:里面有一个放RPM包的文件夹RPMS,和一个放依赖关系等信息的文件夹repodata。
2.cloudera manager安装
--安装
1 |
sudo yum install cloudera-manager-daemons |
2 |
sudo yum install cloudera-manager-server |
3 |
sudo yum install cloudera-manager-server-db |
4 |
sudo yum install cloudera-manager-agent |
--启动
1 |
service cloudera-scm-server start |
2 |
service cloudera-manager-server-db start |
3 |
service cloudera-scm-agent start |
3.cdh4 安装
//安装 装CDH4
1 |
yum install hadoop-0.20-mapreduce-jobtracker hadoop-0.20-mapreduce-tasktracker |
2 |
yum install hadoop-hdfs-namenode |
3 |
yum install hadoop-hdfs-datanode |
4 |
yum install hadoop-0.20-conf-pseudo |
5 |
sudo -u hdfs hdfs namenode - format |
--启动hdfs
1 |
for x in ` cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done |
4..安装 hive
参考资料:http://heylinux.com/archives/2456.html
--安装hive
1 |
sudo yum install hive hive-metastore hive-server |
--安装mysql
1 |
sudo yum install mysql mysql-server mysql-devel |
2 |
service mysqld start |
--安装mysql连接驱动
1 |
sudo yum install mysql-connector-java |
2 |
ln -s /usr/share/java/mysql-connector-java.jar /usr/lib/hive/lib/mysql-connector-java.jar |
--设置root密码
1 |
sudo /usr/bin/mysql_secure_installation |
根据提示输入密码
01 |
01 [...] |
02 |
02 Enter current password for root (enter for none): |
03 |
03 OK, successfully used password, moving on... |
04 |
04 [...] |
05 |
05 Set root password? [Y/n] y |
06 |
06 New password:hadoophive |
07 |
07 Re-enter new password:hadoophive |
08 |
08 Remove anonymous users ? [Y/n] Y |
09 |
09 [...] |
10 |
10 Disallow root login remotely? [Y/n] N |
11 |
11 [...] |
12 |
12 Remove test database and access to it [Y/n] Y |
13 |
13 [...] |
14 |
14 Reload privilege tables now? [Y/n] Y |
15 |
15 All done ! |
--进入mysql 依次执行
01 |
mysql -u root -p hadoophive |
02 |
CREATE DATABASE metastore; |
03 |
USE metastore; |
04 |
SOURCE /usr/lib/hive/scripts/metastore/upgrade/mysql/hive-schema-0.10.0.mysql.sql; |
05 |
CREATE USER 'hive' @ '%' IDENTIFIED BY 'hadoophive' ; |
06 |
CREATE USER 'hive' @ 'localhost' IDENTIFIED BY 'hadoophive' ; |
07 |
GRANT ALL PRIVILEGES ON metastore.* TO 'hive' @ '%' ; |
08 |
GRANT ALL PRIVILEGES ON metastore.* TO 'hive' @ 'localhost' ; |
09 |
FLUSH PRIVILEGES; |
10 |
quit; |
--启动hive
--修改配置文件hive-site.xml
sudo vim /etc/hive/conf/hive-site.xml参考 hive 配置
01 |
< configuration > |
02 |
03 |
<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files --> |
04 |
<!-- that are implied by Hadoop setup variables. --> |
05 |
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive --> |
06 |
<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized --> |
07 |
<!-- resource). --> |
08 |
09 |
<!-- Hive Execution Parameters --> |
10 |
11 |
< property > |
12 |
< name >javax.jdo.option.ConnectionURL</ name > |
13 |
< value >jdbc:mysql://localhost:3306/metastore</ value > |
14 |
< description >JDBC connect string for a JDBC metastore</ description > |
15 |
</ property > |
16 |
17 |
< property > |
18 |
< name >javax.jdo.option.ConnectionDriverName</ name > |
19 |
< value >com.mysql.jdbc.Driver</ value > |
20 |
< description >Driver class name for a JDBC metastore</ description > |
21 |
</ property > |
22 |
< property > |
23 |
< name >javax.jdo.option.ConnectionUserName</ name > |
24 |
< value >hive</ value > |
25 |
</ property > |
26 |
< property > |
27 |
< name >javax.jdo.option.ConnectionPassword</ name > |
28 |
< value >hadoophive</ value > |
29 |
</ property > |
30 |
< property > |
31 |
< name >hive.metastore.uris</ name > |
32 |
< value >thrift://127.0.0.1:9083</ value > |
33 |
< description >IP address (or fully-qualified domain name) and port of the metastore host</ description > |
34 |
</ property > |
35 |
</ configuration > |
--启动hive
1 |
service hive-metastore start |
2 |
service hive-server start |
--依次执行
01 |
$ sudo -u hdfs hadoop fs - mkdir /user/hive |
02 |
$ sudo -u hdfs hadoop fs - chown hive /user/hive |
03 |
$ sudo -u hdfs hadoop fs - mkdir /tmp |
04 |
$ sudo -u hdfs hadoop fs - chmod 777 /tmp |
05 |
$ sudo -u hdfs hadoop fs - chmod o+t /tmp |
06 |
$ sudo -u hdfs hadoop fs - mkdir /data |
07 |
$ sudo -u hdfs hadoop fs - chown hdfs /data |
08 |
$ sudo -u hdfs hadoop fs - chmod 777 /data |
09 |
$ sudo -u hdfs hadoop fs - chmod o+t /data |
10 |
$ sudo chown -R hive:hive /var/lib/hive |
$ sudo vim /tmp/kv1.txt
1 |
1 ,www.baidu.com |
2 |
2 ,wwww.google.com |
3 |
3 ,wwww.sina.com.cn |
4 |
4 ,wwww. 163 .com |
5 |
5 ,wheylinx.com |
1 |
CREATE TABLE IF NOT EXISTS pokes ( foo INT ,bar STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY "," LINES TERMINATED BY "\n" ; |
2 |
show tables; |
3 |
desc formatted pokes; |
4 |
LOAD DATA LOCAL INPATH '/tmp/kv1.txt' OVERWRITE INTO TABLE pokes; |
5 |
select * from pokes; |
5.安装impala
参考资料:http://heylinux.com/archives/2456.html
--下载资源包
--同步资源库
1 |
cd /var/www/html/ |
2 |
reposync -r cloudera-impala |
3 |
cd /var/www/html/cloudera-impala |
4 |
createrepo . |
--修改cloudera-impala.repo
将baseurl=http://archive.cloudera.com/impala/redhat/6/x86_64/impala/1/改为:
baseurl=http://172.16.2.52/cloudera-impala/
--安装
1 |
sudo yum install impala |
2 |
sudo yum install impala-shell |
3 |
sudo yum install impala-server |
4 |
sudo yum install impala-state-store |
--添加配置/etc/hadoop/conf/hdfs-site.xml如下
sudo vim /etc/hadoop/conf/hdfs-site.xml
01 |
<property> |
02 |
<name>dfs.client. read .shortcircuit</name> |
03 |
<value> true </value> |
04 |
</property> |
05 |
<property> |
06 |
<name>dfs.domain.socket.path</name> |
07 |
<value>/var/run/hadoop-hdfs/dn._PORT</value> |
08 |
</property> |
09 |
<property> |
10 |
<name>dfs.client. file -block-storage-locations.timeout</name> |
11 |
<value>3000</value> |
12 |
</property> |
13 |
<property> |
14 |
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name> |
15 |
<value> true </value> |
16 |
</property> |
--复制文件到/etc/impala/conf/
1 |
sudo cp -rpa /etc/hadoop/conf/core-site.xml /etc/impala/conf/ |
2 |
sudo cp -rpa /etc/hadoop/conf/hdfs-site.xml /etc/impala/conf/ |
3 |
sudo cp -rpa /etc/hive/conf/hive-site.xml /etc/impala/conf/ |
--重新启动datanode 及 impala
1 |
sudo service hadoop-hdfs-datanode restart |
2 |
sudo service impala-state-store restart |
3 |
sudo service impala-server restart |
--进入impala命令行
1 |
impala-shell |
5.安装hbase(伪分布)
--安装zookper
1 |
yum install zookeeper-server |
--启动zookper
1 |
service zookeeper-server start |
--安装hbase,hbase-master,hbase-regionserver
1 |
sudo yum install hbase |
2 |
sudo yum install hbase-master |
3 |
sudo yum install hbase-regionserver |
--修改配置
sudo vim /etc/security/limits.conf
1 |
hdfs - nofile 32768 |
2 |
hbase - nofile 32768 |
--修改配置
$sudo vim /etc/pam.d/common-session
1 |
session required pam_limits.so |
--修改hadoop配置
sudo vim /etc/hadoop/conf/hdfs-site.xml
1 |
<property> |
2 |
<name>dfs.datanode.max.xcievers</name> |
3 |
<value>4096</value> |
4 |
</property> |
--修改hbase配置/etc/hbase/conf/hbase-site.xml
01 |
<configuration> |
02 |
<property> |
03 |
<name>hbase.rootdir</name> |
04 |
<value>hdfs://myhost:8020/hbase</value> |
05 |
</property> |
06 |
<property> |
07 |
<name>hbase.cluster.distributed</name> |
08 |
<value> true </value> |
09 |
</property> |
10 |
</configuration> |
--创建hbase工作目录
1 |
sudo -u hdfs hadoop fs - mkdir /hbase |
2 |
sudo -u hdfs hadoop fs - chown hbase /hbase |
--hdfs重启
1 |
sudo /etc/init.d/hadoop-hdfs-namenode restart |
2 |
sudo /etc/init.d/hadoop-hdfs-datanode restart |
--启动
1 |
sudo service hbase-master start |
2 |
sudo service hbase-regionserver start |
--进入hbase
1 |
hbase shell |