源码包:hadoop-2.3.0-src.tar.gz
yum -y install lzo-devel zlib-devel gcc autoconf automake libtool cmake openssl-devel
tar zxvfapache-maven-3.1.1-bin.tar.gz
vi/etc/profile
增加两行:
export MAVEN_HOME=[maven home path]
export PATH=${MAVEN_HOME}/bin:$PATH
tar -zxvf protobuf-2.5.0.tar.gz
cd protobuf-2.5.0
sudo ./configure
如果出现g+: command notfound错误,是由于gcc-c没有安装,使用yum install gcc-c++更新编译器重新编译即可解决问题。
sudo make
sudo makecheck
sudo makeinstall
protoc --version
mvn cleanpackage -Pdist,native -DskipTests -Dtar
编译成功后,./hadoop-dist/target/hadoop-2.3.0.tar.gz就是我们需要的文件了
在master机器上运行ssh-keygen-t rsa命令,一路按回车结束后,会在~/.ssh下生成id_rsa.pub的文件
ssh-copy-id -i ~/.ssh/authorized_keys grid@qzj04
ssh-copy-id -i ~/.ssh/authorized_keys grid@qzj02
cp id_rsa.pub authorized_keys
将authorized_keys文件拷贝到qzj02,qzj04机器的~/.ssh目录下
scp zuthorized_keys qzj02:/home/grid/.ssh
scp zuthorized_keys qzj04:/home/grid/.ssh
测试是否能无密码连接
ssh qzj02
exit
vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/u02/hadoop/jdk1.7.0_15
vi core-site.xml
hadoop.tmp.dir
/u02/hadoop/var
fs.default.name
hdfs://qzj05:8020
hadoop.proxyuser.mlx.hosts
*
hadoop.proxyuser.mlx.groups
*
vi hdfs-site.xml 由于端口被占用,故这里端口值设为默认值+1
dfs.namenode.name.dir
file:/u02/hadoop/var/dfs/name
dfs.namenode.data.dir
file:/u02/hadoop/var/dfs/data
dfs.replication
3
dfs.namenode.secondary.http-address
qzj05:9001
dfs.datanode.address
0.0.0.0:50011
default 50011
dfs.datanode.ipc.address
0.0.0.0:50021
default 50020
dfs.datanode.http.address
0.0.0.0:50076
default 50075
dfs.support.append
true
vi mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
qzj05:10020
mapreduce.jobhistory.webapp.address
qzj05:19888
vi slaves
1 qzj05
2 qzj02
3 qzj04
vi yarn-site.xml
yarn.resourcemanager.address
qzj05:8032
yarn.resourcemanager.scheduler.address
qzj05:8030
yarn.resourcemanager.resource-tracker.address
qzj05:8031
yarn.resourcemanager.admin.address
qzj05:8033
yarn.resourcemanager.webapp.address
qzj05:8088
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
mkdir -p /u02/hadoop/var/dfs/name
mkdir -p /u02/hadoop/var/dfs/data
mkdir -p /u02/hadoop/var/dfs/namesecondary
mkdir -p /u02/hadoop/var/mapred
rsync -avz /u02/hadoop/hadoop-2.3.0qzj02:/u02/hadoop
rsync -avz /u02/hadoop/hadoop-2.3.0qzj04:/u02/hadoop
rsync -avz /u02/hadoop/varqzj02:/u02/hadoop/var
rsync -avz /u02/hadoop/varqzj04:/u02/hadoop/var
vi ~/.bash_profile
export HADOOP_HOME=/u02/hadoop/hadoop-2.3.0
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
source ~/.bash_profile
bin目录下执行格式化namenode,命令:hdfsnamenode -format
sbin目录下执行启动hdfs,yarn
start-dfs.sh
start-yarn.sh
检查是否启动:JPS
主节点应有:NodeManager, DataNode, SecondaryNameNode, ResourceManager, NameNode这几个进程
子节点应有:DataNode, NodeManager这个两个进程
如未启动起来,检查logs,注意端口是否被占用
停止服务:sbin/stop-all.sh
i. 若编译中出现网络问题(比如被公司墙了什么的 = =),备份setting.xml,修改maven的镜像站
setting.xml中配置方式:
nexus-kaifazhe.me
*
Nexuskaifazhe.me
http://maven.kaifazhe.me/content/groups/public/
pom.xml配置方式:
nexus
kaifazhe.me’snexus
http://maven.kaifazhe.me/content/groups/public/
nexus
kaifazhe.me’snexus
http://maven.kaifazhe.me/content/groups/public/
ii. 报错:
[ERROR]Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.7:run(make) on project hadoop-pipes: An Ant BuildException has occured: execreturned: 1
[ERROR]around Ant part ...
解决方法:openssl-devel没装上,重装下openssl-devel就好了
yum install openssl-devel
安装包:zookeeper-3.4.6.tar.gz
vi ~/.bash_profile
export ZK_HOME=/u02/hadoop/zookeeper-3.4.6
source ~/.bash_profile
mkdir -p $ZK_HOME/var/data
mkdir -p $ZK_HOME/var/datalog
cp $ZK_HOME/conf/zoo_sample.cfg $ZK_HOME/conf/zoo.cfg
vi $ZK_HOME/conf/zoo.cfg
dataDir=/usr/u02/hadoop/zookeeper-3.4.6/var/data
dataLogDir=/u02/hadoop/zookeeper-3.4.6/var/datalog
server.1=qzj05:2888:3888
server.2=qzj02:2888:3888
server.3=qzj04:2888:3888
rsync -avz /u02/hadoop/zookeeper-3.4.6qzj02:/u02/hadoop
rsync -avz /u02/hadoop/zookeeper-3.4.6qzj04:/u02/hadoop
vi /u02/hadoop/zookeeper-3.4.6/var/data/myid
主节点输入1
其他两个子节点依次设为2和3
$ZK_HOME/bin/zkServer.sh start(在三个节点分别执行此命令)
测试是否连通
$ZK_HOME/bin/zkCli.sh -serverqzj05,qzj02,qzj04
停止
$ZK_HOME/bin/zkServer.sh stop
安装包:hbase-0.96.1.1-hadoop2-bin.tar.gz
root# vi /etc/security/limits.conf
grid soft nofile 65535
grid hard nofile 65535
grid soft nproc 32000
grid hard nproc 32000
root# echo "session requiredpam_limits.so" >> /etc/pam.d/common-session
vi ~/.bash_profile
export HBASE_HOME=/u02/hadoop/hbase-0.96.1.1
source ~/.bash_profile
mkdir -p /u02/hadoop/hbase-0.96.1.1/var
vi $HBASE_HOME/conf/hbase-env.sh
export JAVA_HOME=/u02/hadoop/jdk1.7.0_15
export HBASE_MANAGES_ZK=false
export HBASE_HEAPSIZE=8000
export HBASE_LOG_DIR=/u02/hadoop/hbase-0.96.1.1/logs
vi $HBASE_HOME/conf/hbase-site.xml
hbase.rootdir
hdfs://qzj05:8020/hbase
hbase.cluster.distributed
true
hbase.tmp.dir
/u02/hadoop/hbase-0.96.1.1/var
ln -s $HADOOP_HOME/etc/hadoop/hdfs-site.xml $HBASE_HOME/conf/hdfs-site.xml
vi $HBASE_HOME/conf/regionservers
qzj05
qzj02
qzj04
rm -i $HBASE_HOME/lib/zookeeper-*
cp zookeeper-3.4.6.jar $HBASE_HOME/lib
检查各节点时间是否同步,将三台服务器时间同步,若各节点时间差距过大会报错
时间同步参照此文http://blog.chinaunix.net/uid-20104120-id-3838847.html
$HBASE_HOME/bin/start-hbase.sh
$HBASE_HOME/bin/hbase shell
输入jps命令检查
主节点下应有HMaster,HRegionServer进程,子节点有HRegionServer进程,检查日志
建表测试
create 'member','m_id','address','info'
查看是否创建成功:list
若该过程未报错则说明hbase安装成功
停止hbase:
$HBASE_HOME/bin/stop-hbase.sh
i. 问题:
用SecureCRT远程连接服务器,运行hbase shell输入命令无法回删
解决方法:选项-全局选项-常规-预设的会话设置-编辑预设的设置-仿真
将终端改成Linux
保存设置之后就可以用CTRL+Backspace来删除了
vi $HADOOP_HOME/etc/hadoop/hdfs-site.xml
dfs.support.append
true
dfs.datanode.max.xcievers
4096
vi $ZK_HOME/conf/java.env
export JAVA_OPTS="-Xms1000m-Xmx1000m"
echo "maxClientCnxns=60" >>$ZK_HOME/conf/zoo.cfg
vi $HBASE_HOME/conf/hbase-env.sh
export HBASE_HEAPSIZE=8000
vi $HBASE_HOME/conf/hbase-site.xml
zookeeper.session.timeout
60000
scp /u02/hadoop/hadoop-2.3.0/etc/hadoop/hdfs-site.xml qzj02:/u02/hadoop/hadoop-2.3.0/etc/hadoop/hdfs-site.xml
scp /u02/hadoop/hadoop-2.3.0/etc/hadoop/hdfs-site.xml qzj04:/u02/hadoop/hadoop-2.3.0/etc/hadoop/hdfs-site.xml
scp /u02/hadoop/zookeeper-3.4.6/conf/java.env qzj02:/u02/hadoop/zookeeper-3.4.6/conf/java.env
scp /u02/hadoop/zookeeper-3.4.6/conf/java.env qzj04:/u02/hadoop/zookeeper-3.4.6/conf/java.env
编译后安装包:apache-hive-0.14.0-SNAPSHOT-bin.tar.gz
用svn从 http://svn.apache.org/repos/asf/hive/trunk/
下载hive源码
cd hive-trunk
mvn clean install -DskipTests -Phadoop-2
mvn package -Pdist -DskipTests -Phadoop-2
将hive-trunk/packaging/target/apache-hive-0.14.0-SNAPSHOT-bin.tar.gz上传至服务器
tar xzvf apache-hive-0.14.0-SNAPSHOT-bin.tar.gz -C /u02/hadoop
mv apache-hive-0.14.0-SNAPSHOT-bin hive
vi ~/.bash_profile
export HIVE_HOME=/u02/hadoop/hive
source ~/.bash_profile
cd conf
cp hive-default.xml.templatehive-default.xml
cp hive-env.sh.template hive-env.sh
cp hive-exec-log4j.properties.templatehive-exec-log4j.properties
cp hive-log4j.properties.templatehive-log4j.properties
mv hive-default.xml hive-site.xml
(hive-env.sh.template文件中存在一个bug,第2000行,
vi
export HADOOP_HOME=/u02/hadoop/hadoop-2.3.0
export HIVE_CONF_DIR=/u02/hadoop/hive/conf
主节点:
vi hive-site.xml
hive.metastore.warehouse.dir
hdfs://qzj05:8020/hive
location of default database for thewarehouse
hive.exec.scratchdir
hdfs://qzj05:8020/hive/scratchdir
Scratch space for Hive jobs
hive.querylog.location
/u02/hadoop/hive/logs
Location of Hive run time structured log file
javax.jdo.option.ConnectionURL
jdbc:mysql://192.168.10.199:3306/hiveMeta?createDatabaseIfNotExist=true
JDBC connect string for a JDBCmetastore
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
Driver class name for aJDBC metastore
javax.jdo.option.ConnectionUserName
root
username to use against metastoredatabase
javax.jdo.option.ConnectionPassword
123456
password to use against metastoredatabase
rm hbase-0.94.6.1-tests.jar
rm hbase-0.94.6.1.jar
rm zookeeper-3.4.3.jar
从/u02/hadoop/hbase-0.96.0-hadoop2/lib下hbase开头的包都拷贝过来
find /u02/hadoop/hbase-0.96.1.1/lib -name "hbase*.jar"|xargs -i cp {} ./
cp /u02/hadoop/hbase-0.96.1.1/lib/protobuf-java-2.5.0.jar /u02/hadoop/hive-0.12.0/lib
cp /u02/hadoop/hbase-0.96.1.1/lib/zookeeper-3.4.6.jar /u02/hadoop/hive-0.12.0/lib
cp/u02/hadoop/hbase-0.96.1.1/lib/hbase-client-0.96.1.1-hadoop2.jar /u02/hadoop/hive-0.12.0/lib
cp/u02/hadoop/hbase-0.96.1.1/lib/hbase-common-0.96.1.1-hadoop2.jar /u02/hadoop/hive-0.12.0/lib
cp mysql-connector-java-3.1.12-bin.jar /u02/hadoop/hive-0.12.0/lib
rsync -avz /u02/hadoop/hiveqzj02:/u02/hadoop
rsync -avz /u02/hadoop/hiveqzj04:/u02/hadoop
修改子节点hive-site.xml
hive.metastore.uris
thrift://qzj05:9083
先启动hadoop,zookeeper,hbase
启动metastore:hive --service metastore
查看log是否报错
用nohup挂载
nohup ./hive --service metastore
hive --service hiveserver
./hive
查看mysql中的表
show tables;
在mysql中建表:
create table doudou(id int,name string);
CREATE TABLE hbase_table_1(key int,valuestring) STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" =":key,cf1:val") TBLPROPERTIES ("hbase.table.name" ="xyz");
i. 报错:
Exception in thread "main"java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiateorg.apache.hadoop.hive.metastore.HiveMetaStoreClient
Caused by: MetaException(message:Versioninformation not found in metastore. )
修改权限为777 (chmod 777 mysql-connector-java-3.1.12-bin.jar)
修改conf/hive-site.xml 中的“hive.metastore.schema.verification” 值为 false
ii. 报错:
javax.jdo.JDOFatalDataStoreException:Unable to open a test connection to the given database. JDBC url =jdbc:mysql://qzj05:3306/metastore?createDatabaseIfNotExist=true, username =root. Terminating connection pool. Original Exception: ------
java.sql.SQLException: Access denied foruser 'root'@'qzj05' (using password: YES)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:2928)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:771)
at com.mysql.jdbc.MysqlIO.secureAuth411(MysqlIO.java:3649)
at com.mysql.jdbc.MysqlIO.doHandshake(
MysqlIO.java:1176)
at com.mysql.jdbc.Connection.createNewIO(Connection.java:2558)
解决方法:将mysql中密码为空的设置为123456
use mysql;
select host,user,password from mysql.user;
update mysql.user set password =PASSWORD('123456') where password = '';
flush privileges;
GRANT ALL PRIVILEGES ON *.* TO 'root'@'qzj02' Identified by '123456';
iii. 报错:
: command not found14.0/bin/ext/beeline.sh:line 15:
: command not found14.0/bin/ext/beeline.sh:line 19:
/u02/hadoop/hive-0.14.0/bin/ext/beeline.sh:line 20: syntax error near unexpected t'ken `{
'u02/hadoop/hive-0.14.0/bin/ext/beeline.sh:line 20: `beeline () {
: command notfound14.0/bin/ext/metatool.sh: line 15:
: command notfound14.0/bin/ext/metatool.sh: line 18:
/u02/hadoop/hive-0.14.0/bin/ext/metatool.sh:line 19: syntax error near unexpected 'oken `{
'u02/hadoop/hive-0.14.0/bin/ext/metatool.sh:line 19: `metatool () {
Service cli not found
orcfiledump rcfilecat schemaTool versionineage metastore metatool
解决方法:
vi -b $HIVE_HOME/bin/ext/beeline.sh 发现每行行尾多了个^M,这是由于该文本应该在C环境下编辑过,Linux编辑器对文件行末的回车符处理不一致,在Linux下经常能看到C文件或者TXT文件每行末尾都有一个^M符号,这个会导致shell脚本运行错误
在命令编辑行<按ESC键然后shift+:冒号>输入:%s/^M//g 这样就删除了行尾的^M,同理处理$HIVE_HOME/bin/ext/metatool.sh
安装包: sqoop-1.99.3-bin-hadoop200.tar.gz
vi ~/.bash_profile
exportSQOOP_HOME=/u02/hadoop/sqoop-1.99.3-bin-hadoop200
source ~/.bash_profile
在oracle的安装路径下
cp database/stage/ext/jlib/ojdbc5.jar /u02/hadoop/sqoop-1.99.3-bin-hadoop200/server/lib
cp $HIVE_HOME/lib/mysql-connector-java-3.1.12-bin.jar /u02/hadoop/sqoop-1.99.3-bin-hadoop200/server/lib
cd server/conf
vi catalina.properties
common.loader=/u02/hadoop/hadoop-2.3.0/share/hadoop/common/*.jar,/u02/hadoop/hadoop-2.3.0/share/hadoop/common/lib/*.jar,/u02/hadoop/hadoop-2.3.0/share/hadoop/yarn/*.jar,/u02/hadoop/hadoop-2.3.0/share/hadoop/hdfs/*.jar,/u02/hadoop/hadoop-2.3.0/share/hadoop/mapreduce/*.jar,/u02/hadoop/sqoop-1.99.3-bin-hadoop200/server/lib/*.jar
vi sqoop.properties
org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/u02/hadoop/hadoop-2.3.0/etc/hadoop
scp -r sqoop-1.99.3-bin-hadoop200qzj02:/u02/hadoop
scp -r sqoop-1.99.3-bin-hadoop200qzj04:/u02/hadoop
sqoop运行参照http://sqoop.apache.org/docs/1.99.3/Sqoop5MinutesDemo.html(提交job的时候用官网上的命令会报错)
启动:./bin/sqoop.sh server start
停止:./bin/sqoop.sh server stop
在子节点上
./bin/sqoop.sh client
sqoop:000> set server --host 192.168.10.199 --port 12000 --webapp sqoop
sqoop:000> show version --all
在server上
./bin/sqoop.sh client
create connection --cid 1
Using Connector id 1
Connection configuration
JDBC Driver Class: com.mysql.jdbc.Driver
JDBC Connection String: jdbc:mysql://192.168.10.199:3306/hiveMeta
Username: root
Password:
JDBC Connection Properties:
Security related configuration options
Max connections: 100
sqoop:000> create job --xid 1 --typeimport
Database configuration
Schema name: hiveMeta
Table name: ROLES
Table SQL statement:
Table column names:
Partition column name:
Nulls in partition column:
Boundary query:
Output configuration
Storage type: HDFS
Output format: TEXT_FILE
Compression format: NONE
Output directory: /user/jarcec/test3
Throttling resources
Extractors:
Loaders:
执行job:
sqoop:000>start job --jid 1
i. submission start --jid 1 时
报错:No such property: start for class: groovysh_evaluate
解决方法:改成start job --jid 1
ii. status job --jid 1时
报错:Exception: org.apache.sqoop.common.SqoopException Message:CLIENT_0001:Server has returned exception
解决方法:show job 发现没有id为1的job,之前删除了id为1的job,新建的job id为2
status job --jid 2
iii. 运行job最后状态为failed,检查日志报错
Application application_1397717698978_0004failed 2 times due to AM Container for appattempt_1397717698978_0004_000002exited with exitCode: 1 due to: Exception from container-launch:org.apache.hadoop.util.Shell$ExitCodeException:
org.apache.hadoop.util.Shell$ExitCodeException:
atorg.apache.hadoop.util.Shell.runCommand(Shell.java:505)
atorg.apache.hadoop.util.Shell.run(Shell.java:418)
atorg.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
atorg.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195)
atorg.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:283)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:79)
atjava.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334)
atjava.util.concurrent.FutureTask.run(FutureTask.java:166)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
atjava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:722)
Container exited with a non-zero exit code1
.Failing this attempt.. Failing theapplication.
执行如下命令测试job:
$HADOOP_HOME/bin/hadoop jar$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.3.0.jar pi 10 5
如果报错则说明hadoop配置有问题,检查$HADOOP_HOME/etc/hadoop/mapred-site.xml
发现mapreduce.jobhistory.webapp.address这个参数配错了,应为qzj05:19888
iv. 编译时报错:Too many unapproved licenses: 1
vi /etc/profile
export MAVEN_OPTS="-Xms512m -Xmx1024m-XX:PermSize=256m -XX:MaxPermSize=512m"
source /etc/profile
mvn clean install -DskipTests -Phaop200-Drat.numUnapprovedLicenses=100
-Drat.numUnapprovedLicenses=100
mvn package -Pdist -DskipTests -Phadoop200
v. 编译时报错:
Arequired class was missing while executingorg.apache.maven.plugins:maven-site-plugin:3.0-beta-3:site:org/sonatype/aether/graph/DependencyFilter
解决方法:
vi ./docs/pom.xml
将
修改为