1. Install zookeeper
1) download zookeeper from https://zookeeper.apache.org/releases.html#download
2) extract binary
$ tar xvf apache-zookeeper-3.8.1-bin.tar.gz -C ~/bigdata/
3) configurate zoo.cfg
$ cd ~/bigdata/zookeeper-3.8.1/conf
$ cp zoo_sample.cfg zoo.cfg
$ vi zoo.cfg # edit zoo.cfg
$ diff -u zoo_sample.cfg zoo.cfg
--- zoo_sample.cfg 2023-01-26 00:31:05.000000000 +0800
+++ zoo.cfg 2023-06-16 18:19:01.510722864 +0800
@@ -9,7 +9,7 @@
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
-dataDir=/tmp/zookeeper
+dataDir=/home/sunxo/bigdata/zookeeper-3.8.1/tmp
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
@@ -25,7 +25,7 @@
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
-#autopurge.purgeInterval=1
+autopurge.purgeInterval=1
4) start zookeeper
$ cd ~/bigdata/zookeeper-3.8.1
$ bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /root/bigdata/zookeeper-3.8.1/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
5) try zookeeper
$ netstat -lnpt | grep -i TCP | grep `jps | grep -w QuorumPeerMain | awk '{print $1}'`
tcp6 0 0 :::2181 :::* LISTEN 240750/java
tcp6 0 0 :::42277 :::* LISTEN 240750/java
tcp6 0 0 :::8080 :::* LISTEN 240750/java
$ bin/zkCli.sh -server 127.0.0.1:2181
[zk: 127.0.0.1:2181(CONNECTED) 0] ls /
[zookeeper]
Note: use following command to stop zookeeper
$ cd ~/bigdata/zookeeper-3.8.1
$ bin/zkServer.sh stop
2. Install Hadoop
1) download Hadoop from https://hadoop.apache.org/releases.html
2) extract binary
$ tar xvf hadoop-2.10.2.tar.gz -C ~/bigdata/
3) configurate
$ cd $HADOOP_HOME/etc/hadoop
$ vi hadoop-env.sh core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml
$ diff -u hadoop-env.sh.orig hadoop-env.sh
...
# The java implementation to use.
-export JAVA_HOME=${JAVA_HOME}
+export JAVA_HOME=/opt/jdk
$ cat core-site.xml
...
fs.defaultFS
hdfs://ubuntu:8020
hadoop.tmp.dir
/home/sunxo/bigdata/hadoop-2.10.2/data/tmp
$ cat hdfs-site.xml
...
dfs.replication
1
dfs.namenode.http-address
ubuntu:50070
dfs.datanode.address
ubuntu:50010
dfs.datanode.http.address
ubuntu:50075
dfs.datanode.ipc.address
ubuntu:50020
$ cat mapred-site.xml
...
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
ubuntu:10020
mapreduce.jobhistory.webapp.address
ubuntu:19888
$ cat yarn-site.xml
...
yarn.resourcemanager.hostname
ubuntu
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.log-aggregation-enable
true
yarn.log-aggregation.retain-seconds
604800
yarn.resourcemanager.scheduler.class
org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
4) format the filesystem
$ cd $HADOOP_HOME
$ bin/hdfs namenode -format
...
23/06/16 15:39:53 INFO common.Storage: Storage directory /home/sunxo/bigdata/hadoop-2.10.2/data/tmp/dfs/name has been successfully formatted
5) start hadoop / yarn
$ cd $HADOOP_HOME
$ sbin/start-dfs.sh
$ sbin/start-yarn.sh
$ sbin/mr-jobhistory-daemon.sh start historyserver
6) try hadoop
#!/bin/sh
mr() {
cd $HADOOP_HOME
mkdir -p input
echo test apache hadoop hadoop sqoop hue mapreduce sqoop oozie http > input/in.txt
hdfs dfs -rm -f -r input
hdfs dfs -mkdir input
hdfs dfs -put input/in.txt input
hdfs dfs -rm -f -r output
hadoop jar $jarfile wordcount input output
hdfs dfs -cat output/*
}
jarfile=$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.10.2.jar
mr
$ cd $HADOOP_HOME
$ ./mr.sh
...
apache 1
hadoop 2
http 1
hue 1
mapreduce 1
oozie 1
sqoop 2
test 1
Note: use followings commands to stop hadoop / yarn
$ cd $HADOOP_HOME
$ sbin/mr-jobhistory-daemon.sh stop historyserver
$ sbin/stop-yarn.sh
$ sbin/stop-dfs.sh
3. Install hbase
1) download HBase from https://hbase.apache.org/downloads.html
2) extract binary
$ tar xvf hbase-2.4.16-bin.tar.gz -C ~/bigdata/
3) configurate
$ cd $HBASE_HOME/conf
$ vi hbase-env.sh hbase-site.xml
$ diff -u hbase-env.sh.orig hbase-env.sh
$ diff -u hbase-site.xml.orig hbase-site.xml
...
# The java implementation to use. Java 1.8+ required.
-# export JAVA_HOME=/usr/java/jdk1.8.0/
+export JAVA_HOME=/opt/jdk
...
# Tell HBase whether it should manage it's own instance of ZooKeeper or not.
-# export HBASE_MANAGES_ZK=true
+export HBASE_MANAGES_ZK=false
$ cat hbase-site.xml
...
hbase.cluster.distributed
true
hbase.rootdir
hdfs://ubuntu:8020/hbase
hbase.zookeeper.quorum
localhost
hbase.zookeeper.property.clientPort
2181
zookeeper.znode.parent
/hbase
phoenix.schema.isNamespaceMappingEnabled
true
phoenix.schema.mapSystemTablesToNamespace
true
4) start hbase
$ cd $HBASE_HOME
$ bin/start-hbase.sh
5) try hbase
$ cd $HBASE_HOME
$ bin/hbase shell
> create_namespace 'manga'
Took 0.1748 seconds
> list_namespace
NAMESPACE
default
hbase
manga
3 row(s)
Took 0.0209 seconds
Note: use followings commands to stop hbase
$ cd $HBASE_HOME
$ bin/stop-hbase.sh
reference:
https://zookeeper.apache.org/doc/r3.8.1/zookeeperStarted.html
https://hadoop.apache.org/docs/r2.10.2/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
https://hbase.apache.org/book.html#quickstart