搭建HBase伪分布式集群

1. Install zookeeper
1) download zookeeper from https://zookeeper.apache.org/releases.html#download
2) extract binary

$ tar xvf apache-zookeeper-3.8.1-bin.tar.gz -C ~/bigdata/

3) configurate zoo.cfg

$ cd ~/bigdata/zookeeper-3.8.1/conf
$ cp zoo_sample.cfg zoo.cfg
$ vi zoo.cfg    # edit zoo.cfg
$ diff -u zoo_sample.cfg zoo.cfg
--- zoo_sample.cfg	2023-01-26 00:31:05.000000000 +0800
+++ zoo.cfg	2023-06-16 18:19:01.510722864 +0800
@@ -9,7 +9,7 @@
 # the directory where the snapshot is stored.
 # do not use /tmp for storage, /tmp here is just 
 # example sakes.
-dataDir=/tmp/zookeeper
+dataDir=/home/sunxo/bigdata/zookeeper-3.8.1/tmp
 # the port at which the clients will connect
 clientPort=2181
 # the maximum number of client connections.
@@ -25,7 +25,7 @@
 #autopurge.snapRetainCount=3
 # Purge task interval in hours
 # Set to "0" to disable auto purge feature
-#autopurge.purgeInterval=1
+autopurge.purgeInterval=1

4) start zookeeper

$ cd ~/bigdata/zookeeper-3.8.1
$ bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /root/bigdata/zookeeper-3.8.1/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED

5) try zookeeper

$ netstat -lnpt  | grep -i TCP | grep `jps | grep -w QuorumPeerMain | awk '{print $1}'`
tcp6       0      0 :::2181                 :::*                    LISTEN      240750/java         
tcp6       0      0 :::42277                :::*                    LISTEN      240750/java         
tcp6       0      0 :::8080                 :::*                    LISTEN      240750/java     
$ bin/zkCli.sh -server 127.0.0.1:2181
[zk: 127.0.0.1:2181(CONNECTED) 0] ls /
[zookeeper]

Note: use following command to stop zookeeper

$ cd ~/bigdata/zookeeper-3.8.1
$ bin/zkServer.sh stop

2. Install Hadoop
1) download Hadoop from https://hadoop.apache.org/releases.html
2) extract binary

$ tar xvf hadoop-2.10.2.tar.gz -C ~/bigdata/

3) configurate

$ cd $HADOOP_HOME/etc/hadoop
$ vi hadoop-env.sh core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml
$ diff -u hadoop-env.sh.orig hadoop-env.sh
...
 # The java implementation to use.
-export JAVA_HOME=${JAVA_HOME}
+export JAVA_HOME=/opt/jdk

$ cat core-site.xml
...

    
        fs.defaultFS
        hdfs://ubuntu:8020
    
    
        hadoop.tmp.dir
        /home/sunxo/bigdata/hadoop-2.10.2/data/tmp
    


$ cat hdfs-site.xml
...

    
        dfs.replication
        1
    
    
        dfs.namenode.http-address
        ubuntu:50070
    
    
        dfs.datanode.address
        ubuntu:50010
    
    
        dfs.datanode.http.address
        ubuntu:50075
    
    
        dfs.datanode.ipc.address
        ubuntu:50020
    


$ cat mapred-site.xml
...

    
        mapreduce.framework.name
        yarn
    
    
        mapreduce.jobhistory.address
        ubuntu:10020
    
    
        mapreduce.jobhistory.webapp.address
        ubuntu:19888
    


$ cat yarn-site.xml
...

    
        yarn.resourcemanager.hostname
        ubuntu
    
    
        yarn.nodemanager.aux-services
        mapreduce_shuffle
    
    
        yarn.log-aggregation-enable
        true
    
    
        yarn.log-aggregation.retain-seconds
        604800
    
    
        yarn.resourcemanager.scheduler.class
        org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
    

4) format the filesystem

$ cd $HADOOP_HOME
$ bin/hdfs namenode -format
...
23/06/16 15:39:53 INFO common.Storage: Storage directory /home/sunxo/bigdata/hadoop-2.10.2/data/tmp/dfs/name has been successfully formatted

5) start hadoop / yarn

$ cd $HADOOP_HOME
$ sbin/start-dfs.sh
$ sbin/start-yarn.sh
$ sbin/mr-jobhistory-daemon.sh start historyserver

6) try hadoop

#!/bin/sh

mr() {
    cd $HADOOP_HOME
    mkdir -p input
    echo test apache hadoop hadoop sqoop hue mapreduce sqoop oozie http > input/in.txt
    hdfs dfs -rm -f -r input
    hdfs dfs -mkdir input
    hdfs dfs -put input/in.txt input
    hdfs dfs -rm -f -r output
    hadoop jar $jarfile wordcount input output
    hdfs dfs -cat output/*
}

jarfile=$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.10.2.jar
mr
$ cd $HADOOP_HOME
$ ./mr.sh
...
apache	1
hadoop	2
http	1
hue	1
mapreduce	1
oozie	1
sqoop	2
test	1

Note: use followings commands to stop hadoop / yarn

$ cd $HADOOP_HOME
$ sbin/mr-jobhistory-daemon.sh stop historyserver
$ sbin/stop-yarn.sh
$ sbin/stop-dfs.sh

3. Install hbase
1) download HBase from https://hbase.apache.org/downloads.html
2) extract binary

$ tar xvf hbase-2.4.16-bin.tar.gz -C ~/bigdata/

3) configurate

$ cd $HBASE_HOME/conf
$ vi hbase-env.sh hbase-site.xml
$ diff -u hbase-env.sh.orig hbase-env.sh

$ diff -u hbase-site.xml.orig hbase-site.xml
...
 # The java implementation to use.  Java 1.8+ required.
-# export JAVA_HOME=/usr/java/jdk1.8.0/
+export JAVA_HOME=/opt/jdk
...
 # Tell HBase whether it should manage it's own instance of ZooKeeper or not.
-# export HBASE_MANAGES_ZK=true
+export HBASE_MANAGES_ZK=false

$ cat hbase-site.xml
...

    
        hbase.cluster.distributed
        true
    
    
        hbase.rootdir
        hdfs://ubuntu:8020/hbase
    
    
        hbase.zookeeper.quorum
        localhost
    
    
        hbase.zookeeper.property.clientPort
        2181
    
    
        zookeeper.znode.parent
        /hbase
    
    
        phoenix.schema.isNamespaceMappingEnabled
        true
    
    
        phoenix.schema.mapSystemTablesToNamespace
        true
    
 

4) start hbase

$ cd $HBASE_HOME
$ bin/start-hbase.sh

5) try hbase

$ cd $HBASE_HOME
$ bin/hbase shell
> create_namespace 'manga'
Took 0.1748 seconds
> list_namespace
NAMESPACE                                                                                           
default                                                                                             
hbase                                                                                               
manga                                                                                               
3 row(s)
Took 0.0209 seconds

Note: use followings commands to stop hbase

$ cd $HBASE_HOME
$ bin/stop-hbase.sh

reference:

https://zookeeper.apache.org/doc/r3.8.1/zookeeperStarted.html
https://hadoop.apache.org/docs/r2.10.2/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
https://hbase.apache.org/book.html#quickstart

你可能感兴趣的:(hadoop,zookeeper,hbase)