大数据开发学习平台安装配置

大数据开发学习平台安装配置_第1张图片
入门基础篇

本文中直接跳过服务器之间免密码登录以及相关账户的创建和权限配置的工作。
相关操作请移步 免密码 登录,linux账户的添加和配置。

软件版本

jdk
jdk-8u91-linux-x64.tar.gz

scala
scala-2.10.6.tgz

hadoop
hadoop-2.6.4.tar.gz
hadoop-2.6.4-src.tar.gz 源代码

mahout
apache-mahout-distribution-0.12.2.tar.gz

hive
apache-hive-2.1.0-bin.tar.gz
mysql-connector-java-5.1.39.tar.gz 数据库JDBC驱动

spark
spark-1.6.2-bin-hadoop2.6.tgz 选版本对应的
spark-1.6.2.tgz 源代码

hbase
zookeeper-3.4.8.tar.gz 提前装好zookeeper
hbase-1.2.2-bin.tar.gz

storm
apache-storm-1.0.2.tar.gz
apache-storm-1.0.2-src.tar.gz 源代码

sqoop
sqoop-1.99.6-bin-hadoop200.tar.gz

集群的三种模式

local(单机)

standalone

yarn


配置清单

JDK

profile

export JAVA_HOME=/usr/local/jdk1.8.0_91
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin

scala

profile

# scala
export SCALA_HOME=/usr/local/scala-2.10.6
export PATH=$PATH:$SCALA_HOME/bin

Hadoop

profile

# hadoop
export HADOOP_HOME=/usr/local/hadoop-2.6.4
export HADOOP_PREFIX=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

*.env

$HADOOP_HOME/etc/hadoop/hadoop-env.sh, $HADOOP_HOME/etc/hadoop/mapred-env.sh, $HADOOP_HOME/etc/hadoop/yarn-env.sh

source ~/.bash_profile

# 对于 yarn-env.sh 只需要配置 JAVA_HOME 即可,否则会出现找不到ResourceManager、NodeManager类的问题
# Error: Could not find or load main class org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
# Error: Could not find or load main class org.apache.hadoop.yarn.server.nodemanager.NodeManager

slaves

$HADOOP_HOME/etc/hadoop/slaves

slave1
slave2

core-site

$HADOOP_HOME/etc/hadoop/core-site.xml


    
            hadoop.tmp.dir
            /home/hadoop/tmp
            A base for other temporary directories.
    

    
            fs.default.name
            hdfs://master:9000
            true
            The name of the default file system.  A URI whose
            scheme and authority determine the FileSystem implementation.  The
            uri's scheme determines the config property (fs.SCHEME.impl) naming
            the FileSystem implementation class.  The uri's authority is used to
            determine the host, port, etc. for a filesystem.
    

hdfs-site

$HADOOP_HOME/etc/hadoop/hdfs-site.xml


  
    dfs.datanode.ipc.address
    0.0.0.0:50020
  
  
    dfs.datanode.http.address
    0.0.0.0:50075
  
  
    dfs.replication
    2
  


mapred-site

$HADOOP_HOME/etc/hadoop/mapred-site.xml


    
        mapreduce.framework.name
        yarn
    

    
        mapreduce.jobhistory.address
        master:10020
    

    
        mapreduce.jobhistory.webapp.address
        master:19888
    

yarn-site

$HADOOP_HOME/etc/hadoop/yarn-site.xml


    
        yarn.nodemanager.aux-services
        mapreduce_shuffle
    

    
        yarn.resourcemanager.address
        master:8032
    

    
        yarn.resourcemanager.scheduler.address
        master:8030
    

    
        yarn.resourcemanager.resource-tracker.address
        master:8031
    


启动HDFS

# 初始化 hdfs
$HADOOP_HOME/bin/hdfs namenode -format

# 启动 hdfs
$HADOOP_HOME/sbin/start-dfs.sh

# 启动 yarn管理集群
$HADOOP_HOME/sbin/start-yarn.sh

# jobhistory启动
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

测试

$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.4.jar wordcount  

端口

端口号 描述
8088 All Applications(yarn-web-ui)
9000 hdfs端口
50070 Namenode information(web-ui)
50090 SecondaryNamenode information(web-ui)
19888 JobHistory(web-ui)

pig

profile

# pig
export PIG_HOME=/usr/local/pig-0.16.0
export PIG_CLASS=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$PIG_HOME/bin

spark

profile

# spark
export SPARK_HOME=/usr/local/spark-1.6.2-bin-hadoop2.6
export PATH=$PATH:$SPARK_HOME/bin

spark-env

$SPARK_HOME/conf/spark-env.sh

# local, standalone, yarn模式
source ~/.bash_profile
export SPARK_MASTER_IP=master

# jobhistroy
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=7777 -Dspark.history.retainedApplications=2 -Dspark.history.fs.logDirectory=hdfs://master:9000/sparklog"

# 其它配置
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=1G

slaves

$SPARK_HOME/conf/slaves

slave1
slave2

spark-default.xml

# jobhistory配置
spark.eventLog.enabled  true
spark.eventLog.dir      hdfs://master:9000/sparklog
spark.eventLog.compress true

启动spark

# 启动计算框架集群
$SPARK_HOME/sbin/start-all.sh

# 启动HistoryServer
$SPARK_HOME/sbin/start-history-server.sh

测试

# 1
$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME/lib/examples-1.6.2-hadoop2.6.0.jar

# 2
MASTER=local && $SPARK_HOME/bin/run-example SparkPi

端口

端口号 描述
4040 App(web-ui)
7077 Master
7777 History(web-ui)

zookeeper

profile

# zookeeper
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.8
export PATH=$PATH:$ZOOKEEPER_HOME/bin

zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
dataDir=/home/zookeeper/data
# dataLogDir=/home/zookeeper/logs
# the port at which the clients will connect
clientPort=2181

server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888

myid

创建 myid 文件 /home/zookeeper/data/myid

# 不同的机器需要分配不同的 id,序号与上述 zoo.cfg 中的 server.* 中的数字对应
echo 1 >> /home/zookeeper/data/myid

启动

$ZOOKEEPER_HOME/bin/zkServer.sh start

端口

端口号 描述
2181 Zookeeper-Client
2888 from
3888 to

hbase

profile

# hbase
export HBASE_HOME=/usr/local/hbase-1.2.2
export PATH=$PATH:$HBASE_HOME/bin

hbase-env

$HBASE_HOME/conf/hbase-env.sh

source ~/.bash_profile
# export JAVA_HOME HADOOP_HOME HBASE_HOME
export HBASE_CLASSPATH=$HADOOP_HOME/etc/hadoop
export HBASE_MANAGES_ZK=true
export HBASE_LOG_DIR=$HBASE_HOME/logs

regionservers

slave1
slave2

hbase-site.xml

$HBASE_HOME/conf/hbase-site.xml


  
    hbase.master
    master:6000
  
  
    hbase.master.maxclockskew
    180000
  
  
    hbase.rootdir
    hdfs://master:9000/hbase
  
  
    hbase.cluster.distributed
    true
  
  
    hbase.zookeeper.quorum
    master,slave1,slave2
  
  
    hbase.zookeeper.property.dataDir
    /home/zookeeper/data
  
  
    dfs.replication
    1
  

启动HBase

$HBASE_HOME/bin/start-hbase.sh

测试

$HBASE_HOME/bin/hbase-shell

端口

端口号 描述
16010 HBase(web-ui)

hive

profile

# hive
export HIVE_HOME=/usr/local/apache-hive-2.1.0-bin
export PATH=$PATH:$HIVE_HOME/bin

hive-env.sh

$HIVE_HOME/conf/hive-env.sh

source ~/.bash_profile
export HIVE_CONF_DIR=$HIVE_HOME/conf

hive-site.xml

$HIVE_HOME/conf/hive-site.xml




  
    javax.jdo.option.ConnectionDriverName
    com.mysql.jdbc.Driver
    Driver class name for a JDBC metastore
  
  
    javax.jdo.option.ConnectionURL
    jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true
    JDBC connect string for a JDBC metastore
  
  
    javax.jdo.option.ConnectionUserName
    hive
    username to use against metastore database
  
  
    javax.jdo.option.ConnectionPassword
    hive
    password to use against metastore database
  
  
    hive.metastore.warehouse.dir
    hdfs://master:9000/user/hive/warehouse
  
    
        hive.hwi.listen.host
        0.0.0.0
    
    
        hive.hwi.listen.port
        9999
    
    
        hive.hwi.war.file
        lib/hive-hwi-2.1.0.war
    

初始化

测试

$HIVE_HOME/bin/hive

sqoop

profile

# sqoop
export SQOOP_HOME=/usr/local/sqoop-1.99.6-bin-hadoop200
export PATH=$PATH:$SQOOP_HOME/bin
export CATALINE_BASE=$SQOOP_HOME/server
export LOGDIR=$SQOOP_HOME/logs

测试

# 启动
$SQOOP_HOME/bin/sqoop2-server start

# cli
$SQOOP_HOME/bin/sqoop2-shell

mahout

storm

第三方包管理工具

maven

下载
配置 path 路径即可

sbt

Homebrew (Third-party package)

$ brew install sbt

Macports (Third-party package)

$ port install sbt

下载

未完待续...

你可能感兴趣的:(大数据开发学习平台安装配置)