阿里云搭建集群环境

#修改主机名
vim /etc/sysconfig/network

#创建文件夹
mkdir -p /usr/application/dev-env
mkdir -p /usr/application/dev-fm

#跨机器复制
scp jdk-8u111-linux-x64.tar.gz root@node2:/usr/application/dev-env
scp jdk-8u111-linux-x64.tar.gz root@node3:/usr/application/dev-env

scp scala-2.11.2.tgz root@node2:/usr/application/dev-env
scp scala-2.11.2.tgz root@node3:/usr/application/dev-env

scp -v * root@node2:/usr/application/dev-fm
scp -v * root@node3:/usr/application/dev-fm

#安装java
tar -xzvf jdk-8u111-linux-x64.tar.gz 
tar -xzvf scala-2.11.2.tgz 
vim /etc/profile

JAVA_HOME=/usr/application/dev-env/jdk1.8.0_111
JRE_HOME=/usr/application/dev-env/jdk1.8.0_111
SCALA_HOME=/usr/application/dev-env/scala-2.11.2
CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib
PATH=$PATH:JAVA_HOME/bin:$JRE_HOME/bin:$SCALA_HOME/bin
export PATH CLASSPATH JAVA_HOME JRE_HOME SCALA_HOME
source /etc/profile

#配置免密码登录
cd ~/
ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa
cd .ssh
cat id_dsa.pub >> authorized_key

汇总公钥到一个文件

vim  /etc/ssh/sshd_config 
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile  /root/.ssh/authorized_key


搭建Flume环境

节点 node1, node2, node3
1.在node1,node2,node3分别部署agent采集端
2.在node3上部署collector端,汇集node1,node2,node3中的监控数据并暂存到本地文件系统。

该Flume系统用于收集系统运行数据,包括内存、硬盘、系统信息等。
利用Sigar采集系统数据,然后借助flume agent作为启动外壳,用socket向本机的tcp端口发送数据。

具体操作步骤:
1.编写自定义Source作为任务启动外壳。代码见:
https://github.com/MouseSong/Big-Data/tree/master/bigdata_flume_customer

2.上传sigar so文件到classpath(三个节点重复此操作)

这里写图片描述

3.上传自定义Sorce jar包到 flume_home/lib目录(三个节点重复次操作)
4.编写启动sigar的外壳agent配置文件

sigar-shell.conf
#defined name of source channel and sink

a1.sources=r1
a1.channels=c1
a1.sinks=k1

#configure sigar source
a1.sources.r1.type=customer.source.SigarSource
a1.sources.r1.no=node1
a1.sources.r1.hostname=node1
a1.sources.r1.port=4141

#configure channel
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100

#configurej sink
a1.sinks.k1.type=logger


a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1


5.编写数据收集agent配置文件
hostinfo.conf
#defined name of source/channel/sink
a1.sources=r1
a1.channels=c1
a1.sinks=k1

#configure syslogtcp source
a1.sources.r1.type=syslogtcp
a1.sources.r1.host=node1
a1.sources.r1.port=4141

a1.sources.r1.interceptors=i1 i2
a1.sources.r1.interceptors.i1.type=host
a1.sources.r1.interceptors.i1.hostHeader=ip
a1.sources.r1.interceptors.i2.type=timestamp

#configure channel
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=1000


#configure avor sink
a1.sinks.k1.type=avro
a1.sinks.k1.hostname=node3
a1.sinks.k1.port=5555

a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

6.将上述两个配置文件复制到node2,node3节点并修改node1为对应的主机名或ip

7.编写collector配置文件
#defined name of source , channel and sink
a1.sources = r1
a1.sinks = k1
a1.channels = c1

#configure avro source
a1.sources.r1.type = avro
a1.sources.r1.channels = c1
a1.sources.r1.bind = node3
a1.sources.r1.port = 5555

#configure logger sink
a1.sinks.k1.type = file_roll
a1.sinks.k2.sink.directory=/usr/application/dev-fm/flume1.7.0/temp/datas
a1.sinks.k2.sink.rollSize=5096000000
a1.sinks.k2.sink.round=true
a1.sinks.k2.sink.roundValue=12
a1.sinks.k2.sink.roundUnit=hour
a1.sinks.k2.channel=c1

#configure channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 10000
a1.channels.c1.transactionCapacity = 1000             

搭建Hadoop环境

1.下载解压(略)
2.编辑环境变量
#java
JAVA_HOME=/usr/application/dev-env/jdk1.8.0_111
JRE_HOME=/usr/application/dev-env/jdk1.8.0_111
#scala
SCALA_HOME=/usr/application/dev-env/scala-2.11.2
#hadoop
HADOOP_HOME=/usr/application/dev-fm/hadoop-2.5.2
YARN_HOME=/usr/application/dev-fm/hadoop-2.5.2
CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib
PATH=$PATH:JAVA_HOME/bin:$JRE_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin
export PATH CLASSPATH JAVA_HOME JRE_HOME
export HADOOP_HOME YARN_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop

3.修改hadoop-evn.sh
export JAVA_HOME=/usr/application/dev-env/jdk1.8.0_111

4.修改slaves
node1
node2
node3

5.修改core-site.xml
<configuration>
   <property>
     <name>fs.default.namename>
     <value>hdfs://node1:9000value>
   property>  
configuration>

6.修改hdfs-site.xml
<configuration>

  <property>
     <name>dfs.name.dirname>
     <value>/usr/application/dev-fm/hadoop-2.5.2/hadoop_filesystem/namevalue>
  property>
  <property>
     <name>dfs.data.dirname>
     <value>/usr/application/dev-fm/hadoop-2.5.2/hadoop_filesystem/datavalue>
  property>
  <property>
     <name>dfs.replicationname>
     <value>3value>
  property>
  <property>
     <name>dfs.permissionsname>
     <value>falsevalue>
  property>
configuration>

7.修改mapred-site.xml
<configuration>
    <property>
       <name>mapred.job.trackername>
       <value>node1:9001value>
    property>
    <property>
       <name>mapred.tasktracker.map.tasks.maximumname>
       <value>4value>
    property>
    <property>
       <name>mapred.tasktracker.reduce.tasks.maximumname>
       <value>4value>
    property>
configuration>

8.复制源文件到另外两台机器
>scp -r hadoop2.5/ root@node2:/usr/application/dev-fm/
>scp -r hadoop2.5/ root@node3:/usr/application/dev-fm/

9.格式化namenode
>hadoop namenode -format

10.启动hadoop
>./start-all.sh

搭建Zookeeper集群

1.下载zookeeper
http://zookeeper.apache.org/

2.解压安装包
>tar -xzvf zookeeper-3.4.6.tar.gz

3.创建zookeeper数据目录
>mkdir -p /usr/application/dev-fm/zookeeper-3.4.6/zk_datas

4.修改配置文件
>cd conf/
>cp zoo_sample.cfg zoo.cfg
>vim zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/usr/application/dev-fm/zookeeper-3.4.6/zk_datas
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=outer1:2888:3888
server.2=outer2:2888:3888
server.3=outer3:2888:3888


5.在zookeeper数据目录下创建myid文件并赋予id值
>echo "1" >> myid

6.将配置好的zookeeper目录复制到node2, node3
>scp -r zookeeper-3.4.6/ root@node2:/usr/application/dev-fm
>scp -r zookeeper-3.4.6/ root@node3:/usr/application/dev-fm

7.修改node2和node3中myid的值为2,3

8.启动三个zookeeper节点

搭建Kafka集群

1.下载安装包 http://kafka.apache.org/
2.解压
>tar -xvzf  kafka_2.11-0.9.0.0.tgz

3.编辑配置文件
>vim config/server.properties
broker.id=0
host.name=outer1
log.dirs=/usr/application/dev-fm/kafka0.9/datas
zookeeper.connect=outer1:2181,outer2:2181,outer3:2181

4.将kafka安装文件复制到node2,node3中,并求改broker.id1,2, 修改host.name 为 outer2, outer3

5.启动三个节点的kafka broker
>nohup bin/kafka-server-start.sh config/server.properties > ./kafka.log 2>&1 &

配置Flume Kafka Sink

#defined name of source , channel and sink
collector.sources = r1
collector.sinks = k1 k2
collector.channels = c1

#configure avro source
collector.sources.r1.type = avro
collector.sources.r1.channels = c1
collector.sources.r1.bind = outer3
collector.sources.r1.port = 5555

#configure logger sink
#collector.sinks.k2.type = file_roll
#collector.sinks.k2.sink.directory=/usr/application/dev-fm/flume1.7.0/temp/datas
#collector.sinks.k2.sink.rollSize=5096000000
#collector.sinks.k2.sink.round=true
#collector.sinks.k2.sink.roundValue=12
#collector.sinks.k2.sink.roundUnit=hour
#collector.sinks.k2.channel=c1

collector.sinks.k2.type=org.apache.flume.sink.kafka.KafkaSink
collector.sinks.k2.brokerList=outer1:9092
collector.sinks.k2.partitioner.class=org.apache.flume.plugins.SinglePartition
collector.sinks.k2.partition.key=1
collector.sinks.k2.serializer.class=kafka.serializer.StringEncoder
collector.sinks.k2.request.required.acks=0
collector.sinks.k2.max.message.size=1000000
collector.sinks.k2.kafka.topic=test
collector.sinks.k2.channel=c1

collector.sinks.k1.type=hdfs
collector.sinks.k1.hdfs.path=hdfs://outer1:9000/flume/hostinfo
collector.sinks.k1.hdfs.filePrefix=hostinfo
collector.sinks.k1.hdfs.minBlockReplicas=1
collector.sinks.k1.hdfs.rollInterval=600
collector.sinks.k1.hdfs.rollSize=0
collector.sinks.k1.hdfs.rollCount=0
collector.sinks.k1.hdfs.idleTimeout=0

collector.sinks.k1.channel=c1
#configure channel
collector.channels.c1.type = memory
collector.channels.c1.capacity = 10000
collector.channels.c1.transactionCapacity = 1000

你可能感兴趣的:(Tool)