jdk8 | https://repo.huaweicloud.com/java/jdk/8u202-b08/jdk-8u202-linux-x64.tar.gz |
flume 1.9.0 | https://mirrors.tuna.tsinghua.edu.cn/apache/flume/1.9.0/apache-flume-1.9.0-bin.tar.gz |
supervisor-4.2.1 | https://files.pythonhosted.org/packages/44/60/698e54b4a4a9b956b2d709b4b7b676119c833d811d53ee2500f1b5e96dc3/supervisor-3.3.4.tar.gz |
setuptools-44.0.0 | https://mirrors.aliyun.com/pypi/packages/b0/f3/44da7482ac6da3f36f68e253cb04de37365b3dba9036a3c70773b778b485/setuptools-44.0.0.zip#sha256=e5baf7723e5bb8382fc146e33032b241efc63314211a3a120aaa55d62d2bb008 |
meld3-2.0.1 | https://mirrors.aliyun.com/pypi/packages/53/af/5b8b67d04a36980de03505446d35db39c7b2a01b9bac1cb673434769ddb8/meld3-2.0.1.tar.gz#sha256=3ea266994f1aa83507679a67b493b852c232a7905e29440a6b868558cad5e775 |
dnmp/services/flume
.
│ apache-flume-1.9.0-bin.tar.gz
│ Dockerfile
│ jdk-8u202-linux-x64.tar.gz
| meld3-2.0.1.tar.gz
| setuptools-44.0.0.tar.gz
| supervisor-4.2.1.tar.gz
| supervisord.conf
└─conf
exec_flume.sh
flume-conf.properties.template
flume-env.ps1.template
flume-env.sh
flume-env.sh.template
log4j.properties
send-test-flume.conf
test-flume.conf
supervisord.conf 是supervisord的配置文件,只是将最后的 [include] 配置项取消注释,指定到配置存放目录
[include]
files = /etc/supervisor/config.d/*.conf
FROM centos:centos7
LABEL MAINTAINER=mit description="FLume-ng数据采集agent" FlumeVersion=1.9.0
# 安装 java 环境
ADD jdk-8u202-linux-x64.tar.gz /usr/local/java
ENV JAVA_HOME /usr/local/java/jdk1.8.0_202
ENV CLASSPATH $JAVA_HOME/lib:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
ENV PATH $JAVA_HOME/bin:$PATH
# 安装 flume
ADD apache-flume-1.9.0-bin.tar.gz /usr/local/
RUN mv /usr/local/apache-flume-1.9.0-bin /usr/local/flume
ENV FLUME_HOME /usr/local/flume
ENV PATH $FLUME_HOME/bin:$PATH
# 修改默认的flume-ng jvm heap大小
RUN sed -i '225 s/Xmx20m/Xmx2048m/' /usr/local/flume/bin/flume-ng
########### setuptools、meld3、supervisor可以试着不用 可能用于kafka#
# 安装 supervisor
# - setuptools
ADD setuptools-44.0.0.tar.gz /opt
WORKDIR /opt/setuptools-44.0.0/
RUN python setup.py install
# - meld3
ADD meld3-2.0.1.tar.gz /opt
WORKDIR /opt/meld3-2.0.1/
RUN python setup.py install
# - supervisor
ADD supervisor-4.2.1.tar.gz /opt
WORKDIR /opt/supervisor-4.2.1/
RUN python setup.py install
RUN mkdir -p /etc/supervisor && mkdir -p /etc/supervisor/config.d && rm -rf /opt/setuptools-44.0.0 /opt/meld3-2.0.1 /opt/supervisor-4.2.1
COPY supervisord.conf /etc/supervisor/
#ENTRYPOINT ["/usr/bin/supervisord", "-n", "-c", "/etc/supervisor/supervisord.conf"]
########### setuptools、meld3、supervisor可以试着不用 #
# 设定工作目录
WORKDIR /usr/local/flume
flume:
build:
context: ./services/flume
args:
TZ: "$TZ"
image: flume
expose:
- 15510
ports:
- "15510:15510"
container_name: flume
environment:
JAVA_HOME: "/usr/local/java/jdk1.8.0_202"
restart: always
network_mode: bridge
privileged: true
volumes:
- ./services/flume/conf:/usr/local/flume/conf #映射配置文件目录
- ./logs/flume:/usr/local/flume/logs #映射日志文件目录
tty: true #不加这个属性,容器启动后会自动停止
template文件到 apache-flume-1.9.0-bin.tar.gz
的解压包conf复制出来,因为映射会影响原容器内的内容
自己封装的运行脚本,代码可以自己研究一下
#! /bin/bash
#./exec_flume.sh start jyDataApi-flume.conf jyDataAgent
#./exec_flume.sh stop jyDataApi-flume.conf jyDataAgent
#./exec_flume.sh restart flume_cmbc.conf(配置文件,自己修改) Cobub(代理名称,自己修改)
# FLUME环境变量
flumePath=/usr/local/flume
flumeLogPath=/usr/local/flume/logs
cd $flumePath
# 配置文件目录
process=$2
if [ ! -n "$process" ] ; then
echo "param process is empry"
exit 1
fi
# 代理名称
AgentName=$3
if [ ! -n "$AgentName" ] ; then
echo "param AgentName is empry"
exit 1
fi
JAR="flume"
function start(){
if [ ! -f "$flumePath/conf/$process" ] ; then
echo "file $flumePath/conf/$process no exists"
exit 1
fi
num=`ps -ef|grep java|grep $JAR|grep $AgentName|wc -l`
echo "ps -ef|grep java|grep $JAR|grep $AgentName|wc -l"
if [ "$num" = "0" ] ; then
if [ ! -d "/usr/local/flume/logs/game_log/" ];then
mkdir /usr/local/flume/logs/game_log
fi
if [ ! -d "/usr/local/flume/logs/flume_log/" ];then
mkdir /usr/local/flume/logs/flume_log
fi
nohup bin/flume-ng agent -c conf -f conf/$process --name $AgentName -Dflume.root.logger=INFO,console >$flumeLogPath/flume.log 2>&1 &
echo "启动成功...."
echo "运行日志路径: $flumeLogPath/flume.log"
else
echo "进程已经存在,启动失败,请检查....."
exit 0
fi
}
function stop(){
num=`ps -ef|grep java|grep $JAR|grep $AgentName|wc -l`
if [ "$num" != "0" ] ; then
ps -ef|grep java|grep $JAR|grep $AgentName|awk '{print $2;}'|xargs kill
echo "进程已经关闭..."
else
echo "服务未启动,无需停止..."
fi
}
function restart(){
if [ ! -f "$flumePath/conf/$process" ] ; then
echo "file $flumePath/conf/$process no exists"
exit 1
fi
echo "begin stop process ..."
stop
# 判断程序是否彻底停止
num=`ps -ef|grep java|grep $JAR|grep $AgentName|wc -l`
while [ $num -gt 0 ]; do
sleep 1
num=`ps -ef|grep java|grep $JAR|grep $AgentName|wc -l`
done
echo "process stoped,and starting ..."
start
echo "started ..."
}
case "$1" in
"start")
start $@
exit 0
;;
"stop")
stop
exit 0
;;
"restart")
restart $@
exit 0
;;
*)
echo "Usage: exec_flume {start|stop|restart}"
exit 1
;;
esac
将flume-env.sh.template
复制 重命名为 flume-env.sh
修改路径 要与dockerfile
安装的路径相同(也可以在docker-compose写,就不用改flume-env.sh了)
environment:
JAVA_HOME: "/usr/local/java/jdk1.8.0_202"
# api log
jyDataAgent.sources=r1 r2
jyDataAgent.channels=c1
jyDataAgent.sinks=k1
# 配置r1:TAILDIR 来自api接口
jyDataAgent.sources.r1.type = TAILDIR
#文件同步记录
jyDataAgent.sources.r1.positionFile = /usr/local/flume/logs/jyDataApi_position.json
jyDataAgent.sources.r1.filegroups = f1
#监听指定文件夹下目录文件
jyDataAgent.sources.r1.filegroups.f1 = /usr/local/flume/logs/game_log/.*log
# 配置r2:avro 来自游戏服务器(监听由15510端口传过来的数据)
jyDataAgent.sources.r2.type=avro
jyDataAgent.sources.r2.bind=0.0.0.0
jyDataAgent.sources.r2.port=15510
# 配置channel-memory
jyDataAgent.channels.c1.type = memory
jyDataAgent.channels.c1.capacity = 1000
jyDataAgent.channels.c1.transactionCapacity = 100
jyDataAgent.channels.c1.byteCapacityBufferPercentage = 20
jyDataAgent.channels.c1.byteCapacity = 10485760
# sink 输出文件格式(接收到的数据输出的目录及格式)
jyDataAgent.sinks.k1.type = file_roll
jyDataAgent.sinks.k1.sink.directory = /usr/local/flume/logs/flume_log
jyDataAgent.sinks.k1.sink.pathManager = rolltime
jyDataAgent.sinks.k1.sink.pathManager.extension = log
jyDataAgent.sinks.k1.sink.rollInterval = 120
# 绑定chennel
jyDataAgent.sources.r1.channels=c1
jyDataAgent.sources.r2.channels=c1
jyDataAgent.sinks.k1.channel=c1
#配置在日志生成的服务器
# 游戏服务器flume配置文件
# 记录最近读取日志位置inode文件:/data/flumeData/logs/jyDataApi_position.json 按实际情况修改
# channel使用flie方式,需对应新建checkpoint和data目录并修改配置
# sink配置jyDataNode1 jyDataNode2,需绑定hosts具体ip地址联系运维
# 定义Agent
jyDataAgent.sources=r1
jyDataAgent.channels=c1
jyDataAgent.sinks=k1 k2
# 配置r1:TAILDIR 游戏区服日志
jyDataAgent.sources.r1.type = TAILDIR
jyDataAgent.sources.r1.positionFile = /usr/local/flume/logs/jyDataApi_position.json
jyDataAgent.sources.r1.filegroups = f1
#监听该文件夹的日志文件(游戏端需将生成的日志写入该文件夹下,按实际需求修改)
jyDataAgent.sources.r1.filegroups.f1 = /usr/local/flume/logs/game_log/.*log
# 配置channel
jyDataAgent.channels = c1
jyDataAgent.channels.c1.type = file
jyDataAgent.channels.c1.checkpointDir = /usr/local/flume/logs/checkpoint
jyDataAgent.channels.c1.dataDirs = /usr/local/flume/logs/data
# 配置sink
## 定义sinkgroups
jyDataAgent.sinkgroups = g1
## k1
jyDataAgent.channels = c1
jyDataAgent.sinks.k1.type = avro
#发送数据到该域名或ip的指定端口
jyDataAgent.sinks.k1.hostname = 192.168.240.141
jyDataAgent.sinks.k1.port = 15510
## k2
jyDataAgent.channels = c1
jyDataAgent.sinks.k2.type = avro
jyDataAgent.sinks.k2.hostname = 192.168.240.141
jyDataAgent.sinks.k2.port = 15510
## 设置sink权限
jyDataAgent.sinkgroups.g1.sinks = k1 k2
jyDataAgent.sinkgroups.g1.processor.type = failover
jyDataAgent.sinkgroups.g1.processor.priority.k1 = 10
jyDataAgent.sinkgroups.g1.processor.priority.k2 = 1
jyDataAgent.sinkgroups.g1.processor.maxpenalty = 10000
# 绑定chennel
jyDataAgent.sources.r1.channels=c1
jyDataAgent.sources.r2.channels=c1
jyDataAgent.sinks.k1.channel=c1
jyDataAgent.sinks.k2.channel=c1
docker exec -it flume sh
# exec_flume.sh 存放目录/flume/conf中,可自行修改
# 脚本启动
cd /conf/
# 启动flume
接收端执行 /bin/bash exec_flume.sh start 接收端-test-flume.conf xm2Agent
生成端执行 /bin/bash exec_flume.sh start 生成端-test-flume.conf xm2Agent
# 检查flume运行日志
cat /flume/logs/flume.log
# 测试日志同步
echo “test” >> /flume/logs/game_log/test.log
# 查看是否存在test.log同步记录
cat /flume/logs/flume_position.json
# 停止
/bin/bash exec_flume.sh stop 接收端-test-flume.conf xm2Agent
/bin/bash exec_flume.sh stop 生成端-test-flume.conf xm2Agent
# 重启
/bin/bash exec_flume.sh restart 接收端-test-flume.conf xm2Agent
/bin/bash exec_flume.sh restart 生成端-test-flume.conf xm2Agent
上面构建的镜像只是将相应的服务打进了镜像里,使用时应挂载相应flume-ng配置和supervisor应用配置,以下为我使用docker-compose启动flume服务的相应docker-compose.yaml部分配置
flume-01:
image: flume:1.9.0
container_name: flume-01
hostname: flume-01
restart: always
ports:
- "6000:6000" # flume-ng source监听的端口
volumes:
- /etc/localtime:/etc/localtime:ro
- /home/mit/my_project/big_data/data_collect/flume/supervisord.d/flume.conf:/etc/supervisord.d/flume.conf # flume守护
- /home/mit/my_project/big_data/data_collect/flume/conf/http_kafka_channel.conf:/usr/local/flume/conf/http_kafka_channel.conf # flume-ng agent启动的配置文件
- /home/mit/my_project/big_data/data_collect/flume/conf/log4j.properties:/usr/local/flume/conf/log4j.properties
- /home/mit/my_project/big_data/data_collect/flume/data:/usr/local/flume/data # FileChannel data dir
- /home/mit/my_project/big_data/data_collect/flume/checkpoint:/usr/local/flume/checkpoint # FIleChannel checkpoint dir
- /home/mit/my_project/big_data/data_collect/flume/logs:/usr/local/flume/logs # flume-ng agent日志
- /home/mit/my_project/big_data/data_collect/flume/lib/fastjson-1.2.59.jar:/usr/local/flume/lib/fastjson-1.2.59.jar # flume-ng 自定义拦截器依赖的jar包需要全部列出
- /home/mit/my_project/big_data/data_collect/flume/lib/TQDataInterceptor-0.1.jar:/usr/local/flume/lib/TQDataInterceptor-0.1.jar
environment:
JAVA_HOME: "/usr/java/jdk1.8.0_202-amd64/"
depends_on:
- kafka-01
- kafka-02
- kafka-03