AIOPS探针部署说明书

Linux服务器rsyncd服务配置说明

查看服务器资源剩余量

# linux 可通过如下命令查看
free -m
top
df -Th

ansible部署步骤

建立用于启动服务的用户

useradd logmanager -u 500 -M -L -G group1,group2
# -M 不创建用户家目录
# -L 锁定用户,拥有root权限才可以切换至该用户
# -G 指定用户附加组,多个组可用','分割

如果被采集的日志文件存放在logmanger用户无法进入的目录下,可使用如下方式为用户授权

# 1. 如果组内用户可以进入目标目录下,可以将logmanager用户添加至目标组内
[root@localhost /home]$ ll -al
...
drwxr-x---@   3 user  group    96B  1 29 23:03 Directory
...
[root@localhost /home]$ usermod logmanager -G group
# 2. 为logmanager用户对该目录设置facl权限
[root@localhost /home]$ setfacl -m u:logmanager:rx /path/to/Directory

检查/usr/bin/jps是否存在

ls -l /usr/bin/jps

如果该命令不存在可以使用如下命令设置软连接解决

ln -s $JAVA_HOME/bin/jps /usr/bin/jps

集中采集服务器中需要配置kafka集群本地解析,

如果不添加,会出现flume因无法解析到kafka集群导致启动失败。

# 使用vim命令添加如下内容,kafka集群节点的主机名需要与ansible部署时主机名及ip地址对应
vim /etc/hosts
1.1.1.1   node-1 
1.1.1.2   node-2 
1.1.1.3   node-3   

确保如下位置配置文件已被正确配置

# 切换至部署文件所在目录
cd /path/to/deployFilePath

服务器组配置文件

./inventory/inventory.ini

确保存在如下内容,并且内容为本次部署服务器的相关配置

[all] # 对所有节点定义ansible连接时对应的IP地址
node-1 ansible_host=1.1.1.1
node-2 ansible_host=1.1.1.2
node-3 ansible_host=1.1.1.3

[kafka] # 数据有flume或filebeat采集,传输到logstash,再有logstash放入kafka,如果kafka集群和flume节点不在相同集群中,需要在此处或单出创建一个组定义没台服务器对应的IP地址
node-1 kafka_id=1
node-2 kafka_id=2
node-3 kafka_id=3

# master节点建议每次部署时仅配置一台有效的服务器,并在slave节点中配置同步至该服务器的节点
[flume-master] # flume 日志集中采集服务器,JAVA_HOME需要被正确配置
node-2 javaHome='/usr/java/jdk1.8.0_261-amd64'

[flume-slave] # 被采集日志的服务器,JAVA_HOME需要被正确配置
node-1 skipToEnd=true javaHome='/usr/java/jdk1.8.0_261-amd64'
node-3 skipToEnd=true javaHome='/usr/java/jdk1.8.0_261-amd64'

[rsyncd] # flume 日志集中采集节点,用于配置同步探针状态信息的仓库
node-2

[flume:children] # 设置主机组的集合,勿动
flume-master
flume-slave

确保入口文件存在并被正确配置

``./agent.yml`:CentOS7系统部署探针入口文件

---
# 对指定节点执行创建用户,安装java操作
- hosts: flume-slave
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - agent-java
# 在指定节点安装flume
- hosts: flume-slave
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent
# 对master节点执行创建用户
- hosts: flume-master
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - agent-java
# 在master节点安装flume
- hosts: flume-master
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent
# 在master节点配置rsyncd服务,设置agent-status仓库,接受日志路径在/data/agent-status
- hosts: flume-master
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - rsyncd
# 对flume-master角色的flume-conf.properties进行配置,并配置检查flume状态定时任务、agent-statua状态日志定时清理等任务
- hosts: flume-master
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent-master
# 在master节点配置logstash服务,接受探针传输的数据,并传送至kafka
- hosts: flume-master
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent-logstash
# 对flume-mslave角色的flume-conf.properties进行配置,并配置检查flume状态定时任务、agent-statua状态日志定时清理等任务
- hosts: flume-slave
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent-slave

./agent6.yml CentOS6 服务器探针部署入口文件(master节点必须为CentOS7)

---
# 部署CentOS 6 系统服务器java程序,创建用户操作
- hosts: flume-slave6
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - agent6-java
# 部署CentOS6服务器flume组件
- hosts: flume-slave6
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent6

- hosts: flume-master
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - rsyncd

- hosts: flume-master
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent-master

- hosts: flume-master
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent-logstash
# 部署CentOS6服务器flume配置文件、启动服务器、配置清理日志,检查监控状态定时任务
- hosts: flume-slave6
  gather_facts: false
  max_fail_percentage: 0
  roles:
  - agent6-slave

ansible文件模板的变量定义文件

  • ./inventory/group_vars/flume.yml
# flume variable
flumeBase: '/opt'
dataPathBase: '/opt'
flumeInstallBase: '{{flumeBase}}/flume'
flumeuser: logmanager
flumegroup: logmanager
flume:
  pkgName: apache-flume-1.7.0-bin.tgz
  javahomeconfig: java
  flumeDir: '{{flumeInstallBase}}'
  dataDir: '{{flumeInstallBase}}/data'
  logDir: '{{flumeInstallBase}}/logs'
  jdkversion: 'java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64'
  logMasterDir: '{{dataPathBase}}/agent-status'
  logRetentionDays: '{{ defaultLogRetentionDays }}'
  user: '{{ flumeuser }}'
  group: '{{ flumegroup }}'
  server: '172.20.3.248'
  jvmHeapSize: 512m
  # 配置文件中变量值连接符需使用"_"
  masterPort: 4541

rsync:
  rsyncdPath: /data/agent-status
  rsyncPort: 873
  logRetentionDays: 7

flumeLogstash:
  version: 7.8.0
  pkgName: logstash-7.8.0.tgz
  Xms: '512m'
  user: '{{ flumeuser }}'
  group: '{{ flumegroup }}'
  checkPoint: 256
  nodeName: 'logmanager'
  dataPath: '{{dataPathBase}}/logstash/data'
  logPath: '{{dataPathBase}}/logstash/logs'
  port: 5044
  id: 'logmanager-filebeat'
  topic_id: 'logmanager-filebeat'
  pipelineItem:
    'logmanager-filebeat': '/opt/logstash/config/conf.d/filebeat.yml'
  logRetentionDays: 7
  • ./inventory/host_vars/%hostname%.yml master 节点主机变量定义文件

    注意 执行的%hostname%.yml需要与inventory.ini中需要执行的服务器名称对应,否则执行过程中回报错。

sourcedefine:
  - name: r1
    type: avro
    channels: c1
    bind: '{{ ansible_hostname }}'
    port: 4541
    selectorenable: true
    selector:
      type: multiplexing
      header: kafkaTopic
      kafkatopic: ogmanager-flume
      channels: c1
  - name: r2
    type: 'com.cyclone.data.flume.source.TaildirRecursiveSource'
    channels: c1
    udfenable: false
    filegroup:
      - name: fg
        path: '/data/agent-status/.*\\.log'
    skiptoend: false
  - name: r3
    type: 'com.cyclone.data.flume.source.TaildirRecursiveSource'
    filegroup:
      - name: flume
        path: '{{flumeBase}}/flume/logs/.*\\.log'
        rulename: flume
        storename: agent_proxy
      - name: logstash
        path: '/opt/logstash/logs/.*\\.log'
        rulename: logstash
        storename: agent_proxy
      - name: webservice
        path: '/data/webservice/.*'
        rulename: webservice
        storename: webservice
    udfenable: true
    interceptorsenable: true
    skiptoend: false
    interceptors:
      - name: i1
        kv:
          - type: timestamp
      - name: i2
        kv:
          - type: host
          - hostHeader: '@ip'
      - name: i3
        kv:
          - type: com.cyclone.data.flume.interceptor.TimezoneInterceptor$Builder
          - timezoneHeader: '@time_zone'
      - name: i4
        kv:
          - type: static
          - key: '@store_name'
          - value: agent-proxy
      - name: i5
        kv:
          - type: static
          - key: '@log_identifier'
          - value: 'hostname:{{ ansible_hostname }}##ip:{{ ansible_host }}'
      - name: i6
        kv:
          - type: static
          - key: key
          - value: '{{ ansible_host }}'
sinkdefine:
  - name: k1
    channel: c1
    type: 'com.cyclone.data.flume.sink.KafkaLightSink'
    bindingname: "{{ groups['kafka'] | list }}"
    bindingport: '{{ kafka.port }}'
    kafkatopic: logmanager-flume
    producerack: all
    producerlinger: 'linger.ms'
    producercompression: snappy
  - name: k2
    channel: c2
    type: 'org.apache.flume.sink.kafka.KafkaSink'
    bindingname: "{{ groups['kafka'] | list }}"
    bindingport: '{{ kafka.port }}'
    kafkatopic: agent-status
    producerack: all
    producerlinger: 'max.in.flight.requests.per.connection'
    #producercompression: snappy
  - name: k3
    channel: c3
    type: 'com.cyclone.data.flume.sink.KafkaLightSink'
    bindingname: "{{ groups['kafka'] | list }}"
    bindingport: '{{ kafka.port }}'
    kafkatopic: logmanager-flume
    producerack: all
    producerlinger: 'linger.ms'
    producercompression: snappy

channeldefine:
  - name: c1
    type: file
  - name: c2
    type: file
  - name: c3
    type: file
  • ./inventory/host_vars/%hostname%.yml slave 节点主机变量定义文件

    注意 执行的%hostname%.yml需要与inventory.ini中需要执行的服务器名称对应,否则执行过程中回报错。

sourcedefine:
  - name: r1
    tochannel: c1
    interceptors: true
    filegroups:
      - path: '/home/lsj/logs/test\\.log'
        filegroup: test1
        kafkatopic: logmanager-flumeA
    rulename: test1
    storename: bigdata
    skiptoend: false

sinkdefine:
  - name: k1
    tochannel: c1
    type: avro
    bindingname: "{{ groups['kafka'] | list }}"
    bindingport: 4541

channeldefine:
  - name: c1
    type: file
    checkpointdir: '/opt/flume/checkpointDir/c1'
    dataDirs: '/opt/flume/fileChannel/c1'

ansible文件执行的部署角色

  • 安装目录/roles/agent:针对centos7系统 安装flume

  • 安装目录/roles/agent-master:针对centos7系统 部署flume master节点配置文件,并启动服务(可选,如果需要启动可在roles/agent-slave/tasks/main.yml中解除名称为start service的步骤的注释)

  • 安装目录/roles/agent-slave:针对centos7系统 部署flume slave节点配置文件,并启动服务(可选,如果需要启动可在roles/agent-slave/tasks/main.yml中解除名称为start service的步骤的注释)

  • 安装目录/roles/agent-java:针对centos系统 创建服务器启动用户、安转java组件

  • 安装目录/roles/agent-logstash:针对centos7系统,安装logstash组件配置并启动服务

  • 安装目录/roles/rsyncd:针对centos7系统,安装并配置rsync 仓库 用于接受服务器日志

  • 安装目录/roles/agent6:针对centos6系统

  • 安装目录/roles/agent6-java:针对centos6系统

  • 安装目录/roles/agent6-slave:针对centos6系统

执行部署角色

sudo ansible-playbook -i inventory/inventory.ini flume.yml

部署完成后检查flume、logstash等组件安装目录的权限

[user@hostname-2 opt]$ ll
total 513268
...
drwxr-xr-x 15 logmanager logmanager      4096 Mar  4 16:37 flume
drwxr-xr-x 14 logmanager logmanager      4096 Feb  3 19:57 logstash
...

部署完成后不会自动启动服务,需要确认flume-conf中配置项后,使用如下命令启动flume

# 启动服务命令
sudo systemctl start flume
sudo systemctl enable flume
# 启动服务后log文件路径
安装目录/flume/logs/flume.log

Linux 服务器 flume组件部署流程

linux服务器flume组件

上传``apache-flume-1.7.0-bin.tgz至服务器/opt目录中,并使用命令tar xfapache-flume-1.7.0-bin.tgz`解压缩。

解压缩后会在/opt目录中生成flume目录,目录内容如下

[user@hostname-3 flume]$ tree /opt/flume
/opt/flume
├── bin
│   ├── flume-ng
│   ├── flume-ng.cmd
│   └── flume-ng.ps1
├── CHANGELOG
├── conf
│   ├── flume-conf.properties.template
│   ├── flume-env.ps1.template
│   ├── flume-env.sh.template
│   └── log4j.properties
├── DEVNOTES
├── doap_Flume.rdf
├── docs
│   └── ...
├── lib
│   └── ...
├── LICENSE
├── NOTICE
├── README.md
├── RELEASE-NOTES
└── tools
    └── ...

安装目录/flume/conf中创建配置文件flume-conf.properties

# slave节点配置文件
[user@hostname-3 conf]$ cat flume-conf.properties
# example.conf: A single-node Flume configuration

# Name the components on this agent a1
a1.sources = r1 r2
a1.sinks = k1
a1.channels = c1

# ===================r1:tail dir recursive source1=================
# 定义使用的channel管道
a1.sources.r1.channels = c1
# 定义使用的组件类型
a1.sources.r1.type = com.cyclone.data.flume.source.TaildirRecursiveSource
# 空格分隔的文件组列表,每个分组代表一系列的文件
a1.sources.r1.filegroups =  flume  hadoop
a1.sources.r1.filegroups.flume = /opt/flume/logs/.*\\.log
a1.sources.r1.headers.flume.kafkaTopic = logmanager-flume
a1.sources.r1.headers.flume.@rule_name = flume
a1.sources.r1.filegroups.hadoop = /data/hadoop/logs/.*\\.log
a1.sources.r1.headers.hadoop.kafkaTopic = logmanager-flume
a1.sources.r1.headers.hadoop.@rule_name = hadoop
# 定义是否使用递归方式读取文件
a1.sources.r1.custom.recursive.read = true
# 是否从末尾读取文件
a1.sources.r1.skipToEnd = true
# 为了方便清理测试数据
# 以json格式记录读取文件的inode和对应文件的最后读取位置
a1.sources.r1.positionFile = /opt/flume/tailDirRecursiveSource/r1/taildir_position.json
# 设置拦截器
a1.sources.r1.interceptors = i1 i2 i3 i4 i5 i6
# 设置时间戳
a1.sources.r1.interceptors.i1.type = timestamp
# 设置hostname
a1.sources.r1.interceptors.i2.type = host
a1.sources.r1.interceptors.i2.hostHeader = @ip
# 设置时区
a1.sources.r1.interceptors.i3.type = com.cyclone.data.flume.interceptor.TimezoneInterceptor$Builder
a1.sources.r1.interceptors.i3.timezoneHeader = @time_zone
# 设置store_name(内容要用_进行连接)
a1.sources.r1.interceptors.i4.type = static
a1.sources.r1.interceptors.i4.key = @store_name
a1.sources.r1.interceptors.i4.value = cap_dev
# 设置自定义字段
a1.sources.r1.interceptors.i5.type = static
a1.sources.r1.interceptors.i5.key = @log_identifier
a1.sources.r1.interceptors.i5.value = hostname:cyclone-analytics-3##ip:172.20.3.249
# 设置topic的key
a1.sources.r1.interceptors.i6.type = static
a1.sources.r1.interceptors.i6.key = key
a1.sources.r1.interceptors.i6.value = 172.20.3.249


# ==========================k1:kafka sink========================
# 定义使用的channel管道
a1.sinks.k1.channel = c1
# 定义使用的组件类型
a1.sinks.k1.type = avro
# 绑定的hostname
a1.sinks.k1.hostname = cyclone-analytics-2
#
# 绑定额端口
a1.sinks.k1.port = 4541

# ========================c1:file channel=========================
# 定义管道类型
a1.channels.c1.type = file
# 为了方便清理测试数据
# checkpoint文件的存储位置
a1.channels.c1.checkpointDir = /opt/flume/checkpointDir/c1
# 用逗号分隔的存储文件的目录,使用不同磁盘上的不同目录可以提升性能
a1.channels.c1.dataDirs = /opt/flume/fileChannel/c1
# master节点配置文件样例文件

# Name the components on this agent a1
a1.sources = r1 r2 r3
a1.sinks = k1 k2 k3
a1.channels = c1 c2 c3

# ===================r1:tail dir recursive source1=================
# 定义使用的channel管道
a1.sources.r1.channels = c1
# 定义使用的组件类型
a1.sources.r1.type = avro
# 绑定的hostname
a1.sources.r1.bind = cyclone-analytics-2
# 文件组的绝对路径,支持文件名的正则表达式
a1.sources.r1.port = 4541

a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = kafkaTopic
a1.sources.r1.selector.mapping.logmanager-flume = c1

# ==========================k1:kafka sink========================
# 定义使用的channel管道
a1.sinks.k1.channel = c1
# 定义使用的组件类型
a1.sinks.k1.type = com.cyclone.data.flume.sink.KafkaLightSink
# 定义连接的kafka broker的列表,建议使用两个作为高可用,以逗号隔开
a1.sinks.k1.kafka.bootstrap.servers = cyclone-analytics-1:9092,cyclone-analytics-2:9092,cyclone-analytics-3:9092
#
# 定义向kafka发送信息的topic
a1.sinks.k1.kafka.topic = logmanager-flume
# 等待ISR列表中所有的副本完成同步后才算发送成功
a1.sinks.k1.kafka.producer.acks = all
# 
a1.sinks.k1.kafka.producer.linger.ms = 1
# 设置消息压缩方式
a1.sinks.k1.kafka.producer.compression.type = snappy

# ========================c1:file channel=========================
# 定义管道类型
a1.channels.c1.type = file
# 为了方便清理测试数据
# checkpoint文件的存储位置
a1.channels.c1.checkpointDir = /opt/flume/checkpointDir/c1
# 用逗号分隔的存储文件的目录,使用不同磁盘上的不同目录可以提升性能
a1.channels.c1.dataDirs = /opt/flume/fileChannel/c1

# ===================r2:tail dir recursive source2=================
# 定义使用的channel管道
a1.sources.r2.channels = c2
# 定义使用的组件类型
a1.sources.r2.type = com.cyclone.data.flume.source.TaildirRecursiveSource
# 空格分隔的文件组列表,每个分组代表一系列的文件
a1.sources.r2.filegroups = fg
# 文件组的绝对路径,支持文件名的正则表达式
a1.sources.r2.filegroups.fg = /data/agent-status/.*\\.log
#a1.sources.r2.filegroups.fg = /opt/flume/data/agent-status/.*\\.log
# 定义是否使用递归方式读取文件
a1.sources.r2.custom.recursive.read = true
# 为了方便清理测试数据
# 以json格式记录读取文件的inode和对应文件的最后读取位置
a1.sources.r2.positionFile = /opt/flume/tailDirRecursiveSource/r2/taildir_position.json

# ==========================k2:kafka sink========================
# 定义使用的channel管道
a1.sinks.k2.channel = c2
# 定义使用的组件类型
a1.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
# 定义连接的kafka broker的列表,建议使用两个作为高可用,以逗号隔开
a1.sinks.k2.kafka.bootstrap.servers = cyclone-analytics-1:9092,cyclone-analytics-2:9092,cyclone-analytics-3:9092
#
# 定义向kafka发送信息的topic
a1.sinks.k2.kafka.topic = agent-status
# 等待ISR列表中所有的副本完成同步后才算发送成功
a1.sinks.k2.kafka.producer.acks = all
a1.sinks.k2.kafka.producer.max.in.flight.requests.per.connection = 1

# ========================c2:file channel=========================
# 定义管道类型
a1.channels.c2.type = file
# 为了方便清理测试数据
# checkpoint文件的存储位置
a1.channels.c2.checkpointDir = /opt/flume/checkpointDir/c2
# 用逗号分隔的存储文件的目录,使用不同磁盘上的不同目录可以提升性能
a1.channels.c2.dataDirs = /opt/flume/fileChannel/c2

#====================================================代理机 agent日志 + webservice=============================================

# ===================r3:tail dir recursive source3=================
# 定义使用的channel管道
a1.sources.r3.channels = c3
# 定义使用的组件类型
a1.sources.r3.type = com.cyclone.data.flume.source.TaildirRecursiveSource
# 空格分隔的文件组列表,每个分组代表一系列的文件
a1.sources.r3.filegroups = flume logstash webservice
# flume
a1.sources.r3.filegroups.flume = /opt/flume/logs/.*\\.log
a1.sources.r3.headers.flume.@rule_name = flume
a1.sources.r3.headers.flume.@store_name = agent_proxy
# logstash
a1.sources.r3.filegroups.logatash = /opt/logstash/logs/.*\\.log
a1.sources.r3.headers.logstash.@rule_name = logstash
a1.sources.r3.headers.logstash.@store_name = agent_proxy
# webservice
a1.sources.r3.filegroups.webservice = /data/webservice/.*
a1.sources.r3.headers.webservice.@rule_name = webservice
a1.sources.r3.headers.webservice.@store_name = webservice
# 定义是否使用递归方式读取文件
a1.sources.r3.custom.recursive.read = true
# 是否从末尾读取文件
a1.sources.r3.skipToEnd = true
# 为了方便清理测试数据
# 以json格式记录读取文件的inode和对应文件的最后读取位置
a1.sources.r3.positionFile = /opt/flume/tailDirRecursiveSource/r3/taildir_position.json
# 设置拦截器
a1.sources.r3.interceptors = i1 i2 i3 i5 i6
# 设置时间戳
a1.sources.r3.interceptors.i1.type = timestamp
# 设置hostname
a1.sources.r3.interceptors.i2.type = host
a1.sources.r3.interceptors.i2.hostHeader = @ip
# 设置时区
a1.sources.r3.interceptors.i3.type = com.cyclone.data.flume.interceptor.TimezoneInterceptor$Builder
a1.sources.r3.interceptors.i3.timezoneHeader = @time_zone
# 设置store_name(内容要用_进行连接)
#a1.sources.r3.interceptors.i4.type = static
#a1.sources.r3.interceptors.i4.key = @store_name
#a1.sources.r3.interceptors.i4.value = agent-proxy
# 设置自定义字段
a1.sources.r3.interceptors.i5.type = static
a1.sources.r3.interceptors.i5.key = @log_identifier
a1.sources.r3.interceptors.i5.value = hostname:cyclone-analytics-2##ip:172.20.3.248
# 设置topic的key
a1.sources.r3.interceptors.i6.type = static
a1.sources.r3.interceptors.i6.key = key
a1.sources.r3.interceptors.i6.value = 172.20.3.248

# ==========================k3:kafka sink========================
# 定义使用的channel管道
a1.sinks.k3.channel = c3
# 定义使用的组件类型
a1.sinks.k3.type = com.cyclone.data.flume.sink.KafkaLightSink
# 定义连接的kafka broker的列表,建议使用两个作为高可用,以逗号隔开
a1.sinks.k3.kafka.bootstrap.servers = cyclone-analytics-2:9092
# 定义向kafka发送信息的topic
a1.sinks.k3.kafka.topic = logmanager-flume
# 等待ISR列表中所有的副本完成同步后才算发送成功
a1.sinks.k3.kafka.producer.acks = all
# 
a1.sinks.k3.kafka.producer.linger.ms = 1
# 设置消息压缩方式
a1.sinks.k3.kafka.producer.compression.type = snappy

# ========================c3:file channel=========================
# 定义管道类型
a1.channels.c3.type = file
# 为了方便清理测试数据
# checkpoint文件的存储位置
a1.channels.c3.checkpointDir = /opt/flume/checkpointDir/c3
# 用逗号分隔的存储文件的目录,使用不同磁盘上的不同目录可以提升性能
a1.channels.c3.dataDirs = /opt/flume/fileChannel/c3

以及flume启动时的环境变量文件flume-env.sh

export FLUME_HOME=/opt/flume
export JAVA_HOME=/usr/java/jdk1.8.0_261-amd64
export JAVA_OPTS="-Xms512m -Xmx512m -Dcom.sun.management.jmxremote"

上传flume插件压缩文件至 安装目录/flume/ ,并解压缩至 安装目录/flume/plugins.d 目录下

添加flume service文件至 /lib/systemd/system/flume.service

[user@hostname-3 ~]$ cat /lib/systemd/system/flume.service 
[Unit]
Description=Apache Flume
Wants=network-online.target
After=network-online.target

[Service]
EnvironmentFile=-/opt/flume/conf/flume.env
WorkingDirectory=/opt/flume

User=cyclone
Group=cyclone

ExecStart=/opt/flume/bin/flume-ng agent \
    --name a1 \
    -c conf \
    -f conf/flume-conf.properties \
    -Dflume.root.logger=INFO,LOGFILE

LimitNOFILE=65536
LimitNPROC=2048
KillSignal=SIGTERM
KillMode=process
SendSIGKILL=no
Restart=always
TimeoutStopSec=60

[Install]
WantedBy=multi-user.target

[user@hostname-3 ~]$ systemctl daemon-reload

启动flume服务,并设置为开启启动

[user@hostname-3 ~]$ systemctl start flume
[user@hostname-3 ~]$ systemctl enable flume

Linux rsync server端

如果需要使用重启组件的功能,需要执行脚本的用户拥有免密使用sudo命令的权限

使用root用户执行 visudo 命令, 在文件最后添加以下内容后依次输入Esc : wq Enter 保存退出,其中username为需要添加sudo权限的用户的用户名

username ALL=(ALL) NOPASSWD: ALL

在集中采集服务器中安装rsync

sudo yum install rsync

编辑rsyncd服务的配置文件

sudo vi /etc/rsyncd.conf
log file = /var/log/rsyncd.log
pidfile = /var/run/rsyncd.pid
lock file = /var/run/rsync.lock
secrets file = /etc/users
[agent-status]
    path = /data/agent-status
    comment = sync agent file
    uid = root
    gid = root
    port = '873'
    ignore errors
    use chroot = yes
    read only = no
    list = no
    max connections = 200
    timeout = 600
    auth users = logmanager

启动rsyncd服务并设置服务为开机启动

sudo systemctl start rsyncd

sudo systemctl enable rsyncd

创建用于同步数据的目录

sudo make -p /data/agent-status

配置用于定时清理agent状态检查日志的定时任务

使用命令

cron -e

添加如下内容

# agent-statue log cleaner
1 1 * * * find /data/agent-status \( -name "*.out.*" -o -name '*.log.*' \) -type f -mtime +6 -delete

Windows服务器agent节点部署流程

  1. 部署filebeat:

服务器安装完成java并配置好JAVA_HOME和PATH环境变量后, 上传filebeat安装文件,进行安装。

  1. 修改配置文件
filebeat:
  inputs:
    - type: log
      enabled: true
      paths:
        - C:\logs\**\*.log*  
      encoding: utf-8 
      scan_frequency: 1s
      recursive_glob.enabled: true
      backoff: 1s # 当读到文件末尾时,检查文件是否有新内容的间隔时间
      close_inactive: 10m 
      close_renamed: false
      close_removed: true 
      clean_inactive: 0
      clean_removed: true
      fields:
        log_identifier: hostname:arvin##ip:127.0.0.1
        rule_name: test
        store_name: test
      fields_under_root: true
      tail_files: true # 读取新文件时,会在文件的末尾开始读取
      max_bytes: 102400 # 单条日志的最大值100KB


#output.file:
#  path: C:\Users\edz\Desktop\filebeat
#  filename: filebeat.log
  
output.logstash:
  hosts: [172.20.3.248:5044]
  bulk_max_size: 512 # 一个单独的logstash请求中事件的最大数量
  slow_start: true # 如果启用,每个事务只传输一批事件中的事件子集。如果没有发生错误,事件的数量将会增长到bulk_max_size.如果发生错误,将会减少。
  
# 日志
logging.level: info
logging.to_file: true
logging.files: 
  name: filebeat.log
  keepfiles: 7
  permissions: 0644

# 内存队列
queue.mem:
  events: 512
  flush.min_events: 512
  flush.timeout: 1s
  
# 设置同时执行的cpu数量
max_procs: 1

# filebeat关闭前等待的时间,可以发送内存中的数据并且接受相应写入registry中
filebeat.shutdown_timeout: 5s
  1. 配置filebeat为系统服务
sc create filebeat binPath= “FILEBEAT_HOME\filebeat.exe -c FILEBEAT_HOME\filebeat.yml" start= delayed-auto
  1. 部署rsync

上传rsync安装文件,并进行安装,建议安装在c盘或d盘根目录下。方便调用.调用时,可使用C:\ICW\bin\rsync.exe [--args]

  1. 上传agent健康状态检查脚本

  2. 编辑bat文件内容:

# 探针状态检查脚本
rem 获取探针状态
for /f "tokens=4" %%i in ('sc query filebeat ^| find /i "STATE"') do set agent_status=%%i
rem 获取日期
set date=%date:~0,4%-%date:~5,2%-%date:~8,2%
rem 获取时间
set time=%date:~0,4%-%date:~5,2%-%date:~8,2% %time:~0,2%:%time:~3,2%:%time:~6,2%
rem 获取机器名
set hostname=%COMPUTERNAME%
rem 获取ip
for /f "delims=: tokens=2" %%i in ('ipconfig ^| find /i "ipv4"') do set ip=%%i
set "ip=%ip: =%"
rem 对ip和hostname进行判空
if "%hostname%" == "" set hostname=localhost
if "%ip%" == "" set ip=127.0.0.1
rem 获取当前路径
set path=%~dp0
set path=%path:~0,-1%
rem 获取探针类型
set agent_type=filebeat
rem 获取探针状态
if "%agent_status%" == "RUNNING" (
    set status=normal
) else (
    rem 如果探针状态为STOPPED,则进行重启
    set status=error
	if "%agent_status%" == "STOPPED" (
	    rem 需要以管理员权限进行执行
	    sc start filebeat
	)
)
rem 获取日志名
set log_name=filebeat-status-%date%.log
rem 获取日志全路径,需要提前创建相关目录
set log_path=%path%\agent-status\%log_name%
rem 获取日志内容
set log=%time%,%hostname%,%ip%,%path%,%agent_type%,%status%
rem 将日志内容输出到文件中
echo %log% >> %log_path%
  1. 探针状态日志同步脚本
cd C:\Users\testuser\Desktop\filebeat\agent-status
C:\ICW\bin\rsync.exe -av --append ./* [email protected]::agent-status/172.0.0.4/

配置健康状态检测脚本:

  1. 配置exe4j需要的环境变量EXE4J_JAVA_HOME,
set EXE4J_JAVA_HOME
  1. 部署timezone.exe

运行后生成配置文件所需的信息。生成如下txt文件,内含修改配置文件所需参数。

  1. 修改配置文件信息 filebeat.yml
  1. 修改相应配置:
    log_identifier rule_name store_name tail_file encoding等

  2. 将filebeat配置为系统服务

运行图中install-service-filebeat.ps1 脚本即可将filebeat配置为系统服务

设置agent-status.bat定时任务

进入计划任务设置面板,创建定时任务

设置探针状态检查定时任务

在下图步骤中设置完成后点击确定

设置rsync定时任务

在下图步骤中设置完成后点击确定

安装kafka-tools检查数据是否进入消息队列

Linux服务器agent节点部署流程

再服务器中部署好java,并配置好JAVA_HOME、PATH等变量,并创建logmanager用户。

sudo yum install openjdk-1.8.0
rpm -qa|grep jdk
 java-1.8.0-openjdk-headless-1.8.0.262.b10-0.el7_8.x86_64

sudo vim /etc/profile.d/flume.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64
export FLUME_HOME=/opt/flume
export PATH=$JAVA_HOME/bin:$FLUME_HOME/bin:$PATH

sudo cp /etc/profile.d/flume.sh /opt/flume/conf/flume.env
sudo cp /etc/profile.d/flume.sh /opt/flume/conf/flume-env.sh

部署flume

上传flume安装包及flume插件安装包至服务器中/opt目录下,并将flume配置为系统服务

sudo tar -zxf /path/to/apache-flume-1.7.0-bin.tgz -C /opt/
sudo tar -zxf /path/to/plugins.d.tar -C /opt/pludins.d/
# 将flume配置为系统服务
sudo cat << EOF
[Unit]
Description=Apache Flume
Wants=network-online.target
After=network-online.target

[Service]
EnvironmentFile=-/opt/flume/conf/flume.env
WorkingDirectory=/opt/flume

User=logmanager
Group=logmanager

ExecStart=/opt/flume/bin/flume-ng agent \
    --name a1 \
    -c conf \
    -f conf/flume-conf.properties \
    -Dflume.root.logger=INFO,LOGFILE

LimitNOFILE=65536
LimitNPROC=2048
KillSignal=SIGTERM
KillMode=process
SendSIGKILL=no
Restart=always
TimeoutStopSec=60

[Install]
WantedBy=multi-user.target
EOF > /usr/lib/systemd/system/flume.service

创建flume的数据目录和日志目录

# 如设置flume的数据目录和日志目录为/data/flume/{data,logs}
sudo mkdir -p /data/flume/{data,logs}

修改flume配置文件

sudo vim /opt/flume/conf/flume-conf.property
# example.conf: A single-node Flume configuration

# Name the components on this agent a1
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# ===================r1:tail dir recursive source1=================
# 定义使用的channel管道
a1.sources.r1.channels = c1
# 定义使用的组件类型
a1.sources.r1.type = pi.dev.flume.source.TaildirRecursiveSource
# 空格分隔的文件组列表,每个分组代表一系列的文件
a1.sources.r1.filegroups = fg
# 文件组的绝对路径,支持文件名的正则表达式
# 设置文件组的key

a1.sources.r1.filegroups.fg = /data/hadoop/logs/.*\\.log /data/flume/logs/.*\\.log
a1.sources.r1.headers.fg.kafkaTopic = logmanager-flume

# 定义是否使用递归方式读取文件
a1.sources.r1.custom.recursive.read = true
# 是否从末尾读取文件
a1.sources.r1.skipToEnd = true
# 为了方便清理测试数据
# 以json格式记录读取文件的inode和对应文件的最后读取位置
a1.sources.r1.positionFile = /opt/flume/tailDirRecursiveSource/r1/taildir_position.json
# 设置拦截器
a1.sources.r1.interceptors = i1 i2 i4 i5 i6
# 设置时间戳
a1.sources.r1.interceptors.i1.type = timestamp
# 设置hostname
a1.sources.r1.interceptors.i2.type = host
a1.sources.r1.interceptors.i2.hostHeader = hostname
#a1.sources.r1.interceptors.i3.type = static
#a1.sources.r1.interceptors.i3.key = @rule_name
#a1.sources.r1.interceptors.i3.value =
# 设置store_name(内容要用_进行连接)
a1.sources.r1.interceptors.i4.type = static
a1.sources.r1.interceptors.i4.key = @store_name
a1.sources.r1.interceptors.i4.value = cap_dev
# 设置自定义字段
a1.sources.r1.interceptors.i5.type = static
a1.sources.r1.interceptors.i5.key = @log_identifier
a1.sources.r1.interceptors.i5.value = hostname:cyclone-analytics-1##ip:172.20.3.247
# 设置topic的key
a1.sources.r1.interceptors.i6.type = static
a1.sources.r1.interceptors.i6.key = key
a1.sources.r1.interceptors.i6.value = 172.20.3.247

# ==========================k1:kafka sink========================
# 定义使用的channel管道
a1.sinks.k1.channel = c1
# 定义使用的组件类型
a1.sinks.k1.type = avro
# 绑定的hostname
a1.sinks.k1.hostname = cyclone-analytics-2
#
# 绑定额端口
a1.sinks.k1.port = 4541

# ========================c1:file channel=========================
# 定义管道类型
a1.channels.c1.type = file
# 为了方便清理测试数据
# checkpoint文件的存储位置
a1.channels.c1.checkpointDir = /opt/flume/checkpointDir/c1
# 用逗号分隔的存储文件的目录,使用不同磁盘上的不同目录可以提升性能
a1.channels.c1.dataDirs = /opt/flume/fileChannel/c1


sudo vim log4j.property
# Define some default values that can be overridden by system properties.
#
# For testing, it may also be convenient to specify
# -Dflume.root.logger=DEBUG,console when launching flume.

#flume.root.logger=DEBUG,console
flume.root.logger=INFO,LOGFILE
flume.log.dir=/opt/flume/logs
flume.log.file=flume.log

log4j.logger.org.apache.flume.lifecycle = INFO
log4j.logger.org.jboss = WARN
log4j.logger.org.mortbay = INFO
log4j.logger.org.apache.avro.ipc.NettyTransceiver = WARN
log4j.logger.org.apache.hadoop = INFO
log4j.logger.org.apache.hadoop.hive = ERROR

# Define the root logger to the system property "flume.root.logger".
log4j.rootLogger=${flume.root.logger}


# Stock log4j rolling file appender
# Default log rotation configuration
log4j.appender.LOGFILE=org.apache.log4j.RollingFileAppender
log4j.appender.LOGFILE.MaxFileSize=100MB
log4j.appender.LOGFILE.MaxBackupIndex=10
log4j.appender.LOGFILE.File=${flume.log.dir}/${flume.log.file}
log4j.appender.LOGFILE.layout=org.apache.log4j.PatternLayout
log4j.appender.LOGFILE.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n


# Warning: If you enable the following appender it will fill up your disk if you don't have a cleanup job!
# This uses the updated rolling file appender from log4j-extras that supports a reliable time-based rolling policy.
# See http://logging.apache.org/log4j/companions/extras/apidocs/org/apache/log4j/rolling/TimeBasedRollingPolicy.html
# Add "DAILY" to flume.root.logger above if you want to use this
log4j.appender.DAILY=org.apache.log4j.rolling.RollingFileAppender
log4j.appender.DAILY.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy
log4j.appender.DAILY.rollingPolicy.ActiveFileName=${flume.log.dir}/${flume.log.file}
log4j.appender.DAILY.rollingPolicy.FileNamePattern=${flume.log.dir}/${flume.log.file}.%d{yyyy-MM-dd}
log4j.appender.DAILY.layout=org.apache.log4j.PatternLayout
log4j.appender.DAILY.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n


# console
# Add "console" to flume.root.logger above if you want to use this
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n

启动flume服务

sudo systemctl start flume
sudo systemctl enable flume

部署flume节点健康状态检查脚本

sudo mkdir /data/agent-status
sudo touch /data/agent-status/agent-status.sh
sudo chmod +x /data/agent-status/agent-status.sh
sudo vim /data/agent-status/agent-status.sh
@echo off
rem 获取探针状态
for /f "tokens=4" %%i in ('sc query filebeat ^| find /i "STATE"') do set agent_status=%%i
rem 获取日期
set date=%date:~0,4%-%date:~5,2%-%date:~8,2%
rem 获取时间
set time=%date:~0,4%-%date:~5,2%-%date:~8,2% %time:~0,2%:%time:~3,2%:%time:~6,2%

rem 获取当前路径
set path=%~dp0
set path=%path:~0,-1%

rem 将hostname的值输出到文件中
set hostname_file="%path%\hostname.txt"
rem 获取机器名
set hostname=%COMPUTERNAME%
if not exist %hostname_file% (
	echo %hostname%>%hostname_file%
)
rem 设置hostname的值
set /p hostname=<%hostname_file%

rem 将ip的值输出到文件中
set ip_file="%path%\ip.txt"
rem 获取ip
for /f "delims=: tokens=2" %%i in ('ipconfig ^| find /i "ipv4"') do set ip=%%i
set "ip=%ip: =%"
if not exist %ip_file% (
	echo %ip%>%ip_file%
)
rem 设置ip的值
set /p ip=<%ip_file%

rem 创建bat错误日志的目录
set bat_log_path=%path%\agent-status-bat-log
if not exist "%log_path%" ( 
   md %bat_log_path%
)

rem 设置bat脚本日志的文件
set bat_log=%bat_log_path%\agent-status.log

rem 对ip和hostname进行判空
if "%hostname%" == "" set hostname=localhost
rem ip为空,则输出日志信息推出脚本 
if "%ip%" == "" (
	echo "IP can not be empty.">> %bat_log%
	goto end
)

rem 获取探针类型
set agent_type=filebeat
rem 获取探针状态
if "%agent_status%" == "RUNNING" (
    set status=normal
) else (
    rem 如果探针状态为STOPPED,则进行重启
    set status=error
	if "%agent_status%" == "STOPPED" (
	    rem 需要以管理员权限进行执行
	    sc start filebeat
	)
)

rem 获取时区信息
SET time_zone_file="%path%\timezone.txt"

if not exist %time_zone_file% (
	echo "%time% timezone.txt does not exit. Please run timezone.exe.">> %bat_log%
	goto end
)
set /p timezone=<%time_zone_file%

rem 获取创建时间
SET create_time_file="%path%\create-time.txt"

rem 如果不存在将当前时间写入文件中
if not exist %create_time_file% (
	echo %time%> %create_time_file%
)
set /p create_time=<%create_time_file%

if "%create_time%"=="" (
	echo "%time% create time is empty."> %bat_log%
	goto end
)

rem 获取日志名
set agent_log_name=filebeat-status-%date%.log
rem 获取日志全路径,需要提前创建相关目录
set agent_log_path=%path%\agent-status\%agent_log_name%
rem 获取日志内容
set agent_log=%time%,%hostname%,%ip%,%path%,%agent_type%,%status%,%timezone%,%create_time%
rem 将日志内容输出到文件中
echo %agent_log%>> %agent_log_path%
:end
exit

配置定时检查健康状态、定时同步健康状态检查日志、定时清理flume日志、定时清理健康检查日志

部署rsync同步agent状态日志脚本

sudo vim /data/flume/shell/rsyncClient.sh
work_path=$(dirname $0)
work_path="$(cd "${work_path}"; pwd)"
User={{ flume.user }}
Server={{ flume.server }}
/usr/bin/rsync -av --append log_name=${work_path}/flume-status $User@$Server::agent-status/{{ ansible_env.SSH_CLIENT.split() | first }}/

使用命令

cron -e

添加如下内容

# flume logs cleaner
1 1 * * * find /data/flume/logs \( -name "*.out.*" -o -name '*.log.*' \) -type f -mtime +6 -delete > /dev/null
# agent-statue log cleaner
1 1 * * * find /data/agent-status \( -name "*.out.*" -o -name '*.log.*' \) -type f -mtime +6 -delete > /dev/null
# agent healthy check
* * * * * sh -c "/opt/flume/agent-status/agent-status.sh"
# rsync transfer healthy check log
* * * * * sh -c "/opt/flume/rsyncClient.sh"

探针修改配置文件

方法1:

在对应服务器的ansible主机变量中添加目标文件定义

# example file: inventory/host_vars/hostname-1.yml
# sourcedefine 定义需要采集的文件
sourcedefine:
    # 定义第一组采集的文件 名字定义为r1
  - name: r1
    # 该组文件内容采集完毕后传输至 channel 的名字,对channel的定义见下方 channeldefine 内容
    tochannel: c1
    # logmanager系统中配置的采集数据的label
    storename: network_core
    rulename: hadoop_log
    # 具体需要被采集的文件的定义
    filegroups:
        # 被采集的文件路径
      - path: '/data/example1/logs/.*\\.log'
        # 日志文件所属组
        filegroup: example1
        # 文件传输至kafka时使用的topic
        kafkatopic: logmanager-flume
    # 是否只读取新增内容
    skiptoend: false
  #- name: r2
  #  storename: network_core
  #  tochannel: c1
  #  rulename: hadoop_log
  #  filegroups:
  #    - path: '/data/example2/logs/.*\\.log'
  #      filegroup: example2
  #      kafkatopic: logmanager-flumes
  #  skiptoend: false

# 数据传输目的地定义
sinkdefine:
    # 自定义名称
  - name: k1
    # 读取队列配置,对应channeldefine 中的内容
    tochannel: c1
    # avro是一个数据接收服务
    type: avro
    # 配置kafka链接地址列表
    bindingname: "{{ groups['kafka'] | list }}"
    # 为flume绑定一个端口
    bindingport: 4541
  #- name: k2
  #  type: avro
  #  bindingname: "{{ groups['kafka'] | list }}"
  #  bindingport: 4541

# 数据传输队列定义
channeldefine:
    # 自定义队列名称
  - name: c1
    # 配置数据队列类型为file
    type: file
    # 数据传输检查点定义
    checkpointdir: '/opt/flume/checkpointDir/c1'
    # 数据传输中间过程临时存储
    dataDirs: '/opt/flume/fileChannel/c1'
  #- name: c2
  #  type: file
  #  checkpointdir: '/opt/flume/checkpointDir/c1'
  #  dataDirs: '/opt/flume/fileChannel/c1'

方法2:

手动安装flume

  • 安装java
# 通过 yum 安装 openjdk
[root@hostname bin]# yum install java-1.8.0-openjdk.x86_64 java-1.8.0-openjdk-devel.x86_64 java-1.8.0-openjdk-headless.x86_64
# 查找 openjdk 的 JAVA_HOME
[root@hostname bin]# which java
/usr/bin/java
[root@hostname bin]# ls -lrt /usr/bin/java
lrwxrwxrwx. 1 root root 22 412 13:50 /usr/bin/java -> /etc/alternatives/java
[root@hostname bin]# ls -lrt /etc/alternatives/java
lrwxrwxrwx. 1 root root 73 412 13:50 /etc/alternatives/java -> /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre/bin/java
  • 安装flume
# 创建用户logmanager
[root@hostname bin]# groupadd logmanager -g 552
[root@hostname bin]# useradd logmanager -u 552 -g 552 -M  -s /sbin/nologin
# 下载 flume 安装包 bin解压压缩包
[root@hostname bin]# wget https://archive.apache.org/dist/flume/1.7.0/apache-flume-1.7.0-bin.tar.gz
[root@hostname bin]# tar -zxf apache-flume-1.7.0-bin.tar.gz
[root@hostname bin]# mv flume /opt/
# 配置 flume 运行使用的环境变量
[root@hostname bin]# cat /opt/flume/conf/flume.env
FLUME_HOME=/opt/flume
JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre
JAVA_OPTS="-Xms512m -Xmx512m -Dcom.sun.management.jmxremote"
PATH=${JAVA_HOME}/bin:${FLUME_HOME}/bin:$PATH
[root@hostname bin]# source /opt/flume/conf/flume.env
  • 配置flume配置文件
# /${FLUME_HOME}/conf/flume-conf.properties
Agent的配置文件最好根据Flume的拓扑架构,依次写好每个节点的配置文件;
一.Agent

开头都是先要定义agent,sorce,channel,sink名

# Name the components on this agent( 描述这个Agent,给各个组件取名字)
a1.sources = r1
a1.sinks = k1 k2
a1.channels = c1 c2

二.Source
taildir

# Describe/configure the source
a1.sources.r3.type = TAILDIR
#维护这每个文件读取到的最新的位置
a1.sources.r3.positionFile = /opt/module/flume/tail_dir.json
#可配置多目录
a1.sources.r3.filegroups = f1 f2
#正则匹配文件名
a1.sources.r3.filegroups.f1 = /opt/module/flume/files/.*file.*
a1.sources.r3.filegroups.f2 = /opt/module/flume/files/.*log.*

arvo

# Describe/configure the source
# source端的avro是一个数据接收服务
a1.sources.r1.type = avro
#接收的主机
a1.sources.r1.bind = hadoop102
#要和上级的avro的sink的端口一致
a1.sources.r1.port = 4141

netstat

# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444

exec

# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /opt/module/hive/logs/hive.log
a1.sources.r1.shell = /bin/bash -c

spooldir

# Describe/configure the source
a1.sources.r3.type = spooldir
# 指定文件夹
a1.sources.r3.spoolDir = /opt/module/flume/upload
#指定文件上传后的后缀
a1.sources.r3.fileSuffix = .COMPLETED
a1.sources.r3.fileHeader = true
#忽略所有以.tmp结尾的文件,不上传
a1.sources.r3.ignorePattern = ([^ ]*.tmp)

三.Sink
hdfs

# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hadoop102:8020/flume/%Y%m%d/%H
#上传文件的前缀
a1.sinks.k1.hdfs.filePrefix = logs-
#是否对时间戳取整
a1.sinks.k1.hdfs.round = true
#多少时间单位创建一个新的文件夹
a1.sinks.k1.hdfs.roundValue = 1
#创建文件夹的时间单位
a1.sinks.k1.hdfs.roundUnit = day
#是否使用本地时间戳
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#积攒多少个Event才flush到HDFS一次
a1.sinks.k1.hdfs.batchSize = 100
#设置文件类型,可支持压缩
a1.sinks.k1.hdfs.fileType = DataStream
#多久生成一个新的文件,单位:s
a1.sinks.k1.hdfs.rollInterval = 3600
#设置每个文件的滚动大小,一般略小于128M
a1.sinks.k1.hdfs.rollSize = 134217700
#文件的滚动与Event数量无关
a1.sinks.k1.hdfs.rollCount = 0

## 控制输出文件是原生文件。LZO压缩
a1.sinks.k1.hdfs.fileType = CompressedStream
a1.sinks.k1.hdfs.codeC = lzop

kafka(待续)
hbase(待续)
arvo

# Describe the sink
# sink端的avro是一个数据发送者
a1.sinks.k1.type = avro
#发送的目的主机ip
a1.sinks.k1.hostname = hadoop102 
a1.sinks.k1.port = 4141

logger

# Describe the sink
a1.sinks.k1.type = logger

本地目录(file_roll)

# Describe the sink
a3.sinks.k1.type = file_roll
a3.sinks.k1.sink.directory = /opt/module/datas/flume3

注意:输出的本地目录必须是已经存在的目录,如果该目录不存在,并不会创建新的目录。
四.Channel

# Describe the channel
#channel的类型为memory或者file
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

五.组件绑定

# Bind the source and sink to the channel
#组件绑定,1个source,2个channel
a1.sources.r1.channels = c1 c2 
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2

六.自定义拦截器和channle选择器

channel选择器有两种:replicating(默认),multiplexing

a1.sources.r1.interceptors = i1
#自定义拦截器的全类名
a1.sources.r1.interceptors.i1.type = com.atguigu.interceptor.TypeInterceptor$Builder
#channel选择器选用multiplexing类型
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = type
a1.sources.r1.selector.mapping.hello = c1
a1.sources.r1.selector.mapping.nohello = c2

七.负载均衡和故障转移

# Name the components on this agent
a1.sources = r1
a1.channels = c1
#添加sink组
a1.sinkgroups = g1
a1.sinks = k1 k2

# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
#配置为故障转移(failover)
a1.sinkgroups.g1.processor.type = failover
a1.sinkgroups.g1.processor.priority.k1 = 5
a1.sinkgroups.g1.processor.priority.k2 = 10
a1.sinkgroups.g1.processor.maxpenalty = 10000

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
#sink组的绑定
a1.sinkgroups.g1.sinks = k1 k2
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c1
  • 启动flume
#启动脚本           flume的conf目录   agent名字       执行的配置文件
[root@hostname flume]# bin/flume-ng agent -c conf/ -n a1 -f job/flume-netcat-logger.conf

logger 打印控制台

[root@hostname flume]# bin/flume-ng agent --conf conf/ --name a1 --conf-file job/flume-netcat-logger.conf -Dflume.root.logger=INFO,console

#缩写形式
[root@hostname flume]# bin/flume-ng agent -c conf/ -n a1 -f job/flume-netcat-logger.conf -Dflume.root.logger=INFO,console
  • 配置用于服务自启动的service文件
[root@hostname bin]#  cat /lib/systemd/system/flume.service
[Unit]
Description=Apache Flume
Wants=network-online.target
After=network-online.target

[Service]
EnvironmentFile=-/opt/flume/conf/flume.env
WorkingDirectory=/opt/flume
User=logmanager
Group=logmanager
ExecStart=/opt/flume/bin/flume-ng agent \
    --name a1 \
    -c conf \
    -f conf/flume-conf.properties \
    -Dflume.root.logger=INFO,LOGFILE
LimitNOFILE=65536
LimitNPROC=2048
KillSignal=SIGTERM
KillMode=process
SendSIGKILL=no
Restart=always
TimeoutStopSec=60

[Install]
WantedBy=multi-user.target
# 启动系统服务,需保证flume使用的端口未被占用
[root@hostname bin]# systemctl daemon-reload
[root@hostname bin]# systemctl enable --now flume

探针卸载

agent-destroy 角色添加至 ansibleroles 目录中, 在 inventory 文件中添加 [destroy] 组,将需要卸载探针的服务器添加至该组,确保 inventory 存在该服务器的连接信息,可通过秘钥互相访问或配置用户密码至``inventory文件中,并且确保 inventory/host_var中有flume组变量。配置ansible-playbook` 文件。

[root@hostname ansible]# tree .
ansible_project_root
├── inventory
│   ├── group_vars
│   │   ├── all.yml
│   │   └── flume.yml
│   └── inventory.ini
├── agent-destroy.yml
└── roles
    └── agent-destroy
        └── tasks
            └── main.yml
[root@hostname ansible]# cat inventory/inventory.ini
...
[destroy]
hostname-1 ansible_host=10.0.0.1
hostname-2 ansible_host=10.0.0.2
hostname-3 ansible_host=10.0.0.3
...
[root@hostname ansible]# cat agent-destroy.yml
---
- hosts: destroy
  gather_facts: true
  max_fail_percentage: 0
  roles:
  - agent-destroy

配置检查通过后,可用如下命令执行 ansible playbook

[root@hostname ansible]# ansible-playbook -i inventory/inventory.ini agent-destroy.yml

看到类似如下输出且所有主机状态均为 unreachable=0 failed=0 则执行成功

...
hostname-1  : ok=1    changed=1    unreachable=0    failed=0    skipped=1    rescued=0    ignored=1
hostname-2  : ok=1    changed=1    unreachable=0    failed=0    skipped=1    rescued=0    ignored=1
hostname-3  : ok=1    changed=1    unreachable=0    failed=0    skipped=1    rescued=0    ignored=1

你可能感兴趣的:(ubuntu)