Flume 八:本地数据传至kafka

Flume 八:本地数据传至kafka

  • 启动zookeeper
  • 启动kafka
  • 创建topic
  • 创建目录
  • 配置文件
  • 启动flume
  • 启动kafka:消费者
  • 复制文件到待监测文件夹
  • 查看kafka消费者xshell页面!!!

启动zookeeper

[root@caicai bin]# ./zkServer.sh start 

启动kafka

[root@cai bin]# ./kafka-server-start.sh ../config/server.properties 

创建topic

[root@cai bin]# ./kafka-topics.sh --create --topic users --zookeeper 192.168.101.130:2181 --partitions 1 --replication-factor 1
Created topic "mydemo".

创建目录

# 待监测的目录:
/opt/flumelog/users1
# 监测点目录:
/opt/flumelog/checkpoint/users1
# 输出目录:
/opt/flumelog/data/users1

配置文件

文件目录:

[root@cai job2]# vi users-flume-kafka.conf 
[root@cai job2]# pwd
/opt/bigdata/flume/conf/job2

配置内容如下:

users.sources = usersSource
users.channels = usersChannel
users.sinks = usersSink

users.sources.usersSource.type = spooldir

users.sources.usersSource.spoolDir = /opt/flumelog/users1
users.sources.usersSource.includePattern = users_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
users.sources.usersSource.deserializer = LINE
users.sources.usersSource.deserializer.maxLineLength = 160000
users.sources.usersSource.interceptors = head_filter
users.sources.usersSource.interceptors.head_filter.type = regex_filter
users.sources.usersSource.interceptors.head_filter.regex = ^user_id*
users.sources.usersSource.interceptors.head_filter.excludeEvents = true

users.channels.usersChannel.type = file
users.channels.usersChannel.checkpointDir = /opt/flumelog/checkpoint/users1
users.channels.usersChannel.dataDirs = /opt/flumelog/data/users1

users.sinks.usersSink.type = org.apache.flume.sink.kafka.KafkaSink
users.sinks.usersSink.hdfs.batchSize = 640
users.sinks.usersSink.brokerList = 192.168.101.130:9092
users.sinks.usersSink.topic = users

users.sources.usersSource.channels = usersChannel
users.sinks.usersSink.channel = usersChannel

启动flume

./bin/flume-ng agent -c conf/ -f conf/job2/users-flume-kafka.conf -n users -Dflume.root.logger=INFO,console

启动kafka:消费者

kafka-console-consumer.sh --bootstrap-server 192.168.101.130:9092 --topic users --from-beginning

复制文件到待监测文件夹

查看文件行数

wc -l users.csv

复制文件,记得要根据配置文件内容拷贝文件时修改对应的格式

[root@cai events]# cp users.csv  /opt/flumelog/users1/users_2020-05-26.csv

[root@cai events]# pwd
/opt/bigdata/flume/conf/events

查看kafka消费者xshell页面!!!

你可能感兴趣的:(Kafka,Flume)