01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
a1.sources = s1
a1.channels = mc1
a1.sinks = k1 k2
# Configure source
a1.sources.s1.channels = mc1
a1.sources.s1.type = exec
a1.sources.s1.command = tail -F /data/nginx/logs/app_user_events.log
# Configure channel
a1.channels.mc1.type = memory
a1.channels.mc1.transactionCapacity = 50000
a1.channels.mc1.capacity = 100000
# Configure sinks
a1.sinks.k1.channel = mc1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = 10.10.1.122
a1.sinks.k1.port = 44446
a1.sinks.k2.channel = mc1
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = 10.10.1.121
a1.sinks.k2.port = 44446
# Configure failover
a1.sinkgroups = g1
a1.sinkgroups.g1.sinks = k1 k2
a1.sinkgroups.g1.processor.type = failover
a1.sinkgroups.g1.processor.priority.k1 = 9
a1.sinkgroups.g1.processor.priority.k2 = 7
a1.sinkgroups.g1.processor.maxpenalty = 10000
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
a2.sources = s2
a2.channels = mc2
a2.sinks = k3 k4
# Configure source
a2.sources.s2.channels = mc2
a2.sources.s2.type = exec
a2.sources.s2.command = tail -F /data/nginx/logs/push_click_events.log
# Configure channel
a2.channels.mc2.type = memory
a2.channels.mc2.capacity = 50000
a2.channels.mc2.transactionCapacity = 100000
# Configure sinks
a2.sinks.k3.channel = mc2
a2.sinks.k3.type = avro
a2.sinks.k3.hostname = 10.10.1.121
a2.sinks.k3.port = 44447
a2.sinks.k4.channel = mc2
a2.sinks.k4.type = avro
a2.sinks.k4.hostname = 10.10.1.122
a2.sinks.k4.port = 44447
# Configure failover
a2.sinkgroups = g2
a2.sinkgroups.g2.sinks = k3 k4
a2.sinkgroups.g2.processor.type = failover
a2.sinkgroups.g2.processor.priority.k3 = 9
a2.sinkgroups.g2.processor.priority.k4 = 7
a2.sinkgroups.g2.processor.maxpenalty = 10000
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
a3.sources = s3
a3.channels = mc3
a3.sinks = k5 k6
# Configure source
a3.sources.s3.channels = mc3
a3.sources.s3.type = exec
a3.sources.s3.command = tail -F /data/nginx/logs/thirdparty_click_events.log
# Configure channel
a3.channels.mc3.type = memory
a3.channels.mc3.transactionCapacity = 50000
a3.channels.mc3.capacity = 100000
# Configure sinks
a3.sinks.k5.channel = mc3
a3.sinks.k5.type = avro
a3.sinks.k5.hostname = 10.10.1.121
a3.sinks.k5.port = 44446
a3.sinks.k6.channel = mc3
a3.sinks.k6.type = avro
a3.sinks.k6.hostname = 10.10.1.122
a3.sinks.k6.port = 44446
# Configure failover
a3.sinkgroups = g3
a3.sinkgroups.g3.sinks = k5 k6
a3.sinkgroups.g3.processor.type = failover
a3.sinkgroups.g3.processor.priority.k5 = 9
a3.sinkgroups.g3.processor.priority.k6 = 7
a3.sinkgroups.g3.processor.maxpenalty = 10000
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
a4.sources = s4
a4.channels = mc4
a4.sinks = k7 k8
# Configure source
a4.sources.s4.channels = mc4
a4.sources.s4.type = exec
a4.sources.s4.command = tail -F /data/nginx/logs/ad.log
# Configure channel
a4.channels.mc4.type = memory
a4.channels.mc4.transactionCapacity = 50000
a4.channels.mc4.capacity = 100000
# Configure sinks
a4.sinks.k7.channel = mc4
a4.sinks.k7.type = avro
a4.sinks.k7.hostname = 10.10.1.121
a4.sinks.k7.port = 44448
a4.sinks.k8.channel = mc4
a4.sinks.k8.type = avro
a4.sinks.k8.hostname = 10.10.1.122
a4.sinks.k8.port = 44448
# Configure failover
a4.sinkgroups = g4
a4.sinkgroups.g4.sinks = k7 k8
a4.sinkgroups.g4.processor.type = failover
a4.sinkgroups.g4.processor.priority.k7 = 10
a4.sinkgroups.g4.processor.priority.k8 = 8
a4.sinkgroups.g4.processor.maxpenalty = 10000
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
a1.sources = s1 s2
a1.channels = fc1 fc2 fc3
a1.sinks = kk1 fk2 kk3
# Configure source:
# Configure app user event source: s1 -> fc1+fc2
a1.sources.s1.channels = fc1 fc2
a1.sources.s1.type = avro
a1.sources.s1.bind = 10.10.1.121
a1.sources.s1.port = 44446
a1.sources.s1.threads = 8
# Configure source
# Configure push click event source: s2 -> fc2+fc3
a1.sources.s2.channels = fc2 fc3
a1.sources.s2.type = avro
a1.sources.s2.bind = 10.10.1.122
a1.sources.s2.port = 44447
a1.sources.s2.threads = 4
# Configure file channel(/data1)
# Configure app user event channel: fc1 ->kk1
a1.channels.fc1.type = file
a1.channels.fc1.checkpointDir = /data1/flume/channels/app_user_event/checkpoint
a1.channels.fc1.useDualCheckpoints = true
a1.channels.fc1.backupCheckpointDir = /data1/flume/channels/app_user_event/backup
a1.channels.fc1.dataDirs = /data1/flume/channels/app_user_event/data
a1.channels.fc1.transactionCapacity = 100000
a1.channels.fc1.capacity = 500000
a1.channels.fc1.checkpointInterval = 60000
a1.channels.fc1.keep-alive = 5
a1.channels.fc1.maxFileSize = 5368709120
# Configure file channel(/data2)
# Configure app user event + push click event: fc2 - > fk2
a1.channels.fc2.type = file
a1.channels.fc2.checkpointDir = /data2/flume/channels/offline_file_event/checkpoint
a1.channels.fc2.useDualCheckpoints = true
a1.channels.fc2.backupCheckpointDir = /data2/flume/channels/offline_file_event/backup
a1.channels.fc2.dataDirs = /data2/flume/channels/offline_file_event/data
a1.channels.fc2.transactionCapacity = 100000
a1.channels.fc2.capacity = 500000
a1.channels.fc2.checkpointInterval = 60000
a1.channels.fc2.keep-alive = 5
a1.channels.fc2.maxFileSize = 5368709120
# Configure file channel(/data3)
# Configure push click channel: fc3 ->kk3
a1.channels.fc3.type = file
a1.channels.fc3.checkpointDir = /data3/flume/channels/push_click_event/checkpoint
a1.channels.fc3.useDualCheckpoints = true
a1.channels.fc3.backupCheckpointDir = /data3/flume/channels/push_click_event/backup
a1.channels.fc3.dataDirs = /data3/flume/channels/push_click_event/data
a1.channels.fc3.transactionCapacity = 100000
a1.channels.fc3.capacity = 500000
a1.channels.fc3.checkpointInterval = 60000
a1.channels.fc3.keep-alive = 5
a1.channels.fc3.maxFileSize = 5368709120
# Configure sink: RealtimeMessageSink(app user event)
a1.sinks.kk1.type = org.shirdrn.flume.sink.RealtimeMessageSink
a1.sinks.kk1.channel = fc1
a1.sinks.kk1.metadata.broker.list = kafka01:9092,kafka02:9092,kafka03:9092
a1.sinks.kk1.topic = json_user_event
a1.sinks.kk1.serializer.class = kafka.serializer.StringEncoder
a1.sinks.kk1.producer.type = async
a1.sinks.kk1.message.send.max.retries = 3
a1.sinks.kk1.client.id = flume_app_user_event_2_1
a1.sinks.kk1.event.decoder.count = 8
a1.sinks.kk1.output.stat.event.batch.size = 2000
a1.sinks.kk1.event.decoder.queue.size = 1000
# Configure sink: RichRollingFileSink
a1.sinks.fk2.type = org.shirdrn.flume.sink.RichRollingFileSink
a1.sinks.fk2.channel = fc2
a1.sinks.fk2.batchSize = 100
a1.sinks.fk2.serializer = TEXT
a1.sinks.fk2.sink.rollInterval = 60
a1.sinks.fk2.sink.directory = /data/flume/rolling_files
a1.sinks.fk2.sink.file.prefix = event
a1.sinks.fk2.sink.file.suffix = .log
a1.sinks.fk2.sink.file.pattern = yyyyMMddHHmmss
# Configure sink: RealtimeMessageSink(push click)
a1.sinks.kk3.type = org.shirdrn.flume.sink.RealtimeMessageSink
a1.sinks.kk3.channel = fc3
a1.sinks.kk3.metadata.broker.list = kafka01:9092,kafka02:9092,kafka03:9092
a1.sinks.kk3.topic = json_push_click_event
a1.sinks.kk3.serializer.class = kafka.serializer.StringEncoder
a1.sinks.kk3.producer.type = async
a1.sinks.kk3.message.send.max.retries = 3
a1.sinks.kk3.client.id = flume_push_click_2_1
a1.sinks.kk3.event.decoder.count = 4
a1.sinks.kk3.output.stat.event.batch.size = 2000
a1.sinks.kk3.event.decoder.queue.size = 1000
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
a2.sources = s3
a2.channels = fc4
a2.sinks = kk4
# Configure source: s3 -> fc4
a2.sources.s3.channels = fc4
a2.sources.s3.type = avro
a2.sources.s3.bind = 10.10.1.121
a2.sources.s3.port = 44448
a2.sources.s3.threads = 2
# Configure channel(/data4)
# Configure Ad channel: fc4 ->kk4
a2.channels.fc4.type = file
a2.channels.fc4.checkpointDir = /data4/flume/channels/ad/checkpoint
a2.channels.fc4.useDualCheckpoints = true
a2.channels.fc4.backupCheckpointDir = /data4/flume/channels/ad/backup
a2.channels.fc4.dataDirs = /data4/flume/channels/ad/data
a2.channels.fc4.transactionCapacity = 100000
a2.channels.fc4.capacity = 500000
a2.channels.fc4.checkpointInterval = 60000
a2.channels.fc4.keep-alive = 5
a2.channels.fc1.maxFileSize = 5368709120
# Configure sinks: RealtimeAdKafkaSink
a2.sinks.kk4.type = org.shirdrn.flume.sink.RealtimeAdKafkaSink
a2.sinks.kk4.channel = fc4
a2.sinks.kk4.metadata.broker.list = kafka01:9092,kafka02:9092,kafka03:9092
a2.sinks.kk4.topic = json_ad_event
a2.sinks.kk4.serializer.class = kafka.serializer.StringEncoder
a2.sinks.kk4.producer.type = async
a2.sinks.kk4.message.send.max.retries = 3
a2.sinks.kk4.client.id = flume_ad_2_1
a2.sinks.kk4.event.decoder.count = 4
a2.sinks.kk4.output.stat.event.batch.size = 2500
a2.sinks.kk4.event.decoder.queue.size = 5000
|
1
|
bin/flume-ng agent -n a1 -c conf -f conf/config.conf -Dflume.monitoring.type=http -Dflume.monitoring.port=34545
|
1
|
JAVA_OPTS="-server -Xms1024m -Xmx4096m -Dcom.sun.management.jmxremote -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:ParallelGCThreads=4 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/data/flume/logs/gc-ad.log"
|
1
2
3
4
|
a1.sources.s1.type = avro
a1.sources.s1.bind = 10.10.1.121
a1.sources.s1.port = 44446
a1.sources.s1.threads = 8
|