使用的是6.0.1cdh上面的1.8版本的flume:
从mysql抽取数据到hdfs
flume-ng agent --conf conf --conf-file conf/flume-conf04.properties --name a1 -Dflume.root.logger=INFO,console
a1.channels.ch1.type = memory
a1.sources.sql-source.channels = ch1
a1.channels = ch1
a1.sinks = HDFS
a1.sources = sql-source
a1.sources.sql-source.type = org.keedio.flume.source.SQLSource
a1.sources.sql-source.connection.url = jdbc:mysql://ip地址:3306/库名
a1.sources.sql-source.user = 用户名
a1.sources.sql-source.password = 密码
a1.sources.sql-source.table = 表名
a1.sources.sql-source.columns.to.select = *
a1.sources.sql-source.incremental.column.name = id
a1.sources.sql-source.incremental.value = 0
a1.sources.sql-source.run.query.delay=5000
a1.sources.sql-source.status.file.path = /var/lib/flume-ng/flume
a1.sources.sql-source.status.file.name = sql-source.status
a1.sinks.HDFS.channel = ch1
a1.sinks.HDFS.type = hdfs
a1.sinks.HDFS.hdfs.path = hdfs://node01/user/hive/warehouse/test.db/dim_period_d
a1.sinks.HDFS.hdfs.fileType = DataStream
a1.sinks.HDFS.hdfs.writeFormat = Text
a1.sinks.HDFS.hdfs.rollSize = 268435456
a1.sinks.HDFS.hdfs.rollInterval = 0
a1.sinks.HDFS.hdfs.rollCount = 0
报错:
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: HDFS Agent: a1
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,206 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,207 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,207 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,207 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:10:46,224 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a1]
2019-03-19 10:10:46,224 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-03-19 10:10:46,230 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel ch1 type memory
2019-03-19 10:10:46,234 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel ch1
2019-03-19 10:10:46,235 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source sql-source, type org.keedio.flume.source.SQLSource
2019-03-19 10:10:46,242 (conf-file-poller-0) [INFO - org.keedio.flume.source.SQLSource.configure(SQLSource.java:63)] Reading and processing configuration values for source sql-source
2019-03-19 10:10:46,249 (conf-file-poller-0) [ERROR - org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:361)] Source sql-source has been removed due to an error during configuration
org.apache.flume.conf.ConfigurationException: hibernate.connection.url property not set
at org.keedio.flume.source.SQLSourceHelper.checkMandatoryProperties(SQLSourceHelper.java:294)
at org.keedio.flume.source.SQLSourceHelper.(SQLSourceHelper.java:100)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:66)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2019-03-19 10:10:46,252 (conf-file-poller-0) [INFO - org.apache.flume.sink.DefaultSinkFactory.create(DefaultSinkFactory.java:42)] Creating instance of sink: HDFS, type: hdfs
2019-03-19 10:10:46,265 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:116)] Channel ch1 connected to [HDFS]
2019-03-19 10:10:46,267 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:161)] Starting new configuration:{ sourceRunners:{} sinkRunners:{HDFS=SinkRunner: { policy:org.apache.flume.sink.DefaultSinkProcessor@4f5a783e counterGroup:{ name:null counters:{} } }} channels:{ch1=org.apache.flume.channel.MemoryChannel{name: ch1}} }
2019-03-19 10:10:46,268 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:168)] Starting Channel ch1
2019-03-19 10:10:46,310 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: CHANNEL, name: ch1: Successfully registered new MBean.
2019-03-19 10:10:46,310 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: CHANNEL, name: ch1 started
2019-03-19 10:10:46,310 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:195)] Starting Sink HDFS
2019-03-19 10:10:46,313 (lifecycleSupervisor-1-1) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.register(MonitoredCounterGroup.java:119)] Monitored counter group for type: SINK, name: HDFS: Successfully registered new MBean.
2019-03-19 10:10:46,313 (lifecycleSupervisor-1-1) [INFO - org.apache.flume.instrumentation.MonitoredCounterGroup.start(MonitoredCounterGroup.java:95)] Component type: SINK, name: HDFS started
flume-ng agent --conf conf --conf-file conf/flume-conf05.properties --name a1 -Dflume.root.logger=INFO,console
a1.channels.ch1.type = memory
a1.sources.sql-source.channels = ch1
a1.channels = ch1
a1.sinks = HDFS
a1.sources = sql-source
a1.sources.sql-source.type = org.keedio.flume.source.SQLSource
a1.sources.sql-source.hibernate.connection.url = jdbc:mysql://ip地址:3306/库名
a1.sources.sql-source.user = mysql用户名
a1.sources.sql-source.password = mysql密码
a1.sources.sql-source.table = 表名
a1.sources.sql-source.columns.to.select = *
a1.sources.sql-source.hibernate.connection.autocommit = true
a1.sources.sql-source.hibernate.dialect = org.hibernate.dialect.MySQL5Dialect
a1.sources.sql-source.connection.driver_class = com.mysql.jdbc.Driver
a1.sources.sql-source.incremental.column.name = id
a1.sources.sql-source.incremental.value = 0
a1.sources.sql-source.run.query.delay=5000
a1.sources.sql-source.status.file.path = /var/lib/flume-ng/flume
a1.sources.sql-source.status.file.name = sql-source.status
a1.sinks.HDFS.channel = ch1
a1.sinks.HDFS.type = hdfs
a1.sinks.HDFS.hdfs.path = hdfs://node01/user/hive/warehouse/test.db/dim_period_d
a1.sinks.HDFS.hdfs.fileType = DataStream
a1.sinks.HDFS.hdfs.writeFormat = Text
a1.sinks.HDFS.hdfs.rollSize = 268435456
a1.sinks.HDFS.hdfs.rollInterval = 0
a1.sinks.HDFS.hdfs.rollCount = 0
报错:
2019-03-19 10:17:34,426 (lifecycleSupervisor-1-0) [INFO - org.apache.flume.node.PollingPropertiesFileConfigurationProvider.start(PollingPropertiesFileConfigurationProvider.java:62)] Configuration provider starting
2019-03-19 10:17:34,442 (conf-file-poller-0) [INFO - org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:138)] Reloading configuration file:conf/flume-conf04.properties
2019-03-19 10:17:34,458 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,459 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,460 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: HDFS Agent: a1
2019-03-19 10:17:34,460 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-19 10:17:34,482 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a1]
2019-03-19 10:17:34,482 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-03-19 10:17:34,489 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel ch1 type memory
2019-03-19 10:17:34,495 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel ch1
2019-03-19 10:17:34,496 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source sql-source, type org.keedio.flume.source.SQLSource
2019-03-19 10:17:34,501 (conf-file-poller-0) [INFO - org.keedio.flume.source.SQLSource.configure(SQLSource.java:63)] Reading and processing configuration values for source sql-source
2019-03-19 10:17:34,515 (conf-file-poller-0) [ERROR - org.keedio.flume.source.SQLSourceHelper.getStatusFileIndex(SQLSourceHelper.java:232)] Exception reading status file, doing back up and creating new status file
Unexpected token END OF FILE at position 0.
at org.json.simple.parser.JSONParser.parse(JSONParser.java:257)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:92)
at org.keedio.flume.source.SQLSourceHelper.getStatusFileIndex(SQLSourceHelper.java:227)
at org.keedio.flume.source.SQLSourceHelper.(SQLSourceHelper.java:113)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:66)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2019-03-19 10:17:34,548 (conf-file-poller-0) [ERROR - org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:154)] Unhandled error
java.lang.NoSuchMethodError: org.apache.flume.Context.getSubProperties(Ljava/lang/String;)Lcom/google/common/collect/ImmutableMap;
at org.keedio.flume.source.HibernateHelper.(HibernateHelper.java:46)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:72)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
创建本地状态文件的时候,如果没有hdfs dfs -chmod -R 777 /flume/mysql 改变文件执行权限
就会报错:
2019-03-19 14:44:29,957 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,957 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,957 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,957 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,958 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,958 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: k1 Agent: a1
2019-03-19 14:44:29,958 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:44:29,976 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a1]
2019-03-19 14:44:29,976 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-03-19 14:44:29,984 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel ch-1 type memory
2019-03-19 14:44:29,990 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel ch-1
2019-03-19 14:44:29,992 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source src-1, type org.keedio.flume.source.SQLSource
2019-03-19 14:44:29,997 (conf-file-poller-0) [INFO - org.keedio.flume.source.SQLSource.configure(SQLSource.java:63)] Reading and processing configuration values for source src-1
2019-03-19 14:44:30,012 (conf-file-poller-0) [ERROR - org.keedio.flume.source.SQLSourceHelper.getStatusFileIndex(SQLSourceHelper.java:232)] Exception reading status file, doing back up and creating new status file
Unexpected token END OF FILE at position 0.
at org.json.simple.parser.JSONParser.parse(JSONParser.java:257)
at org.json.simple.parser.JSONParser.parse(JSONParser.java:92)
at org.keedio.flume.source.SQLSourceHelper.getStatusFileIndex(SQLSourceHelper.java:227)
at org.keedio.flume.source.SQLSourceHelper.(SQLSourceHelper.java:113)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:66)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2019-03-19 14:44:30,026 (conf-file-poller-0) [ERROR - org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:154)] Unhandled error
java.lang.NoSuchMethodError: org.apache.flume.Context.getSubProperties(Ljava/lang/String;)Lcom/google/common/collect/ImmutableMap;
at org.keedio.flume.source.HibernateHelper.(HibernateHelper.java:46)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:72)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
改了执行权限之后,错误变成:
2019-03-19 14:45:43,288 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,288 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,289 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,289 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,289 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,289 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: k1 Agent: a1
2019-03-19 14:45:43,289 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:k1
2019-03-19 14:45:43,303 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a1]
2019-03-19 14:45:43,303 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-03-19 14:45:43,310 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel ch-1 type memory
2019-03-19 14:45:43,319 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel ch-1
2019-03-19 14:45:43,320 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source src-1, type org.keedio.flume.source.SQLSource
2019-03-19 14:45:43,323 (conf-file-poller-0) [INFO - org.keedio.flume.source.SQLSource.configure(SQLSource.java:63)] Reading and processing configuration values for source src-1
2019-03-19 14:45:43,343 (conf-file-poller-0) [ERROR - org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:154)] Unhandled error
java.lang.NoSuchMethodError: org.apache.flume.Context.getSubProperties(Ljava/lang/String;)Lcom/google/common/collect/ImmutableMap;
at org.keedio.flume.source.HibernateHelper.(HibernateHelper.java:46)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:72)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:145)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
org.apache.flume.context类在方法flume-ng-configuration-1.8.0-cdh6.0.1.jar、flume-ng-configuration.jar包里面
查看这个两个jar包,里面都是有方法:public Map
放进去的flume-ng-sql-source-1.5.2.jar也有这个方法,但是返回值类型不一样,本身的Jar包里面返回值类型是Map,而新增的这个包里面的方法返回值类型是ImmutableMap
解决方法:https://github.com/keedio/flume-ng-sql-source/issues/74?from=singlemessage&isappinstalled=0
但是找不到flume-ng-sql-source-1.5.2.jar的源码,所以这个方法行不通。
最后:解决办法:
不适用cdh自带的flume版本,就自己下载了1.7版本的flume,里面的Jar包的getSubProperties方法包里面的方法返回值类型是ImmutableMap了。步骤采用:
https://blog.csdn.net/wzy0623/article/details/73650053
PS:我使用的是flume-ng-sql-source-1.5.2.jar、mysql-connector-java-5.1.34.jar ,也能正常使用。能正常抽取mysql数据了。
问题2:
从mysql抽取数据,写配置文件的时候:
flume-conf04.properties:
a1.channels.ch1.type = memory
a1.sources.sql-source.channels = ch1
a1.channels = ch1
a1.sinks = HDFS
a1.sources = sql-source
a1.sources.sql-source.type = org.keedio.flume.source.SQLSource
a1.sources.sql-source.hibernate.connection.url = jdbc:mysql://IP地址:3306/数据库?useSSL=false
#a1.sources.sql-source.connection.url = jdbc:mysql://IP地址:3306/数据库?useSSL=false
a1.sources.sql-source.user = 用户名
a1.sources.sql-source.password = 密码
a1.sources.sql-source.table = dim_period_d
a1.sources.sql-source.columns.to.select = *
#a1.sources.sql-source.hibernate.connection.autocommit = true
#a1.sources.sql-source.hibernate.dialect = org.hibernate.dialect.MySQL5Dialect
#a1.sources.sql-source.connection.driver_class = com.mysql.jdbc.Driver
a1.sources.sql-source.incremental.column.name = id
a1.sources.sql-source.incremental.value = 0
a1.sources.sql-source.run.query.delay=5000
a1.sources.sql-source.status.file.path = /var/lib/flume-ng/flume
a1.sources.sql-source.status.file.name = sql-source.status
a1.sinks.HDFS.channel = ch1
a1.sinks.HDFS.type = hdfs
a1.sinks.HDFS.hdfs.path = hdfs://node01/user/hive/warehouse/test.db/dim_period_d
a1.sinks.HDFS.hdfs.fileType = DataStream
a1.sinks.HDFS.hdfs.writeFormat = Text
a1.sinks.HDFS.hdfs.rollSize = 268435456
a1.sinks.HDFS.hdfs.rollInterval = 0
a1.sinks.HDFS.hdfs.rollCount = 0
#----------------------这种写法会报错:
2019-03-26 10:55:56,791 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,792 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,795 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,796 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:930)] Added sinks: HDFS Agent: a1
2019-03-26 10:55:56,796 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,796 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,796 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,797 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,797 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration$AgentConfiguration.addProperty(FlumeConfiguration.java:1016)] Processing:HDFS
2019-03-26 10:55:56,818 (conf-file-poller-0) [INFO - org.apache.flume.conf.FlumeConfiguration.validateConfiguration(FlumeConfiguration.java:140)] Post-validation flume configuration contains configuration for agents: [a1]
2019-03-26 10:55:56,818 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:147)] Creating channels
2019-03-26 10:55:56,826 (conf-file-poller-0) [INFO - org.apache.flume.channel.DefaultChannelFactory.create(DefaultChannelFactory.java:42)] Creating instance of channel ch1 type memory
2019-03-26 10:55:56,832 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.loadChannels(AbstractConfigurationProvider.java:201)] Created channel ch1
2019-03-26 10:55:56,833 (conf-file-poller-0) [INFO - org.apache.flume.source.DefaultSourceFactory.create(DefaultSourceFactory.java:41)] Creating instance of source sql-source, type org.keedio.flume.source.SQLSource
2019-03-26 10:55:56,841 (conf-file-poller-0) [INFO - org.keedio.flume.source.SQLSource.configure(SQLSource.java:76)] Reading and processing configuration values for source sql-source
2019-03-26 10:55:56,844 (conf-file-poller-0) [ERROR - org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:361)] Source sql-source has been removed due to an error during configuration
org.apache.flume.conf.ConfigurationException: hibernate.connection.user property not set
at org.keedio.flume.source.SQLSourceHelper.checkMandatoryProperties(SQLSourceHelper.java:308)
at org.keedio.flume.source.SQLSourceHelper.(SQLSourceHelper.java:108)
at org.keedio.flume.source.SQLSource.configure(SQLSource.java:79)
at org.apache.flume.conf.Configurables.configure(Configurables.java:41)
at org.apache.flume.node.AbstractConfigurationProvider.loadSources(AbstractConfigurationProvider.java:326)
at org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:101)
at org.apache.flume.node.PollingPropertiesFileConfigurationProvider$FileWatcherRunnable.run(PollingPropertiesFileConfigurationProvider.java:141)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2019-03-26 10:55:56,851 (conf-file-poller-0) [INFO - org.apache.flume.sink.DefaultSinkFactory.create(DefaultSinkFactory.java:42)] Creating instance of sink: HDFS, type: hdfs
2019-03-26 10:55:56,869 (conf-file-poller-0) [INFO - org.apache.flume.node.AbstractConfigurationProvider.getConfiguration(AbstractConfigurationProvider.java:116)] Channel ch1 connected to [HDFS]
2019-03-26 10:55:56,889 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:137)] Starting new configuration:{ sourceRunners:{} sinkRunners:{HDFS=SinkRunner: { policy:org.apache.flume.sik.DefaultSinkProcessor@7d35d83e counterGroup:{ name:null counters:{} } }} channels:{ch1=org.apache.flume.channel.MemoryChannel{name: ch1}} }
2019-03-26 10:55:56,901 (conf-file-poller-0) [INFO - org.apache.flume.node.Application.startAllComponents(Application.java:144)] Starting Channel ch1
但是如果写成以下这样的配置文件就没问题
flume-conf05.properties:
a1.channels = ch-1
a1.sources = src-1
a1.sinks = k1
#sql source
#For each one of the sources, the type is defined
a1.sources.src-1.type = org.keedio.flume.source.SQLSource
a1.sources.src-1.hibernate.connection.url = jdbc:mysql://IP地址:3306/数据库
#Hibernate Database connection properties
a1.sources.src-1.hibernate.connection.user = 用户名
a1.sources.src-1.hibernate.connection.password = 密码
a1.sources.src-1.hibernate.connection.autocommit = true
a1.sources.src-1.hibernate.dialect = org.hibernate.dialect.MySQL5Dialect
a1.sources.src-1.hibernate.connection.driver_class = com.mysql.jdbc.Driver
a1.sources.src-1.run.query.delay=5000
a1.sources.src-1.status.file.path = /var/lib/flume-ng/flume
a1.sources.src-1.status.file.name = src-1.status
#Custom query
a1.sources.src-1.start.from = 0
a1.sources.src-1.custom.query = select * from dim_period_d
a1.sources.src-1.batch.size = 1000
a1.sources.src-1.max.rows = 1000
a1.sources.src-1.hibernate.connection.provider_class = org.hibernate.connection.C3P0ConnectionProvider
a1.sources.src-1.hibernate.c3p0.min_size=1
a1.sources.src-1.hibernate.c3p0.max_size=10
################################################################
a1.channels.ch-1.type = memory
a1.channels.ch-1.capacity = 10000
a1.channels.ch-1.transactionCapacity = 10000
a1.channels.ch-1.byteCapacityBufferPercentage = 20
a1.channels.ch-1.byteCapacity = 800000
################################################################
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = from_flume
a1.sinks.k1.brokerList = node01:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1
a1.sinks.k1.channel = ch-1
a1.sources.src-1.channels=ch-1