# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444# Describe the sink
a1.sinks.k1.type = logger# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
--conf-file /todo/flume/example.conf --name a1
这个类主要有三部分.
1.加载参数
2.设置服务
3.启动服务, 设置监控&钩子
代码如图:
加载参数
接下来设置服务:
PollingPropertiesFileConfigurationProvider configurationProvider =
new PollingPropertiesFileConfigurationProvider(agentName, configurationFile, eventBus, 30);
启动服务& 监控&钩子
建立监控&钩子 我就不细说了, 直接说创建服务了
PollingPropertiesFileConfigurationProvider 继承了 :PropertiesFileConfigurationProvider 继承了 : AbstractConfigurationProvider
在抽象类:AbstractConfigurationProvider
private final String agentName;
private final SourceFactory sourceFactory;
private final SinkFactory sinkFactory;
private final ChannelFactory channelFactory;
分别声明了Source , Sink , Channel 工厂.
在构造方法中进行了初始化
Source , Sink , Channel 默认分别实现自己对应的工厂接口.
核心的方法是 create(String name, String type) 方法
其实就是根据 type 实例不同的Source , Sink , Channel 实例.
代码如下:
到这里的时候, 服务只是创建了, 在哪里启动的呢???
application.start();
直接跟进去
org.apache.flume.node.Application#start
monitorService: 这个是一个线程池用于执行程序,调用 start 方法: ScheduledThreadPoolExecutor ( 在类初始化的时候创建. )
其实就是调用:
会调用org.apache.flume.node.FileWatcherRunnable#run 方法
这个方法分两步.
1.读取配置文件.
2.生成 source、channel、sink 对应的 runner . 加入到 MaterializedConfiguration 中
第一步. 读取配置文件.
第二步: 生成 source、channel、sink 对应的 runner . 加入到 MaterializedConfiguration 中
主要是根据配置文件生成 runner 这部分需要细说一下.
这里会生成 Channels, Sources , Sinks 三种 runner
loadChannels(agentConf, channelComponentMap);
loadSources(agentConf, channelComponentMap, sourceRunnerMap);
loadSinks(agentConf, channelComponentMap, sinkRunnerMap);
分别说明:
这个方法主要干了四件事件事.
1.将缓存的 channel 加入到一个 ListMultimap
2. 创建具有ComponentConfiguration 对象的Channel 实例
3. 创建没有ComponentConfiguration 对象, 但是配置 context 的Channel 实例
4.将缓存中的 channel 与新生成的 channel 做匹配, 去掉配置项中没有的 channel
private void loadChannels(AgentConfiguration agentConf,
Map channelComponentMap)
throws InstantiationException {
LOGGER.info("Creating channels");
//todo 缓存中的 channel
ListMultimap, String> channelsNotReused =
ArrayListMultimap.create();
// assume all channels will not be re-used
for (Map.Entry, Map> entry :
channelCache.entrySet()) {
Class extends Channel> channelKlass = entry.getKey();
Set channelNames = entry.getValue().keySet();
channelsNotReused.get(channelKlass).addAll(channelNames);
}
Set channelNames = agentConf.getChannelSet();
Map compMap = agentConf.getChannelConfigMap();
for (String chName : channelNames) {
ComponentConfiguration comp = compMap.get(chName);
if (comp != null) {
// todo 使用工厂类创建Channel
Channel channel = getOrCreateChannel(channelsNotReused,
comp.getComponentName(), comp.getType());
try {
//todo 更新配置 , 因为 channelComponentMap 刚开始传进来的时候是空值
Configurables.configure(channel, comp);
channelComponentMap.put(comp.getComponentName(),
new ChannelComponent(channel));
LOGGER.info("Created channel " + chName);
} catch (Exception e) {
String msg = String.format("Channel %s has been removed due to an " +
"error during configuration", chName);
LOGGER.error(msg, e);
}
}
}
//todo 组合没有 ComponentConfiguration配置, 仅仅使用Context的对象.
for (String chName : channelNames) {
Context context = agentConf.getChannelContext().get(chName);
if (context != null) {
// todo 使用工厂类创建Channel
Channel channel = getOrCreateChannel(channelsNotReused, chName,
context.getString(BasicConfigurationConstants.CONFIG_TYPE));
try {
// todo 更新配置 , 因为 channelComponentMap 刚开始传进来的时候是空值
Configurables.configure(channel, context);
channelComponentMap.put(chName, new ChannelComponent(channel));
LOGGER.info("Created channel " + chName);
} catch (Exception e) {
String msg = String.format("Channel %s has been removed due to an " +
"error during configuration", chName);
LOGGER.error(msg, e);
}
}
}
for (Class extends Channel> channelKlass : channelsNotReused.keySet()) {
Map channelMap = channelCache.get(channelKlass);
if (channelMap != null) {
for (String channelName : channelsNotReused.get(channelKlass)) {
if (channelMap.remove(channelName) != null) {
LOGGER.info("Removed {} of type {}", channelName, channelKlass);
}
}
if (channelMap.isEmpty()) {
//todo 有一些 channel 在配置中没有重新使用, 将会将其从缓存中移除.
channelCache.remove(channelKlass);
}
}
}
}
private Channel getOrCreateChannel(
ListMultimap, String> channelsNotReused,
String name, String type)
throws FlumeException {
// todo 根据传入的类型, 获取对应的类
Class extends Channel> channelClass = channelFactory.getClass(type);
/*
* Channel has requested a new instance on each re-configuration
* todo 根据新的配置, 实例化对象.
*/
//todo 如何类的注解 Disposable 存在, 则直接进行实例化,并返回 只有 jdbc 和 file 模式用到了
if (channelClass.isAnnotationPresent(Disposable.class)) {
Channel channel = channelFactory.create(name, type);
channel.setName(name);
return channel;
}
Map channelMap = channelCache.get(channelClass);
//todo 如果缓存中不存在 channel 的话, 那么直接加入缓存.
if (channelMap == null) {
channelMap = new HashMap();
channelCache.put(channelClass, channelMap);
}
//todo 如果channelMap 中的 channel 为 null ,使用工厂类创建.
Channel channel = channelMap.get(name);
if (channel == null) {
channel = channelFactory.create(name, type);
channel.setName(name);
channelMap.put(name, channel);
}
//todo 如果缓存中已经存在对应的 channel 的话,那么移除它, 后续的方法会更新它 .
channelsNotReused.get(channelClass).remove(name);
return channel;
}
读取配置文件生成 source , 然后创建 sourceRunner, 并注册到 channel
private void loadSources(AgentConfiguration agentConf,
Map channelComponentMap,
Map sourceRunnerMap)
throws InstantiationException {
Set sourceNames = agentConf.getSourceSet();
Map compMap =
agentConf.getSourceConfigMap();
/*
* Components which have a ComponentConfiguration object
*
* todo 组合配置ComponentConfiguration 的对象
*/
for (String sourceName : sourceNames) {
ComponentConfiguration comp = compMap.get(sourceName);
if (comp != null) {
SourceConfiguration config = (SourceConfiguration) comp;
//todo 使用sourceFactory 直接采用根据类型,采用反射方式 实例化 source
Source source = sourceFactory.create(comp.getComponentName(),
comp.getType());
try {
//todo 为 source 匹配对应的 channel
Configurables.configure(source, config);
Set channelNames = config.getChannels();
List sourceChannels =
getSourceChannels(channelComponentMap, source, channelNames);
if (sourceChannels.isEmpty()) {
String msg = String.format("Source %s is not connected to a " +
"channel", sourceName);
throw new IllegalStateException(msg);
}
//todo 创建 selector .
ChannelSelectorConfiguration selectorConfig =
config.getSelectorConfiguration();
ChannelSelector selector = ChannelSelectorFactory.create(
sourceChannels, selectorConfig);
ChannelProcessor channelProcessor = new ChannelProcessor(selector);
Configurables.configure(channelProcessor, config);
source.setChannelProcessor(channelProcessor);
//todo 将 source 转换为 SourceRunner !!!!!!!!!!!!!!!1
sourceRunnerMap.put(comp.getComponentName(),
SourceRunner.forSource(source) );
//todo source 向 channel 反向注册 sourceName
for (Channel channel : sourceChannels) {
ChannelComponent channelComponent =
Preconditions.checkNotNull(channelComponentMap.get(channel.getName()),
String.format("Channel %s", channel.getName()));
channelComponent.components.add(sourceName);
}
} catch (Exception e) {
String msg = String.format("Source %s has been removed due to an " +
"error during configuration", sourceName);
LOGGER.error(msg, e);
}
}
}
/*
* Components which DO NOT have a ComponentConfiguration object
* and use only Context
*
* todo 组合没有配置 ComponentConfiguration 但是使用 context 的对象
*/
Map sourceContexts = agentConf.getSourceContext();
for (String sourceName : sourceNames) {
Context context = sourceContexts.get(sourceName);
if (context != null) {
//todo 直接采用根据类型,采用反射方式 实例化 source
Source source =
sourceFactory.create(sourceName,
context.getString(BasicConfigurationConstants.CONFIG_TYPE));
try {
Configurables.configure(source, context);
String[] channelNames = context.getString(
BasicConfigurationConstants.CONFIG_CHANNELS).split("\\s+");
//todo 为 source 匹配对应的 channel
List sourceChannels =
getSourceChannels(channelComponentMap, source, Arrays.asList(channelNames));
if (sourceChannels.isEmpty()) {
String msg = String.format("Source %s is not connected to a " +
"channel", sourceName);
throw new IllegalStateException(msg);
}
//todo 创建 selector .
Map selectorConfig = context.getSubProperties(
BasicConfigurationConstants.CONFIG_SOURCE_CHANNELSELECTOR_PREFIX);
ChannelSelector selector = ChannelSelectorFactory.create(
sourceChannels, selectorConfig);
ChannelProcessor channelProcessor = new ChannelProcessor(selector);
Configurables.configure(channelProcessor, context);
source.setChannelProcessor(channelProcessor);
//todo 将 source 转换为 SourceRunner !!!!!!!!!!!!!!!1
sourceRunnerMap.put(sourceName,
SourceRunner.forSource(source));
//todo source 向 channel 反向注册 sourceName
for (Channel channel : sourceChannels) {
ChannelComponent channelComponent =
Preconditions.checkNotNull(channelComponentMap.get(channel.getName()),
String.format("Channel %s", channel.getName()));
channelComponent.components.add(sourceName);
}
} catch (Exception e) {
String msg = String.format("Source %s has been removed due to an " +
"error during configuration", sourceName);
LOGGER.error(msg, e);
}
}
}
}
读取配置文件生成 sink , 并注册到 channel , 然后根据分组情况 sinkRunner, 未设置分组的,单独创建sinkRunner
private void loadSinks(AgentConfiguration agentConf,
Map channelComponentMap, Map sinkRunnerMap)
throws InstantiationException {
Set sinkNames = agentConf.getSinkSet();
Map compMap =
agentConf.getSinkConfigMap();
Map sinks = new HashMap();
/*
* Components which have a ComponentConfiguration object
* todo 组合配置ComponentConfiguration 的对象
*/
for (String sinkName : sinkNames) {
ComponentConfiguration comp = compMap.get(sinkName);
if (comp != null) {
//todo 使用SinkFactory 直接采用根据类型,采用反射方式 实例化 Sink
SinkConfiguration config = (SinkConfiguration) comp;
Sink sink = sinkFactory.create(comp.getComponentName(), comp.getType());
try {
//todo 为 Sink 匹配对应的 channel
Configurables.configure(sink, config);
ChannelComponent channelComponent = channelComponentMap.get(config.getChannel());
if (channelComponent == null) {
String msg = String.format("Sink %s is not connected to a " +
"channel", sinkName);
throw new IllegalStateException(msg);
}
//todo 检查 channel 是否可用 : sink 的 batch size 要小于 channel 的 transaction capacity
checkSinkChannelCompatibility(sink, channelComponent.channel);
sink.setChannel(channelComponent.channel);
sinks.put(comp.getComponentName(), sink);
//todo Sink 向 channel 反向注册 SinkName
channelComponent.components.add(sinkName);
} catch (Exception e) {
String msg = String.format("Sink %s has been removed due to an " +
"error during configuration", sinkName);
LOGGER.error(msg, e);
}
}
}
/*
* Components which DO NOT have a ComponentConfiguration object
* and use only Context
* todo 组合没有配置 ComponentConfiguration 但是使用 context 的对象
*/
Map sinkContexts = agentConf.getSinkContext();
for (String sinkName : sinkNames) {
Context context = sinkContexts.get(sinkName);
if (context != null) {
//todo 直接采用根据类型,采用反射方式 实例化 Sink
Sink sink = sinkFactory.create(sinkName, context.getString(
BasicConfigurationConstants.CONFIG_TYPE));
try {
//todo 为 Sink 匹配对应的 channel
Configurables.configure(sink, context);
ChannelComponent channelComponent =
channelComponentMap.get(
context.getString(BasicConfigurationConstants.CONFIG_CHANNEL));
if (channelComponent == null) {
String msg = String.format("Sink %s is not connected to a " +
"channel", sinkName);
throw new IllegalStateException(msg);
}
//todo 检查 channel 是否可用 : sink 的 batch size 要大于 channel 的 transaction capacity
checkSinkChannelCompatibility(sink, channelComponent.channel);
sink.setChannel(channelComponent.channel);
sinks.put(sinkName, sink);
channelComponent.components.add(sinkName);
} catch (Exception e) {
String msg = String.format("Sink %s has been removed due to an " +
"error during configuration", sinkName);
LOGGER.error(msg, e);
}
}
}
//todo 对 sink 进行分组
loadSinkGroups(agentConf, sinks, sinkRunnerMap);
}
private void loadSinkGroups(AgentConfiguration agentConf,
Map sinks, Map sinkRunnerMap)
throws InstantiationException {
// todo 获取配置中的 group 分组
Set sinkGroupNames = agentConf.getSinkgroupSet();
Map compMap =
agentConf.getSinkGroupConfigMap();
Map usedSinks = new HashMap();
for (String groupName: sinkGroupNames) {
ComponentConfiguration comp = compMap.get(groupName);
if (comp != null) {
SinkGroupConfiguration groupConf = (SinkGroupConfiguration) comp;
List groupSinks = new ArrayList();
for (String sink : groupConf.getSinks()) {
Sink s = sinks.remove(sink);
if (s == null) {
String sinkUser = usedSinks.get(sink);
if (sinkUser != null) {
throw new InstantiationException(String.format(
"Sink %s of group %s already " +
"in use by group %s", sink, groupName, sinkUser));
} else {
throw new InstantiationException(String.format(
"Sink %s of group %s does "
+ "not exist or is not properly configured", sink,
groupName));
}
}
groupSinks.add(s);
usedSinks.put(sink, groupName);
}
try {
SinkGroup group = new SinkGroup(groupSinks);
Configurables.configure(group, groupConf);
//todo 创建 sinkRunner
sinkRunnerMap.put(comp.getComponentName(),
new SinkRunner(group.getProcessor()));
} catch (Exception e) {
String msg = String.format("SinkGroup %s has been removed due to " +
"an error during configuration", groupName);
LOGGER.error(msg, e);
}
}
}
// add any unassigned sinks to solo collectors
// todo 对未分组的 sink 进行处理
for (Entry entry : sinks.entrySet()) {
if (!usedSinks.containsValue(entry.getKey())) {
try {
SinkProcessor pr = new DefaultSinkProcessor();
List sinkMap = new ArrayList();
sinkMap.add(entry.getValue());
pr.setSinks(sinkMap);
Configurables.configure(pr, new Context());
//todo 创建 SinkRunner
sinkRunnerMap.put(entry.getKey(), new SinkRunner(pr));
} catch (Exception e) {
String msg = String.format("SinkGroup %s has been removed due to " +
"an error during configuration", entry.getKey());
LOGGER.error(msg, e);
}
}
}
}
将 Channels, Sources , Sinks 具体的Runner 加载进配置. 直接返回配置.
在回到上一级方法: org.apache.flume.node.PollingPropertiesFileConfigurationProvider#run
eventBus放松的信息去哪了呢? 干了啥??????
这个方法添加了 @Subscribe 注解.
private void startAllComponents(MaterializedConfiguration materializedConfiguration) {
logger.info("Starting new configuration:{}", materializedConfiguration);
this.materializedConfiguration = materializedConfiguration;
// todo 启动所有的 channel
for (Entry entry :
materializedConfiguration.getChannels().entrySet()) {
try {
logger.info("Starting Channel " + entry.getKey());
supervisor.supervise(entry.getValue(),
new SupervisorPolicy.AlwaysRestartPolicy(), LifecycleState.START);
} catch (Exception e) {
logger.error("Error while starting {}", entry.getValue(), e);
}
}
/*
* Wait for all channels to start.
* todo 等待所有 channels 启动
*/
for (Channel ch : materializedConfiguration.getChannels().values()) {
while (ch.getLifecycleState() != LifecycleState.START
&& !supervisor.isComponentInErrorState(ch)) {
try {
logger.info("Waiting for channel: " + ch.getName() +
" to start. Sleeping for 500 ms");
Thread.sleep(500);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting for channel to start.", e);
Throwables.propagate(e);
}
}
}
// todo 启动所有的 sink
for (Entry entry : materializedConfiguration.getSinkRunners().entrySet()) {
try {
logger.info("Starting Sink " + entry.getKey());
supervisor.supervise(entry.getValue(),
new SupervisorPolicy.AlwaysRestartPolicy(), LifecycleState.START);
} catch (Exception e) {
logger.error("Error while starting {}", entry.getValue(), e);
}
}
// todo 启动所有的 source
for (Entry entry :
materializedConfiguration.getSourceRunners().entrySet()) {
try {
logger.info("Starting Source " + entry.getKey());
supervisor.supervise(entry.getValue(),
new SupervisorPolicy.AlwaysRestartPolicy(), LifecycleState.START);
} catch (Exception e) {
logger.error("Error while starting {}", entry.getValue(), e);
}
}
//todo 加载监控
this.loadMonitoring();
}
public synchronized void supervise(LifecycleAware lifecycleAware,
SupervisorPolicy policy, LifecycleState desiredState) {
if (this.monitorService.isShutdown()
|| this.monitorService.isTerminated()
|| this.monitorService.isTerminating()) {
throw new FlumeException("Supervise called on " + lifecycleAware + " " +
"after shutdown has been initiated. " + lifecycleAware + " will not" +
" be started");
}
Preconditions.checkState(!supervisedProcesses.containsKey(lifecycleAware),
"Refusing to supervise " + lifecycleAware + " more than once");
if (logger.isDebugEnabled()) {
logger.debug("Supervising service:{} policy:{} desiredState:{}",
new Object[] { lifecycleAware, policy, desiredState });
}
Supervisoree process = new Supervisoree();
process.status = new Status();
process.policy = policy;
process.status.desiredState = desiredState;
process.status.error = false;
MonitorRunnable monitorRunnable = new MonitorRunnable();
//todo lifecycleAware 这个就是 PollingPropertiesFileConfigurationProvider 最终要调用里面的 start 方法!!!
monitorRunnable.lifecycleAware = lifecycleAware;
monitorRunnable.supervisoree = process;
monitorRunnable.monitorService = monitorService;
supervisedProcesses.put(lifecycleAware, process);
// todo 交由线程池 启动服务.
// todo monitorService 是一个线程池,在对象创建的时候初始化
ScheduledFuture> future = monitorService.scheduleWithFixedDelay(
monitorRunnable, 0, 3, TimeUnit.SECONDS);
monitorFutures.put(lifecycleAware, future);
}
最后我们分别看一下 Sources , Sinks 的Runner 实例是啥样的吧.
/**
* Static factory method to instantiate a source runner implementation that
* corresponds to the type of {@link Source} specified.
*
* @param source The source to run
* @return A runner that can run the specified source
* @throws IllegalArgumentException if the specified source does not implement
* a supported derived interface of {@link SourceRunner}.
*/
public static SourceRunner forSource(Source source) {
SourceRunner runner = null;
if (source instanceof PollableSource) {
runner = new PollableSourceRunner();
((PollableSourceRunner) runner).setSource((PollableSource) source);
} else if (source instanceof EventDrivenSource) {
runner = new EventDrivenSourceRunner();
((EventDrivenSourceRunner) runner).setSource((EventDrivenSource) source);
} else {
throw new IllegalArgumentException("No known runner type for source "
+ source);
}
return runner;
}
Source 分两种:
PollableSource: 持续运行的那种. 比如 TaildirSource 要持续监控指定的文件 , 会在 start 方法里面创建线程.
具体关注:
org.apache.flume.source.PollableSourceRunner.start
org.apache.flume.source.PollingRunner
EventDrivenSource: 针对事件 , 且只执行一次.
总结来说, Flume 其实就是一个生产者,消费者模式的一个变形.
source : 生产者
sink : 消费者
channel : 缓存.
目前先写到这里, 有什么不正确的地方,欢迎大家指正, 不胜感激..............