flume源码分析2--配置文件的加载

上面提到Application启动的时候,PollingPropertiesFileConfigurationProvider作为唯一的LifecycleAware类型的组件被交给监护者LifecycleSupervisor去监护,在监护方法里面,会启动一个定时调度线程去维护PollingPropertiesFileConfigurationProvider的状态,比如PollingPropertiesFileConfigurationProvider的期望状态是START,那么就每隔几秒检查是否处于START状态,如果不是,那么就调用PollingPropertiesFileConfigurationProvider的start方法     

那么在PollingPropertiesFileConfigurationProvider的start方法里面又做了什么呢?

  public void start() {
    LOGGER.info("Configuration provider starting");

    Preconditions.checkState(file != null,
        "The parameter file must not be null");

    executorService = Executors.newSingleThreadScheduledExecutor(
            new ThreadFactoryBuilder().setNameFormat("conf-file-poller-%d")
                .build());

    FileWatcherRunnable fileWatcherRunnable =
        new FileWatcherRunnable(file, counterGroup);

    executorService.scheduleWithFixedDelay(fileWatcherRunnable, 0, interval,
        TimeUnit.SECONDS);

    lifecycleState = LifecycleState.START;

    LOGGER.debug("Configuration provider started");
  }

就是启动一个定时线程,运行fileWatcherRunnable,这是一个内部类,看看他的run方法都有什么:  

    public void run() {
      LOGGER.debug("Checking file:{} for changes", file);

      counterGroup.incrementAndGet("file.checks");

      long lastModified = file.lastModified();

      if (lastModified > lastChange) {
        LOGGER.info("Reloading configuration file:{}", file);

        counterGroup.incrementAndGet("file.loads");

        lastChange = lastModified;

        try {
          eventBus.post(getConfiguration());
        } catch (Exception e) {
          LOGGER.error("Failed to load configuration data. Exception follows.",
              e);
        } catch (NoClassDefFoundError e) {
          LOGGER.error("Failed to start agent because dependencies were not " +
              "found in classpath. Error follows.", e);
        } catch (Throwable t) {
          // caught because the caller does not handle or log Throwables
          LOGGER.error("Unhandled error", t);
        }
      }
    }

内容就是检查配置文件是否有改变,如果有就获取最新的配置文件,并发送到总线eventBus,然后总线会通知相应的监听方法,重启组件:

  @Subscribe
  public synchronized void handleConfigurationEvent(MaterializedConfiguration conf) {
    stopAllComponents();
    startAllComponents(conf);
  }

下面分析配置文件的具体加载:

加载主要是通过调用父类的getConfiguration方法,

public MaterializedConfiguration getConfiguration() {
  MaterializedConfiguration conf = new SimpleMaterializedConfiguration();
  FlumeConfiguration fconfig = getFlumeConfiguration();
  AgentConfiguration agentConf = fconfig.getConfigurationFor(getAgentName());
  if (agentConf != null) {
    Map, ChannelComponent> channelComponentMap = Maps.newHashMap();
    Map, SourceRunner> sourceRunnerMap = Maps.newHashMap();
    Map, SinkRunner> sinkRunnerMap = Maps.newHashMap();
    try {
      loadChannels(agentConf, channelComponentMap);
      loadSources(agentConf, channelComponentMap, sourceRunnerMap);
      loadSinks(agentConf, channelComponentMap, sinkRunnerMap);
      Set channelNames = new HashSet(channelComponentMap.keySet());
      for (String channelName : channelNames) {
        ChannelComponent channelComponent = channelComponentMap.get(channelName);
        if (channelComponent.components.isEmpty()) {
          LOGGER.warn(String.format("Channel %s has no components connected" +
              " and has been removed.", channelName));
          channelComponentMap.remove(channelName);
          Map, Channel> nameChannelMap =
              channelCache.get(channelComponent.channel.getClass());
          if (nameChannelMap != null) {
            nameChannelMap.remove(channelName);
          }
        } else {
          LOGGER.info(String.format("Channel %s connected to %s",
              channelName, channelComponent.components.toString()));
          conf.addChannel(channelName, channelComponent.channel);
        }
      }
      for (Map.Entry, SourceRunner> entry : sourceRunnerMap.entrySet()) {
        conf.addSourceRunner(entry.getKey(), entry.getValue());
      }
      for (Map.Entry, SinkRunner> entry : sinkRunnerMap.entrySet()) {
        conf.addSinkRunner(entry.getKey(), entry.getValue());
      }
    } catch (InstantiationException ex) {
      LOGGER.error("Failed to instantiate component", ex);
    } finally {
      channelComponentMap.clear();
      sourceRunnerMap.clear();
      sinkRunnerMap.clear();
    }
  } else {
    LOGGER.warn("No configuration found for this host:{}", getAgentName());
  }
  return conf;
}

getConfiguration方法里面又是首先会调用getFlumeConfiguration()方法,获取配置并封装成FlumeConfiguration 方便使用,因为配置文件的存储有多种形式,所以getFlumeConfiguration也有多种实现,那我们就把他作为抽象方法,先不管,看下面就是根据配置文件形成ChannelComponent、SourceRunner、SinkRunner,然后再据此组装成MaterializedConfiguration返回。

分析其中的source的加载:

private void loadSources(AgentConfiguration agentConf,
    Map, ChannelComponent> channelComponentMap,
    Map, SourceRunner> sourceRunnerMap)
    throws InstantiationException {

  Set sourceNames = agentConf.getSourceSet();
  Map, ComponentConfiguration> compMap =
      agentConf.getSourceConfigMap();
  /*
   * Components which have a ComponentConfiguration object
   */
  for (String sourceName : sourceNames) {
    ComponentConfiguration comp = compMap.get(sourceName);
    if (comp != null) {
      SourceConfiguration config = (SourceConfiguration) comp;
      //通过反射构造Source
      Source source = sourceFactory.create(comp.getComponentName(),
          comp.getType());
      try {
        Configurables.configure(source, config);//将source的conf  set到Source对象
        Set channelNames = config.getChannels();
        List sourceChannels = new ArrayList();
        for (String chName : channelNames) {
          ChannelComponent channelComponent = channelComponentMap.get(chName);
          if (channelComponent != null) {
            sourceChannels.add(channelComponent.channel);
          }
        }
        if (sourceChannels.isEmpty()) {
          String msg = String.format("Source %s is not connected to a " +
              "channel",  sourceName);
          throw new IllegalStateException(msg);
        }
        ChannelSelectorConfiguration selectorConfig =
            config.getSelectorConfiguration();//获取通道选择器的配置

        ChannelSelector selector = ChannelSelectorFactory.create(
            sourceChannels, selectorConfig);//构造通道选择器

        ChannelProcessor channelProcessor = new ChannelProcessor(selector);
        Configurables.configure(channelProcessor, config);//构造过滤器链并set到channelProcessor

        source.setChannelProcessor(channelProcessor);//将channelProcessor  set到source去
        sourceRunnerMap.put(comp.getComponentName(),
            SourceRunner.forSource(source));//通过source构造SourceRunner
        for (Channel channel : sourceChannels) {
          ChannelComponent channelComponent =
              Preconditions.checkNotNull(channelComponentMap.get(channel.getName()),
                                         String.format("Channel %s", channel.getName()));
          channelComponent.components.add(sourceName);
        }
      } catch (Exception e) {
        String msg = String.format("Source %s has been removed due to an " +
            "error during configuration", sourceName);
        LOGGER.error(msg, e);
      }
    }
  }


==========================分割线=================================

看了主体流程,然后分析上面提到的getFlumeConfiguration的实现:

  @Override
  public FlumeConfiguration getFlumeConfiguration() {
    BufferedReader reader = null;
    try {
      reader = new BufferedReader(new FileReader(file));
      String resolverClassName = System.getProperty("propertiesImplementation",
          DEFAULT_PROPERTIES_IMPLEMENTATION);
      Class propsclass = Class.forName(resolverClassName)
          .asSubclass(Properties.class);
      Properties properties = propsclass.newInstance();
      properties.load(reader);
      return new FlumeConfiguration(toMap(properties));
    } catch (IOException ex) {
      LOGGER.error("Unable to load file:" + file
          + " (I/O failure) - Exception follows.", ex);
    }

看看FlumeConfiguration的构造方法:

  public FlumeConfiguration(Properties properties) {
    agentConfigMap = new HashMap();
    errors = new LinkedList();
    // Construct the in-memory component hierarchy
    for (Object name : properties.keySet()) {
      Object value = properties.get(name);
      if (!addRawProperty(name.toString(), value.toString())) {
        logger.warn("Configuration property ignored: " + name + " = " + value);
      }
    }
    // Now iterate thru the agentContext and create agent configs and add them
    // to agentConfigMap

    // validate and remove improperly configured components
    validateConfiguration();
  }

假如配置文件内容如下:

 * host1.sources = avroSource thriftSource
 * host1.channels = jdbcChannel
 * host1.sinks = hdfsSink
 *
 * # avroSource configuration
 * host1.sources.avroSource.type = org.apache.flume.source.AvroSource
 * host1.sources.avroSource.runner.type = avro
 * host1.sources.avroSource.runner.port = 11001
 * host1.sources.avroSource.channels = jdbcChannel
 * host1.sources.avroSource.selector.type = replicating
 *
 * # thriftSource configuration
 * host1.sources.thriftSource.type = org.apache.flume.source.ThriftSource
 * host1.sources.thriftSource.runner.type = thrift
 * host1.sources.thriftSource.runner.port = 12001
 * host1.sources.thriftSource.channels = jdbcChannel
 *
 * # jdbcChannel configuration
 * host1.channels.jdbcChannel.type = jdbc
 * host1.channels.jdbcChannel.jdbc.driver = com.mysql.jdbc.Driver
 * host1.channels.jdbcChannel.jdbc.connect.url = http://localhost/flumedb
 * host1.channels.jdbcChannel.jdbc.username = flume
 * host1.channels.jdbcChannel.jdbc.password = flume
 *
 * # hdfsSink configuration
 * host1.sinks.hdfsSink.type = hdfs
 * host1.sinks.hdfsSink.hdfs.path = hdfs://localhost/
 * host1.sinks.hdfsSink.batchsize = 1000
 * host1.sinks.hdfsSink.runner.type = polling
 * host1.sinks.hdfsSink.runner.polling.interval = 60

主要是调用addRawProperty,进入:

private boolean addRawProperty(String name, String value) {
  // Null names and values not supported
  if (name == null || value == null) {
    errors
        .add(new FlumeConfigurationError("", "",
            FlumeConfigurationErrorType.AGENT_NAME_MISSING,
            ErrorOrWarning.ERROR));
    return false;
  }

  // Empty values are not supported
  if (value.trim().length() == 0) {
    errors
        .add(new FlumeConfigurationError(name, "",
            FlumeConfigurationErrorType.PROPERTY_VALUE_NULL,
            ErrorOrWarning.ERROR));
    return false;
  }

  // Remove leading and trailing spaces
  name = name.trim();
  value = value.trim();

  int index = name.indexOf('.');

  // All configuration keys must have a prefix defined as agent name
  if (index == -1) {
    errors
        .add(new FlumeConfigurationError(name, "",
            FlumeConfigurationErrorType.AGENT_NAME_MISSING,
            ErrorOrWarning.ERROR));
    return false;
  }

  String agentName = name.substring(0, index);

  // Agent name must be specified for all properties
  if (agentName.length() == 0) {
    errors
        .add(new FlumeConfigurationError(name, "",
            FlumeConfigurationErrorType.AGENT_NAME_MISSING,
            ErrorOrWarning.ERROR));
    return false;
  }

  String configKey = name.substring(index + 1);

  // Configuration key must be specified for every property
  if (configKey.length() == 0) {
    errors
        .add(new FlumeConfigurationError(name, "",
            FlumeConfigurationErrorType.PROPERTY_NAME_NULL,
            ErrorOrWarning.ERROR));
    return false;
  }

  AgentConfiguration aconf = agentConfigMap.get(agentName);

  if (aconf == null) {
    aconf = new AgentConfiguration(agentName, errors);
    agentConfigMap.put(agentName, aconf);
  }

  // Each configuration key must begin with one of the three prefixes:
  // sources, sinks, or channels.
  return aconf.addProperty(configKey, value);
}


会根据.做分割,得到agentName、configKey、value。然后每个agent的具体配置是封装在内部类AgentConfiguration 里面的。


private boolean addProperty(String key, String value) {
  // Check for sources检查是否是source的名字
  if (key.equals(BasicConfigurationConstants.CONFIG_SOURCES)) {
    if (sources == null) {
      sources = value;
      return true;
    } else {
      logger
          .warn("Duplicate source list specified for agent: " + agentName);
      errorList.add(new FlumeConfigurationError(agentName,
          BasicConfigurationConstants.CONFIG_SOURCES,
          FlumeConfigurationErrorType.DUPLICATE_PROPERTY,
          ErrorOrWarning.ERROR));
      return false;
    }
  }

  // Check for sinks 同上
  if (key.equals(BasicConfigurationConstants.CONFIG_SINKS)) {
    if (sinks == null) {
      sinks = value;
      logger.info("Added sinks: " + sinks + " Agent: " + this.agentName);
      return true;
    } else {
      logger.warn("Duplicate sink list specfied for agent: " + agentName);
      errorList.add(new FlumeConfigurationError(agentName,
          BasicConfigurationConstants.CONFIG_SINKS,
          FlumeConfigurationErrorType.DUPLICATE_PROPERTY,
          ErrorOrWarning.ERROR));
      return false;
    }
  }

  // Check for channels 同上
  if (key.equals(BasicConfigurationConstants.CONFIG_CHANNELS)) {
    if (channels == null) {
      channels = value;

      return true;
    } else {
      logger.warn("Duplicate channel list specified for agent: "
          + agentName);
      errorList.add(new FlumeConfigurationError(agentName,
          BasicConfigurationConstants.CONFIG_CHANNELS,
          FlumeConfigurationErrorType.DUPLICATE_PROPERTY,
          ErrorOrWarning.ERROR));
      return false;
    }
  }

  // Check for sinkgroups同上
  if (key.equals(BasicConfigurationConstants.CONFIG_SINKGROUPS)) {
    if (sinkgroups == null) {
      sinkgroups = value;

      return true;
    } else {
      logger
          .warn("Duplicate sinkgroup list specfied for agent: " + agentName);
      errorList.add(new FlumeConfigurationError(agentName,
          BasicConfigurationConstants.CONFIG_SINKGROUPS,
          FlumeConfigurationErrorType.DUPLICATE_PROPERTY,
          ErrorOrWarning.ERROR));
      return false;
    }
  }

  ComponentNameAndConfigKey cnck = parseConfigKey(key,
      BasicConfigurationConstants.CONFIG_SOURCES_PREFIX);
//解析得到组件名字和属性key,然后每个组件创建一个Context,并把对应组件的key、value放到对应的Context里面。
  if (cnck != null) {
    // it is a source
    String name = cnck.getComponentName();
    Context srcConf = sourceContextMap.get(name);

    if (srcConf == null) {
      srcConf = new Context();
      sourceContextMap.put(name, srcConf);
    }

    srcConf.put(cnck.getConfigKey(), value);
    return true;
  }

  cnck = parseConfigKey(key,
      BasicConfigurationConstants.CONFIG_CHANNELS_PREFIX);

  if (cnck != null) {
    // it is a channel
    String name = cnck.getComponentName();
    Context channelConf = channelContextMap.get(name);

    if (channelConf == null) {
      channelConf = new Context();
      channelContextMap.put(name, channelConf);
    }

    channelConf.put(cnck.getConfigKey(), value);
    return true;
  }

  cnck = parseConfigKey(key,
      BasicConfigurationConstants.CONFIG_SINKS_PREFIX);

  if (cnck != null) {
    // it is a sink
    String name = cnck.getComponentName().trim();
    logger.info("Processing:" + name);
    Context sinkConf = sinkContextMap.get(name);

    if (sinkConf == null) {
      logger.debug("Created context for " + name + ": "
          + cnck.getConfigKey());
      sinkConf = new Context();
      sinkContextMap.put(name, sinkConf);
    }

    sinkConf.put(cnck.getConfigKey(), value);
    return true;
  }

  cnck = parseConfigKey(key,
      BasicConfigurationConstants.CONFIG_SINKGROUPS_PREFIX);

  if (cnck != null) {
    String name = cnck.getComponentName();
    Context groupConf = sinkGroupContextMap.get(name);
    if (groupConf == null) {
      groupConf = new Context();
      sinkGroupContextMap.put(name, groupConf);
    }

    groupConf.put(cnck.getConfigKey(), value);

    return true;
  }

  logger.warn("Invalid property specified: " + key);
  errorList.add(new FlumeConfigurationError(agentName, key,
      FlumeConfigurationErrorType.INVALID_PROPERTY, ErrorOrWarning.ERROR));
  return false;
}

你可能感兴趣的:(flume)