Flume学习笔记:Flume自定义Agent

目录

Flume自定义Agent

pom文件

自定义Source

测试

自定义Sink

测试

自定义Interceptor 

测试

自定义Channel(DualChannel实现)

参考

Flume自定义Agent

pom文件


  4.0.0
  FlumeAgent
  FlumeAgent
  1.0
  
  
    
      
        
        
          org.eclipse.m2e
          lifecycle-mapping
          1.0.0
          
            
              
                
                  
                    
                      org.apache.maven.plugins
                    
                    
                      maven-compiler-plugin
                    
                    [3.1,)
                    
                      compile
                    
                  
                  
                    
                  
                
              
            
          
        
      
    
  
  
  
    
      cloudera
      https://repository.cloudera.com/artifactory/cloudera-repos/
    
    
      maven
      http://central.maven.org/maven2/
    
    
    alimaven  
        http://maven.aliyun.com/nexus/content/groups/public/
      
     


      
        org.apache.maven.plugins  
        maven-resources-plugin  
        2.5  
      
    
    org.apache.hadoop
    hadoop-common
    2.6.0-cdh5.14.0
    
    
    org.apache.hadoop
    hadoop-hdfs
    2.6.0-cdh5.14.0
    
    
    org.apache.hadoop
    hadoop-maven-plugins
    2.6.0-cdh5.14.0
    
     
            org.apache.hadoop
            hadoop-client
            2.6.0-cdh5.14.0
        
     
        org.apache.hadoop
        hadoop-yarn-server-resourcemanager
        2.6.0-cdh5.14.0
     
    
     
        org.apache.hadoop
        hadoop-yarn-server-nodemanager
        2.6.0-cdh5.14.0
     
            
     
            org.apache.hadoop
            hadoop-yarn-common
          2.6.0-cdh5.14.0
     
     
    org.apache.hadoop
    hadoop-mapreduce-client-app
    2.6.0-cdh5.14.0
    
     
        org.apache.hadoop
        hadoop-mapreduce-client-common
        2.6.0-cdh5.14.0
    
    
        org.apache.hadoop
        hadoop-mapreduce-client-core
        2.6.0-cdh5.14.0
    
     
        org.apache.hadoop
        hadoop-mapreduce-client-jobclient
        2.6.0-cdh5.14.0
    
    
         org.apache.hadoop
         hadoop-yarn-api
        2.6.0-cdh5.14.0
     
    
        org.apache.hadoop
        hadoop-yarn-client
        2.6.0-cdh5.14.0
    
    
        org.apache.hive
        hive-exec
        1.1.0-cdh5.14.0
     
      
    org.apache.hive
    hive-common
    1.1.0-cdh5.14.0

        
    org.apache.hive
    hive-jdbc
    1.1.0-cdh5.14.0

   
      org.apache.flume  
      flume-ng-core  
      1.6.0-cdh5.14.0  
      
  
      
      org.apache.flume.flume-ng-sources  
      flume-kafka-source  
      1.6.0-cdh5.14.0  
     



    org.apache.hive
    hive-metastore
    1.1.0-cdh5.14.0




    org.apache.hive
    hive-exec
    1.1.0-cdh5.14.0

    
      org.apache.mrunit
      mrunit
      1.1.0
      hadoop2
      test
    
    
      org.mockito
      mockito-all
      1.9.5
      test
    
    
      junit
      junit
      4.10
      test
    
      
            jdk.tools  
            jdk.tools  
            1.8  
            system  
            ${JAVA_HOME}/lib/tools.jar  
          
 
  

自定义Source

package com.flume.source;

import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Random;

import org.apache.flume.Context;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;

public class MySource extends AbstractSource implements Configurable,PollableSource{

	public long getBackOffSleepIncrement() {
		return 0;
	}

	public long getMaxBackOffSleepInterval() {
		return 0;
	}

	public Status process() throws EventDeliveryException {
		
		//生成0-10的随机数,组合成一个text
		Random random = new Random();
		int randomNum = random.nextInt(10);
		String text = "myfirstSource" + randomNum;
		
		//生成Header
		HashMap header = new HashMap();
		header.put("id",Integer.toString(randomNum));
		
		//EventBuilder.withBody:将给定的header和body组合成Event,并制定字符集
		//getChannelProcessor.processEvent():将给定的Event put到每个配置的Channel
		this.getChannelProcessor().processEvent(EventBuilder.withBody(text,Charset.forName("UTF-8"),header));
		
		//Ready状态表示Event可以被取走,还有一个状态是Backoff,表示让Flume睡眠一段时间
		return Status.READY;
	}

	public void configure(Context arg0) {
	}
	
}

测试

生成jar包,拷到$FLUME_HOME/lib下
然后执行命令

flume-ng agent -n a1 -c / -f /udagent.conf -Dflume.root.logger=INFO,console

运行效果

Flume学习笔记:Flume自定义Agent_第1张图片

自定义Sink

package com.flume.sink;

import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;

public class MySink extends AbstractSink implements Configurable {

	public Status process() throws EventDeliveryException {

		//初始化Status
		Status status = Status.READY;
		Transaction trans = null;
		try {
			//开始事务
			Channel channel = getChannel();
			trans = channel.getTransaction();
			
			trans.begin();
			
			//获取Event
			Event event = channel.take();
			while(event!=null){
				//获取body
				String body = new String(event.getBody());
				System.out.println(body);
			}
			if (event == null) {
				status = Status.BACKOFF;
			}

			trans.commit();
		} catch (Exception e) {
			//有异常的时候还是要提交
			if (trans != null) {
				trans.commit();
			}
			e.printStackTrace();
		} finally {
			if (trans != null) {
				trans.close();
			}
		}

		return status;
	}

	public void configure(Context arg0) {
	}

}

测试

配置文件如下

Flume学习笔记:Flume自定义Agent_第2张图片

运行Flume

flume-ng agent -n a1 -c / -f udsinkagent.conf -Dflume.root.logger=INFO,console

将测试文件拷贝进文件夹中,相应的jar包也拷到Flume的lib下
Flume学习笔记:Flume自定义Agent_第3张图片

Flume的Console马上会打印出文件的内容

Flume学习笔记:Flume自定义Agent_第4张图片

自定义Interceptor 

package com.flume.interceptor;

import java.util.List;

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;

import com.google.common.base.Charsets;

public class MyInterceptor implements Interceptor{

	public void close() {
	}

	public void initialize() {		
	}

	//对单个Event内部的处理
	public Event intercept(Event event) {
		
		String body = new String(event.getBody(),Charsets.UTF_8);
		System.out.println(body);
		StringBuffer bodyString = new StringBuffer();
		
		//当发现body以test开头时,将body替换成123456
		if(body.startsWith("test")){
			bodyString = bodyString.append("123456");
			event.setBody(bodyString.toString().getBytes());
		}
		
		return event;
	}

	//对多个Event的处理
	public List intercept(List events) {

		//reset Event
		for(Event event :events ){
			if (event!=null) intercept(event);
		}
		
		return events;
	}
	
	public static class Builder implements Interceptor.Builder{

		public void configure(Context arg0) {
			
		}

		public Interceptor build() {
			return new MyInterceptor();
		}		
	}
}

测试

测试文件

Flume学习笔记:Flume自定义Agent_第5张图片

运行Flume

flume-ng agent -n a2 -c / -f /myinterceptor.conf -Dflume.root.logger= INFO,console

测试结果,可以发现所有以test开头的字符串都被替换成了123456 
Flume学习笔记:Flume自定义Agent_第6张图片

自定义Channel(DualChannel实现)

这里参考了一下美团技术团队的文章
https://tech.meituan.com/mt_log_system_optimization.html

Flume本身提供了MemoryChannel和FileChannel。MemoryChannel处理速度快,但缓存大小有限,且没有持久化;FileChannel则刚好相反。我们希望利用两者的优势,在Sink处理速度够快,Channel没有缓存过多日志的时候,就使用MemoryChannel,当Sink处理速度跟不上,又需要Channel能够缓存下应用端发送过来的日志时,就使用FileChannel,由此我们开发了DualChannel,能够智能的在两个Channel之间切换。

/***
 * putToMemChannel indicate put event to memChannel or fileChannel
 * takeFromMemChannel indicate take event from memChannel or fileChannel
 * */
private AtomicBoolean putToMemChannel = new AtomicBoolean(true);
private AtomicBoolean takeFromMemChannel = new AtomicBoolean(true);

void doPut(Event event) {
        if (switchon && putToMemChannel.get()) {
              //往memChannel中写数据
              memTransaction.put(event);

              if ( memChannel.isFull() || fileChannel.getQueueSize() > 100) {
                putToMemChannel.set(false);
              }
        } else {
              //往fileChannel中写数据
              fileTransaction.put(event);
        }
  }

Event doTake() {
    Event event = null;
    if ( takeFromMemChannel.get() ) {
        //从memChannel中取数据
        event = memTransaction.take();
        if (event == null) {
            takeFromMemChannel.set(false);
        } 
    } else {
        //从fileChannel中取数据
        event = fileTransaction.take();
        if (event == null) {
            takeFromMemChannel.set(true);

            putToMemChannel.set(true);
        } 
    }
    return event;
}

Flume提供了NullSink,可以把不需要的日志通过NullSink直接丢弃,不进行存储。然而,Source需要先将events存放到Channel中,NullSink再将events取出扔掉。为了提升性能,我们把这一步移到了Channel里面做,所以开发了NullChannel。 

参考

http://flume.apache.org/releases/content/1.6.0/apidocs/overview-summary.html

 

你可能感兴趣的:(Flume)