flume自定义sink

一、配置文件taildir.conf

a1.sources = r1
a1.channels = c1
a1.sinks = k1

# Describe/configure the source
#source的类型为TAILDIR,这里的类型大小写都可以
a1.sources.r1.type = taildir
a1.sources.r1.channels = c1
#存储tial最后一个位置存储位置
a1.sources.r1.positionFile = /opt/hui/taildir_position.json
#设置tiail的组, 使用空格隔开
a1.sources.r1.filegroups = f1
#设置每个分组的绝对路径
#.匹配后缀为csv的文件,如hehe.csv
a1.sources.r1.filegroups.f1 = /opt/hui/files/.*.csv
a1.sources.r1.fileHeader = true

# Describe the sink
a1.sinks.k1.type = com.hui.yilianzhong.YwdataTask
a1.sinks.k1.username = hehe
a1.sinks.k1.password = haha@213
a1.sinks.k1.batchSize = 5

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

 

二、maven的pom.xml



    4.0.0

    cn.itcast.demo
    flume-gauss-sink
    1.0.0-SNAPSHOT

    
        UTF-8
        1.8
        1.8
        1.8.0
    

    

        
            org.apache.flume
            flume-ng-core
            ${version.flume}
        
        
            org.apache.flume
            flume-ng-configuration
            ${version.flume}
        

        
            com.huawei.gaussDb
            gsjdbc4
            1.0.0
        
    

    
        flume-gauss-sink
        
            
                maven-compiler-plugin
                2.3.2
            
        
    

 

三、代码

package com.hui.yilianzhong;

import com.google.common.base.Preconditions;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.Connection;
import java.sql.SQLException;

/**
 * Created by HuiQ on 2020-03-06.
 */
public class YwdataTask extends AbstractSink implements Configurable {

    private static final Logger LOG = LoggerFactory.getLogger(YwdataTask.class);

    private String username;

    private String password;

    private int batchSize;
    private StringBuilder str = new StringBuilder(); // 要批量插入shbxhj_jcsj表中的数据

    private String eventData;

    //数据处理的逻辑都在process方法中实现
    @Override
    public Status process() {
        Transaction tx = null ;
        Status status=null;
        Channel channel= getChannel();
        try{
            tx= channel.getTransaction();
            tx.begin();
            for(int i=0;i" + eventData);
                tx.commit();// commit to drop bad event, otherwise it will enter dead loop.
            }
        } finally {
            if (tx != null) {
                try{
                    tx.close();
                }catch (Exception e){
                    tx.commit();
                    tx.close();
                }
            }
        }
        return status;
    }

    //该方法用于读取Flume中Sink的配置,在Sink初始化时调用
    @Override
    public void configure(Context context) {
        username = context.getString("username");
        Preconditions.checkNotNull(username, "username must be set");

        password = context.getString("password");
        Preconditions.checkNotNull("dateFormat must be set");

        batchSize = context.getInteger("batchSize");
        Preconditions.checkNotNull(batchSize > 0, "batchSize must be a positive number!!");
    }

    //该方法用于Sink启动时调用
    @Override
    public synchronized void start()
    {
        super.start();
        LOG.info("GaussSink start...");
    }

    @Override
    public void stop() {
        super.stop();
        LOG.info("GaussSink stop...");
    }
}

然后在idea中执行mvn clean compile package打包并上传到flume安装目录下的lib目录下,执行flume命令即可运行

bin/flume-ng agent -c conf -f conf/taildir.conf -n a1 -Dflume.root.logger=INFO,console

注意:得把所需要的依赖包上传到flume的lib目录下,我这里的代码只用到了一个数据库的驱动包

备注:后来数据处理那块这样写比较好些,虽然batchSize参数没啥用了。因为上面的代码会导致进的数据行数只有batchSize的整数倍才会执行插入数据库的操作,没达到batchSize整数倍的数据一直在flume中实时性可能差点。

//            for(int i=0;i

你可能感兴趣的:(flume)