flume篇4:flume把json数据写入elasticsearch(flume-elasticsearch-sink)

flume篇4:flume把json数据写入elasticsearch(flume-elasticsearch-sink)

对应非json数据同样适用,可以把非json数据通过拦截器拼接成一个json,然后send出去,这样也是ok的
废话不多说,直接上干货

一、 自定义拦截器:
1 拦截器要求:新建一个新的工程,单独打包,保证每个flume的的拦截器都是单独的一个工程打的包,这样保证每次对拦截器修改的时候不影响其他flume业务

  <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <scala.version>2.10.4</scala.version>
        <flume.version>1.8.0</flume.version>
    </properties>

  <dependencies>
            <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-core</artifactId>
            <version>${flume.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>commons-net</groupId>
            <artifactId>commons-net</artifactId>
            <version>3.3</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.4</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.testng</groupId>
            <artifactId>testng</artifactId>
            <version>6.1.1</version>
            <scope>test</scope>
        </dependency>
       <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-store-sdk</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-core</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-common</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-format</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-hadoop</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbondata-processing</artifactId>
            <version>1.5.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.carbondata</groupId>
            <artifactId>carbonata</artifactId>
            <version>1.5.3</version>
            <scope>system</scope>
            <systemPath>${project.basedir}/lib/apache-carbondata-1.5.3-bin-spark2.3.2-hadoop2.6.0-cdh5.16.1.jar</systemPath>
        </dependency>
     <dependency>
            <groupId>org.apache.mina</groupId>
            <artifactId>mina-core</artifactId>
            <version>2.0.9</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-all</artifactId>
            <version>1.9.5</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.sshd</groupId>
            <artifactId>sshd-core</artifactId>
            <version>0.14.0</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.jcraft</groupId>
            <artifactId>jsch</artifactId>
            <version>0.1.54</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.12</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.5</version>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>16.0.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.11.0.0</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.46</version>
            <scope>compile</scope>
        </dependency>
    </dependencies>

2 拦截器代码如下:
(以下拦截器主要目的是:把一个嵌套2层的body Json中的各个字段取出来,并拼接成一个新的json send 出去)

package com.extracting.flume.interceptor.tc;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.List;

public class XyAccessJsonInterceptorTC implements Interceptor {

	private static final Logger logger = LoggerFactory.getLogger(XyAccessJsonInterceptorTC.class);

    private SimpleDateFormat dataFormat;
    @Override
    public void initialize() {
    	 dataFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    }

    @Override
    public Event intercept(Event event) {
            String body = new String(event.getBody());
        try {
            JSONObject jsonObject = JSON.parseObject(body);
            JSONObject bodyObject1 = jsonObject.getJSONObject("body");
            JSONObject bodyObject2 = bodyObject1.getJSONObject("body");
            JSONObject resObject = new JSONObject();
            getPut(resObject, bodyObject2, "id", "new_id");
            getPut(resObject, bodyObject2, "name", "new_name");
            getPutLong(resObject, bodyObject2, "age", "new_age");
            getPutDate(resObject, bodyObject2, "time", "new_time", dataFormat);
            logger.info("拦截器最hou输出结果为resObject:" + resObject.toString());
            event.setBody(resObject.toString().getBytes());
            return event;
        } catch (Exception e) {
            logger.info("ERROR格式数据" + body.toString());
            return null;
        }
    }

    @Override
    public List<Event> intercept(List<Event> events) {
    	List<Event> resultList = Lists.newArrayList();
        for (Event event : events) {
            Event result = intercept(event);
            if (result != null) {
                resultList.add(result);
            }
        }
        return resultList;
    }

    @Override
    public void close() {

    }


    public static class Builder implements Interceptor.Builder {

        @Override
        public Interceptor build() {
            return new XyAccessJsonInterceptorTC();
        }

        @Override
        public void configure(Context context) {
        }

    }

    public static void getPut(JSONObject resObject, JSONObject jsonObject, String oldName,String newName) {
        Object value = jsonObject.get(oldName);
        if (value !=null){
            resObject.put(newName, value.toString());
        }
    }
    public static void getPutDate(JSONObject resObject,JSONObject jsonObject,String oldName,String newName,SimpleDateFormat dataFormat) {
        Object value = jsonObject.get(oldName);
        if (value !=null){
            Long valuelong=  Long.parseLong(value.toString());
            resObject.put(newName, dataFormat.format(valuelong).toString());
        }
    }

    public static void getPutLong(JSONObject resObject,JSONObject jsonObject,String oldName,String newName) {
        Object value = jsonObject.get(oldName);
        if (value !=null){
            Long valuelong=  Long.parseLong(value.toString());
            resObject.put(newName, valuelong);
        }
    }

}

3 打包上传,到flume的lib位置,cdh位置如下:/opt/cloudera/parcels/CDH/lib/flume-ng/lib/

二、自定义flume-elasticsearch-sink
1 flume-elasticsearch-sink目前官网还没有正式提供,主要原因是好像是es的版本迭代过快,es2和es5的api都有所变化,我这里是基于es5的,并且配置了动态索引,每月自动生成新的es索引,这是我自定义的一个通用的flume-elasticsearch-sink包,需要新建一个工程,在此声明,此es-sink是借鉴网上的,由于年代久远我一时半会已经找不到此es-sink的原作者了,如有侵权,我会立马删除的!

2 pom 如下:

<properties>
        <elasticsearch-verison>5.4.1</elasticsearch-verison>
        <flume-version>1.8.0</flume-version>
        <hbase-version>1.2.1</hbase-version>
        <log4j-version>2.8.2</log4j-version>
        <junit-version>4.10</junit-version>
        <avro-version>1.8.2</avro-version>

    </properties>

    <build>
        <plugins>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.5.3</version>
                <configuration>
                    <descriptors>
                        <descriptor>src/main/assembly/assembly.xml</descriptor>
                    </descriptors>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <dependencies>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${elasticsearch-verison}</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>${elasticsearch-verison}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-sdk</artifactId>
            <version>${flume-version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flume</groupId>
            <artifactId>flume-ng-core</artifactId>
            <version>${flume-version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <version>${log4j-version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>${log4j-version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
            <version>${junit-version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
            <version>${avro-version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>16.0.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.2.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
            <version>3.4.10</version>
            <type>pom</type>
        </dependency>
    </dependencies>

3 代码部分:代码有点多,给大家看一下整体的目录结构!
flume篇4:flume把json数据写入elasticsearch(flume-elasticsearch-sink)_第1张图片
接下来大家就开始疯狂复制模式吧:
先是com.cognitree.flume.sink.elasticsearch.client

package com.cognitree.flume.sink.elasticsearch.client;

import com.cognitree.flume.sink.elasticsearch.Util;
import org.apache.flume.Context;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.cognitree.flume.sink.elasticsearch.Constants.*;

public class BulkProcessorBuilder {

    private static final Logger logger = LoggerFactory.getLogger(BulkProcessorBuilder.class);

    private Integer bulkActions;

    private String bulkProcessorName;

    private ByteSizeValue bulkSize;

    private Integer concurrentRequest;

    private TimeValue flushIntervalTime;

    private String backoffPolicyTimeInterval;

    private Integer backoffPolicyRetries;


    public BulkProcessor buildBulkProcessor(Context context, TransportClient client) {
        bulkActions = context.getInteger(ES_BULK_ACTIONS,
                DEFAULT_ES_BULK_ACTIONS);
        bulkProcessorName = context.getString(ES_BULK_PROCESSOR_NAME,
                DEFAULT_ES_BULK_PROCESSOR_NAME);
        bulkSize = Util.getByteSizeValue(context.getInteger(ES_BULK_SIZE),
                context.getString(ES_BULK_SIZE_UNIT));
        concurrentRequest = context.getInteger(ES_CONCURRENT_REQUEST,
                DEFAULT_ES_CONCURRENT_REQUEST);
        flushIntervalTime = Util.getTimeValue(context.getString(ES_FLUSH_INTERVAL_TIME),
                DEFAULT_ES_FLUSH_INTERVAL_TIME);
        backoffPolicyTimeInterval = context.getString(ES_BACKOFF_POLICY_TIME_INTERVAL,
                DEFAULT_ES_BACKOFF_POLICY_START_DELAY);
        backoffPolicyRetries = context.getInteger(ES_BACKOFF_POLICY_RETRIES,
                DEFAULT_ES_BACKOFF_POLICY_RETRIES);
        return build(client);
    }

    private BulkProcessor build(TransportClient client) {
        logger.trace("Bulk processor name: [{}]  bulkActions: [{}], bulkSize: [{}], flush interval time: [{}]," +
                        " concurrent Request: [{}], backoffPolicyTimeInterval: [{}], backoffPolicyRetries: [{}] ",
                new Object[]{bulkProcessorName, bulkActions, bulkSize, flushIntervalTime,
                        concurrentRequest, backoffPolicyTimeInterval, backoffPolicyRetries});
        return BulkProcessor.builder(client, getListener())
                .setName(bulkProcessorName)
                .setBulkActions(bulkActions)
                .setBulkSize(bulkSize)
                .setFlushInterval(flushIntervalTime)
                .setConcurrentRequests(concurrentRequest)
                .setBackoffPolicy(BackoffPolicy.exponentialBackoff(
                        Util.getTimeValue(backoffPolicyTimeInterval,
                                DEFAULT_ES_BACKOFF_POLICY_START_DELAY),
                        backoffPolicyRetries))
                .build();
    }

    private BulkProcessor.Listener getListener() {
        return new BulkProcessor.Listener() {
            public void beforeBulk(long executionId,
                                   BulkRequest request) {
                logger.trace("Bulk Execution [" + executionId + "]\n" +
                        "No of actions " + request.numberOfActions());
            }

            public void afterBulk(long executionId,
                                  BulkRequest request,
                                  BulkResponse response) {
                logger.trace("Bulk execution completed [" + executionId + "]\n" +
                        "Took (ms): " + response.getTookInMillis() + "\n" +
                        "Failures: " + response.hasFailures() + "\n" +
                        "Failures Message: " + response.buildFailureMessage() + "\n" +
                        "Count: " + response.getItems().length);
            }

            public void afterBulk(long executionId,
                                  BulkRequest request,
                                  Throwable failure) {
                logger.error("Bulk execution failed [" + executionId + "]" +
                        failure.toString());
            }
        };
    }

}

package com.cognitree.flume.sink.elasticsearch.client;

import com.google.common.base.Throwables;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.InetAddress;
import java.util.ArrayList;
import java.util.List;

import static com.cognitree.flume.sink.elasticsearch.Constants.*;

public class ElasticsearchClientBuilder {

    private static final Logger logger = LoggerFactory.getLogger(ElasticsearchClientBuilder.class);

    private String clusterName;

    private boolean transportSniff;
    private boolean ignoreClusterName;
    private TimeValue transportPingTimeout;
    private TimeValue nodeSamplerInterval;

    private List<InetSocketTransportAddress> transportAddresses;

    public ElasticsearchClientBuilder(String clusterName, String[] hostnames) {
        this.clusterName = clusterName;
        setTransportAddresses(hostnames);
        logger.debug("@@@@@@@@@@@@@@clusterName为 {}", clusterName);
    }


    public ElasticsearchClientBuilder setTransportSniff(boolean transportSniff) {
        this.transportSniff = transportSniff;
        return this;
    }

    public ElasticsearchClientBuilder setIgnoreClusterName(boolean ignoreClusterName) {
        this.ignoreClusterName = ignoreClusterName;
        return this;
    }

    public ElasticsearchClientBuilder setTransportPingTimeout(TimeValue transportPingTimeout) {
        this.transportPingTimeout = transportPingTimeout;
        return this;
    }

    public ElasticsearchClientBuilder setNodeSamplerInterval(TimeValue nodeSamplerInterval) {
        this.nodeSamplerInterval = nodeSamplerInterval;
        return this;
    }

    public TransportClient build() {
        TransportClient client;
        logger.trace("Cluster Name: [{}], Transport Sniff: [{}]" +
                        ", Ignore Cluster Name: [{}], Transport Ping TimeOut: [{}],  " +
                        "Node Sampler Interval: [{}], HostName: [{}], Port: [{}] ",
                new Object[]{clusterName, transportSniff,
                        ignoreClusterName, transportPingTimeout,
                        nodeSamplerInterval, transportAddresses});
        Settings settings = Settings.builder()
                .put(ES_CLUSTER_NAME,
                        clusterName)
                .put(ES_TRANSPORT_SNIFF,
                        transportSniff)
                .put(ES_IGNORE_CLUSTER_NAME,
                        ignoreClusterName)
                .put(ES_TRANSPORT_PING_TIMEOUT,
                        transportPingTimeout)
                .put(ES_TRANSPORT_NODE_SAMPLER_INTERVAL,
                        nodeSamplerInterval)
                .build();
        client = new PreBuiltTransportClient(settings);
        for (InetSocketTransportAddress inetSocketTransportAddress : transportAddresses) {
            client.addTransportAddress(inetSocketTransportAddress);
        }
        return client;
    }

    private void setTransportAddresses(String[] transportAddresses) {
        try {
            this.transportAddresses = new ArrayList<InetSocketTransportAddress>(transportAddresses.length);
            for (String transportAddress : transportAddresses) {
                String hostName = transportAddress.split(":")[0];
                Integer port = transportAddress.split(":").length > 1 ?
                        Integer.parseInt(transportAddress.split(":")[1]) : DEFAULT_ES_PORT;
                this.transportAddresses.add(new InetSocketTransportAddress(InetAddress.getByName(hostName), port));
            }
        } catch (Exception e) {
            logger.error("Error in creating the InetSocketTransportAddress for elastic search " + e.getMessage(), e);
            Throwables.propagate(e);
        }
    }
}

接下来是com.cognitree.flume.sink.elasticsearch目录

/*
 * Copyright 2017 Cognitree Technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.cognitree.flume.sink.elasticsearch;

import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Throwables;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.elasticsearch.common.xcontent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;

import static com.cognitree.flume.sink.elasticsearch.Constants.ES_AVRO_SCHEMA_FILE;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

/**
 * Created by prashant
 * 

* This Serializer assumes the event body to be in avro binary format */ public class AvroSerializer implements Serializer { private static final Logger logger = LoggerFactory.getLogger(AvroSerializer.class); private DatumReader<GenericRecord> datumReader; /** * Converts the avro binary data to the json format */ public XContentBuilder serialize(Event event) { XContentBuilder builder = null; try { if (datumReader != null) { Decoder decoder = new DecoderFactory().binaryDecoder(event.getBody(), null); GenericRecord data = datumReader.read(null, decoder); logger.trace("Record in event " + data); XContentParser parser = XContentFactory .xContent(XContentType.JSON) .createParser(NamedXContentRegistry.EMPTY, data.toString()); builder = jsonBuilder().copyCurrentStructure(parser); parser.close(); } else { logger.error("Schema File is not configured"); } } catch (IOException e) { logger.error("Exception in parsing avro format data but continuing serialization to process further records", e.getMessage(), e); } return builder; } public XContentBuilder serialize2(JSONObject event) { XContentBuilder builder = null; try { if (datumReader != null) { // logger.trace("Record in event " + data); XContentParser parser = XContentFactory .xContent(XContentType.JSON) .createParser(NamedXContentRegistry.EMPTY, event.toString()); builder = jsonBuilder().copyCurrentStructure(parser); parser.close(); } else { logger.error("Schema File is not configured"); } } catch (IOException e) { logger.error("Exception in parsing avro format data but continuing serialization to process further records", e.getMessage(), e); } return builder; } public void configure(Context context) { String file = context.getString(ES_AVRO_SCHEMA_FILE); if (file == null) { Throwables.propagate(new Exception("Schema file is not configured, " + "please configure the property " + ES_AVRO_SCHEMA_FILE)); } try { Schema schema = new Schema.Parser().parse(new File(file)); datumReader = new GenericDatumReader<GenericRecord>(schema); } catch (IOException e) { logger.error("Error in parsing schema file ", e.getMessage(), e); Throwables.propagate(e); } } }

/*
 * Copyright 2017 Cognitree Technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.cognitree.flume.sink.elasticsearch;

/**
 * Created by prashant
 */
public class Constants {

    public static final String COMMA = ",";

    public static final String COLONS = ":";

    public static final String PREFIX = "es.";

    public static final String INDEX = "index";
    public static final String TYPE = "type";
    public static final String ID = "id";

    public static final String ES_BULK_ACTIONS = "es.bulkActions";
    public static final Integer DEFAULT_ES_BULK_ACTIONS = 1000;

    public static final String ES_BULK_SIZE = "es.bulkSize";
    public static final String ES_BULK_SIZE_UNIT = "es.bulkSize.unit";
    public static final Integer DEFAULT_ES_BULK_SIZE = 5;

    public static final String ES_BULK_PROCESSOR_NAME = "es.bulkProcessor.name";
    public static final String DEFAULT_ES_BULK_PROCESSOR_NAME = "flume";

    public static final String ES_CONCURRENT_REQUEST = "es.concurrent.request";
    public static final Integer DEFAULT_ES_CONCURRENT_REQUEST = 1;

    public static final String ES_FLUSH_INTERVAL_TIME = "es.flush.interval.time";
    public static final String DEFAULT_ES_FLUSH_INTERVAL_TIME = "10s";

    public static final String DEFAULT_ES_TIME = "5s";

    public static final String ES_BACKOFF_POLICY_TIME_INTERVAL = "es.backoff.policy.time.interval";
    public static final String DEFAULT_ES_BACKOFF_POLICY_START_DELAY = "50M";

    public static final String ES_BACKOFF_POLICY_RETRIES = "es.backoff.policy.retries";
    public static final Integer DEFAULT_ES_BACKOFF_POLICY_RETRIES = 8;

    public static final String ES_INDEX = "es.index";
    public static final String DEFAULT_ES_INDEX = "default";

    public static final String ES_TYPE = "es.type";
    public static final String DEFAULT_ES_TYPE = "default";

    public static final String ES_INDEX_BUILDER = "es.index.builder";
    public static final String DEFAULT_ES_INDEX_BUILDER = "com.cognitree.flume.sink.elasticsearch.StaticIndexBuilder";

    public static final String ES_SERIALIZER = "es.serializer";
    public static final String DEFAULT_ES_SERIALIZER = "com.cognitree.flume.sink.elasticsearch.SimpleSerializer";

    // Mandatory Properties
    public static final String ES_CLUSTER_NAME = "cluster.name";
    public static final String DEFAULT_ES_CLUSTER_NAME = "elasticsearch";

    public static final String ES_HOSTS = "es.client.hosts";

    public static final Integer DEFAULT_ES_PORT = 9300;

    public static final String ES_TRANSPORT_SNIFF = "client.transport.sniff";
    public static final String ES_IGNORE_CLUSTER_NAME = "client.transport.ignore_cluster_name";
    public static final String ES_TRANSPORT_PING_TIMEOUT = "client.transport.ping_timeout";
    public static final String ES_TRANSPORT_NODE_SAMPLER_INTERVAL = "client.transport.nodes_sampler_interval";

    public static final String ES_CSV_FIELDS = "es.serializer.csv.fields";
    public static final String ES_CSV_DELIMITER = "es.serializer.csv.delimiter";
    public static final String DEFAULT_ES_CSV_DELIMITER = ",";

    public static final String ES_AVRO_SCHEMA_FILE = "es.serializer.avro.schema.file";
    public static final String HBASE_ZK_QUORUM= "hbase.zookeeper.quorum";
    public static final String HBASE_TABLE_NAME = "hbase.namespace.table";
    public static final String HBASE_CF = "hbase.cf";
    public static final String HBASE_ROWKEY= "hbase.rowkey";
    public static final String HBASE_FIELDS = "hbase.fields";
    public static final String ES_FIELDS = "hbase.fields";



    /**
     * This enum is used for the time unit
     *
     * Time unit can be in Second, Minute or Mili second
     */
    public enum UnitEnum {
        SECOND("s"),
        MINUTE("m"),
        MILI_SECOND("M"),
        UNKNOWN("unknown");

        private String unit;

        UnitEnum(String unit) {
            this.unit = unit;
        }

        @Override
        public String toString() {
            return unit;
        }

        public static UnitEnum fromString(String unit) {
            for (UnitEnum unitEnum : UnitEnum.values()) {
                if (unitEnum.unit.equals(unit)) {
                    return unitEnum;
                }
            }
            return UNKNOWN;
        }
    }

    /**
     *
     * This enum is used for unit of size of data
     *
     * Unit can be in Mega byte or kilo byte
     */
    public enum ByteSizeEnum {
        MB("mb"),
        KB("kb");

        private String byteSizeUnit;

        ByteSizeEnum(String byteSizeUnit) {
            this.byteSizeUnit = byteSizeUnit;
        }

        @Override
        public String toString() {
            return byteSizeUnit;
        }
    }

    /**
     *  Enum for field type
     *
     */
    public enum FieldTypeEnum {
        STRING("string"),
        INT("int"),
        FLOAT("float"),
        LONG("long"),
        BOOLEAN("boolean");

        private String fieldType;

        FieldTypeEnum(String fieldType) {
            this.fieldType = fieldType;
        }

        @Override
        public String toString() {
            return fieldType;
        }
    }
}

package com.cognitree.flume.sink.elasticsearch;

import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static com.cognitree.flume.sink.elasticsearch.Constants.*;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

public class CsvSerializer implements Serializer {

    private static final Logger logger = LoggerFactory.getLogger(CsvSerializer.class);

    private final List<String> names = new ArrayList<String>();

    private final List<String> types = new ArrayList<String>();

    private String delimiter;

    /**
     *
     * Converts the csv data to the json format
     */
    public XContentBuilder serialize(Event event) {
        XContentBuilder xContentBuilder = null;
        String body = new String(event.getBody(), Charsets.UTF_8);
        try {
            if (!names.isEmpty() && !types.isEmpty()) {
                xContentBuilder = jsonBuilder().startObject();
                List<String> values = Arrays.asList(body.split(delimiter));
                for (int i = 0; i < names.size(); i++) {
                    Util.addField(xContentBuilder, names.get(i), values.get(i), types.get(i));
                }
                xContentBuilder.endObject();
            } else {
                logger.error("Fields for csv files are not configured, " +
                        "please configured the property " + ES_CSV_FIELDS);
            }
        } catch (Exception e) {
            logger.error("Error in converting the body to the json format " + e.getMessage(), e);
        }
        return xContentBuilder;
    }

    public XContentBuilder serialize2(JSONObject event) {
        XContentBuilder xContentBuilder = null;
//        String body = new String(event.getBody(), Charsets.UTF_8);
        try {
            if (!names.isEmpty() && !types.isEmpty()) {
                xContentBuilder = jsonBuilder().startObject();
                List<String> values = Arrays.asList(event.toString().split(delimiter));
                for (int i = 0; i < names.size(); i++) {
                    Util.addField(xContentBuilder, names.get(i), values.get(i), types.get(i));
                }
                xContentBuilder.endObject();
            } else {
                logger.error("Fields for csv files are not configured, " +
                        "please configured the property " + ES_CSV_FIELDS);
            }
        } catch (Exception e) {
            logger.error("Error in converting the body to the json format " + e.getMessage(), e);
        }
        return xContentBuilder;
    }


    /**
     *
     * Returns name and value based on the index
     *
     */
    private String getValue(String fieldType, Integer index) {
        String value = "";
        if (fieldType.length() > index) {
            value = fieldType.split(COLONS)[index];
        }
        return value;
    }

    /**
     *
     * Configure the field and its type with the custom delimiter
     */
    public void configure(Context context) {
        String fields = context.getString(ES_CSV_FIELDS);
        if(fields == null) {
            Throwables.propagate(new Exception("Fields for csv files are not configured," +
                    " please configured the property " + ES_CSV_FIELDS));
        }
        try {
            delimiter = context.getString(ES_CSV_DELIMITER, DEFAULT_ES_CSV_DELIMITER);
            String[] fieldTypes = fields.split(COMMA);
            for (String fieldType : fieldTypes) {
                names.add(getValue(fieldType, 0));
                types.add(getValue(fieldType, 1));
            }
        } catch(Exception e) {
            Throwables.propagate(e);
        }
    }
}

package com.cognitree.flume.sink.elasticsearch;

import org.apache.flume.Context;
import org.apache.flume.Event;

import java.util.Map;

public class HeaderBasedIndexBuilder extends StaticIndexBuilder
{
    public String getIndex(Event event)
    {
        Map headers = event.getHeaders();
        String index;
        if (headers.get("index") != null)
            index = (String)headers.get("index");
        else {
            index = super.getIndex(event);
        }
        return index;
    }

    public String getType(Event event)
    {
        Map headers = event.getHeaders();

        String type;
        if (headers.get("type") != null)
            type = (String)headers.get("type");
        else {
            type = super.getType(event);
        }
        return type;
    }

    public String getId(Event event)
    {
        Map headers = event.getHeaders();
        return (String)headers.get("id");
    }

    public void configure(Context context)
    {
        super.configure(context);
    }
}
package com.cognitree.flume.sink.elasticsearch;

import org.apache.flume.Event;
import org.apache.flume.conf.Configurable;

public  interface IndexBuilder extends Configurable
{
    public  String getIndex(Event paramEvent);

    public  String getType(Event paramEvent);

    public  String getId(Event paramEvent);
}
/*
 * Copyright 2017 Cognitree Technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.cognitree.flume.sink.elasticsearch;

import com.alibaba.fastjson.JSONObject;
import org.apache.flume.Event;
import org.apache.flume.conf.Configurable;
import org.elasticsearch.common.xcontent.XContentBuilder;

/**
 * A serializer to convert the given Flume Event into a json document that will be indexed into Elasticsearch.
 * A single instance of the class is created when the Sink initializes and is destroyed when the Sink is stopped.
 *
 */
public interface Serializer extends Configurable {

    /**
     * Serialize the body of the event to
     * XContentBuilder format
     */
    XContentBuilder serialize(Event event);
}

/*
 * Copyright 2017 Cognitree Technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.cognitree.flume.sink.elasticsearch;

import com.google.common.base.Charsets;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.elasticsearch.common.xcontent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;


public class SimpleSerializer implements Serializer {

    private static final Logger logger = LoggerFactory.getLogger(SimpleSerializer.class);

    public XContentBuilder serialize(Event event) {
        XContentBuilder builder = null;
        try {
            XContentParser parser = XContentFactory
                    .xContent(XContentType.JSON)
                    .createParser(NamedXContentRegistry.EMPTY,
                            new String(event.getBody(), Charsets.UTF_8));
            builder = jsonBuilder().copyCurrentStructure(parser);
            parser.close();
        } catch (Exception e) {
            logger.error("Error in Converting the body to json field " + e.getMessage(), e);
        }
        return builder;
    }

    public void configure(Context context) {
        // No parameters needed from the configurations
    }
}

package com.cognitree.flume.sink.elasticsearch;

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class StaticIndexBuilder
        implements IndexBuilder
{
    private static final Logger logger = LoggerFactory.getLogger(StaticIndexBuilder.class);
    private String index;
    private String type;

    public String getIndex(Event event)
    {
        String index;
        if (this.index != null)
            index = this.index;
        else {
            index = "default";
        }
        return index;
    }

    public String getType(Event event)
    {
        String type;
        if (this.type != null)
            type = this.type;
        else {
            type = "default";
        }
        return type;
    }

    public String getId(Event event)
    {
        return null;
    }

    public void configure(Context context)
    {
        this.index = Util.getContextValue(context, "es.index");
        this.type = Util.getContextValue(context, "es.type");
        logger.info("Simple Index builder, name [{}] type [{}] ", new Object[] { this.index, this.type });
    }
}
/*
 * Copyright 2017 Cognitree Technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.cognitree.flume.sink.elasticsearch;

import org.apache.commons.lang.StringUtils;
import org.apache.flume.Context;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

import static com.cognitree.flume.sink.elasticsearch.Constants.*;

/**
 * Created by prashant
 */
public class Util {

    private static final Logger logger = LoggerFactory.getLogger(Util.class);

    /**
     * Returns TimeValue based on the given interval
     * Interval can be in minutes, seconds, mili seconds
     */
    public static TimeValue getTimeValue(String interval, String defaultValue) {
        TimeValue timeValue = null;
        String timeInterval = interval != null ? interval : defaultValue;
        logger.trace("Time interval is [{}] ", timeInterval);
        if (timeInterval != null) {
            Integer time = Integer.valueOf(timeInterval.substring(0, timeInterval.length() - 1));
            String unit = timeInterval.substring(timeInterval.length() - 1);
            UnitEnum unitEnum = UnitEnum.fromString(unit);
            switch (unitEnum) {
                case MINUTE:
                    timeValue = TimeValue.timeValueMinutes(time);
                    break;
                case SECOND:
                    timeValue = TimeValue.timeValueSeconds(time);
                    break;
                case MILI_SECOND:
                    timeValue = TimeValue.timeValueMillis(time);
                    break;
                default:
                    logger.error("Unit is incorrect, please check the Time Value unit: " + unit);
            }
        }
        return timeValue;
    }

    /**
     * Returns ByteSizeValue of the given byteSize and unit
     * byteSizeUnit can be in Mega bytes, Kilo Bytes
     */
    public static ByteSizeValue getByteSizeValue(Integer byteSize, String unit) {
        ByteSizeValue byteSizeValue = new ByteSizeValue(DEFAULT_ES_BULK_SIZE, ByteSizeUnit.MB);
        logger.trace("Byte size value is [{}] ", byteSizeValue);
        if (byteSize != null) {
            ByteSizeEnum byteSizeEnum = ByteSizeEnum.valueOf(unit.toUpperCase());
            switch (byteSizeEnum) {
                case MB:
                    byteSizeValue = new ByteSizeValue(byteSize, ByteSizeUnit.MB);
                    break;
                case KB:
                    byteSizeValue = new ByteSizeValue(byteSize, ByteSizeUnit.KB);
                    break;
                default:
                    logger.error("Unit is incorrect, please check the Byte Size unit: " + unit);
            }
        }
        return byteSizeValue;
    }

    /**
     * Returns the context value for the contextId
     */
    public static String getContextValue(Context context, String contextId) {
        String contextValue = null;
        if (StringUtils.isNotBlank(context.getString(contextId))) {
            contextValue = context.getString(contextId);
        }
        return contextValue;
    }

    /**
     * Add csv field to the XContentBuilder
     */
    public static void addField(XContentBuilder xContentBuilder, String key, String value, String type) throws IOException{
        if (type != null) {
            FieldTypeEnum fieldTypeEnum = FieldTypeEnum.valueOf(type.toUpperCase());
            switch (fieldTypeEnum) {
                case STRING:
                    xContentBuilder.field(key, value);
                    break;
                case FLOAT:
                    xContentBuilder.field(key, Float.valueOf(value));
                    break;
                case INT:
                    xContentBuilder.field(key, Integer.parseInt(value));
                    break;
                case BOOLEAN:
                    xContentBuilder.field(key, Boolean.valueOf(value));
                    break;
                case LONG:
                    xContentBuilder.field(key, Long.parseLong(value));
                    break;
                default:
                    logger.error("Type is incorrect, please check type: " + type);
            }
        }
    }
}

package com.cognitree.flume.sink.elasticsearch;

import com.alibaba.fastjson.JSONObject;
import com.cognitree.flume.sink.elasticsearch.client.BulkProcessorBuilder;
import com.cognitree.flume.sink.elasticsearch.client.ElasticsearchClientBuilder;
import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import io.netty.util.internal.StringUtil;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.Date;

import static com.alibaba.fastjson.JSON.parseObject;

public class XyElasticSearchSink_DATE_UUID extends AbstractSink
        implements Configurable
{
    private static final Logger logger = LoggerFactory.getLogger(XyElasticSearchSink_DATE_UUID.class);
    private BulkProcessor bulkProcessor;
    private IndexBuilder indexBuilder;
    private Serializer serializer;
    private SimpleDateFormat dataFormat;

    public void configure(Context context)
    {
        this.dataFormat = new SimpleDateFormat("yyyyMM");
        String[] hosts = getHosts(context);
        if (ArrayUtils.isNotEmpty(hosts))
        {
            TransportClient client = new ElasticsearchClientBuilder(context
                    .getString("es.cluster.name", "elasticsearch"),
                    hosts)
                    .setTransportSniff(context
                            .getBoolean("es.client.transport.sniff",
                                    Boolean.valueOf(false))
                            .booleanValue())
                    .setIgnoreClusterName(context
                            .getBoolean("es.client.transport.ignore_cluster_name",
                                    Boolean.valueOf(false))
                            .booleanValue())
                    .setTransportPingTimeout(Util.getTimeValue(context
                                    .getString("es.client.transport.ping_timeout"),
                            "5s"))
                    .setNodeSamplerInterval(Util.getTimeValue(context
                                    .getString("es.client.transport.nodes_sampler_interval"),
                            "5s"))
                    .build();
            buildIndexBuilder(context);
            buildSerializer(context);
            this.bulkProcessor = new BulkProcessorBuilder().buildBulkProcessor(context, client);
        } else {
            logger.error("Could not create transport client, No host exist");
        }
    }

    public Status process() throws EventDeliveryException
    {
        Date data = new Date();
        String dateString = dataFormat.format(data);
        Channel channel = getChannel();
        Transaction txn = channel.getTransaction();
        txn.begin();
        try {
            Event event = channel.take();
            String body;
            if (event != null) {
                body = new String(event.getBody(), Charsets.UTF_8);
                if (!Strings.isNullOrEmpty(body)) {
                    logger.debug("start to sink event [{}].", body);
                    String index = this.indexBuilder.getIndex(event)+"_"+dateString;
                    String type = this.indexBuilder.getType(event);
//                    JSONObject jsonObjct= parseObject(body);
//                    String id=jsonObjct.get("uuid").toString();
                    String id = this.indexBuilder.getId(event);
                    XContentBuilder xContentBuilder = this.serializer.serialize(event);
                    if (xContentBuilder != null) {
                        if (!StringUtil.isNullOrEmpty(id)) {
                            this.bulkProcessor.add(new IndexRequest(index, type, id)
                                    .source(xContentBuilder));
                        }
                        else {
                            this.bulkProcessor.add(new IndexRequest(index, type)
                                    .source(xContentBuilder));
                        }
                    }
                    else {
                        logger.error("Could not serialize the event body [{}] for index [{}], type[{}] and id [{}] ", new Object[] { body, index, type, id });
                    }
                }

                logger.debug("sink event [{}] successfully.", body);
            }
            txn.commit();
            return Status.READY;
        } catch (Throwable tx) {
            try {
            }
            catch (Exception ex) {
                logger.error("exception in rollback.", ex);
            }
            logger.error("transaction rolled back.", tx);
            return Status.BACKOFF;
        } finally {
            txn.close();
        }
    }

    public void stop()
    {
        if (this.bulkProcessor != null)
            this.bulkProcessor.close();
    }

    private void buildIndexBuilder(Context context)
    {
        String indexBuilderClass = "com.cognitree.flume.sink.elasticsearch.StaticIndexBuilder";
        if (StringUtils.isNotBlank(context.getString("es.index.builder"))) {
            indexBuilderClass = context.getString("es.index.builder");
        }
        this.indexBuilder = ((IndexBuilder)instantiateClass(indexBuilderClass));
        if (this.indexBuilder != null)
            this.indexBuilder.configure(context);
    }

    private void buildSerializer(Context context)
    {
        String serializerClass = "com.cognitree.flume.sink.elasticsearch.SimpleSerializer";
        if (StringUtils.isNotEmpty(context.getString("es.serializer"))) {
            serializerClass = context.getString("es.serializer");
        }
        this.serializer = ((Serializer)instantiateClass(serializerClass));
        if (this.serializer != null)
            this.serializer.configure(context);
    }

    private <T> T instantiateClass(String className)
    {
        try
        {
            Class aClass = Class.forName(className);
            return (T) aClass.newInstance();
        } catch (Exception e) {
            logger.error("Could not instantiate class " + className, e);
            Throwables.propagate(e);
        }return null;
    }

    private String[] getHosts(Context context)
    {
        String[] hosts = null;
        if (StringUtils.isNotBlank(context.getString("es.client.hosts"))) {
            hosts = context.getString("es.client.hosts").split(",");
        }
        return hosts;
    }
}

3 打包上传flume-elasticserach-sink相关jar包

解释一下这个sink:主要是把json数据按月建立索引加载的es中
上传的jar包有:自定义的flume-elasticserach-sink包
以及pom里面涉及的elasticserach相关jar包:比如transport等,我这里不一一列举了

三、配置flume的conf
1 在 /opt/cloudera/parcels/CDH/lib/flume-ng/conf目录下,
vi elasticserach.conf
输入以下内容:

ng.sources = kafkaSource
ng.sources = kafkaSource
ng.channels = memorychannel
ng.sinks =  es_sink

ng.sources.kafkaSource.type= org.apache.flume.source.kafka.KafkaSource
ng.sources.kafkaSource.kafka.bootstrap.servers=cdh01:9092,cdh02:9092,cdh03:9092
ng.sources.kafkaSource.kafka.consumer.group.id=xytest112
ng.sources.kafkaSource.kafka.topics=pd_ry_txjl
ng.sources.kafkaSource.batchSize=1000
ng.sources.kafkaSource.channels= memorychannel
ng.sources.kafkaSource.kafka.consumer.auto.offset.reset=latest
ng.sources.kafkaSource.interceptors= i1
ng.sources.kafkaSource.interceptors.i1.type=com.iflytek.extracting.flume.interceptor.XyAccessJsonInterceptorTC$Builder #自定义的拦截器

ng.channels.memorychannel.type = memory
ng.channels.memorychannel.keep-alive = 3
ng.channels.memorychannel.byteCapacityBufferPercentage = 20
ng.channels.memorychannel.transactionCapacity = 10000
ng.channels.memorychannel.capacity = 100000

ng.sinks.es_sink.type=com.cognitree.flume.sink.elasticsearch.XyElasticSearchSink_DATE_UUID#自定义的es-sink
ng.sinks.es_sink.es.bulkActions=5
ng.sinks.es_sink.es.bulkProcessor.name=bulkprocessor
ng.sinks.es_sink.es.bulkSize=5
ng.sinks.es_sink.es.bulkSize.unit=MB
ng.sinks.es_sink.es.concurrent.request=1
ng.sinks.es_sink.es.flush.interval.time=5m
ng.sinks.es_sink.es.backoff.policy.time.interval=50M
ng.sinks.es_sink.es.backoff.policy.retries=8
ng.sinks.es_sink.es.cluster.name=elasticsearch   # es集群名称
ng.sinks.es_sink.es.client.hosts=cdh01:9300  #es ip和端口
ng.sinks.es_sink.es.index=rxjl_new  #es索引,实际写入索引rxjl_new_201912,加了按月动态索引  
ng.sinks.es_sink.es.type=capture  #es type
ng.sinks.es_sink.es.index.builder=com.cognitree.flume.sink.elasticsearch.HeaderBasedIndexBuilder
ng.sinks.es_sink.es.serializer=com.cognitree.flume.sink.elasticsearch.SimpleSerializer
ng.sinks.es_sink.es.serializer.csv.fields=id:string,name:string,age:long,time:string
ng.sinks.es_sink.es.serializer.csv.delimiter=,
ng.sinks.es_sink.es.serializer.avro.schema.file=/usr/local/schme.avsc
ng.sinks.es_sink.channel=memorychannel

2 启动flume:

然后前台启动flume,进行调试:
bin/flume-ng agent -n ng -c conf -f conf/elasticserach.conf
cdh版本的flume默认的日志打在 /var/log/flume/flume.log里面
查看数据已经接入es,并确定没问题可以使用后台提交:
nohup bin/flume-ng agent -n ng -c conf -f conf/elasticserach.conf &
任务停止:
jcmd | grep elasticserach.conf   # 找到含有 elasticserach.conf的任务
然后kill 任务id即可

你可能感兴趣的:(flume)