elasticsearch restclient bulk批量索引数据,可回滚返回失败数据及原因(仅供参考)

自用代码,不喜勿喷,感谢指正。(仅供参考,仅供参考,仅供参考。)

maven:


        
            org.elasticsearch.client
            elasticsearch-rest-high-level-client
            
            
                
                    com.google.guava
                    guava
                
                
                    com.fasterxml.jackson.core
                    jackson-core
                
                
                    com.fasterxml.jackson.dataformat
                    jackson-dataformat-smile
                
                
                    com.fasterxml.jackson.dataformat
                    jackson-dataformat-yaml
                
                
                    com.fasterxml.jackson.core
                    jackson-databind
                
                
                    com.fasterxml.jackson.dataformat
                    jackson-dataformat-cbor
                
            
            ${es.version}
        
        
        
            org.elasticsearch.client
            elasticsearch-rest-client-sniffer
            ${es.version}
        

java:

import javafx.util.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager;
import org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor;
import org.apache.http.impl.nio.reactor.IOReactorConfig;
import org.apache.http.nio.reactor.IOReactorException;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.sniff.ElasticsearchHostsSniffer;
import org.elasticsearch.client.sniff.HostsSniffer;
import org.elasticsearch.client.sniff.SniffOnFailureListener;
import org.elasticsearch.client.sniff.Sniffer;
import org.elasticsearch.common.xcontent.XContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;

/**
 * elasticsearch rest client operation class
 * 方法:canRollbackBulkDocIndex。可回调的批量插入方法。
 * 注意:使用前需先行调用 initialize() 方法修改变量 hostAndPorts 中的 es 集群地址信息并初始化客户端,client 在使用完毕后需要调用该工具类的 close() 方法。
 */
public class ESRestClientTools {
    private static Logger log = LoggerFactory.getLogger(ESRestClientTools.class);
    private static RestClient restClient = null;
    private static final ElasticsearchHostsSniffer.Scheme SCHEMA = ElasticsearchHostsSniffer.Scheme.HTTP;
    private static String hostAndPorts;
    private static Sniffer sniffer = null;

    /**
     * obtain elasticsearch high level rest client.
     *
     * @return HighLevelRestClient
     */
    public static RestHighLevelClient getHighLevelRestClient() {
        if (null != restClient && ping() == 200) {
            return new RestHighLevelClient(restClient);
        } else {
            if (StringUtils.isNotEmpty(hostAndPorts)) {
                initialize(hostAndPorts);
                if (null != restClient && ping() == 200) {
                    return new RestHighLevelClient(restClient);
                } else {
                    log.info("can not obtain useful ESRestClient, please have a check for ip or network status.");
                    return null;
                }
            } else {
                log.info("the connect param is enpty or null!");
                return null;
            }
        }
    }

    /**
     * test server network state.
     *
     * @return status code.
     */
    private static int ping() {
        Map params = Collections.singletonMap("pretty", "true");
        int code = 0;
        try {
            if (restClient != null) {
                Response response = restClient.performRequest("GET", "/", params);
                code = response.getStatusLine().getStatusCode();
                log.info("code:{},elasticsearch connected success!", code);
            } else {
                log.info("the rest client is null!");
            }
        } catch (IOException e) {
            log.info("elasticsearch connect error:{}。", e.getLocalizedMessage());
            e.printStackTrace();
        }
        return code;
    }

    /**
     * close elasticsearch rest client.
     */
    public static void close() {
        try {
            if (null != sniffer) {
                sniffer.close();
                log.info("ESRestClient sniffer closed。。。。。。。。。");
            }
            if (null != restClient) {
                restClient.close();
                log.info("ESRestClient closed。。。。。。。。。");

            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * initialize logic.
     */
    public static void initialize(String firstInputAddressOfCluster) {
        if (null != restClient){
            create(firstInputAddressOfCluster);
        } else {
            if (ping() != 200) {
                close();
                create(firstInputAddressOfCluster);
            } else {
                log.info("the client is OK please dong't initialize repeat!");
            }
        }
    }
    /**
     * create elasticsearch rest client.
     */
    private static void create(String firstInputAddressOfCluster) {
        hostAndPorts = firstInputAddressOfCluster;
        if (StringUtils.isNotEmpty(hostAndPorts)) {
            String[] splites = hostAndPorts.split(",");
            HttpHost[] hosts = new HttpHost[splites.length];
            for (int i = 0; i < splites.length; i++) {
                String host = splites[i].split(":")[0].trim();
                int port = Integer.parseInt(splites[i].split(":")[1].trim());
                hosts[i] = new HttpHost(host, port, SCHEMA.toString());
            }
            try {
                RestClientBuilder builder = RestClient.builder(hosts);
                builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
                    @Override
                    public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder requestConfigBuilder) {
                        requestConfigBuilder.setConnectTimeout(1000 * 10);
                        requestConfigBuilder.setSocketTimeout(1000 * 60);
                        requestConfigBuilder.setConnectionRequestTimeout(1000 * 5);
                        return requestConfigBuilder;
                    }
                });
                builder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                    @Override
                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                        try {
                            final IOReactorConfig ioReactorConfig = IOReactorConfig.custom()
                                    .setConnectTimeout(1000 * 20)
                                    .setSoTimeout(1000 * 60 * 2)
                                    .setRcvBufSize(4096)
                                    .setSndBufSize(8192)
                                    .setSoKeepAlive(true)
                                    .build();
                            final PoolingNHttpClientConnectionManager connManager = new PoolingNHttpClientConnectionManager(new
                                    DefaultConnectingIOReactor(ioReactorConfig));
                            connManager.setMaxTotal(100);//多线程访问时最大并发量
                            connManager.setDefaultMaxPerRoute(20); //单次路由线程上限

                            httpClientBuilder.disableAuthCaching();
                            httpClientBuilder.setDefaultIOReactorConfig(ioReactorConfig);
                            httpClientBuilder.setConnectionManager(connManager);
                        } catch (IOReactorException e) {
                            e.printStackTrace();
                        }
                        return httpClientBuilder;
                    }
                });

                //star cluster sniffe function, it will be ok for provide partial IP address of es cluster.
                SniffOnFailureListener sniffOnFailureListener = new SniffOnFailureListener();

                builder.setFailureListener(sniffOnFailureListener);
                builder.setMaxRetryTimeoutMillis(1000 * 60 * 5); //the time(ms) of max retry timeout
                restClient = builder.build();

                HostsSniffer hostsSniffer = new ElasticsearchHostsSniffer(
                        restClient,
                        ElasticsearchHostsSniffer.DEFAULT_SNIFF_REQUEST_TIMEOUT * 5,
                        ElasticsearchHostsSniffer.Scheme.HTTP);

                sniffer = Sniffer.builder(restClient)
                        .setHostsSniffer(hostsSniffer)
                        .setSniffIntervalMillis(1000 * 60 * 3)
                        .setSniffAfterFailureDelayMillis(1000 * 10)
                        .build();

                sniffOnFailureListener.setSniffer(sniffer);

                log.info("ESRestClient already initialized。。。。。。。。。");
            } catch (Exception ex) {
                log.info("ESRestClient initialize error: {}. Please have a check for ip or network status.", ex.getMessage());
                ex.printStackTrace();
            }
        }
    }

    /**
     * es bulk index api
     *
     * @param client    RestHighLevelClient
     * @param indexName index
     * @param typeName  type
     * @param batch     batch data
     * @param batchSize the size of every batch
     * @return failure doc collect
     */
    public static ArrayList> canRollbackBulkDocIndex(RestHighLevelClient client, String indexName, String typeName, ArrayList batch, int batchSize) {
        ArrayList> failure = new ArrayList<>();
        if (batchSize <= 0 || batchSize > 500) {
            batchSize = 500;
        }
        Iterator batchIter = batch.iterator();
        BulkRequest request = new BulkRequest();
        int cursor = 0;
        int count = 0;
        try {
            while (batchIter.hasNext()) {
                String json = batchIter.next();
                if (StringUtils.isNotEmpty(json)) {
                    if (cursor < batchSize) {
                        request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON));
                    } else {
                        everyBatchWrapper(client, request, failure);
                        cursor = 0;
                        request = new BulkRequest();
                        request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON));
                    }
                    cursor++;
                } else {
                    log.info("index of {} is null or enpty.", count);
                }
                count++;
            }
            everyBatchWrapper(client, request, failure);
        } catch (Exception e) {
            log.info("the failed json element index range is {} - {}.", count - cursor, count);
            log.info("Cause by: {}.", e.getMessage());
            e.printStackTrace();
        }
        if (failure.size() > 0) {
            log.info("the number of failed doc index about this target index collention is {}.", failure.size());
        }
        return failure;
    }

    /**
     * every batch operation
     *
     * @param client  RestHighLevelClient
     * @param request BulkRequest
     * @param failure failure collection
     * @throws IOException
     */
    private static void everyBatchWrapper(RestHighLevelClient client, BulkRequest request, ArrayList> failure) throws IOException {
        BulkResponse response = client.bulk(request);
        if (response.hasFailures()) {
            List list = request.requests();
            Iterator iter = response.iterator();
            while (iter.hasNext()) {
                BulkItemResponse bir = iter.next();
                if (bir.isFailed()) {
                    String reason = "HTTP status: [" +  bir.getFailure().getStatus().getStatus() + "], Cause by: " + bir.getFailureMessage() + ".";
                    int docIndex = bir.getItemId();
                    IndexRequest ir = (IndexRequest) list.get(docIndex);
                    failure.add(new Pair<>(ir.source().utf8ToString(), reason));
                }
            }
        }
    }
}

scala(bigdata 使用):

import scala.collection.JavaConverters._
import scala.collection.JavaConversions._
import org.apache.commons.lang3.StringUtils
import org.apache.http.HttpHost
import org.apache.http.client.config.RequestConfig
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder
import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager
import org.apache.http.impl.nio.reactor.{DefaultConnectingIOReactor, IOReactorConfig}
import org.apache.http.nio.reactor.IOReactorException
import org.elasticsearch.action.DocWriteRequest
import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequest, BulkResponse}
import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.sniff.{ElasticsearchHostsSniffer, HostsSniffer, SniffOnFailureListener, Sniffer}
import org.elasticsearch.client.{Response, RestClient, RestClientBuilder, RestHighLevelClient}
import org.elasticsearch.common.xcontent.XContentType
import org.slf4j.LoggerFactory

import scala.collection.mutable
import scala.collection.mutable.ListBuffer

/**
  * elasticsearch rest client operation class
  * 方法:canRollbackBulkDocIndex。可回调的批量插入方法。
  * 注意:使用前需先行调用 initialize() 方法修改变量 hostAndPorts 中的 es 集群地址信息并初始化客户端,client 在使用完毕后需要调用该工具类的 close() 方法。
  */
object ESRestClientUtils {
  private val log = LoggerFactory.getLogger(ESRestClientUtils.getClass.getSimpleName)

  private var restClient: Option[RestClient] = None
  private val SCHEMA: ElasticsearchHostsSniffer.Scheme = ElasticsearchHostsSniffer.Scheme.HTTP
  private var hostAndPorts: String = _
  private var sniffer: Option[Sniffer] = None

  /**
    * obtain elasticsearch high level rest client.
    * @return HighLevelRestClient
    */
  def getHighLevelRestClient: Option[RestHighLevelClient] = {
    if (restClient.isDefined && ping() == 200) {
      Some(new RestHighLevelClient(restClient.get))
    } else {
      if (StringUtils.isNotEmpty(hostAndPorts)) {
        initialize(hostAndPorts)
        if (null != restClient && ping() == 200) {
          Some(new RestHighLevelClient(restClient.get))
        } else {
          log.info("can not obtain useful ESRestClient, please have a check for ip or network status.")
          None
        }
      } else {
        log.info("the connect param is enpty or null!")
        None
      }
    }
  }

  /**
    * test server network state.
    *
    * @return status code.
    */
  private def ping(): Int = {
    val params: Map[String, String] = Map("pretty" -> "true")
    var code = 0
    try {
      if (restClient.isDefined) {
        val response: Response = restClient.get.performRequest("GET", "/", params)
        code = response.getStatusLine.getStatusCode
        log.info("code:{},elasticsearch connected success!", code)
      } else {
        log.info("the rest client is null!")
      }
    } catch {
      case ex: Exception =>
        log.info("elasticsearch connect error:{}。", ex.getLocalizedMessage)
        ex.printStackTrace()
    }
    code
  }

  /**
    * close elasticsearch rest client.
    */
  def close(): Unit = {
    try {
      if (sniffer.isDefined) {
        sniffer.get.close()
        log.info("ESRestClient sniffer closed。。。。。。。。。")
      }
      if (restClient.isDefined) {
        restClient.get.close()
        log.info("ESRestClient closed。。。。。。。。。")
      }
    } catch {
      case ex: Exception => ex.printStackTrace()
    }
  }

  /**
    * initialize logic.
    */
  def initialize(firstInputAddressOfCluster: String): Unit = {
    if (restClient.isEmpty) {
      create(firstInputAddressOfCluster)
    } else {
      if (ping() != 200) {
        close()
        create(firstInputAddressOfCluster)
      } else {
        log.info("the client is OK please dong't initialize repeat!")
      }
    }
  }

  /**
    * initialize elasticsearch rest client.
    */
  private def create(firstInputAddressOfCluster: String): Unit = {
    hostAndPorts = firstInputAddressOfCluster
    if (StringUtils.isNotEmpty(hostAndPorts)) {
      val splites = hostAndPorts.split(",")
      val hosts = new Array[HttpHost](splites.length)

      for (i <- 0 until splites.length) {
        val host = splites(i).split(":")(0).trim()
        val port = Integer.parseInt(splites(i).split(":")(1).trim())
        hosts(i) = new HttpHost(host, port, SCHEMA.toString)
      }
      try {
        val builder: RestClientBuilder = RestClient.builder(hosts: _*)
        builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
          @Override
          def customizeRequestConfig(requestConfigBuilder: RequestConfig.Builder): RequestConfig.Builder = {
            requestConfigBuilder.setConnectTimeout(1000 * 10)
            requestConfigBuilder.setSocketTimeout(1000 * 60)
            requestConfigBuilder.setConnectionRequestTimeout(1000 * 5)
            requestConfigBuilder
          }
        })
        builder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
          @Override
          def customizeHttpClient(httpClientBuilder: HttpAsyncClientBuilder): HttpAsyncClientBuilder = {
            try {
              val ioReactorConfig: IOReactorConfig = IOReactorConfig.custom()
                .setConnectTimeout(1000 * 20)
                .setSoTimeout(1000 * 60 * 2)
                .setRcvBufSize(4096)
                .setSndBufSize(8192)
                .setSoKeepAlive(true)
                .build()
              val connManager: PoolingNHttpClientConnectionManager = new PoolingNHttpClientConnectionManager(new
                  DefaultConnectingIOReactor(ioReactorConfig))
              connManager.setMaxTotal(100); //多线程访问时最大并发量
              connManager.setDefaultMaxPerRoute(20); //单次路由线程上限

              httpClientBuilder.disableAuthCaching()
              httpClientBuilder.setDefaultIOReactorConfig(ioReactorConfig)
              httpClientBuilder.setConnectionManager(connManager);
            } catch {
              case ex: IOReactorException => ex.printStackTrace()
              case _: Exception =>
            }
            httpClientBuilder
          }
        })

        //star cluster sniffe function, it will be ok for provide partial IP address of es cluster.
        val sniffOnFailureListener: SniffOnFailureListener = new SniffOnFailureListener()

        builder.setFailureListener(sniffOnFailureListener)
        builder.setMaxRetryTimeoutMillis(1000 * 60 * 5); //the time(ms) of max retry timeout
        restClient = Some(builder.build())

        if (restClient.isDefined) {
          val hostsSniffer: HostsSniffer = new ElasticsearchHostsSniffer(
            restClient.get,
            ElasticsearchHostsSniffer.DEFAULT_SNIFF_REQUEST_TIMEOUT * 5,
            ElasticsearchHostsSniffer.Scheme.HTTP)

          sniffer = Some(Sniffer.builder(restClient.get)
            .setHostsSniffer(hostsSniffer)
            .setSniffIntervalMillis(1000 * 60 * 3)
            .setSniffAfterFailureDelayMillis(1000 * 10)
            .build())

          if (sniffer.isDefined) {
            sniffOnFailureListener.setSniffer(sniffer.get)
          }
        }
        log.info("ESRestClient already initialized。。。。。。。。。");
      } catch {
        case ex: Exception =>
          log.info("ESRestClient initialize error: {}. Please have a check for ip or network status.", ex.getMessage)
          ex.printStackTrace();
      }
    }
  }

  /**
    * es bulk index api
    *
    * @param client      RestHighLevelClient
    * @param indexName   index
    * @param typeName    type
    * @param batch       batch data
    * @param sizeOfBatch the size of every batch
    * @return failure doc collect
    */
  def canRollbackBulkDocIndex(client: RestHighLevelClient, indexName: String, typeName: String, batch: List[String], sizeOfBatch: Int = 10): List[(String, String)] = {
    val failure = new ListBuffer[(String, String)]()
    var batchSize = sizeOfBatch
    if (sizeOfBatch <= 0 || sizeOfBatch > 1000) {
      batchSize = 1000
    }
    val batchIter: Iterator[String] = batch.iterator
    var request: BulkRequest = new BulkRequest()
    var cursor = 0
    var count = 0
    try {
      while (batchIter.hasNext) {
        val json = batchIter.next()
        if (StringUtils.isNotEmpty(json)) {
          if (cursor < batchSize) {
            request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON))
          } else {
            everyBatchWrapper(client, request, failure)
            cursor = 0
            request = new BulkRequest()
            request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON))
          }
          cursor += 1
        } else {
          log.info("index of {} is null or enpty.", count)
        }
        count += 1
      }
      everyBatchWrapper(client, request, failure);
    } catch {
      case e: Exception =>
        log.info("the failed json element index range is {} - {}, Cause by: {}.", count - cursor, count)
        log.info("Cause by: {}.", e.getMessage)
        e.printStackTrace();
    }
    if (failure.size() > 0) {
      log.info("the number of failed doc index about this target index collention is {}.", failure.size())
    }
    failure.toList
  }

  /**
    * every batch operation
    *
    * @param client  RestHighLevelClient
    * @param request BulkRequest
    * @param failure failure collection
    */
  private def everyBatchWrapper(client: RestHighLevelClient, request: BulkRequest, failure: ListBuffer[(String, String)]): Unit = {
    val response: BulkResponse = client.bulk(request)
    if (response.hasFailures) {
      val list: mutable.Buffer[DocWriteRequest[_]] = request.requests().asScala
      val iter: Iterator[BulkItemResponse] = response.iterator()
      while (iter.hasNext) {
        val bir: BulkItemResponse = iter.next()
        if (bir.isFailed) {
          val reason = s"""HTTP status: [${bir.getFailure.getStatus.getStatus}], Cause by: ${bir.getFailureMessage}."""
          val docIndex = bir.getItemId
          val ir: IndexRequest = list.get(docIndex).asInstanceOf[IndexRequest]
          failure.+=((ir.source().utf8ToString(), reason))
        }
      }
    }
  }
}

 

你可能感兴趣的:(技术文档)