Java批量写入Elasticsearch

 记一次es批量导入数据的惨痛经历(分享给缺少团队协作的苦逼人儿):

1、确认es版本及maven依赖es版本;

2、确认es启动内存、索引刷新规则、默认批量写入数据量大小;

3、分批次导入,没批数量1000-5000;

4、如使用多线程操作,计数器使用ThreadLocal

package com.config;

import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.util.List;
import java.util.Map;

@Slf4j
@Component
public class ElasticsearchDataHandle {

    @Value("${spring.elasticsearch.rest.nodes:127.0.0.1}")
    private String host;
    @Value("${spring.elasticsearch.rest.port:9200}")
    private String port;
    @Value("${spring.elasticsearch.rest.indexName:indexName}")
    private String indexName;//索引名称

	/**
     * 创建连接 高级客户端
     */
    public RestHighLevelClient restHighLevelClient() {
        RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost(host, Integer.parseInt(port), "http")));
        return client;
    }

    /**
     * Elasticsearch数据导入
     */
    public void addElasticsearchData(List> addEsDataMapList) {
	    //获取连接
        RestHighLevelClient client = restHighLevelClient();
        try {
			//创建请求
            BulkRequest bulkRequest = new BulkRequest();
			//创建index请求 千万注意,这个写在循环外侧,否则UDP协议会有丢数据的情况,看运气
            IndexRequest requestData = null;
            for (Map addEsDataMap : addEsDataMapList) {//添加数据
                requestData = new IndexRequest(indexName, "_doc", addEsDataMap.get("id").toString()).source(addEsDataMap, XContentType.JSON);
                bulkRequest.add(requestData);
            }
            log.info("es同步数据数量:{}", bulkRequest.numberOfActions());
			//设置索引刷新规则
            bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
			//分批次提交,数量控制
            if (bulkRequest.numberOfActions() >= 1) {
                BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
                log.info("es同步数据结果:{}", bulkResponse.hasFailures());
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.error("es同步数据执行失败:{}", addEsDataMapList);
        } finally {
            try {
                client.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

你可能感兴趣的:(Elasticsearch,elasticsearch,java,大数据)