自用代码,不喜勿喷,感谢指正。(仅供参考,仅供参考,仅供参考。)
maven:
org.elasticsearch.client
elasticsearch-rest-high-level-client
com.google.guava
guava
com.fasterxml.jackson.core
jackson-core
com.fasterxml.jackson.dataformat
jackson-dataformat-smile
com.fasterxml.jackson.dataformat
jackson-dataformat-yaml
com.fasterxml.jackson.core
jackson-databind
com.fasterxml.jackson.dataformat
jackson-dataformat-cbor
${es.version}
org.elasticsearch.client
elasticsearch-rest-client-sniffer
${es.version}
java:
import javafx.util.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager;
import org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor;
import org.apache.http.impl.nio.reactor.IOReactorConfig;
import org.apache.http.nio.reactor.IOReactorException;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.sniff.ElasticsearchHostsSniffer;
import org.elasticsearch.client.sniff.HostsSniffer;
import org.elasticsearch.client.sniff.SniffOnFailureListener;
import org.elasticsearch.client.sniff.Sniffer;
import org.elasticsearch.common.xcontent.XContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
/**
* elasticsearch rest client operation class
* 方法:canRollbackBulkDocIndex。可回调的批量插入方法。
* 注意:使用前需先行调用 initialize() 方法修改变量 hostAndPorts 中的 es 集群地址信息并初始化客户端,client 在使用完毕后需要调用该工具类的 close() 方法。
*/
public class ESRestClientTools {
private static Logger log = LoggerFactory.getLogger(ESRestClientTools.class);
private static RestClient restClient = null;
private static final ElasticsearchHostsSniffer.Scheme SCHEMA = ElasticsearchHostsSniffer.Scheme.HTTP;
private static String hostAndPorts;
private static Sniffer sniffer = null;
/**
* obtain elasticsearch high level rest client.
*
* @return HighLevelRestClient
*/
public static RestHighLevelClient getHighLevelRestClient() {
if (null != restClient && ping() == 200) {
return new RestHighLevelClient(restClient);
} else {
if (StringUtils.isNotEmpty(hostAndPorts)) {
initialize(hostAndPorts);
if (null != restClient && ping() == 200) {
return new RestHighLevelClient(restClient);
} else {
log.info("can not obtain useful ESRestClient, please have a check for ip or network status.");
return null;
}
} else {
log.info("the connect param is enpty or null!");
return null;
}
}
}
/**
* test server network state.
*
* @return status code.
*/
private static int ping() {
Map params = Collections.singletonMap("pretty", "true");
int code = 0;
try {
if (restClient != null) {
Response response = restClient.performRequest("GET", "/", params);
code = response.getStatusLine().getStatusCode();
log.info("code:{},elasticsearch connected success!", code);
} else {
log.info("the rest client is null!");
}
} catch (IOException e) {
log.info("elasticsearch connect error:{}。", e.getLocalizedMessage());
e.printStackTrace();
}
return code;
}
/**
* close elasticsearch rest client.
*/
public static void close() {
try {
if (null != sniffer) {
sniffer.close();
log.info("ESRestClient sniffer closed。。。。。。。。。");
}
if (null != restClient) {
restClient.close();
log.info("ESRestClient closed。。。。。。。。。");
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* initialize logic.
*/
public static void initialize(String firstInputAddressOfCluster) {
if (null != restClient){
create(firstInputAddressOfCluster);
} else {
if (ping() != 200) {
close();
create(firstInputAddressOfCluster);
} else {
log.info("the client is OK please dong't initialize repeat!");
}
}
}
/**
* create elasticsearch rest client.
*/
private static void create(String firstInputAddressOfCluster) {
hostAndPorts = firstInputAddressOfCluster;
if (StringUtils.isNotEmpty(hostAndPorts)) {
String[] splites = hostAndPorts.split(",");
HttpHost[] hosts = new HttpHost[splites.length];
for (int i = 0; i < splites.length; i++) {
String host = splites[i].split(":")[0].trim();
int port = Integer.parseInt(splites[i].split(":")[1].trim());
hosts[i] = new HttpHost(host, port, SCHEMA.toString());
}
try {
RestClientBuilder builder = RestClient.builder(hosts);
builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
@Override
public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder requestConfigBuilder) {
requestConfigBuilder.setConnectTimeout(1000 * 10);
requestConfigBuilder.setSocketTimeout(1000 * 60);
requestConfigBuilder.setConnectionRequestTimeout(1000 * 5);
return requestConfigBuilder;
}
});
builder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
try {
final IOReactorConfig ioReactorConfig = IOReactorConfig.custom()
.setConnectTimeout(1000 * 20)
.setSoTimeout(1000 * 60 * 2)
.setRcvBufSize(4096)
.setSndBufSize(8192)
.setSoKeepAlive(true)
.build();
final PoolingNHttpClientConnectionManager connManager = new PoolingNHttpClientConnectionManager(new
DefaultConnectingIOReactor(ioReactorConfig));
connManager.setMaxTotal(100);//多线程访问时最大并发量
connManager.setDefaultMaxPerRoute(20); //单次路由线程上限
httpClientBuilder.disableAuthCaching();
httpClientBuilder.setDefaultIOReactorConfig(ioReactorConfig);
httpClientBuilder.setConnectionManager(connManager);
} catch (IOReactorException e) {
e.printStackTrace();
}
return httpClientBuilder;
}
});
//star cluster sniffe function, it will be ok for provide partial IP address of es cluster.
SniffOnFailureListener sniffOnFailureListener = new SniffOnFailureListener();
builder.setFailureListener(sniffOnFailureListener);
builder.setMaxRetryTimeoutMillis(1000 * 60 * 5); //the time(ms) of max retry timeout
restClient = builder.build();
HostsSniffer hostsSniffer = new ElasticsearchHostsSniffer(
restClient,
ElasticsearchHostsSniffer.DEFAULT_SNIFF_REQUEST_TIMEOUT * 5,
ElasticsearchHostsSniffer.Scheme.HTTP);
sniffer = Sniffer.builder(restClient)
.setHostsSniffer(hostsSniffer)
.setSniffIntervalMillis(1000 * 60 * 3)
.setSniffAfterFailureDelayMillis(1000 * 10)
.build();
sniffOnFailureListener.setSniffer(sniffer);
log.info("ESRestClient already initialized。。。。。。。。。");
} catch (Exception ex) {
log.info("ESRestClient initialize error: {}. Please have a check for ip or network status.", ex.getMessage());
ex.printStackTrace();
}
}
}
/**
* es bulk index api
*
* @param client RestHighLevelClient
* @param indexName index
* @param typeName type
* @param batch batch data
* @param batchSize the size of every batch
* @return failure doc collect
*/
public static ArrayList> canRollbackBulkDocIndex(RestHighLevelClient client, String indexName, String typeName, ArrayList batch, int batchSize) {
ArrayList> failure = new ArrayList<>();
if (batchSize <= 0 || batchSize > 500) {
batchSize = 500;
}
Iterator batchIter = batch.iterator();
BulkRequest request = new BulkRequest();
int cursor = 0;
int count = 0;
try {
while (batchIter.hasNext()) {
String json = batchIter.next();
if (StringUtils.isNotEmpty(json)) {
if (cursor < batchSize) {
request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON));
} else {
everyBatchWrapper(client, request, failure);
cursor = 0;
request = new BulkRequest();
request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON));
}
cursor++;
} else {
log.info("index of {} is null or enpty.", count);
}
count++;
}
everyBatchWrapper(client, request, failure);
} catch (Exception e) {
log.info("the failed json element index range is {} - {}.", count - cursor, count);
log.info("Cause by: {}.", e.getMessage());
e.printStackTrace();
}
if (failure.size() > 0) {
log.info("the number of failed doc index about this target index collention is {}.", failure.size());
}
return failure;
}
/**
* every batch operation
*
* @param client RestHighLevelClient
* @param request BulkRequest
* @param failure failure collection
* @throws IOException
*/
private static void everyBatchWrapper(RestHighLevelClient client, BulkRequest request, ArrayList> failure) throws IOException {
BulkResponse response = client.bulk(request);
if (response.hasFailures()) {
List list = request.requests();
Iterator iter = response.iterator();
while (iter.hasNext()) {
BulkItemResponse bir = iter.next();
if (bir.isFailed()) {
String reason = "HTTP status: [" + bir.getFailure().getStatus().getStatus() + "], Cause by: " + bir.getFailureMessage() + ".";
int docIndex = bir.getItemId();
IndexRequest ir = (IndexRequest) list.get(docIndex);
failure.add(new Pair<>(ir.source().utf8ToString(), reason));
}
}
}
}
}
scala(bigdata 使用):
import scala.collection.JavaConverters._
import scala.collection.JavaConversions._
import org.apache.commons.lang3.StringUtils
import org.apache.http.HttpHost
import org.apache.http.client.config.RequestConfig
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder
import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager
import org.apache.http.impl.nio.reactor.{DefaultConnectingIOReactor, IOReactorConfig}
import org.apache.http.nio.reactor.IOReactorException
import org.elasticsearch.action.DocWriteRequest
import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequest, BulkResponse}
import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.sniff.{ElasticsearchHostsSniffer, HostsSniffer, SniffOnFailureListener, Sniffer}
import org.elasticsearch.client.{Response, RestClient, RestClientBuilder, RestHighLevelClient}
import org.elasticsearch.common.xcontent.XContentType
import org.slf4j.LoggerFactory
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
/**
* elasticsearch rest client operation class
* 方法:canRollbackBulkDocIndex。可回调的批量插入方法。
* 注意:使用前需先行调用 initialize() 方法修改变量 hostAndPorts 中的 es 集群地址信息并初始化客户端,client 在使用完毕后需要调用该工具类的 close() 方法。
*/
object ESRestClientUtils {
private val log = LoggerFactory.getLogger(ESRestClientUtils.getClass.getSimpleName)
private var restClient: Option[RestClient] = None
private val SCHEMA: ElasticsearchHostsSniffer.Scheme = ElasticsearchHostsSniffer.Scheme.HTTP
private var hostAndPorts: String = _
private var sniffer: Option[Sniffer] = None
/**
* obtain elasticsearch high level rest client.
* @return HighLevelRestClient
*/
def getHighLevelRestClient: Option[RestHighLevelClient] = {
if (restClient.isDefined && ping() == 200) {
Some(new RestHighLevelClient(restClient.get))
} else {
if (StringUtils.isNotEmpty(hostAndPorts)) {
initialize(hostAndPorts)
if (null != restClient && ping() == 200) {
Some(new RestHighLevelClient(restClient.get))
} else {
log.info("can not obtain useful ESRestClient, please have a check for ip or network status.")
None
}
} else {
log.info("the connect param is enpty or null!")
None
}
}
}
/**
* test server network state.
*
* @return status code.
*/
private def ping(): Int = {
val params: Map[String, String] = Map("pretty" -> "true")
var code = 0
try {
if (restClient.isDefined) {
val response: Response = restClient.get.performRequest("GET", "/", params)
code = response.getStatusLine.getStatusCode
log.info("code:{},elasticsearch connected success!", code)
} else {
log.info("the rest client is null!")
}
} catch {
case ex: Exception =>
log.info("elasticsearch connect error:{}。", ex.getLocalizedMessage)
ex.printStackTrace()
}
code
}
/**
* close elasticsearch rest client.
*/
def close(): Unit = {
try {
if (sniffer.isDefined) {
sniffer.get.close()
log.info("ESRestClient sniffer closed。。。。。。。。。")
}
if (restClient.isDefined) {
restClient.get.close()
log.info("ESRestClient closed。。。。。。。。。")
}
} catch {
case ex: Exception => ex.printStackTrace()
}
}
/**
* initialize logic.
*/
def initialize(firstInputAddressOfCluster: String): Unit = {
if (restClient.isEmpty) {
create(firstInputAddressOfCluster)
} else {
if (ping() != 200) {
close()
create(firstInputAddressOfCluster)
} else {
log.info("the client is OK please dong't initialize repeat!")
}
}
}
/**
* initialize elasticsearch rest client.
*/
private def create(firstInputAddressOfCluster: String): Unit = {
hostAndPorts = firstInputAddressOfCluster
if (StringUtils.isNotEmpty(hostAndPorts)) {
val splites = hostAndPorts.split(",")
val hosts = new Array[HttpHost](splites.length)
for (i <- 0 until splites.length) {
val host = splites(i).split(":")(0).trim()
val port = Integer.parseInt(splites(i).split(":")(1).trim())
hosts(i) = new HttpHost(host, port, SCHEMA.toString)
}
try {
val builder: RestClientBuilder = RestClient.builder(hosts: _*)
builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
@Override
def customizeRequestConfig(requestConfigBuilder: RequestConfig.Builder): RequestConfig.Builder = {
requestConfigBuilder.setConnectTimeout(1000 * 10)
requestConfigBuilder.setSocketTimeout(1000 * 60)
requestConfigBuilder.setConnectionRequestTimeout(1000 * 5)
requestConfigBuilder
}
})
builder.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
def customizeHttpClient(httpClientBuilder: HttpAsyncClientBuilder): HttpAsyncClientBuilder = {
try {
val ioReactorConfig: IOReactorConfig = IOReactorConfig.custom()
.setConnectTimeout(1000 * 20)
.setSoTimeout(1000 * 60 * 2)
.setRcvBufSize(4096)
.setSndBufSize(8192)
.setSoKeepAlive(true)
.build()
val connManager: PoolingNHttpClientConnectionManager = new PoolingNHttpClientConnectionManager(new
DefaultConnectingIOReactor(ioReactorConfig))
connManager.setMaxTotal(100); //多线程访问时最大并发量
connManager.setDefaultMaxPerRoute(20); //单次路由线程上限
httpClientBuilder.disableAuthCaching()
httpClientBuilder.setDefaultIOReactorConfig(ioReactorConfig)
httpClientBuilder.setConnectionManager(connManager);
} catch {
case ex: IOReactorException => ex.printStackTrace()
case _: Exception =>
}
httpClientBuilder
}
})
//star cluster sniffe function, it will be ok for provide partial IP address of es cluster.
val sniffOnFailureListener: SniffOnFailureListener = new SniffOnFailureListener()
builder.setFailureListener(sniffOnFailureListener)
builder.setMaxRetryTimeoutMillis(1000 * 60 * 5); //the time(ms) of max retry timeout
restClient = Some(builder.build())
if (restClient.isDefined) {
val hostsSniffer: HostsSniffer = new ElasticsearchHostsSniffer(
restClient.get,
ElasticsearchHostsSniffer.DEFAULT_SNIFF_REQUEST_TIMEOUT * 5,
ElasticsearchHostsSniffer.Scheme.HTTP)
sniffer = Some(Sniffer.builder(restClient.get)
.setHostsSniffer(hostsSniffer)
.setSniffIntervalMillis(1000 * 60 * 3)
.setSniffAfterFailureDelayMillis(1000 * 10)
.build())
if (sniffer.isDefined) {
sniffOnFailureListener.setSniffer(sniffer.get)
}
}
log.info("ESRestClient already initialized。。。。。。。。。");
} catch {
case ex: Exception =>
log.info("ESRestClient initialize error: {}. Please have a check for ip or network status.", ex.getMessage)
ex.printStackTrace();
}
}
}
/**
* es bulk index api
*
* @param client RestHighLevelClient
* @param indexName index
* @param typeName type
* @param batch batch data
* @param sizeOfBatch the size of every batch
* @return failure doc collect
*/
def canRollbackBulkDocIndex(client: RestHighLevelClient, indexName: String, typeName: String, batch: List[String], sizeOfBatch: Int = 10): List[(String, String)] = {
val failure = new ListBuffer[(String, String)]()
var batchSize = sizeOfBatch
if (sizeOfBatch <= 0 || sizeOfBatch > 1000) {
batchSize = 1000
}
val batchIter: Iterator[String] = batch.iterator
var request: BulkRequest = new BulkRequest()
var cursor = 0
var count = 0
try {
while (batchIter.hasNext) {
val json = batchIter.next()
if (StringUtils.isNotEmpty(json)) {
if (cursor < batchSize) {
request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON))
} else {
everyBatchWrapper(client, request, failure)
cursor = 0
request = new BulkRequest()
request.add(new IndexRequest(indexName, typeName).create(false).source(json, XContentType.JSON))
}
cursor += 1
} else {
log.info("index of {} is null or enpty.", count)
}
count += 1
}
everyBatchWrapper(client, request, failure);
} catch {
case e: Exception =>
log.info("the failed json element index range is {} - {}, Cause by: {}.", count - cursor, count)
log.info("Cause by: {}.", e.getMessage)
e.printStackTrace();
}
if (failure.size() > 0) {
log.info("the number of failed doc index about this target index collention is {}.", failure.size())
}
failure.toList
}
/**
* every batch operation
*
* @param client RestHighLevelClient
* @param request BulkRequest
* @param failure failure collection
*/
private def everyBatchWrapper(client: RestHighLevelClient, request: BulkRequest, failure: ListBuffer[(String, String)]): Unit = {
val response: BulkResponse = client.bulk(request)
if (response.hasFailures) {
val list: mutable.Buffer[DocWriteRequest[_]] = request.requests().asScala
val iter: Iterator[BulkItemResponse] = response.iterator()
while (iter.hasNext) {
val bir: BulkItemResponse = iter.next()
if (bir.isFailed) {
val reason = s"""HTTP status: [${bir.getFailure.getStatus.getStatus}], Cause by: ${bir.getFailureMessage}."""
val docIndex = bir.getItemId
val ir: IndexRequest = list.get(docIndex).asInstanceOf[IndexRequest]
failure.+=((ir.source().utf8ToString(), reason))
}
}
}
}
}