elasticsearch 5.4 JAVA API 使用

JAVA API

根据官网API进行的整合,提供了增删改查、分组的demo
前置条件 :

  • JDK1.8
  • elasticsearch 5.4
  • maven 项目

1.新建maven项目,添加依赖

#添加more-core依赖,或者自行添加sping依赖代替(私服依赖)

    com.umpay
    mars-core
    2.0.0-SNAPSHOT

#client依赖(必须的)

    org.elasticsearch.client
    transport
    5.4.1

#使用了LifeCycle的生命周期类(非必须),可使用Spring的生命周期或者其他来实现(私服依赖)

    com.umpay
    typhos-kernel

关于日志
若使用log4j 2来记录日志,增加如下依赖


    org.apache.logging.log4j
    log4j-api
    2.8.2


    org.apache.logging.log4j
    log4j-core
    2.8.2

若使用其他日志,如logback,需添加slf4j桥转接


    org.apache.logging.log4j
    log4j-to-slf4j
    2.8.2

2.客户端实现
ElasticsearchClient.java
①设置变量,set方法自己生成

private String clusterName = "elasticsearch";//设置集群名称
private String clusterNodes;//节点的ip:port
private boolean sniff = true;// 是否自动检测变化节点
private String pingTimeout = "5s";// 等待来自节点的ping响应的时间。默认为5 s
private boolean ignoreClusterName = false;//是否忽略集群名称
private String nodesSamplerInterval = "5s";// 对列出和连接的节点进行采样的频率。默认为5 s
private TransportClient client;

②启动方法

@Override
protected void doStart( ) throws Exception {
  buildClient();
  logger.info( "elasticSearch client is connected");
  super.doStart( );
}

protected void buildClient( ) throws Exception {
  client = new PreBuiltTransportClient( settings( ) );
  Assert.hasText( clusterNodes, "[Assertion failed] clusterNodes settings missing." );
  for( String clusterNode : clusterNodes.split( ";" )) {
       String hostName = clusterNode.substring( 0,clusterNode.indexOf( ":" ) );
       String port = clusterNode.substring( clusterNode.indexOf( ":" )+1,clusterNode.length( ) );
       Assert.hasText( hostName, "[Assertion failed] missing host name in 'clusterNodes'" );
       Assert.hasText( port, "[Assertion failed] missing port in 'clusterNodes'" );
       logger.info( "adding transport node : " + clusterNode );
       client.addTransportAddress( new InetSocketTransportAddress( InetAddress.getByName( hostName ), Integer.valueOf( port ) ) );
   }
  client.connectedNodes( );
}

private Settings settings( ) {
  return Settings.builder( ).put( "cluster.name", clusterName ).put( "client.transport.sniff", sniff )
    .put( "client.transport.ignore_cluster_name", ignoreClusterName )
    .put( "client.transport.ping_timeout", pingTimeout )
    .put( "client.transport.nodes_sampler_interval", nodesSamplerInterval ).build( );
}

③停止方法

@Override
protected void doStop( ) throws Exception {
    try {
        logger.info( "Closing elasticSearch  client" );
        if( client != null ) {
            client.close( );
        }
    } catch( final Exception e ) {
        logger.error( "Error closing ElasticSearch client: ", e );
    }
    super.doStop( );
}

④设置单例

private ElasticsearchClient(){}

public static ElasticsearchClient getInstance(){
    return InstanceHolder.instance;
}

private static final class InstanceHolder {
    public static final ElasticsearchClient instance = new ElasticsearchClient( );
}

⑤ 提供getClient方法

public TransportClient getClient(){
    return client;
}

3.sping 配置


    
    
    
    
    
    

4.启动

public static void main( String[] args ) throws Exception {
    App.getInstance( ).start( );//可替换成其他加载spring的方式     
}

5.数据的增删改
① 单条数据插入

public static void insert( ) {
    TransportClient client = ElasticsearchClient.getInstance( ).getClient( );
    //bill-->索引,submit-->类型
    IndexResponse response =client.prepareIndex( "bill", "submit" ).setSource( "{\"mobile\":\"14445424" + i + "\",\"status\":\"1\",\"orgId\":\"553\",\"apId\":\"ap01\",\"bizCode\":\"001\"}" ).get( );
    System.out.println( response.getIndex( ) + "-" + response.getId( ) + "-" +esponse.getType( ) );
}

②批量插入、删除
bulk

public static void bluk( ) {
    TransportClient client = ElasticsearchClient.getInstance( ).getClient( );
    BulkRequestBuilder bulkRequest = client.prepareBulk( );
    for( int i = 0; i < 1000; i++ ) {
        //可add index/delete 请求
        bulkRequest.add( client.prepareIndex( "bill", "submit" ).setSource("{\"mobile\":\"144dfdf5424" + i + "\",\"status\":\"5\",\"orgId\":\"5ds5\",\"apId\":\"ap03\",\"bizCode\":\"002\"}" ) );
        bulkRequest.add(client.prepareDelete( "bill", "submit" ,"1" ));//index,type,id
    }
    //此时发送处理请求
    BulkResponse bulkResponse = bulkRequest.get( );
    if( bulkResponse.hasFailures( ) ) {
        //失败处理
    }
}

③ BulkProcessor--它提供了一个简单的接口,可以根据请求的数量或大小,或在给定的时间段后自动执行批量操作。

BulkProcessor bulkProcessor = BulkProcessor.builder(
        client,  <1>
        new BulkProcessor.Listener() {
            @Override
            public void beforeBulk(long executionId,
                                   BulkRequest request) { ... }    <2>

            @Override
            public void afterBulk(long executionId,
                                  BulkRequest request,
                                  BulkResponse response) { ... }    <3>

            @Override
            public void afterBulk(long executionId,
                                  BulkRequest request,
                                  Throwable failure) { ... }    <4>
        })
        .setBulkActions(10000)   <5>
        .setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB))    <6>
        .setFlushInterval(TimeValue.timeValueSeconds(5))      <7>
        .setConcurrentRequests(1)      <8>
        .setBackoffPolicy(
            BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3))     <9>
        .build();

说明:

<1> elasticsearch客户端
<2> 每个批量请求前调用,例如可以查询numberOfActions->request.numberOfActions()
<3> 每个批量执行后调用此方法。例如可以检查是否有一些失败的请求->response.hasFailures()
<4> 每个批量执行后,抛出了异常调用此方法,大部分失败了
<5> 设置批量大小,比如每到10000个批次就处理
<6> 设置数据量大小,比如每5M处理一次
<7> 设置定时处理间隔,比如每5s处理一次
<8> 设置并发请求的数量。值为0意味着只有一个单一的请求被允许执行。值为1时表示1个并发请求,请求是累计的批次请求
<9> 设置一个补偿政策,当一次批量请求失败,并抛出EsRejectedExecutionException (表明请求处理不过来)的异常时,初次等待100ms重试,重试3次,重试等待时间呈指数增长,禁用补偿政策,通过设置BackoffPolicy.noBackoff().

默认值:

bulkActions = 1000
bulkSize = 5mb
不设置 flushInterval
concurrentRequests = 1
backoffPolicy设置次8重试和开始50毫秒的延迟。总等待时间大约是5.1秒。

添加的请求
IndexRequest、DeleteRequest、UpdateRequest、还有upsert的请求

bulkProcessor.add(new IndexRequest("twitter", "tweet", "1").source(/* your doc here */));
bulkProcessor.add(new DeleteRequest("twitter", "tweet", "2"));
bulkProcessor.add(new UpdateRequest( "report", "test", "2" ).doc( "{\"orgId\":\"1\"}" ))
//id存在则更新否则就插入
bulkProcessor.add(new UpdateRequest( "report", "test", "2" ).doc( "{\"orgId\":\"1\"}" ).upsert( /* your doc here */))

BulkProcessor关闭
当文档加载到 BulkProcessor后,通过使用awaitClose或 close方法将其关闭:

bulkProcessor.awaitClose(10, TimeUnit.MINUTES);
或者
bulkProcessor.close();

如果被设置flushInterval,这两种方法都可以flush任何剩余的文档,并禁用所有其他预定的flush政策。如果启用了并发请求,在等待的超时时间内完成了所有的bulk请求,awaitClose方法将会返回true ,否则会返回false。close方法不等待任何剩余的批量请求完成并立即退出。
④update --当id不存在时将会抛出异常

public static void update( ) throws InterruptedException, ExecutionException  {
    TransportClient client = ElasticsearchClient.getInstance( ).getClient( );
    UpdateRequest updateRequest = new UpdateRequest( "report", "test", "338799bd8c40e1963fd56557fb161c" ).doc( "{\"orgId\":\"333\"}" );
    client.update( updateRequest ).get( );
}

⑤upsert--id不存在时就插入

public static void upsert( ) throws InterruptedException, ExecutionException {
    TransportClient client = ElasticsearchClient.getInstance( ).getClient( );
    UpdateRequest updateRequest = new UpdateRequest( "report", "test", "338799bd8c40e1963fd56557fb161c" ).doc( "{\"orgId\":\"333\"}" ).upsert(/*source*/ );
    client.update( updateRequest ).get( );
}

⑥分组
分组的结果是树形结构的json,使用时需要自行拼装需要的对象

{
    "aggregations": {
        "ap_count": {
            "doc_count_error_upper_bound": 0, 
            "sum_other_doc_count": 0, 
            "buckets": [
                {
                    "key": "ap02", 
                    "doc_count": 304000, 
                    "org_count": {
                        "doc_count_error_upper_bound": 0, 
                        "sum_other_doc_count": 0, 
                        "buckets": [
                            {
                                "key": "5ds5", 
                                "doc_count": 300000
                            }, 
                            {
                                "key": "5ds3", 
                                "doc_count": 2000
                            }, 
                            {
                                "key": "5ds4", 
                                "doc_count": 2000
                            }
                        ]
                    }
                }, 
                {
                    "key": "ap03", 
                    "doc_count": 300000, 
                    "org_count": {
                        "doc_count_error_upper_bound": 0, 
                        "sum_other_doc_count": 0, 
                        "buckets": [
                            {
                                "key": "5ds5", 
                                "doc_count": 300000
                            }
                        ]
                    }
                }
            ]
        }
    }
}

java code

  public static void aggregation( ) {
        SearchRequestBuilder sbuilder = ElasticsearchClient.getInstance( ).getClient( ).prepareSearch( "bill" ).setTypes( "submit" );
        TermsAggregationBuilder apBuilder = AggregationBuilders.terms( "ap_count" ).field( "apId" );
        TermsAggregationBuilder orgBuilder = AggregationBuilders.terms( "org_count" ).field( "orgId" );
        apBuilder.subAggregation( orgBuilder );
        sbuilder.addAggregation( apBuilder );
        SearchResponse response = sbuilder.execute( ).actionGet( );
        Map aggMap = response.getAggregations( ).asMap( );
        StringTerms teamAgg = (StringTerms) aggMap.get( "ap_count" );
        Iterator teamBucketIt = teamAgg.getBuckets( ).iterator( );
        while( teamBucketIt.hasNext( ) ) {
            Bucket buck = teamBucketIt.next( );
            String key = buck.getKeyAsString( );
            long count = buck.getDocCount( );
            System.out.println(key+"--"+count);
            Map subaggmap = buck.getAggregations( ).asMap( );
            StringTerms orgAgg = (StringTerms) subaggmap.get( "org_count" );
            Iterator orgBucketIt = orgAgg.getBuckets( ).iterator( );
            while(orgBucketIt.hasNext( )){
                Bucket orgBuck = orgBucketIt.next( );
                String orgKey=orgBuck.getKeyAsString( );
                long orgCount = orgBuck.getDocCount( );
                System.out.println(orgKey+"--"+orgCount);
            }
        }
  }

6.数据查询
①get--根据index,type,id

GetResponse response = client.prepareGet("twitter", "tweet", "1").get();

可设置是否多个线程处理请求,默认为true

GetResponse response = client.prepareGet("twitter", "tweet", "1")
        .setOperationThreaded(false)
        .get();

②根据条件查询、分页

   public static void queryPage(){
        TransportClient client = ElasticsearchClient.getInstance( ).getClient( );
        SearchResponse response = client.prepareSearch("bill")
        .setTypes("submit")
        .setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
        .setQuery(QueryBuilders.termQuery("orgId", "5ds5"))                 // Query
        //.setPostFilter(QueryBuilders.rangeQuery("age").from(12).to(18))     // Filter
        .setFrom(0).setSize(60).setExplain(true)
        .get();
        SearchHits hits = response.getHits();
        for (SearchHit searchHit : hits) {
            Map source = searchHit.getSource();
            for(Object key : source.keySet( )){
                System.out.println(key+"-" +source.get( key ));
            }
        }
    }

你可能感兴趣的:(elasticsearch 5.4 JAVA API 使用)