java api访问elasticsearch下ik-pinyin分词基本使用

笔者在使用java进行访问es创建ik分词的时候,当时找api比较费时间,现在将近期使用的api方法整理如下,若有更简单的欢迎拍砖。本实例代码仅限功能测试,不适用于作为生产环境,因为没有进行严格性能测试。本代码使用es版本2.3.5,ik版本1.9.5,ik-pinyin版本1.8.5(注意修改zip包中es版本有2.4.5到2.3.5)。

1. java获取es的连接

public static Client getElasticSearchClient() throws IOException {
        // 设置elasticsearch的集群名称(多台时设置有意义)
        Settings settings = Settings.settingsBuilder().put("cluster.name", "elasticsearch").build();
        // 建立连接地址
        InetSocketTransportAddress address = new InetSocketTransportAddress(InetAddress.getByName("127.0.0.1"), 9300);
        // 获取es的客户端
        return TransportClient.builder().settings(settings).build().addTransportAddress(address);
    }

2.java创建索引
public static void creatIndexAndType() throws IOException {
        Client client = getElasticSearchClient();
        // 创建index的json
        XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("index.number_of_shards", 5).field("index.number_of_replicas", 1)
                .startObject("analysis").startObject("analyzer").startObject("default").field("tokenizer", "ik_max_word").endObject()
                .startObject("pinyin_analyzer").field("tokenizer", "my_pinyin").field("filter", "word_delimiter").endObject().endObject()
                .startObject("tokenizer").startObject("my_pinyin").field("type", "pinyin").field("keep_separate_first_letter", "false")
                .field("keep_full_pinyin", "true").field("keep_original", "true").field("limit_first_letter_length", "16").field("lowercase", "true")
                .field("keep_full_pinyin", "true").endObject().endObject().endObject().endObject();
        // 创建index
        CreateIndexResponse indexResponse = client.admin().indices().prepareCreate(INDEX_NAME).setSource(source).execute().actionGet();
        if (indexResponse.isAcknowledged()) {
            System.out.println("创建成功");
        } else {
            System.out.println("创建失败");
        }
        // 创建type的json
        XContentBuilder mappings = XContentFactory.jsonBuilder().startObject().startObject("_all").field("analyzer", "ik_max_word").endObject()
                .startObject("properties").startObject("name").field("type", "multi_field").startObject("fields").startObject("name")
                .field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").field("analyzer", "pinyin_analyzer")
                .field("boost", "10").endObject().startObject("primitive").field("type", "string").field("store", "yes")
                .field("analyzer", "ik_max_word").endObject().endObject().endObject().endObject().endObject();
        PutMappingRequest mapping = Requests.putMappingRequest(INDEX_NAME).type(TYPE_NAME).source(mappings);
        PutMappingResponse putResponse = client.admin().indices().putMapping(mapping).actionGet();
        if (putResponse.isAcknowledged()) {
            System.out.println("创建成功");
        } else {
            System.out.println("创建失败");
        }
    }
3.批量插入数据
public static void insertIndexRecord() throws IOException {
        Client client = getElasticSearchClient();
        // 获取批量操作
        BulkRequestBuilder bulkRequest = client.prepareBulk();
        bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "1").setSource(XContentFactory.jsonBuilder().startObject().field("name", "张三")
                                                                                          .endObject()));
        bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "2").setSource(XContentFactory.jsonBuilder().startObject().field("name", "李四")
                                                                                          .endObject()));
        bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "3").setSource(XContentFactory.jsonBuilder().startObject().field("name", "王五")
                                                                                          .endObject()));
        bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "4").setSource(XContentFactory.jsonBuilder().startObject().field("name", "赵六")
                                                                                          .endObject()));
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        if (bulkResponse.hasFailures()) {
            System.out.println("索引创建失败!");
        }
    }
4.修改数据

public static void updateIndexRecord() throws Exception {
        Client client = getElasticSearchClient();
        // 使用update时候,如果id不存在将出现异常
        // [medcl][[medcl][1]] DocumentMissingException[[folks][5]: document missing]
        // UpdateResponse updateResponse = client.prepareUpdate(INDEX_NAME, TYPE_NAME, "5")
        // .setDoc(XContentFactory.jsonBuilder().startObject().field("name", "张三1").endObject())
        // .execute().actionGet();
        // System.out.println(updateResponse.isCreated());
        // 添加setDocAsUpsert之后,如果存在修改,不存在新建
        UpdateResponse updateResponse = client.prepareUpdate(INDEX_NAME, TYPE_NAME, "5")
                .setDoc(XContentFactory.jsonBuilder().startObject().field("name", "张三1").endObject())
                .setDocAsUpsert(true).execute().actionGet();
        System.out.println(updateResponse.isCreated());
    }
5.搜索查询数据

public static void findIndexRecordByName(String key) throws Exception {
        Client client = getElasticSearchClient();
        // 构造查询请求
        QueryBuilder bq = QueryBuilders.matchQuery("name", key);
        SearchRequestBuilder searchRequest = client.prepareSearch(INDEX_NAME).setTypes(TYPE_NAME);

        // 设置查询条件和分页参数
        int start = 0;
        int size = 10;
        searchRequest.setQuery(bq).setFrom(start).setSize(size);

        // 获取返回值,并进行处理
        SearchResponse response = searchRequest.execute().actionGet();
        SearchHits shs = response.getHits();
        for (SearchHit hit : shs) {
            String name = (String) hit.getSource().get("name");
            System.out.println("[searchkey:" + key + "]" + name);
        }
        client.close();
    }

注意:为了便于本地进行分词查询,可以直接使用http请求。

查询分词

http://127.0.0.1:9200/medcl/_analyze?text=张三&analyzer=ik

http://127.0.0.1:9200/medcl/_analyze?text=张三&analyzer=analyzer=pinyin_analyzer

直接查询结果

127.0.0.1:9200/medcl/folks/_search?q=name:zhang

查询结果并查看打分情况

http://127.0.0.1:9200/medcl/folks/_search?q=name:san&explian

你可能感兴趣的:(技术)