Elasticsearch 带中文分词的全文检索(分页+高亮返回)

一.全文搜索介绍

Full text queries 全文搜索主要有以下几种类型:

1.1 匹配查询(match query)

QueryBuilder qb = matchQuery( 
   "name",                  //field 字段    
   "kimchy elasticsearch"   // text 
);

DSL 查询语句:

GET /_search
{
    "query": {
        "match" : {
            "message" : "this is a test"
        }
    }
}

1.2 多字段查询(multi_match query)

可以用来对多个字段的版本进行匹配查询

QueryBuilder qb = multiMatchQuery(   
 "kimchy elasticsearch", //text   
 "user", "message"       //fields 多个字段
);

DSL查询语句:

GET /_search
{
  "query": {
    "multi_match" : {
      "query":    "this is a test", 
      "fields": [ "subject", "message" ] 
    }
  }
}

1.3 常用术语查询(common_terms query)

可以对一些比较专业的偏门词语进行的更加专业的查询

QueryBuilder qb = commonTermsQuery(
"name",    //field 字段  
"kimchy");  // value

DSL查询语句:

GET /_search
{
    "query": {
        "common": {
            "body": {
                "query": "this is bonsai cool",
                "cutoff_frequency": 0.001
            }
        }
    }
}

1.4 查询语句查询(query_string query)

       与lucene查询语句的语法结合的更加紧密的一种查询,允许你在一个查询语句中使用多个特殊条件关键字(如:AND|OR|NOT )对多个字段进行查询,这种查询仅限专家用户去使用。

QueryBuilder qb = queryStringQuery("+kimchy -elasticsearch");    //text

DSL查询语句:

GET /_search
{
    "query": {
        "query_string" : {
            "default_field" : "content",
            "query" : "this AND that OR thus"
        }
    }
}

        以上四种是全文搜索可以用到的查询方式,但是一般使用多字段查询(multi_match query)比较多,这里重点写下第二种方式的使用。

二.使用multi_match query的方式实现全文多字段的匹配查询

2.1 检索服务

实现一个关键字分词匹配多个字段,分页查询,命中字段高亮显示

private  SearchDto getResult(ShipQueryDto shipQueryDto, String indexName, Class clazz) throws IOException, IllegalAccessException {
        SearchRequest searchRequest = new SearchRequest();
        searchRequest.indices(indexName);
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        /* 高亮查询 */
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.numOfFragments(0); /*长度*/
        highlightBuilder.preTags("");
        highlightBuilder.postTags("");
        highlightBuilder.highlighterType("plain");
        for (String name : EsSmartIndexHelper.classMapMap.get(clazz).keySet()) {
            highlightBuilder.field(name).requireFieldMatch(false);
        }
        sourceBuilder.highlighter(highlightBuilder);
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
        if(StringUtils.isNotEmpty(shipQueryDto.getKeys())){
            boolQueryBuilder.must(QueryBuilders.multiMatchQuery(shipQueryDto.getKeys()).fields(EsSmartIndexHelper.classMapMap.get(clazz)).type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
//                    .minimumShouldMatch("70%")
                            //使用最细粒度分词搜索
                            .analyzer("ik_max_word").operator(Operator.OR)
            );
        }
        sourceBuilder.query(boolQueryBuilder);

        // 分页
        Integer from = (shipQueryDto.getPageNum()-1) * shipQueryDto.getPageSize();
        sourceBuilder.from(from);
        sourceBuilder.size(shipQueryDto.getPageSize());
        sourceBuilder.trackTotalHits(true);
        searchRequest.source(sourceBuilder);
        log.error("查询的DSL语句: " + searchRequest.source().toString());
        SearchResponse searchRes = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        log.error("返回原始数据 : " + searchRes);
        SearchHit[] hits = searchRes.getHits().getHits();
        List searchShipCbgkDtos = new ArrayList<>();
        for (SearchHit hit : hits) {
            String json = hit.getSourceAsString();
            T shipDto = JSONObject.parseObject(json, clazz);
            //获取高亮字段
            Map highlightFields = hit.getHighlightFields();
            if(CollectionUtil.isNotEmpty(highlightFields)){
                //获取class子类的字段
                Field[] fields =clazz.getDeclaredFields();
                //获取class继承父类的字段
                Field[] fields1 = clazz.getSuperclass().getDeclaredFields();
                //字段高亮处理
                for (Field field : fields1) {
                    field.setAccessible(true);
                    if (highlightFields.containsKey(field.getName())){
                        HighlightField highlightField = highlightFields.get(field.getName());
                        Text[] fragments = highlightField.fragments();
                        StringBuilder text = new StringBuilder();
                        for (Text fragment : fragments) {
                            text.append(fragment.toString());
                        }
                        field.set(shipDto, text.toString());
                    }
                }
                for (Field field : fields) {
                    field.setAccessible(true);
                    if (highlightFields.containsKey(field.getName())){
                        HighlightField highlightField = highlightFields.get(field.getName());
                        Text[] fragments = highlightField.fragments();
                        StringBuilder text = new StringBuilder();
                        for (Text fragment : fragments) {
                            text.append(fragment.toString());
                        }
                        field.set(shipDto, text.toString());
                    }
                }
            }
            searchShipCbgkDtos.add(shipDto);
        }
        SearchDto searchDto = new SearchDto<>();
        searchDto.setTotal(searchRes.getHits().getTotalHits().value);
        searchDto.setSearchShips(searchShipCbgkDtos);
        return searchDto;
    }



@Override
    public SearchDto searchShip(ShipQueryDto shipQueryDto) throws IOException, IllegalAccessException {
        return getResult(shipQueryDto, EsIndex.INDEX_SEAT_SEARCH_SHIP_CBGK.getStatus(), SearchShipCbgkDto.class);
    }
import lombok.Data;

import java.util.List;

/**
 * 搜索返回实体
 * @param 
 */
@Data
public class SearchDto {

    /** 该库数量 */
    private Long total;

    /** 该库返回列表 */
    private List searchShips;
}
import java.util.HashMap;
import java.util.Map;

/**
 * 全文搜索匹配的字段和权重
 */
public class EsSmartIndexHelper {

    public static Map shipCbgkfields = new HashMap();

    public static HashMap, Map> classMapMap = new HashMap, Map>();

    static {
        //船舶库
        classMapMap.put(SearchShipCbgkDto.class, shipCbgkfields);
        shipCbgkfields.put("shipName", 2.5f);
        //            "shipId",
        shipCbgkfields.put("shipRegistryPort", 1.8f);
        //            "shipOwnerId",
        shipCbgkfields.put("shipOwnerName", 1.5f);
        shipCbgkfields.put("shipOwnerSex", 1f);
        shipCbgkfields.put("shipOwnerTel", 1f);
        shipCbgkfields.put("shipOwnerIdNumber", 1.1f);
        shipCbgkfields.put("deptId", 1f);

        shipCbgkfields.put("createTime", 1f);
        shipCbgkfields.put("bdsTerminalNo", 1.3f);
        shipCbgkfields.put("mmsi", 1.3f);
    }
}

2.2 检索的DSL语句

Get       /index/queryShip?keys=琼海口渔&pageNum=1&pageSize=10
GET index_test_search_ship/_search
{
  "from": 0,
	"size": 20,
	"query": {
		"bool": {
			"must": [{
				"multi_match": {
					"query": "琼海口渔",
					"fields": ["bdsTerminalNo^1.3", "createTime^1.0", "deptId^1.0", "mmsi^1.3", "shipName^2.5", "shipOwnerIdNumber^1.1", "shipOwnerName^1.5", "shipOwnerSex^1.0", "shipOwnerTel^1.0", "shipRegistryPort^1.8"],
					"type": "cross_fields",
					"operator": "OR",
					"analyzer": "ik_max_word",
					"slop": 0,
					"prefix_length": 0,
					"max_expansions": 50,
					"zero_terms_query": "NONE",
					"auto_generate_synonyms_phrase_query": true,
					"fuzzy_transpositions": true,
					"boost": 1.0
				}
			}],
			"adjust_pure_negative": true,
			"boost": 1.0
		}
	},
	"track_total_hits": 2147483647,
	"highlight": {
		"pre_tags": [""],
		"post_tags": [""],
		"number_of_fragments": 0,
		"type": "plain",
		"require_field_match": false,
		"fields": {
			"shipOwnerName": {},
			"shipOwnerTel": {},
			"createTime": {},
			"mmsi": {},
			"bdsTerminalNo": {},
			"deptId": {},
			"shipName": {},
			"shipOwnerSex": {},
			"shipOwnerIdNumber": {},
			"shipRegistryPort": {}
		}
	}
}

2.3 返回的原始JSON数据

{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : 7.7624564,
    "hits" : [
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "bpMhcYQB4gQEvltnaqX-",
        "_score" : 7.7624564,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "b5MhcYQB4gQEvltnbaUM",
        "_score" : 7.7624564,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "U5PBb4QB4gQEvltnIKV-",
        "_score" : 7.0790462,
        "_source" : {
          "shipId" : "01",
          "shipName" : "013234琼海口渔",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "013234琼海口渔"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "VJPEb4QB4gQEvltnm6Uz",
        "_score" : 7.0790462,
        "_source" : {
          "shipId" : "01",
          "shipName" : "013913琼海口渔",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "013913琼海口渔"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "bZMhcYQB4gQEvltnQKVb",
        "_score" : 7.0790462,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔013",
          "shipOwnerName" : "",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔013"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "a5MccYQB4gQEvltnY6Ur",
        "_score" : 7.0790462,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔013",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔013"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "bJMccYQB4gQEvltnZaV1",
        "_score" : 7.0790462,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔013",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔013"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "VZPbb4QB4gQEvltnraU6",
        "_score" : 6.506234,
        "_source" : {
          "shipId" : "01",
          "shipName" : "013913琼海口渔",
          "shipOwnerName" : "013913琼海口渔",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipOwnerName" : [
            "013913琼海口渔"
          ],
          "shipName" : [
            "013913琼海口渔"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "apMWcYQB4gQEvltnT6Vt",
        "_score" : 6.019184,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔013 李宁",
          "shipOwnerName" : "12341",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔013 李宁"
          ]
        }
      },
      {
        "_index" : "index_test_search_ship",
        "_type" : "_doc",
        "_id" : "cpNRcYQB4gQEvltnQ6Xw",
        "_score" : 6.019184,
        "_source" : {
          "shipId" : "01",
          "shipName" : "琼海口渔013 李宁",
          "shipOwnerName" : "李宁",
          "shipOwnerTel" : "15173934187",
          "shipOwnerIdNumber" : "430525199408136134",
          "fullText" : "01 解决12345 时代 15173934187 430525199408136134"
        },
        "highlight" : {
          "shipName" : [
            "琼海口渔013 李宁"
          ]
        }
      }
    ]
  }
}

2.4 接收格式化后返回的接口数据

{
    "code": "SUCCESS",
    "businessCode": "0",
    "message": "操作成功",
    "data": {
        "total": 20,
        "searchShips": [
            {
                "shipId": "01",
                "shipName": "琼海口渔",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "013234琼海口渔",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "013913琼海口渔",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔013",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔013",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔013",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "013913琼海口渔",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "013913琼海口渔",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔013 李宁",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "12341",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            },
            {
                "shipId": "01",
                "shipName": "琼海口渔013 李宁",
                "shipRegistryPort": null,
                "shipOwnerId": null,
                "shipOwnerName": "李宁",
                "shipOwnerSex": null,
                "shipOwnerTel": "15173934187",
                "shipOwnerIdNumber": "430525199408136134",
                "deptId": null,
                "createTime": null,
                "bdsTerminalNo": null,
                "mmsi": null
            }
        ]
    }
}

可以看出,返回的字段,凡是命中关键词分词其中一个的都做高亮处理了。

你可能感兴趣的:(全文检索相关,大数据开发,elasticsearch,全文检索,中文分词)