Elasticsearch 应用

Elasticsearch 的应用

本文使用的版本为:7.14.0
todo:前端部分

Kibana的开发工具

IK分词器粗粒度

# 请求,通过【ik_smart】最粗粒度划分
GET _analyze
{
  "analyzer": "ik_smart",
  "text":"中国共产党"
}

# 返回
{
  "tokens" : [
    {
      "token" : "中国共产党",
      "start_offset" : 0,
      "end_offset" : 5,
      "type" : "CN_WORD",
      "position" : 0
    }
  ]
}

IK分词器细粒度

# 请求,通过【ik_max_word】最细粒度划分
GET _analyze
{
  "analyzer": "ik_max_word",
  "text":"中国共产党"
}

# 返回
{
  "tokens" : [
    {
      "token" : "中国共产党",
      "start_offset" : 0,
      "end_offset" : 5,
      "type" : "CN_WORD",
      "position" : 0
    },
    {
      "token" : "中国",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 1
    },
    {
      "token" : "国共",
      "start_offset" : 1,
      "end_offset" : 3,
      "type" : "CN_WORD",
      "position" : 2
    },
    {
      "token" : "共产党",
      "start_offset" : 2,
      "end_offset" : 5,
      "type" : "CN_WORD",
      "position" : 3
    },
    {
      "token" : "共产",
      "start_offset" : 2,
      "end_offset" : 4,
      "type" : "CN_WORD",
      "position" : 4
    },
    {
      "token" : "党",
      "start_offset" : 4,
      "end_offset" : 5,
      "type" : "CN_CHAR",
      "position" : 5
    }
  ]
}

Rest风格说明

method url地址 描述
PUT localhost:9200/索引名称/类型名称/文档id 创建文档(指定文档id)
POST localhost:9200/索引名称/类型名称 创建文档(随机文档id)
POST localhost:9200/索引名称/类型名称/文档id/_update 修改文档
DELETE localhost:9200/索引名称/类型名称/文档id 删除文档
GET localhost:9200/索引名称/类型名称/文档id 查询文档通过文档id
POST localhost:9200/索引名称/类型名称/_search 查询所有数据

关于索引的基本操作

# 创建索引并添加数据 反复提交则为覆盖修改
PUT bu/_doc/1
{
  "name":"张三",
  "age":"12"
}

# 返回值
{
  "_index" : "bu",
  "_type" : "_doc",
  "_id" : "2",
  "_version" : 1,		# 当多次提交后,版本信息则会随之改变
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 6,
  "_primary_term" : 1
}
# 创建索引规则
PUT /test1/
{
  "mappings": {
    "properties": {
      "name": {
        "type": "text"
      },
      "age": {
        "type": "long"
      },
      "birthday": {
        "type": "date"
      }
    }
  }
}

# 返回值
{
  "acknowledged" : true,
  "shards_acknowledged" : true,
  "index" : "test1"
}
# 查询
GET /bu/

# 返回值
{
  "bu" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "age" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "routing" : {
          "allocation" : {
            "include" : {
              "_tier_preference" : "data_content"
            }
          }
        },
        "number_of_shards" : "1",
        "provided_name" : "bu",
        "creation_date" : "1700188636398",
        "number_of_replicas" : "1",
        "uuid" : "m-y5rTaqRQSyKr-a_sAdXw",
        "version" : {
          "created" : "7140099"
        }
      }
    }
  }
}
# 通过_cat 获取ES更多信息
GET _cat/indices?v			# 索引
GET _cat/aliases			# 别名
GET _cat/allocation
GET _cat/count
GET _cat/fielddata
GET _cat/health
GET _cat/master
GET _cat/nodeattrs
GET _cat/nodes				# 查看节点信息,docker容器即为容器信息
GET _cat/pending_tasks
GET _cat/plugins			# 查看插件,如ik分词器
GET _cat/recovery
GET _cat/repositories
GET _cat/segments
GET _cat/shards
GET _cat/snapshots
GET _cat/tasks
GET _cat/templates
GET _cat/thread_pool
# 通过Post方法进行修改	【如果漏写字段,也不会把字段删除】
POST /test/_update/1/
{
  "doc":{
    "name":"李四"
  }
}

// 结果
{
  "_index" : "test",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 2,
  "result" : "noop",
  "_shards" : {
    "total" : 0,
    "successful" : 0,
    "failed" : 0
  },
  "_seq_no" : 1,
  "_primary_term" : 1
}
# 删除索引
DELETE test

关于文档的基本操作

# 通过条件查询
GET /test/user/_search?q=name:张三

# 通过条件查询
GET /test/user/_search
{
  "query":{
    "match": {
      "name": "张三"
    }
  }
}

# 返回值
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {				#  hits: 命中
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.36464313,
    "hits" : [
      {
        "_index" : "test",
        "_type" : "user",
        "_id" : "1",
        "_score" : 0.36464313,
        "_source" : {
          "name" : "张三",
          "age" : 10,
          "desc" : "说明",
          "tags" : [
            "1",
            "2",
            "3"
          ]
        }
      },
      {
        "_index" : "test",
        "_type" : "user",
        "_id" : "2",
        "_score" : 0.36464313,
        "_source" : {
          "name" : "张三",
          "age" : 10,
          "desc" : "说明",
          "tags" : [
            "1",
            "2",
            "3"
          ]
        }
      }
    ]
  }
}
# 过滤要查询的结果 【select name】
GET /test/_search
{
  "query":{
    "match": {
      "name": "张三"
    }
  }, 
  "_source": ["name"]		# 只显示name
}

# 返回结果
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.36464313,
    "hits" : [
      {
        "_index" : "test",
        "_type" : "user",
        "_id" : "1",
        "_score" : 0.36464313,
        "_source" : {
          "name" : "张三"
        }
      },
      {
        "_index" : "test",
        "_type" : "user",
        "_id" : "2",
        "_score" : 0.36464313,
        "_source" : {
          "name" : "张三"
        }
      }
    ]
  }
}

# 排序
GET /test/_search
{
  "query":{
    "match": {
      "name": "张三"
    }
  }, 
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
}
# 分页
GET /test/_search
{
  "query":{
    "match": {
      "name": "张三"
    }
  }, 
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
  , "from": 0
  , "size": 1
}
# 查询 name 必须【must】 是张三的
GET /test/user/_search
{
  "query":{
    "bool": {
      "must": [
        {"match": {
          "name": "张三"
        }}
      ]
    }
  }
}

# 查询 name 不能【must_not】 是张三的
GET /test/user/_search
{
  "query":{
    "bool": {
      "must_not": [
        {"match": {
          "name": "张三"
        }}
      ]
    }
  }
}

# 查询 name=张三 or age=10GET /test/user/_search
{
  "query":{
    "bool": {
      "should": [
        {"match": {
          "name": "张三"
        }},
        {"match": {
          "age": "10"
        }}
      ]
    }
  }
}

# 查询 范围数据
"gt": 大于
"gte": 大于等于
"lt": 小于
"lte": 小于等于

GET /test/user/_search
{
  "query":{
    "bool": {
      "filter": [
        {
          "range": {
            "age": {
              "gte": 3,
              "lte": 10
            }
          }
        }
      ]
    }
  }
}
# 通过 标签查询,多个值用空格分隔
GET /test/user/_search
{
  "query":{
    "match": {
      "tags": "1 2"
    }
  }
}

term 查询是直接通过倒排索引指定的词条进程精确的查找

关于分词

  • term 直接精确查询
  • match 会使用分词器解析!!(先分析文档,然后通过分析的文档进行查询!!)

两个类型 text keyword

  • text 可分
  • keyword 不可再分
# 创建测试数据
PUT testdb
{
  "mappings": {
    "properties": {
      "name":{
        "type": "text"  
      },
      "desc":{
        "type": "keyword"
      }
    }
  }
}

PUT testdb/_doc/1
{
  "name":"测试",
  "desc":""
}

GET /testdb/_doc/1
# 通过keyword【关键字】进行分析 分析结果:测试
GET _analyze
{
  "analyzer": "keyword",
  "text": "测试"
}

# 通过standard【标准】进行分析 分析结果:测 试
GET _analyze
{
  "analyzer": "standard"
  , "text": "测试"
}
# 通过term查询
GET testdb/_search
{
  "query": {
    "term": {
      "desc": {
        "value": ""
      }
    }
  }
}

GET testdb/_search
{
  "query": {
    "term": {
      "name": {
        "value": "测"
      }
    }
  }
}
# 实现高亮查询
GET /test/user/_search
{
  "query":{
    "bool": {
      "should": [
        {"match": {
          "name": "张三"
        }},
        {"match": {
          "age": "10"
        }}
      ]
    }
  },
  "highlight":{
    "pre_tags": "", 
    "post_tags": "", 
    "fields": {
      "name": {}
    }
  }
}

# 结果
"_source" : {
    "name" : "张三",
    "age" : 10,
    "desc" : "说明",
    "tags" : [
        "1",
        "2",
        "3"
    ]
},
"highlight" : {
    "name" : [
        ""
    ]
}

整合SpringBoot

ES 依赖版本

# 注意 ES 的依赖版本,需要与服务的版本保持一致
<properties>
  <java.version>1.8java.version>
  
  <elasticsearch.version>7.14.0elasticsearch.version>
properties>

爬取数据存入ES

# 第一步配置依赖
<dependency>
    <groupId>org.jsoupgroupId>
    <artifactId>jsoupartifactId>
    <version>1.10.2version>
dependency>
# 第二步编写方法
 public static List<Context> getGoodsList(String keyword) throws IOException {
    String url = "https://search.jd.com/Search?keyword=wd&enc=utf-8";
    Document document = Jsoup.parse(new URL(url.replace("wd", keyword)), 3000);
    Element j_goodsList = document.getElementById("J_goodsList");
    Elements li = j_goodsList.getElementsByTag("li");

    List<Context> goodsList = new ArrayList<>();
    for (Element e:li) {
      String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
      String price = e.getElementsByClass("p-price").eq(0).text();
      String title = e.getElementsByClass("p-name").eq(0).text();

      goodsList.add(new Context(img,price,title));
    }
    return goodsList;
  }

ES Client配置类

package com.es.elasticsearch.config;


import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class ElasticSearchClientConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient(){
      return new RestHighLevelClient(
          RestClient.builder(
              new HttpHost("8.140.248.231", 9200, "http")));
    }
}

业务实现类

package com.es.elasticsearch.service;

import com.alibaba.fastjson.JSON;
import com.es.elasticsearch.pojo.Context;
import com.es.elasticsearch.pojo.User;
import com.es.elasticsearch.util.HtmlParseUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;

@Service
public class ContextService {

  @Autowired
  @Qualifier("restHighLevelClient")
  private RestHighLevelClient restHighLevelClient;

  // 1. 调用工具类,将预备的解析数据插入到索引
  public Boolean insert(String keyword) throws IOException {
    String index = "jd";
    GetIndexRequest getIndexRequest = new GetIndexRequest(index);
    boolean exists = restHighLevelClient.indices()
        .exists(getIndexRequest, RequestOptions.DEFAULT);
    if(!exists){
      // 如果不存在则创建
      CreateIndexRequest createIndexRequest = new CreateIndexRequest(index);
      restHighLevelClient.indices().create(createIndexRequest,RequestOptions.DEFAULT);
    }

    List<Context> goodsList = HtmlParseUtil.getGoodsList(keyword);
    BulkRequest BulkRequest = new BulkRequest();
    BulkRequest.timeout("30s");
    for(Context context:goodsList){
      BulkRequest.add(new IndexRequest(index).source(JSON.toJSONString(context), XContentType.JSON));
    }
    BulkResponse bulkResponse = restHighLevelClient.bulk(BulkRequest, RequestOptions.DEFAULT);
    // 是否失败
    return !bulkResponse.hasFailures();
  }


  // 3. 获取这些数据,实现高亮的搜索功能
  public List<Map<String,Object>> searchPagehighLight(String keyword, int pageNo,int pageSize) throws IOException {
    if (pageNo <= 1)
      pageNo = 1;

    // 条件清晰
    SearchRequest searchRequest = new SearchRequest("jd");

    SearchSourceBuilder builder = new SearchSourceBuilder();

    builder.from(pageNo);
    builder.size(pageSize);
    // 精准匹配
    TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);
    builder.query(termQueryBuilder);
    builder.timeout(new TimeValue(60, TimeUnit.SECONDS));

    // 高亮
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.field("title");
    highlightBuilder.requireFieldMatch(false);
    highlightBuilder.preTags("");
    highlightBuilder.postTags("");
    builder.highlighter(highlightBuilder);

    // 执行搜索
    searchRequest.source(builder);
    SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    // 解析结果
    ArrayList<Map<String,Object>> list= new ArrayList<>();
    for (SearchHit hit: searchResponse.getHits().getHits()) {
      // 解析高亮的字段
      Map<String, HighlightField> highlightFields = hit.getHighlightFields();
      HighlightField title = highlightFields.get("title");
      Map<String,Object> sourceAsMap = hit.getSourceAsMap();// 原来的结果
      // 解析高亮字段,将原来的字段换成我们高亮的字段即可
      if (title != null) {
        Text[] fragments = title.fragments();
        StringBuilder nTitle = new StringBuilder();
        for (Text text:fragments) {
          nTitle.append(text);
        }
        sourceAsMap.put("title",nTitle);
      }
      list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可
    }
    return list;
  }

  // 2. 获取这些数据,实现基本的搜索功能
  public List<Map<String,Object>> searchPage (String keyword, int pageNo,int pageSize) throws IOException {
    if (pageNo <= 1)
      pageNo = 1;

    // 条件清晰
    SearchRequest searchRequest = new SearchRequest("jd");

    SearchSourceBuilder builder = new SearchSourceBuilder();

    builder.from(pageNo);
    builder.size(pageSize);
    // 精准匹配
    TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);
    builder.query(termQueryBuilder);
    builder.timeout(new TimeValue(60, TimeUnit.SECONDS));


    // 执行搜索
    searchRequest.source(builder);
    SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    // 解析结果
    ArrayList<Map<String,Object>> list= new ArrayList<>();
    for (SearchHit hit: searchResponse.getHits().getHits()) {
      list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可
    }
    return list;
  }
}

你可能感兴趣的:(elasticsearch,c#,大数据)