本文使用的版本为:7.14.0
todo:前端部分
# 请求,通过【ik_smart】最粗粒度划分
GET _analyze
{
"analyzer": "ik_smart",
"text":"中国共产党"
}
# 返回
{
"tokens" : [
{
"token" : "中国共产党",
"start_offset" : 0,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 0
}
]
}
# 请求,通过【ik_max_word】最细粒度划分
GET _analyze
{
"analyzer": "ik_max_word",
"text":"中国共产党"
}
# 返回
{
"tokens" : [
{
"token" : "中国共产党",
"start_offset" : 0,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "中国",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "国共",
"start_offset" : 1,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "共产党",
"start_offset" : 2,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "共产",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "党",
"start_offset" : 4,
"end_offset" : 5,
"type" : "CN_CHAR",
"position" : 5
}
]
}
method | url地址 | 描述 |
---|---|---|
PUT | localhost:9200/索引名称/类型名称/文档id | 创建文档(指定文档id) |
POST | localhost:9200/索引名称/类型名称 | 创建文档(随机文档id) |
POST | localhost:9200/索引名称/类型名称/文档id/_update | 修改文档 |
DELETE | localhost:9200/索引名称/类型名称/文档id | 删除文档 |
GET | localhost:9200/索引名称/类型名称/文档id | 查询文档通过文档id |
POST | localhost:9200/索引名称/类型名称/_search | 查询所有数据 |
# 创建索引并添加数据 反复提交则为覆盖修改
PUT bu/_doc/1
{
"name":"张三",
"age":"12"
}
# 返回值
{
"_index" : "bu",
"_type" : "_doc",
"_id" : "2",
"_version" : 1, # 当多次提交后,版本信息则会随之改变
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 6,
"_primary_term" : 1
}
# 创建索引规则
PUT /test1/
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "long"
},
"birthday": {
"type": "date"
}
}
}
}
# 返回值
{
"acknowledged" : true,
"shards_acknowledged" : true,
"index" : "test1"
}
# 查询
GET /bu/
# 返回值
{
"bu" : {
"aliases" : { },
"mappings" : {
"properties" : {
"age" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"number_of_shards" : "1",
"provided_name" : "bu",
"creation_date" : "1700188636398",
"number_of_replicas" : "1",
"uuid" : "m-y5rTaqRQSyKr-a_sAdXw",
"version" : {
"created" : "7140099"
}
}
}
}
}
# 通过_cat 获取ES更多信息
GET _cat/indices?v # 索引
GET _cat/aliases # 别名
GET _cat/allocation
GET _cat/count
GET _cat/fielddata
GET _cat/health
GET _cat/master
GET _cat/nodeattrs
GET _cat/nodes # 查看节点信息,docker容器即为容器信息
GET _cat/pending_tasks
GET _cat/plugins # 查看插件,如ik分词器
GET _cat/recovery
GET _cat/repositories
GET _cat/segments
GET _cat/shards
GET _cat/snapshots
GET _cat/tasks
GET _cat/templates
GET _cat/thread_pool
# 通过Post方法进行修改 【如果漏写字段,也不会把字段删除】
POST /test/_update/1/
{
"doc":{
"name":"李四"
}
}
// 结果
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,
"result" : "noop",
"_shards" : {
"total" : 0,
"successful" : 0,
"failed" : 0
},
"_seq_no" : 1,
"_primary_term" : 1
}
# 删除索引
DELETE test
# 通过条件查询
GET /test/user/_search?q=name:张三
# 通过条件查询
GET /test/user/_search
{
"query":{
"match": {
"name": "张三"
}
}
}
# 返回值
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : { # hits: 命中
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.36464313,
"hits" : [
{
"_index" : "test",
"_type" : "user",
"_id" : "1",
"_score" : 0.36464313,
"_source" : {
"name" : "张三",
"age" : 10,
"desc" : "说明",
"tags" : [
"1",
"2",
"3"
]
}
},
{
"_index" : "test",
"_type" : "user",
"_id" : "2",
"_score" : 0.36464313,
"_source" : {
"name" : "张三",
"age" : 10,
"desc" : "说明",
"tags" : [
"1",
"2",
"3"
]
}
}
]
}
}
# 过滤要查询的结果 【select name】
GET /test/_search
{
"query":{
"match": {
"name": "张三"
}
},
"_source": ["name"] # 只显示name
}
# 返回结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.36464313,
"hits" : [
{
"_index" : "test",
"_type" : "user",
"_id" : "1",
"_score" : 0.36464313,
"_source" : {
"name" : "张三"
}
},
{
"_index" : "test",
"_type" : "user",
"_id" : "2",
"_score" : 0.36464313,
"_source" : {
"name" : "张三"
}
}
]
}
}
# 排序
GET /test/_search
{
"query":{
"match": {
"name": "张三"
}
},
"sort": [
{
"age": {
"order": "desc"
}
}
]
}
# 分页
GET /test/_search
{
"query":{
"match": {
"name": "张三"
}
},
"sort": [
{
"age": {
"order": "desc"
}
}
]
, "from": 0
, "size": 1
}
# 查询 name 必须【must】 是张三的
GET /test/user/_search
{
"query":{
"bool": {
"must": [
{"match": {
"name": "张三"
}}
]
}
}
}
# 查询 name 不能【must_not】 是张三的
GET /test/user/_search
{
"query":{
"bool": {
"must_not": [
{"match": {
"name": "张三"
}}
]
}
}
}
# 查询 name=张三 or age=10 的
GET /test/user/_search
{
"query":{
"bool": {
"should": [
{"match": {
"name": "张三"
}},
{"match": {
"age": "10"
}}
]
}
}
}
# 查询 范围数据
"gt": 大于
"gte": 大于等于
"lt": 小于
"lte": 小于等于
GET /test/user/_search
{
"query":{
"bool": {
"filter": [
{
"range": {
"age": {
"gte": 3,
"lte": 10
}
}
}
]
}
}
}
# 通过 标签查询,多个值用空格分隔
GET /test/user/_search
{
"query":{
"match": {
"tags": "1 2"
}
}
}
term 查询是直接通过倒排索引指定的词条进程精确的查找
关于分词
两个类型 text keyword
# 创建测试数据
PUT testdb
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"desc":{
"type": "keyword"
}
}
}
}
PUT testdb/_doc/1
{
"name":"测试",
"desc":""
}
GET /testdb/_doc/1
# 通过keyword【关键字】进行分析 分析结果:测试
GET _analyze
{
"analyzer": "keyword",
"text": "测试"
}
# 通过standard【标准】进行分析 分析结果:测 试
GET _analyze
{
"analyzer": "standard"
, "text": "测试"
}
# 通过term查询
GET testdb/_search
{
"query": {
"term": {
"desc": {
"value": ""
}
}
}
}
GET testdb/_search
{
"query": {
"term": {
"name": {
"value": "测"
}
}
}
}
# 实现高亮查询
GET /test/user/_search
{
"query":{
"bool": {
"should": [
{"match": {
"name": "张三"
}},
{"match": {
"age": "10"
}}
]
}
},
"highlight":{
"pre_tags": "",
"post_tags": "",
"fields": {
"name": {}
}
}
}
# 结果
"_source" : {
"name" : "张三",
"age" : 10,
"desc" : "说明",
"tags" : [
"1",
"2",
"3"
]
},
"highlight" : {
"name" : [
"张三"
]
}
# 注意 ES 的依赖版本,需要与服务的版本保持一致
<properties>
<java.version>1.8java.version>
<elasticsearch.version>7.14.0elasticsearch.version>
properties>
# 第一步配置依赖
<dependency>
<groupId>org.jsoupgroupId>
<artifactId>jsoupartifactId>
<version>1.10.2version>
dependency>
# 第二步编写方法
public static List<Context> getGoodsList(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword=wd&enc=utf-8";
Document document = Jsoup.parse(new URL(url.replace("wd", keyword)), 3000);
Element j_goodsList = document.getElementById("J_goodsList");
Elements li = j_goodsList.getElementsByTag("li");
List<Context> goodsList = new ArrayList<>();
for (Element e:li) {
String img = e.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = e.getElementsByClass("p-price").eq(0).text();
String title = e.getElementsByClass("p-name").eq(0).text();
goodsList.add(new Context(img,price,title));
}
return goodsList;
}
package com.es.elasticsearch.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient(){
return new RestHighLevelClient(
RestClient.builder(
new HttpHost("8.140.248.231", 9200, "http")));
}
}
package com.es.elasticsearch.service;
import com.alibaba.fastjson.JSON;
import com.es.elasticsearch.pojo.Context;
import com.es.elasticsearch.pojo.User;
import com.es.elasticsearch.util.HtmlParseUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
@Service
public class ContextService {
@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient restHighLevelClient;
// 1. 调用工具类,将预备的解析数据插入到索引
public Boolean insert(String keyword) throws IOException {
String index = "jd";
GetIndexRequest getIndexRequest = new GetIndexRequest(index);
boolean exists = restHighLevelClient.indices()
.exists(getIndexRequest, RequestOptions.DEFAULT);
if(!exists){
// 如果不存在则创建
CreateIndexRequest createIndexRequest = new CreateIndexRequest(index);
restHighLevelClient.indices().create(createIndexRequest,RequestOptions.DEFAULT);
}
List<Context> goodsList = HtmlParseUtil.getGoodsList(keyword);
BulkRequest BulkRequest = new BulkRequest();
BulkRequest.timeout("30s");
for(Context context:goodsList){
BulkRequest.add(new IndexRequest(index).source(JSON.toJSONString(context), XContentType.JSON));
}
BulkResponse bulkResponse = restHighLevelClient.bulk(BulkRequest, RequestOptions.DEFAULT);
// 是否失败
return !bulkResponse.hasFailures();
}
// 3. 获取这些数据,实现高亮的搜索功能
public List<Map<String,Object>> searchPagehighLight(String keyword, int pageNo,int pageSize) throws IOException {
if (pageNo <= 1)
pageNo = 1;
// 条件清晰
SearchRequest searchRequest = new SearchRequest("jd");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.from(pageNo);
builder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);
builder.query(termQueryBuilder);
builder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(false);
highlightBuilder.preTags("");
highlightBuilder.postTags("");
builder.highlighter(highlightBuilder);
// 执行搜索
searchRequest.source(builder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String,Object>> list= new ArrayList<>();
for (SearchHit hit: searchResponse.getHits().getHits()) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String,Object> sourceAsMap = hit.getSourceAsMap();// 原来的结果
// 解析高亮字段,将原来的字段换成我们高亮的字段即可
if (title != null) {
Text[] fragments = title.fragments();
StringBuilder nTitle = new StringBuilder();
for (Text text:fragments) {
nTitle.append(text);
}
sourceAsMap.put("title",nTitle);
}
list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可
}
return list;
}
// 2. 获取这些数据,实现基本的搜索功能
public List<Map<String,Object>> searchPage (String keyword, int pageNo,int pageSize) throws IOException {
if (pageNo <= 1)
pageNo = 1;
// 条件清晰
SearchRequest searchRequest = new SearchRequest("jd");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.from(pageNo);
builder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title",keyword);
builder.query(termQueryBuilder);
builder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(builder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String,Object>> list= new ArrayList<>();
for (SearchHit hit: searchResponse.getHits().getHits()) {
list.add(hit.getSourceAsMap()); // 高亮的字段替换为原来的内容即可
}
return list;
}
}