官网地址: https:// lucene.apache.org/
重要特征:
如果是根据id查询,那么直接走索引,查询速度非常快。
但如果是基于title做模糊查询,只能是逐行扫描数据,流程如下:
逐行扫描,也就是全表扫描,随着数据量增加,其查询效率也会越来越低。当数据量达到数百万时,就是一场灾难。
创建倒排索引是对正向索引的一种特殊处理,流程如下:
如图:
倒排索引的搜索流程如下(以搜索"华为手机"为例):
如图:
虽然要先查询倒排索引,再查询倒排索引,但是无论是词条、还是文档 id 都建立了索引,查询速度非常快!无需全表扫描。
正向索引:
而Json文档中往往包含很多的字段(Field),类似于数据库中的列。
因此,我们可以把索引当做是数据库中的表。
MySQL | Elasticsearch | 说明 |
Table | Index |
索引 (index) ,就是文档的集合,类似数据库的表 (table)
|
Row
|
Document
|
文档( Document ),就是一条条的数据,类似数据库中的行(Row ),文档都是 JSON 格式
|
Column
|
Field
|
字段( Field ),就是 JSON 文档中的字段,类似数据库中的列(Column )
|
Schema
|
Mapping
|
Mapping (映射)是索引中文档的约束,例如字段类型约束。类似数据库的表结构(Schema )
|
SQL
|
DSL
|
DSL 是 elasticsearch 提供的 JSON 风格的请求语句,用来操作elasticsearch,实现 CRUD
|
两者各有自己的擅长支出:
因此在企业中,往往是两者结合使用:
IK分词器有几种模式?
安装
下载:https://github.com/medcl/elasticsearch-analysis-ik/releases
{
"age": 18,
"weight": 70.2,
"isMarried": false,
"info": "apesourceJavaEE王讲师",
"email": "[email protected]",
"score": [99.1, 99.5, 98.9],
"name": {
"firstName": "师傅",
"lastName": "王"
}
}
基本语法:
格式:
PUT /索引库名称
{
"mappings": {
"properties": {
"字段名":{
"type": "text",
"analyzer": "ik_smart"
},
"字段名2":{
"type": "keyword",
"index": "false"
},
"字段名3":{
"properties": {
"子字段": {
"type": "keyword"
}
}
},
// ...略
}
}
}
基本语法:
格式:GET /索引库名
PUT /索引库名/_mapping
{
"properties": {
"新字段名":{
"type": "integer"
}
}
}
语法:
格式:DELETE /索引库名
语法:
POST /索引库名/_doc/文档id
{
"字段1": "值1",
"字段2": "值2",
"字段3": {
"子属性1": "值3",
"子属性2": "值4"
},
// ...
}
响应:result:created
注意 :如果根据 id 删除时, id 不存在,第二步的新增也会执行,也就从修改变成了新增操作了。
PUT /{索引库名}/_doc/文档id
{
"字段1": "值1",
"字段2": "值2",
// ... 略
}
POST /{索引库名}/_update/文档id
{
"doc": {
"字段名": "新的值",
}
}
我们学习的是Java HighLevel Rest Client客户端API
org.elasticsearch.client
elasticsearch-rest-high-level-client
2、因为SpringBoot默认的ES版本是7.6.2,所以我们需要覆盖默认的ES版本:
1.8
7.12.0
3、初始化RestHighLevelClient:
初始化的部分代码如下:
RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(
HttpHost.create("http://localhost:9200")
));
private RestHighLevelClient client;
@BeforeEach
public void setUp(){
this.client = new RestHighLevelClient(RestClient.builder(HttpHost.create("http://localhost:9200")));
}
@AfterEach
void tearDown() throws IOException {
this.client.close();
}
package com.itzhi.common;
/**
* @author lizhihui
* @version 1.0
* @since 2023/8/11
*/
public class HotelConstants {
public static final String MAPPING_TEMPLATE = "{\n" +
" \"mappings\": {\n" +
" \"properties\": {\n" +
" \"id\": {\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"name\":{\n" +
" \"type\": \"text\",\n" +
" \"analyzer\": \"ik_max_word\",\n" +
" \"copy_to\": \"all\"\n" +
" },\n" +
" \"address\":{\n" +
" \"type\": \"keyword\",\n" +
" \"index\": false\n" +
" },\n" +
" \"price\":{\n" +
" \"type\": \"integer\"\n" +
" },\n" +
" \"score\":{\n" +
" \"type\": \"integer\"\n" +
" },\n" +
" \"brand\":{\n" +
" \"type\": \"keyword\",\n" +
" \"copy_to\": \"all\"\n" +
" },\n" +
" \"city\":{\n" +
" \"type\": \"keyword\",\n" +
" \"copy_to\": \"all\"\n" +
" },\n" +
" \"starName\":{\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"business\":{\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"location\":{\n" +
" \"type\": \"geo_point\"\n" +
" },\n" +
" \"pic\":{\n" +
" \"type\": \"keyword\",\n" +
" \"index\": false\n" +
" },\n" +
" \"all\":{\n" +
" \"type\": \"text\",\n" +
" \"analyzer\": \"ik_max_word\"\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
}
// 创建索引库
@Test
public void createHotelIndex() throws IOException {
CreateIndexRequest request = new CreateIndexRequest("hotels");
request.source(HotelConstants.MAPPING_TEMPLATE, XContentType.JSON);
client.indices().create(request,RequestOptions.DEFAULT);
}
// 删除索引库
@Test
public void testDeleteHotelIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("hotels");
client.indices().delete(request,RequestOptions.DEFAULT);
}
// 查找索引是否存在
@Test
public void testExistsHotelIndex() throws IOException {
// 1、创建request对象
GetIndexRequest request = new GetIndexRequest("hotels");
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.err.println(exists ? "索引库已经存在!" : "索引库不存在!");
}
/**
* @author lizhihui
* @version 1.0
* @since 2023/8/11
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
@ToString
@TableName("tb_hotel")
public class Hotel {
@TableId(value = "id",type = IdType.INPUT)
private Long id;
@TableField("name")
private String name;
@TableField("address")
private String address;
@TableField("price")
private Integer price;
@TableField("score")
private Integer score;
@TableField("brand")
private String brand;
@TableField("city")
private String city;
@TableField("starName")
private String starName;
@TableField("business")
private String business;
@TableField("longitude")
private String longitude;//经度
@TableField("latitude")
private String latitude;//纬度
@TableField("pic")
private String pic;
}
/**
* @author lizhihui
* @version 1.0
* @since 2023/8/11
*/
@Data
@NoArgsConstructor
@ToString
public class HotelDoc {
private Long id;
private String name;
private String address;
private Integer price;
private Integer score;
private String brand;
private String city;
private String starName;
private String business;
private String location;
private String pic;
public HotelDoc(Hotel hotel) {
this.id = hotel.getId();
this.name = hotel.getName();
this.address = hotel.getAddress();
this.price = hotel.getPrice();
this.score = hotel.getScore();
this.brand = hotel.getBrand();
this.city = hotel.getCity();
this.starName = hotel.getStarName();
this.business = hotel.getBusiness();
this.location = hotel.getLatitude() + ", " + hotel.getLongitude();
this.pic = hotel.getPic();
}
}
// 添加一个文档到es
@Test
public void testAddDocument() throws IOException {
Hotel hotel = service.getById(197837109);
HotelDoc hotelDoc = new HotelDoc(hotel);
String json = JSON.toJSONString(hotelDoc);
IndexRequest request = new IndexRequest("hotels").id(hotelDoc.getId().toString());
request.source(json, XContentType.JSON);
client.index(request, RequestOptions.DEFAULT);
}
// 根据id查找一个文档
@Test
public void testGetDocument() throws IOException {
GetRequest request = new GetRequest("hotels", "197837109");
GetResponse response = client.get(request, RequestOptions.DEFAULT);
String json = response.getSourceAsString();
HotelDoc hotelDoc = JSON.parseObject(json, HotelDoc.class);
System.out.println(hotelDoc);
}
// 根据id删除一个文档
@Test
public void testDeleteDocument() throws IOException {
DeleteRequest request = new DeleteRequest("hotels", "197837109");
client.delete(request, RequestOptions.DEFAULT);
}
在hotel-demo的HotelDocumentTest测试类中,编写单元测试:
// 根据id修改文档
@Test
public void testUpdateDocument() throws IOException {
UpdateRequest request = new UpdateRequest("hotels", "197837109");
request.doc("name", "W酒店",
"city", "西安",
"price", "2000",
"starName", "五星级");
client.update(request, RequestOptions.DEFAULT);
}
// 批量添加文档
@Test
public void testBulkRequest() throws IOException {
List list = service.list();
BulkRequest request = new BulkRequest();
for (Hotel hotel : list) {
HotelDoc hotelDoc = new HotelDoc(hotel);
request.add(new IndexRequest("hotels")
.id(hotelDoc.getId().toString())
.source(JSON.toJSONString(hotelDoc), XContentType.JSON));
}
client.bulk(request,RequestOptions.DEFAULT);
}
elasticsearch的查询依然是基于JSON风格的DSL来实现的。
// 查询所有
@Test
public void testMatchAll() throws IOException {
SearchRequest request = new SearchRequest("hotels");
request.source().query(QueryBuilders.matchAllQuery());
SearchResponse response = client.search(request,RequestOptions.DEFAULT);
show(response);
}
//查询all字段内容中有如家的(or拼接多条件)
@Test
public void testMatch() throws IOException {
SearchRequest request = new SearchRequest("hotels");
request.source().query(QueryBuilders.matchQuery("all","如家"));
SearchResponse response = client.search(request,RequestOptions.DEFAULT);
show(response);
}
//查询name,business字段内容中有如家的
@Test
void testMultiMatch() throws IOException {
// 1.准备Request
SearchRequest request = new SearchRequest("hotels");
// 2.准备DSL 参数1:字段 参数2:数据
request.source()
.query(QueryBuilders.multiMatchQuery("如家", "name","business"));
// 3.发送请求
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
// 4.解析响应
show(response);
}
// 词条查询
@Test
public void testTermQuery() throws IOException{
SearchRequest request = new SearchRequest("hotels");
request.source().query(QueryBuilders.termQuery("city","上海"));
SearchResponse response = client.search(request,RequestOptions.DEFAULT);
show(response);
}
//范围查询
@Test
void testRangeQuery() throws IOException {
// 1.准备Request
SearchRequest request = new SearchRequest("hotels");
// 2.准备DSL,QueryBuilders构造查询条件
request.source()
.query(QueryBuilders.rangeQuery("price").gte(100).lte(150));
// 3.发送请求
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
show(response);
}
@Test
void testBool() throws IOException {
// 1.准备request
SearchRequest request = new SearchRequest("hotels");
// 布尔查询是一个或多个查询子句的组合,子查询的组合方式有:
// must:必须匹配每个子查询,类似“与”
// should:选择性匹配子查询,类似“或”
// must_not:必须不匹配,不参与算分,类似“非”
// filter:必须匹配,类似“与”,不参与算分
// 一般搜索框用must,选择条件使用filter
// 2.准备请求参数(and拼接)
// BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
// // 2.1.must
// boolQuery.must(QueryBuilders.termQuery("city", "上海"));
// // 2.2.filter小于等于
// boolQuery.filter(QueryBuilders.rangeQuery("price").lte(260));
//
// request.source().query(boolQuery);
//方式2
request.source().query(
QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("city", "上海"))
.filter(QueryBuilders.rangeQuery("price").lte(260))
);
// 3.发送请求,得到响应
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
// 4.结果解析
show(response);
}
@Test
void testPageAndSort() throws IOException {
// 页码,每页大小
int page = 1, size = 20;
// 查询条件
String searchName = "如家";
// String searchName = null;
// 1.准备Request
SearchRequest request = new SearchRequest("hotels");
// 2.准备DSL
// 2.1.query
if(searchName == null){
request.source().query(QueryBuilders.matchAllQuery());
}else{
request.source().query(QueryBuilders.matchQuery("name", searchName));
}
// 2.2.分页 from、size
request.source().from((page - 1) * size).size(size);
//2.3.排序
request.source().sort("price", SortOrder.DESC);
// 3.发送请求
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
// 4.解析响应
show(response);
}
// 解析响应对象
public void show(SearchResponse response){
// 解析响应
SearchHits searchHits = response.getHits();
// 获取总条数
long total = searchHits.getTotalHits().value;
System.out.println("共搜到" + total + "条数据");
// 文档数组
SearchHit[] hits = searchHits.getHits();
// 遍历
for (SearchHit s : hits){
String json = s.getSourceAsString();
HotelDoc hotelDoc = JSON.parseObject(json,HotelDoc.class);
System.out.println("HotelDoc = " + hotelDoc);
}
}