Elaticsearch,简称为es,es是一个开源的高扩展的分布式全文检索引擎,它可以近乎实时的存储、检索数据 本身扩展性很好,可以扩展到上百台服务器,处理PB级别(大数据时代)的数据。
# 更改端口,防止冲突
server.port=9999
<properties>
<java.version>1.8</java.version>
<!-- 统一版本 -->
<elasticsearch.version>7.6.1</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- jsoup解析页面 -->
<!-- 解析网页 爬视频可 研究tiko -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.70</version>
</dependency>
<!-- ElasticSearch -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<!-- thymeleaf -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!-- web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- devtools热部署 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<!-- -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<!-- lombok 需要安装插件 -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
</dependencies>
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticSearchConfig {
// 注册 rest高级客户端
@Bean
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1",9200,"http")
)
);
return client;
}
}
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content implements Serializable {
private static final long serialVersionUID = -8049497962627482693L;
private String name;
private String img;
private String price;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getImg() {
return img;
}
public void setImg(String img) {
this.img = img;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
}
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class User implements Serializable {
private static final long serialVersionUID = -3843548915035470817L;
private String name;
private Integer age;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
}
1、测试增删查改部分
package com.zhang.springbootelasticsearch;
import com.alibaba.fastjson.JSON;
import com.zhang.springbootelasticsearch.pojo.User;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.TimeUnit;
@SpringBootTest
class SpringbootElasticsearchApplicationTests {
@Autowired
public RestHighLevelClient restHighLevelClient;
@Test
void contextLoads() {
}
// 测试索引的创建, Request PUT liuyou_index
@Test
public void testCreateIndex() throws IOException {
CreateIndexRequest request = new CreateIndexRequest("liuyou_index1");
CreateIndexResponse response = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
System.out.println(response.isAcknowledged());// 查看是否创建成功
System.out.println(response);// 查看返回对象
restHighLevelClient.close();
}
// 测试获取索引,并判断其是否存在
@Test
public void testIndexIsExists() throws IOException {
GetIndexRequest request = new GetIndexRequest("liuyou_index1");
boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);// 索引是否存在
restHighLevelClient.close();
}
// 测试索引删除
@Test
public void testDeleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("liuyou_index1");
AcknowledgedResponse response = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(response.isAcknowledged());// 是否删除成功
restHighLevelClient.close();
}
// 测试添加文档(先创建一个User实体类,添加fastjson依赖)
@Test
public void testAddDocument() throws IOException {
// 创建一个User对象
User liuyou = new User("liuyou爹", 40);
// 创建请求
IndexRequest request = new IndexRequest("liuyou_index");
// 制定规则 PUT /liuyou_index/_doc/1
request.id("4");// 设置文档ID
request.timeout(TimeValue.timeValueMillis(1000));// request.timeout("1s")
// 将我们的数据放入请求中
request.source(JSON.toJSONString(liuyou), XContentType.JSON);
// 客户端发送请求,获取响应的结果
IndexResponse response = restHighLevelClient.index(request, RequestOptions.DEFAULT);
System.out.println(response.status());// 获取建立索引的状态信息 CREATED
System.out.println(response);// 查看返回内容 IndexResponse[index=liuyou_index,type=_doc,id=1,version=1,result=created,seqNo=0,primaryTerm=1,shards={"total":2,"successful":1,"failed":0}]
}
// 测试获得文档信息
@Test
public void testGetDocument() throws IOException {
GetRequest request = new GetRequest("liuyou_index","1");
GetResponse response = restHighLevelClient.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());// 打印文档内容
System.out.println(request);// 返回的全部内容和命令是一样的
restHighLevelClient.close();
}
// 获取文档,判断是否存在 get /liuyou_index/_doc/1
@Test
public void testDocumentIsExists() throws IOException {
GetRequest request = new GetRequest("liuyou_index", "1");
// 不获取返回的 _source的上下文了
request.fetchSourceContext(new FetchSourceContext(false));
request.storedFields("_none_");
boolean exists = restHighLevelClient.exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
// 测试更新文档内容
@Test
public void testUpdateDocument() throws IOException {
UpdateRequest request = new UpdateRequest("liuyou_index", "1");
User user = new User("lmk",11);
request.doc(JSON.toJSONString(user),XContentType.JSON);
UpdateResponse response = restHighLevelClient.update(request, RequestOptions.DEFAULT);
System.out.println(response.status()); // OK
restHighLevelClient.close();
}
// 测试删除文档
@Test
public void testDeleteDocument() throws IOException {
DeleteRequest request = new DeleteRequest("liuyou_index", "1");
request.timeout("1s");
DeleteResponse response = restHighLevelClient.delete(request, RequestOptions.DEFAULT);
System.out.println(response.status());// OK
}
// 查询
// SearchRequest 搜索请求
// SearchSourceBuilder 条件构造
// HighlightBuilder 高亮
// TermQueryBuilder 精确查询
// MatchAllQueryBuilder
// xxxQueryBuilder ...
@Test
public void testSearch() throws IOException {
// 1.创建查询请求对象
SearchRequest searchRequest = new SearchRequest();
// 2.构建搜索条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// (1)查询条件 使用QueryBuilders工具类创建
// 精确查询
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "liuyou");
// 匹配查询
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
// (2)其他<可有可无>:(可以参考 SearchSourceBuilder 的字段部分)
// 设置高亮
searchSourceBuilder.highlighter(new HighlightBuilder());
// // 分页
// searchSourceBuilder.from();
// searchSourceBuilder.size();
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// (3)条件投入
// searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.query(matchAllQueryBuilder);
// 3.添加条件到请求
searchRequest.source(searchSourceBuilder);
// 4.客户端查询请求
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 5.查看返回结果
SearchHits hits = search.getHits();
System.out.println(JSON.toJSONString(hits));
System.out.println("=======================");
for (SearchHit documentFields : hits.getHits()) {
System.out.println(documentFields.getSourceAsMap());
}
}
// 上面的这些api无法批量增加数据(只会保留最后一个source)
@Test
public void test() throws IOException {
IndexRequest request = new IndexRequest("bulk");// 没有id会自动生成一个随机ID
request.source(JSON.toJSONString(new User("liu",1)),XContentType.JSON);
request.source(JSON.toJSONString(new User("min",2)),XContentType.JSON);
request.source(JSON.toJSONString(new User("kai",3)),XContentType.JSON);
IndexResponse index = restHighLevelClient.index(request, RequestOptions.DEFAULT);
System.out.println(index.status());// created
}
// 特殊的,真的项目一般会 批量插入数据
@Test
public void testBulk() throws IOException {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
ArrayList<User> users = new ArrayList<>();
users.add(new User("liuyou-1",1));
users.add(new User("liuyou-2",2));
users.add(new User("liuyou-3",3));
users.add(new User("liuyou-4",4));
users.add(new User("liuyou-5",5));
users.add(new User("liuyou-6",6));
// 批量请求处理
for (int i = 0; i < users.size(); i++) {
bulkRequest.add(
// 这里是数据信息
new IndexRequest("bulk")
.id(""+(i + 1)) // 没有设置id 会自定生成一个随机id
.source(JSON.toJSONString(users.get(i)),XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
System.out.println(bulk.status());// ok
}
}
2、京东实战搜索(后端部分)
爬虫工具类:(需要将爬下来的数据放入到es)
import com.zhang.springbootelasticsearch.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
System.out.println(parseJD("java"));
}
public static List<Content> parseJD(String keyword) throws IOException {
/// 使用前需要联网
// 请求url
String url = "http://search.jd.com/search?keyword=" + keyword;
// 1.解析网页(jsoup 解析返回的对象是浏览器Document对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 使用document可以使用在js对document的所有操作
// 2.获取元素(通过id)
Element j_goodsList = document.getElementById("J_goodsList");
// 3.获取J_goodsList ul 每一个 li
Elements lis = j_goodsList.getElementsByTag("li");
// System.out.println(lis);
// 4.获取li下的 img、price、name
// list存储所有li下的内容
List<Content> contents = new ArrayList<Content>();
for (Element li : lis) {
// 由于网站图片使用懒加载,将src属性替换为data-lazy-img
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");// 获取li下 第一张图片
String name = li.getElementsByClass("p-name").eq(0).text();
String price = li.getElementsByClass("p-price").eq(0).text();
// 封装为对象
Content content = new Content(name,img,price);
// 添加到list中
contents.add(content);
}
// System.out.println(contents);
// 5.返回 list
return contents;
}
}
业务层实现:
import com.alibaba.fastjson.JSON;
import com.zhang.springbootelasticsearch.pojo.Content;
import com.zhang.springbootelasticsearch.util.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1、解析数据放入 es 索引中
public Boolean parseContent(String keyword) throws IOException {
// 获取内容
List<Content> contents = HtmlParseUtil.parseJD(keyword);
// 内容放入 es 中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m"); // 可更具实际业务是指
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.id(""+(i+1))
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
restHighLevelClient.close();
return !bulk.hasFailures();
}
// 2、根据keyword分页查询结果
public List<Map<String, Object>> search(String keyword, Integer pageIndex, Integer pageSize) throws IOException {
if (pageIndex < 0){
pageIndex = 0;
}
SearchRequest jd_goods = new SearchRequest("jd_goods");
// 创建搜索源建造者对象
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 条件采用:精确查询 通过keyword查字段name
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", keyword);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));// 60s
// 分页
searchSourceBuilder.from(pageIndex);
searchSourceBuilder.size(pageSize);
// 高亮
// ....
// 搜索源放入搜索请求中
jd_goods.source(searchSourceBuilder);
// 执行查询,返回结果
SearchResponse searchResponse = restHighLevelClient.search(jd_goods, RequestOptions.DEFAULT);
restHighLevelClient.close();
// 解析结果
SearchHits hits = searchResponse.getHits();
List<Map<String,Object>> results = new ArrayList<>();
for (SearchHit documentFields : hits.getHits()) {
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
results.add(sourceAsMap);
}
// 返回查询的结果
return results;
}
// 3、 在2的基础上进行高亮查询
public List<Map<String, Object>> highlightSearch(String keyword, Integer pageIndex, Integer pageSize) throws IOException {
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 精确查询,添加查询条件
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", keyword);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchSourceBuilder.query(termQueryBuilder);
// 分页
searchSourceBuilder.from(pageIndex);
searchSourceBuilder.size(pageSize);
// 高亮 =========
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("name");
highlightBuilder.preTags("");
highlightBuilder.postTags("");
searchSourceBuilder.highlighter(highlightBuilder);
// 执行查询
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果 ==========
SearchHits hits = searchResponse.getHits();
List<Map<String, Object>> results = new ArrayList<>();
for (SearchHit documentFields : hits.getHits()) {
// 使用新的字段值(高亮),覆盖旧的字段值
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
// 高亮字段
Map<String, HighlightField> highlightFields = documentFields.getHighlightFields();
HighlightField name = highlightFields.get("name");
// 替换
if (name != null){
Text[] fragments = name.fragments();
StringBuilder new_name = new StringBuilder();
for (Text text : fragments) {
new_name.append(text);
}
sourceAsMap.put("name",new_name.toString());
}
results.add(sourceAsMap);
}
return results;
}
}
controller层:
**
* @Author: zhangximing
* @Email: 530659058@qq.com
* @Date: 2022/7/10 11:32
* @Description: 查询内容
*/
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
/**
* 爬虫获取并插入至es
* @param keyword
* @return
* @throws IOException
*/
@ResponseBody
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
/**
* 普通分页查询
* @param keyword
* @param pageIndex
* @param pageSize
* @return
* @throws IOException
*/
@ResponseBody
@GetMapping("/search/{keyword}/{pageIndex}/{pageSize}")
public List<Map<String, Object>> parse(@PathVariable("keyword") String keyword,
@PathVariable("pageIndex") Integer pageIndex,
@PathVariable("pageSize") Integer pageSize) throws IOException {
System.out.println("keyword:"+keyword);
System.out.println("pageIndex:"+pageIndex);
System.out.println("pageSize:"+pageSize);
return contentService.search(keyword,pageIndex,pageSize);
}
/**
* 高亮分页查询
* @param keyword
* @param pageIndex
* @param pageSize
* @return
* @throws IOException
*/
@ResponseBody
@GetMapping("/h_search/{keyword}/{pageIndex}/{pageSize}")
public List<Map<String, Object>> highlightParse(@PathVariable("keyword") String keyword,
@PathVariable("pageIndex") Integer pageIndex,
@PathVariable("pageSize") Integer pageSize) throws IOException {
System.out.println("keyword:"+keyword);
System.out.println("pageIndex:"+pageIndex);
System.out.println("pageSize:"+pageSize);
return contentService.highlightSearch(keyword,pageIndex,pageSize);
}
}
Elasticsearch学习视频:https://www.bilibili.com/video/BV17a4y1x7zq
Elasticsearch学习笔记:https://www.kuangstudy.com/bbs/1354069127022583809
两天学习完的,可能很多地方没有讲清楚,望谅解并提出建议哈