目录
1、ElasticSearch编程操作
1.1 依赖坐标
1.2 创建客户端
1.3 创建索引
1.4 查询索引
1.5 创建文档
1.6 修改文档
1.7 Bulk批量操作
1.8 根据ID查询文档
1.9 高级查询
2、Spring Data ElasticSearch
2.1 什么是Spring Data
2.2 什么是Spring Data ElasticSearch
2.3 Spring集成Spring Data ElasticSearch
3、聚合分析
3.1 ES聚合分析是什么
3.2 指标(metric)和 桶(bucket)
3.3 添加测试数据
3.4 最小、最大和平均
3.5 聚合多值
3.6 类型统计
3.7 分组统计
org.elasticsearch.client
elasticsearch-rest-high-level-client
7.6.2
@Before
public void before(){
//创建client
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("192.168.223.128", 9200, "http")));
}
// 1. 创建索引(所有和索引有关的操作全部推荐用org.elasticsearch.client.indices包下的类)
CreateIndexRequest request = new CreateIndexRequest("blog1");
request.settings(Settings.builder()
.put("index.number_of_shards", 1)//分片数
.put("index.number_of_replicas", 1)//副本数
);
Map properties = new HashMap();// properties
Map id = new HashMap();
id.put("type", "long");
properties.put("id", id);// 添加映射
Map title = new HashMap();
title.put("type", "text");
title.put("analyzer", "ik_max_word");
properties.put("title", title);// 添加映射
Map content = new HashMap();
content.put("type", "text");
content.put("analyzer", "ik_max_word");
properties.put("content", content);// 添加映射
Map salary = new HashMap();
salary.put("type", "long");
properties.put("salary", salary);// 添加映射
Map mapping = new HashMap();
mapping.put("properties", properties);
request.mapping(mapping);
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println(createIndexResponse.index());
//2. 检测索引是否存在
GetIndexRequest request = new GetIndexRequest("blog1");
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
//增加文档(如果类注释推荐使用org.elasticsearch.client.Requests来创建类,最好用他推荐的方式)
IndexRequest request = Requests.indexRequest("blog1");
Random random = new Random();
for (int i = 0; i < 30; i++) {
request.id(i + "");//指定唯一标志ID
request.source(
"id", i,
"title", i + "ElasticSearch是一个基于Lucene的搜索服务器",
"content", i + "它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。" +
"Elasticsearch是用Java 开发的,并作为Apache许可条款下的开放源码发布," +
"是当前流行的企业级搜索引擎。设计用于云计算中," +
"能够达到实时 搜索,稳定,可靠,快速,安装使用方便。",
"salary",random.nextInt(100)
);// 支持多种方式
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse);
}
//修改文档
UpdateRequest request = new UpdateRequest("blog1", "1");//指定索引和文档ID
request.doc("title", "(修改)ElasticSearch是一个基于Lucene的搜索服务器",
"content", "(修改)它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java 开发的,并作为Apache许可条款下的开放源码发布,是当前流行的企业级搜索引擎。设计用于云计算中,能够达到实时 搜索,稳定,可靠,快速,安装使用方便。");// 支持多种方式
UpdateResponse response = client.update(request, RequestOptions.DEFAULT);
System.out.println(response);
BulkRequest request = new BulkRequest();
request.add(new DeleteRequest("blog1", "1"));//删除
request.add(new UpdateRequest("blog1", "2")//更新
.doc(XContentType.JSON,"title", "哈哈哈"));
request.add(new IndexRequest("blog1").id("3")//新增
.source(XContentType.JSON,"title", "呵呵呵"));
BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);
System.out.println(response.hasFailures());//失败标识,false标识全部成功
GetRequest request = Requests.getRequest("blog1").id("2");
GetResponse response = client.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());
1.9.1 精确查询
@Test
public void test71() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//不分词单个精确匹配
builder.query(QueryBuilders.termQuery("title","服务器"));
//不分词多个精确匹配
/*builder.query(QueryBuilders.termsQuery("title","服务器1","服务器2"));*/
//分词单个精确匹配
/*builder.query(QueryBuilders.matchQuery("title","服务2器1").analyzer("ik_smart"));*/
//分词多个并集匹配
/*builder.query(QueryBuilders.multiMatchQuery("全文哈哈", "title", "content"));*/
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.2 模糊查询
@Test
public void test72() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//分词左右模糊
builder.query(QueryBuilders.queryStringQuery("服务器").field("title").analyzer("ik_smart"));
//通配符查询,支持* 任意字符串,不支持分词,类似于sql like;
//builder.query(QueryBuilders.wildcardQuery("title", "哈哈*"));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.3 范围查询
@Test
public void test73() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//开闭区间查询
/*builder.query(QueryBuilders.rangeQuery("id").from(1).to(3));*/
/*builder.query(QueryBuilders.rangeQuery("id").from(1).to(3).includeUpper(false).includeLower(true));*/
//大于,小于
builder.query(QueryBuilders.rangeQuery("id").gte(2).lt(5));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.4 组合查询
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//布尔组合 must-->and ,must_not-->not should-->or
//组合冲突,返回空,组合覆盖,以后面的为主
builder.query(QueryBuilders.boolQuery().must(
QueryBuilders.rangeQuery("id").gt(10)
).must(
QueryBuilders.rangeQuery("id").gt(15)
));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
1.9.5 分页查询
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchAllQuery());//无条件查询,默认每页10条记录
builder.from(0).size(5);//分页查询
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
Iterator hits = response.getHits().iterator();
while (hits.hasNext()) {
System.out.println(hits.next().getSourceAsString());//当然,因为输出的为json格式数据,如果有需要我们可以转换为对象
}
client.close();
1.9.6 查询结果高亮显示
@Test
public void test8() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//不分词单个精确匹配
/*builder.query(QueryBuilders.termQuery("title","服务器"));*/
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("");
highlightBuilder.postTags("");
highlightBuilder.field("title");
builder.highlighter(highlightBuilder);
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getHighlightFields());
}
}
Spring Data是一个用于简化数据库访问,并支持云服务的开源框架。其主要目标是使得对数据的访问变得方便快 捷,并支持map-reduce框架和云计算数据服务。 Spring Data可以极大的简化JPA的写法,可以在几乎不用写实现 的情况下,实现对数据的访问和操作。除了CRUD外,还包括如分页、排序等一些常用的功能。
Spring Data ElasticSearch 基于 spring data API 简化 elasticSearch操作,将原始操作elasticSearch的客户端API 进行封装 。Spring Data为Elasticsearch项目提供集成搜索引擎。Spring Data Elasticsearch POJO的关键功能区域 为中心的模型与Elastichsearch交互文档和轻松地编写一个存储库数据访问层
2.3.1 依赖坐标
org.elasticsearch.client
elasticsearch-rest-high-level-client
7.6.2
org.springframework.data
spring-data-elasticsearch
4.0.1.RELEASE
org.springframework
spring-test
5.2.7.RELEASE
2.3.2 es实体对象类
package com.ydt.elasticsearch.domain;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
//@Document 文档对象 (索引信息、文档类型 )
/*Spring Data通过注解来声明字段的映射属性,有下面的三个注解:
- `@Document` 作用在类,标记实体类为文档对象,一般有四个属性
- indexName:对应索引库名称
- type:对应在索引库中的类型 在ElasticSearch7.x中取消了type的概念
- shards:分片数量,默认5
- replicas:副本数量,默认1
- `@Id` 作用在成员变量,标记一个字段作为id主键
- `@Field` 作用在成员变量,标记为文档的字段,并指定字段映射属性:
- type:字段类型,取值是枚举:FieldType
- index:是否设置分词 analyzer,布尔类型,默认是true
- store:是否存储,布尔类型,默认是false
- analyzer:分词器名称:ik_max_word
- createIndex 不创建默认是standard标准分词器索引库,否则会出现异常
*/
@Document(indexName="blog2",type="article",createIndex = false)
public class Article {
//@Id 文档主键 唯一标识
@Id
//@Field
// index:是否设置分词 analyzer:存储时使用的分词器
// searchAnalyze:搜索时使用的分词器 store:是否存储 type: 数据类型
@Field(store=false, index = false,type = FieldType.Integer)
private Integer id;
@Field(index=true,analyzer="ik_max_word",searchAnalyzer="ik_max_word",type = FieldType.Text)
private String title;
@Field(index=true,analyzer="ik_max_word",searchAnalyzer="ik_max_word",type = FieldType.Text)
private String content;
@Field(store=false, index = false,type = FieldType.Integer)
private Integer salary;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Integer getSalary() {
return salary;
}
public void setSalary(Integer salary) {
this.salary = salary;
}
@Override
public String toString() {
return "Article{" +
"id=" + id +
", title='" + title + '\'' +
", content='" + content + '\'' +
", salary=" + salary +
'}';
}
}
2.3.3 es 数据操作接口(dao)
package com.ydt.elasticsearch.dao;
import com.ydt.elasticsearch.domain.Article;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface ArticleRepository extends ElasticsearchRepository {
}
2.3.4 spring配置文件
2.3.5 增删改查测试
package elasticsearch;
import com.ydt.elasticsearch.dao.ArticleRepository;
import com.ydt.elasticsearch.domain.Article;
import org.elasticsearch.client.transport.TransportClient;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations="classpath:applicationContext.xml")
public class SpringbootElasticsearchApplicationTests2 {
@Autowired
private ElasticsearchRestTemplate elasticsearchTemplate;
@Autowired
private ArticleRepository articleRepository;
/**创建索引和映射*/
@Test
public void test1(){
elasticsearchTemplate.createIndex(Article.class);
elasticsearchTemplate.putMapping(Article.class);
/*IndexOperations ops = elasticsearchTemplate.indexOps(Article.class);
ops.create();
ops.putMapping(ops.createMapping(Article.class));*/
}
/**删除索引*/
@Test
public void test2(){
elasticsearchTemplate.deleteIndex("blog2");
/*elasticsearchTemplate.indexOps(Article.class).delete();*/
}
//新增文档
@Test
public void test3(){
Random random = new Random();
for (int i = 1; i < 50; i++) {
Article article = new Article();
article.setId(i);
article.setTitle(i+"hello");
article.setContent(i+"hello es world");
article.setSalary(Long.valueOf(random.nextInt(100)));
articleRepository.save(article);
}
}
/**分页查询*/
@Test
public void test4(){
Pageable pageable = PageRequest.of(0,3);
Page page = articleRepository.findAll(pageable);
for(Article article:page.getContent()){
System.out.println(article);
}
}
}
2.3.6 常用查询命名规则
@Repository
public interface ArticleRepository extends ElasticsearchRepository {
public List findByTitle(String condition);
}
@Test
public void test5(){
List articleList = articleRepository.findByTitle("5hello");
for (Article article : articleList) {
System.out.println(article);
}
}
ElasticSearch虽然最强大的功能是全文索引,但是同时它作为一个数据库,也提供了像MySQL那样的聚合功能,比如:获取一组数据的 最大值(Max)、最小值(Min)。同样我们能够对这组数据进行 分组(Group)。
官方对聚合有四个关键字: Metric(指标)
、Bucketing(桶)
、Matrix(矩阵)
、Pipeline(管道)
。
Metric(指标): 指标分析类型,如计算最大值、最小值、平均值等等 (对桶内的文档进行聚合分析的操作)
Bucket(桶): 分桶类型,类似SQL中的GROUP BY语法 (满足特定条件的文档的集合)
Pipeline(管道): 管道分析类型,基于上一级的聚合分析结果进行在分析
Matrix(矩阵): 矩阵分析类型(聚合是一种面向数值型的聚合,用于计算一组文档字段中的统计信息)
虽然Elasticsearch有四种聚合方式,但在一般实际开发中,用到的比较多的就是Metric和Bucket。
(1) 桶(bucket)
a、简单来说桶就是满足特定条件的文档的集合。
b、当聚合开始被执行,每个文档里面的值通过计算来决定符合哪个桶的条件,如果匹配到,文档将放入相应的桶并接着开始聚合操作。
c、桶也可以被嵌套在其他桶里面。
(2)指标(metric)
a、桶能让我们划分文档到有意义的集合,但是最终我们需要的是对这些桶内的文档进行一些指标的计算。分桶是一种达到目的地的手段:它提供了一种给文档分组的方法来让我们可以计算感兴趣的指标。
b、大多数指标是简单的数学运算(如:最小值、平均值、最大值、汇总),这些是通过文档的值来计算的。
因为我们只是做聚合分析测试,字段可以不需要进行分词(以下都是kibana上操作)
#创建员工索引库
PUT /employees
{
"mappings": {
"properties": {
"age": {
"type": "integer"
},
"gender": {
"type": "keyword"
},
"job": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"name": {
"type": "keyword"
},
"salary": {
"type": "integer"
}
}
}
}
#批量添加10条数据,每条数据包含:姓名、年龄、工作、性别、薪资
PUT /employees/_bulk
{"index":{"_id":"1"}}
{"name":"Emma","age":32,"job":"Product Manager","gender":"female","salary":35000}
{"index":{"_id":"2"}}
{"name":"Underwood","age":41,"job":"Dev Manager","gender":"male","salary":50000}
{"index":{"_id":"3"}}
{"name":"Tran","age":25,"job":"Web Designer","gender":"male","salary":18000}
{"index":{"_id":"4"}}
{"name":"Rivera","age":26,"job":"Web Designer","gender":"female","salary":22000}
{"index":{"_id":"5"}}
{"name":"Rose","age":25,"job":"QA","gender":"female","salary":18000}
{"index":{"_id":"6"}}
{"name":"Lucy","age":31,"job":"QA","gender":"female","salary":25000}
{"index":{"_id":"7"}}
{"name":"Byrd","age":27,"job":"QA","gender":"male","salary":20000}
{"index":{"_id":"8"}}
{"name":"Foster","age":27,"job":"Java Programmer","gender":"male","salary":20000}
{"index":{"_id":"9"}}
{"name":"Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000}
{"index":{"_id":"10"}}
{"name":"Bryant","age":20,"job":"Java Programmer","gender":"male","salary":9000}
3.4.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"max_salary": {
"max": {
"field": "salary"
}
},
"min_salary": {
"min": {
"field": "salary"
}
},
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
}
3.4.1 高级API
SearchRequest request = new SearchRequest("employees");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_salary").field("salary");
MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_salary").field("salary");
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_salary").field("salary");
searchSourceBuilder.aggregation(maxAggregationBuilder);
searchSourceBuilder.aggregation(minAggregationBuilder);
searchSourceBuilder.aggregation(avgAggregationBuilder);
searchSourceBuilder.size(0);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Map asMap = aggregations.getAsMap();
ParsedMax parsedMax = (ParsedMax) asMap.get("max_salary");
System.out.println(parsedMax.getName()+":"+parsedMax.getValue());
ParsedMin parsedMin = (ParsedMin) asMap.get("min_salary");
System.out.println(parsedMin.getName()+":"+parsedMin.getValue());
ParsedAvg parsedAvg = (ParsedAvg) asMap.get("avg_salary");
System.out.println(parsedAvg.getName()+":"+parsedAvg.getValue());
3.5.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"stats_salary": {
"stats": {
"field": "salary"
}
}
}
}
3.5.2 高级API
SearchRequest request = new SearchRequest("blog2");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_salary").field("salary");
searchSourceBuilder.aggregation(statsAggregationBuilder);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
ParsedStats stats = (ParsedStats) aggregations.getAsMap().get("stats_salary");
System.out.println(stats.getCount());
3.6.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"cardinate": {
"cardinality": {
"field": "job.keyword"
}
}
}
}
3.6.2 高级API
都要老胡写吗?
比如我们统计每个工作类型薪资最高的数据
3.7.1 原生API
POST /employees/_search
{
"size": 0,
"aggs": {
"Job_gender_stats": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"gender_stats": {
"terms": {
"field": "gender"
},
"aggs": {
"salary_stats": {
"max": {
"field": "salary"
}
}
}
}
}
}
}
}
3.7.2 高级API
@Test
public void test9() throws IOException {
SearchRequest request = new SearchRequest("employees");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsAggregationBuilder termsAggregationBuilder
= AggregationBuilders.terms("job").field("job.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchSourceBuilder.size(0);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Map asMap = aggregations.getAsMap();
Terms terms = (Terms) asMap.get("job");
List extends Terms.Bucket> buckets = terms.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println(bucket.getKeyAsString() + ":" + bucket.getDocCount());
}
}