<dependency>
<groupId>org.elasticsearch.clientgroupId>
<artifactId>elasticsearch-rest-high-level-clientartifactId>
<version>7.4.2version>
dependency>
需要在pom中指定
<properties>
<elasticsearch.version>7.4.2elasticsearch.version>
properties>
@Configuration
public class GreymallElasticSearchConfig {
public static final RequestOptions COMMON_OPTIONS;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
// builder.addHeader("Authorization", "Bearer " + TOKEN);
// builder.setHttpAsyncResponseConsumerFactory(
// new HttpAsyncResponseConsumerFactory
// .HeapBufferedResponseConsumerFactory(30 * 1024 * 1024 * 1024));
COMMON_OPTIONS = builder.build();
}
/**
* 配置ElasticSearch RestHighLevelClient
* @return
*/
@Bean
public RestHighLevelClient esRestClient(){
return new RestHighLevelClient(
RestClient.builder(
new HttpHost("ES部署的ip", 9200, "http")
));
}
}
// 测试保存数据到es
@Test
void indexData() throws IOException {
IndexRequest request = new IndexRequest("users");
request.id("1");
User user = new User();
user.setUserName("wei-xhh");
user.setAge(20);
user.setGender("男");
String jsonString = JSON.toJSONString(user);
request.source(jsonString, XContentType.JSON); // 保存的内容
IndexResponse index = client.index(request, GreymallElasticSearchConfig.COMMON_OPTIONS);
}
更多可以参考文档
// 测试查询数据
@Test
void searchData2() throws IOException {
// GET /bank/_search
// {
// "query": {
// "term": {
// "address": {
// "value": "mill"
// }
// }
// },
// "aggs": {
// "aggAvg": {
// "avg": {
// "field":"age"
// }
// },
// "balanceAgg":{
// "terms": {
// "field": "balance",
// "size": 10
// }
// }
// }
// }
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("bank");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.termQuery("address", "mill"));
// 按照薪资的值进行分布
TermsAggregationBuilder balance = AggregationBuilders.terms("balanceAgg")
.field("balance");
searchSourceBuilder.aggregation(balance);
// 计算平均年龄
AvgAggregationBuilder age = AggregationBuilders.avg("ageAvg").field("age");
searchSourceBuilder.aggregation(age);
searchRequest.source(searchSourceBuilder);
System.out.println(searchSourceBuilder.toString());
SearchResponse search = client.search(searchRequest, GreymallElasticSearchConfig.COMMON_OPTIONS);
System.out.println(search.toString());
// 封装结果
SearchHits hits = search.getHits();
SearchHit[] data = hits.getHits();
for (SearchHit datum : data) {
String sourceAsString = datum.getSourceAsString();
ResultData resultData = JSON.parseObject(sourceAsString, ResultData.class);
System.out.println(resultData);
}
Aggregations aggregations = search.getAggregations();
// List aggregationsData = aggregations.asList();
// for (Aggregation aggregationsDatum : aggregationsData) {
// String name = aggregationsDatum.getName();
// System.out.println(name);
// }
Terms balanceAgg = aggregations.get("balanceAgg");
for (Terms.Bucket bucket : balanceAgg.getBuckets()) {
String keyAsString = bucket.getKeyAsString();
System.out.println("薪资" + keyAsString);
}
Avg aggAvg = aggregations.get("ageAvg");
double value = aggAvg.getValue();
System.out.println("平均年龄" + value);
}
更多可以参考文档
保存的文档应该怎样呢?
情况1:
保存sku信息和属性信息(都在一个索引下)
如:
{
"skuId":1,
"skuTitle":"华为xx",
"price":998,
"saleCount":99,
"attrs":[
{
"尺寸":5寸},
{
"CPU":"高通945"},
{
"分辨率":"全高清"}
]
}
后果:产生冗余字段
假设
100万个产品,有20个属性(20个属性假设有2kb)
1000000 * 2kb = 2000MB = 2G 内存
情况2:
保存有用的信息(不同索引)
如:
sku索引
{
"skuId":1,
"spuId":11,
"xxx":"xxx"
}
attr索引, 可以看到属性只存了一次
{
"spuId":11,
"attrs":[
{
"尺寸":5寸},
{
"CPU":"高通945"},
{
"分辨率":"全高清"}
]
}
没有像第一种情况的冗余了。
但实际项目中需要根据attrs动态查询到sku:
如 搜索 小米;
假设带小米的商品有10000个,涉及到4000个spu
需要做分步查询:查出4000个spu对应的所有可能属性。
一个请求:
esClient : spuId:[4000个spuId] 4000 * 8个字节 = 32kb
10000个人请求:
32kb * 10000 = 32000Mb = 32GB
只能选第一个:空间换时间
商品的映射信息
在kibana中执行
PUT product
{
"mappings": {
"skuId": {
"type": "long"
},
"spuId": {
"type": "keyword"
},
"skuTitle": {
"type": "text",
"analyzer":"ik_smart"
},
"skuPrice":{
"type":"keyword"
},
"skuImg":{
"type":"keyword",
"index":false,
"doc_values":false
},
"saleCount":{
"type":"long"
},
"hasStock":{
"type":"boolean"
},
"hotScore":{
"type":"long"
},
"brandId":{
"type":"long"
},
"catelogId":{
"type":"long"
},
"brandName":{
"type":"keyword",
"index":false,
"doc_values":false
},
"brandImg":{
"type":"keyword",
"index":false,
"doc_values":false
},
"catalogName":{
"type":"keyword",
"index":false,
"doc_values":false
},
"attrs":{
"type":"nested",
"properties":{
"attrId":{
"type":"long"
},
"attrName":{
"type":"keyword",
"index":false,
"doc_values":false
},
"attrValue":{
"type":"keyword"
}
}
}
}
}
[https://www.elastic.co/guide/en/elasticsearch/reference/7.x/nested.html]
扁平化处理
如ES开发文档的例子
PUT my_index/_doc/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
保存了上述数据后 ,会被扁平化处理
上述数据:存入了两个用户,有first和last属性
保存被扁平化成这样:
{
"group" : "fans",
"user.first" : [ "alice", "john" ],
"user.last" : [ "smith", "white" ]
}
假设检索了Alice,Smith他们对应着不同的用户,但却可以被检索到
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"user.first": "Alice" }},
{
"match": {
"user.last": "Smith" }}
]
}
}
}
所以使用nested嵌入式属性,使用后就不会出现扁平化。
@Data
public class SkuEsModel {
private Long skuId;
private Long spuId;
private String skuTitle;
private BigDecimal skuPrice;
private String skuImg;
private Long saleCount;
private Boolean hasStock;
private Long hotScore;
private Long brandId;
private Long catelogId;
private String brandName;
private String brandImg;
private String catalogName;
private List<Attrs> attrs;
@Data
public static class Attrs{
private Long attrId;
private String attrName;
private String attrValue;
}
}