{
"name" : "John",
"sex" : "Male",
"age" : 25,
"birthDate": "1990/05/01",
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}
# 同时在三台机子上操作,注意node.name、network.host不能重复
[omm@bigdata01 ~]$ tar -zxf /opt/soft/elasticsearch-7.8.0-linux-x86_64.tar.gz -C /opt/module/
[omm@bigdata01 ~]$ ln -s /opt/module/elasticsearch-7.8.0 /opt/module/es
[omm@bigdata01 ~]$ cp /opt/module/es/config/elasticsearch.yml{,.bak}
[omm@bigdata01 ~]$ vi /opt/module/es/config/elasticsearch.yml
[omm@bigdata01 ~]$ cat /opt/module/es/config/elasticsearch.yml
#集群名称
cluster.name: cluster-es
#节点名称,每个节点的名称不能重复
node.name: node-1
#ip地址,每个节点的地址不能重复
network.host: bigdata01
#是不是有资格主节点
node.master: true
node.data: true
http.port: 9200
# head 插件需要这打开这两个配置
http.cors.allow-origin: "*"
http.cors.enabled: true
http.max_content_length: 200mb
#es7.x 之后新增的配置,初始化一个新的集群时需要此配置来选举master
cluster.initial_master_nodes: ["node-1","node-2","node-3"]
#es7.x 之后新增的配置,节点发现
discovery.seed_hosts: ["bigdata01:9300","bigdata02:9300","bigdata01:9300"]
gateway.recover_after_nodes: 2
network.tcp.keep_alive: true
network.tcp.no_delay: true
transport.tcp.compress: true
#集群内同时启动的数据任务个数,默认是2个
cluster.routing.allocation.cluster_concurrent_rebalance: 16
#添加或删除节点及负载均衡时并发恢复的线程个数,默认4个
cluster.routing.allocation.node_concurrent_recoveries: 16
#初始化数据恢复时,并发恢复线程的个数,默认4个
cluster.routing.allocation.node_initial_primaries_recoveries: 16
[omm@bigdata01 ~]$
# 重启shell生效,否则会报“max file descriptors [4096] for elasticsearch process is too low”
[omm@bigdata01 ~]$ sudo cp /etc/security/limits.conf{,.bak}
[omm@bigdata01 ~]$ sudo vi /etc/security/limits.conf
[omm@bigdata01 ~]$ tail -2 /etc/security/limits.conf
omm soft nofile 65536
omm hard nofile 65536
[omm@bigdata01 ~]$ sudo cp /etc/security/limits.d/20-nproc.conf{,.bak}
[omm@bigdata01 ~]$ sudo vi /etc/security/limits.d/20-nproc.conf
[omm@bigdata01 ~]$ tail -3 /etc/security/limits.d/20-nproc.conf
omm soft nofile 65536
omm hard nofile 65536
* hard nproc 4096
[omm@bigdata01 ~]$ sudo cp /etc/sysctl.conf{,.bak}
[omm@bigdata01 ~]$ sudo vi /etc/sysctl.conf
[omm@bigdata01 ~]$ tail -1 /etc/sysctl.conf
vm.max_map_count=655360
[omm@bigdata01 ~]$ sudo sysctl -p
vm.max_map_count = 655360
[omm@bigdata01 ~]$ /opt/module/es/bin/elasticsearch -d
GET http://bigdata01:9200/_cluster/health
{
"cluster_name": "cluster-es",
"status": "green",
"timed_out": false,
"number_of_nodes": 3,
"number_of_data_nodes": 3,
"active_primary_shards": 0,
"active_shards": 0,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100.0
}
GET http://bigdata01:9200/_cat/nodes
192.168.1.102 14 46 0 0.10 0.21 0.14 dilmrt - node-2
192.168.1.101 15 35 0 0.11 0.23 0.15 dilmrt - node-1
192.168.1.103 7 47 0 0.17 0.27 0.18 dilmrt * node-3
PUT http://127.0.0.1:9200/shopping
{
"acknowledged": true,
"shards_acknowledged": true,
"index": "shopping"
}
GET http://127.0.0.1:9200/shopping
{
"shopping": {
"aliases": {},
"mappings": {},
"settings": {
"index": {
"creation_date": "1618233655684",
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "UAVfEyfxR3yG5xKTJ6yFoQ",
"version": {
"created": "7080099"
},
"provided_name": "shopping"
}
}
}
}
v : verbose,打印索引的详细信息
GET http://127.0.0.1:9200/_cat/indices?v
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open shopping UAVfEyfxR3yG5xKTJ6yFoQ 1 1 0 0 208b 208b
DELETE http://127.0.0.1:9200/shopping
{
"acknowledged": true
}
POST http://127.0.0.1:9200/shopping/_doc
BODY
{
"title":"小米手机",
"category":"小米",
"images":"http://www.gulixueyuan.com/xm.jpg",
"price":3999.00
}
RESULT
{
"_index": "shopping",
"_type": "_doc",
"_id": "aA5FxngBuOhsc8gNHj7x",
"_version": 1,
"result": "created",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 0,
"_primary_term": 1
}
POST http://127.0.0.1:9200/shopping/_doc/1001
BODY
{
"title":"小米手机",
"category":"小米",
"images":"http://www.gulixueyuan.com/xm.jpg",
"price":3999.00
}
RESULT
{
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_version": 1,
"result": "created",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 1,
"_primary_term": 1
}
GET http://127.0.0.1:9200/shopping/_doc/1001
{
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_version": 1,
"_seq_no": 1,
"_primary_term": 1,
"found": true,
"_source": {
"title": "小米手机",
"category": "小米",
"images": "http://www.gulixueyuan.com/xm.jpg",
"price": 3999.00
}
}
GET http://127.0.0.1:9200/shopping/_search
{
"took": 41,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "shopping",
"_type": "_doc",
"_id": "aA5FxngBuOhsc8gNHj7x",
"_score": 1.0,
"_source": {
"title": "小米手机",
"category": "小米",
"images": "http://www.gulixueyuan.com/xm.jpg",
"price": 3999.00
}
},
{
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_score": 1.0,
"_source": {
"title": "小米手机",
"category": "小米",
"images": "http://www.gulixueyuan.com/xm.jpg",
"price": 3999.00
}
}
]
}
}
PUT http://127.0.0.1:9200/shopping/_doc/1001
BODY {
"title":"华为手机",
"category":"华为",
"images":"http://www.gulixueyuan.com/hw.jpg",
"price":3999.00
}
RESULT {
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_version": 2,
"result": "updated",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 2,
"_primary_term": 1
}
POST http://127.0.0.1:9200/shopping/_update/1001
BODY {
"doc": {
"price": 4999.00
}
}
RESULT {
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_version": 3,
"result": "updated",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 3,
"_primary_term": 1
}
DELETE http://127.0.0.1:9200/shopping/_doc/1001
{
"_index": "shopping",
"_type": "_doc",
"_id": "1001",
"_version": 4,
"result": "deleted",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 4,
"_primary_term": 1
}
PUT http://127.0.0.1:9200/student
PUT http://127.0.0.1:9200/student/_mapping
BODY {
"properties": {
"name":{
"type": "text",
"index": true
},
"sex":{
"type": "text",
"index": false
},
"age":{
"type": "long",
"index": false
}
}
}
RESULT {
"acknowledged": true
}
类型 | 子类型 | 说明 |
---|---|---|
String | Text | 可分词 |
keyword | 不可分词,数据会作为完整字段进行匹配 | |
Numerical | 基本数据类型 | long、integer、short、byte、double、float、half_float |
浮点数的高精度类型 | scaled_float | |
Date | 日期类型 | |
Array | 数组类型 | |
Object | 对象 |
index:是否索引,默认为true,也就是说你不进行任何配置,所有字段都会被索引。
2.1 true:字段会被索引,则可以用来进行搜索
2.2 false:字段不会被索引,不能用来搜索
store:是否将数据进行独立存储,默认为false
3.1 原始的文本会存储在_source里面,默认情况下其他提取出来的字段都不是独立存储的,是从_source里面提取出来的。
3.2 当然你也可以独立的存储某个字段,只要设置"store": true即可,获取独立存储的字段要比从_source中解析快得多,但是也会占用更多的空间,所以要根据实际业务需求来设置。
GET http://127.0.0.1:9200/student/_mapping
{
"student": {
"mappings": {
"properties": {
"age": {
"type": "long",
"index": false
},
"name": {
"type": "text"
},
"sex": {
"type": "text",
"index": false
}
}
}
}
}
数据准备
# POST /student/_doc/1001
{
"name":"zhangsan",
"nickname":"zhangsan",
"sex":"男",
"age":30
}
# POST /student/_doc/1002
{
"name":"lisi",
"nickname":"lisi",
"sex":"男",
"age":20
}
# POST /student/_doc/1003
{
"name":"wangwu",
"nickname":"wangwu",
"sex":"女",
"age":40
}
# POST /student/_doc/1004
{
"name":"zhangsan1",
"nickname":"zhangsan1",
"sex":"女",
"age":50
}
# POST /student/_doc/1005
{
"name":"zhangsan2",
"nickname":"zhangsan2",
"sex":"女",
"age":30
}
GET http://127.0.0.1:9200/student/_search
BODY {
"query": {
"match_all": {}
}
}
# "query":这里的query代表一个查询对象,里面可以有不同的查询属性
# "match_all":查询类型,例如:match_all(代表查询所有), match,term , range 等等
# {查询条件}:查询条件会根据类型的不同,写法也有差异
RESULT {
"took": 864,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
...
]
}
}
match匹配类型查询,会把查询条件进行分词,然后进行查询,多个词条之间是or的关系
GET http://127.0.0.1:9200/student/_search
BODY {
"query": {
"match": {
"name":"zhangsan"
}
}
}
multi_match与match类似,不同的是它可以在多个字段中查询。
{
"query": {
"multi_match": {
"query": "zhangsan",
"fields": ["name","nickname"]
}
}
}
term查询,精确的关键词匹配查询,不对查询条件进行分词。
{
"query": {
"term": {
"name": {
"value": "zhangsan"
}
}
}
}
terms 查询和 term 查询一样,但它允许你指定多值进行匹配。
{
"query": {
"terms": {
"name": ["zhangsan","lisi"]
}
}
}
默认情况下,Elasticsearch在搜索的结果中,会把文档中保存在_source的所有字段都返回。如果我们只想获取其中的部分字段,我们可以添加_source的过滤
{
"_source": ["name","nickname"],
"query": {
"terms": {
"nickname": ["zhangsan"]
}
}
}
可以通过 includes:来指定想要显示的字段,excludes:来指定不想要显示的字段
{
"_source": {
"includes": ["name","nickname"]
},
"query": {
"terms": {
"nickname": ["zhangsan"]
}
}
}
bool
把各种其它查询通过must
(必须 )、must_not
(必须不)、should
(应该)的方式进行组合
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "zhangsan"
}
}
],
"must_not": [
{
"match": {
"age": "40"
}
}
],
"should": [
{
"match": {
"sex": "男"
}
}
]
}
}
}
range 查询找出那些落在指定区间内的数字或者时间。range查询允许以下字符
操作符 | 说明 |
---|---|
gt | 大于> |
gte | 大于等于>= |
lt | 小于< |
lte | 小于等于<= |
{
"query": {
"range": {
"age": {
"gte": 30,
"lte": 35
}
}
}
}
返回包含与搜索字词相似的字词的文档。
# 编辑距离是将一个术语转换为另一个术语所需的一个字符更改的次数。这些更改可以包括:
# > 更改字符(box → fox)
# > 删除字符(black → lack)
# > 插入字符(sic → sick)
# > 转置两个相邻字符(act → cat)
# 为了找到相似的术语,fuzzy查询会在指定的编辑距离内创建一组搜索词的所有可能的变体或扩展。然后查询返回每个扩展的完全匹配。
# 通过fuzziness修改编辑距离。一般使用默认值AUTO,根据术语的长度生成编辑距离。
{
"query": {
"fuzzy": {
"title": {
"value": "zhangsan"
}
}
}
}
sort 可以让我们按照不同的字段进行排序,并且通过order指定排序的方式。desc降序,asc升序。
{
"query": {
"match": {
"name":"zhangsan"
}
},
"sort": [{
"age": {
"order":"desc"
}
}]
}
假定我们想要结合使用 age和 _score进行查询,并且匹配的结果首先按照年龄排序,然后按照相关性得分排序
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "desc"
}
},
{
"_score":{
"order": "desc"
}
}
]
}
在进行关键字搜索时,搜索出的内容中的关键字会显示不同的颜色,称之为高亮。
# Elasticsearch可以对查询内容中的关键字部分,进行标签和样式(高亮)的设置。
# 在使用match查询的同时,加上一个highlight属性:
# >pre_tags:前置标签
# >post_tags:后置标签
# >fields:需要高亮的字段
# >title:这里声明title字段需要高亮,后面可以为这个字段设置特有配置,也可以空
{
"query": {
"match": {
"name": "zhangsan"
}
},
"highlight": {
"pre_tags": "",
"post_tags": "",
"fields": {
"name": {}
}
}
}
# from:当前页的起始索引,默认从0开始。 from = (pageNum - 1) * size
# size:每页显示多少条
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "desc"
}
}
],
"from": 0,
"size": 2
}
聚合允许使用者对es文档进行统计分析,类似与关系型数据库中的group by,当然还有很多其他的聚合,例如取最大值、平均值等等。
{
"aggs":{
"max_age":{
"max":{"field":"age"}
}
},
"size":0
}
桶聚和相当于sql中的group by语句:terms聚合,分组统计
{
"aggs":{
"age_groupby":{
"terms":{"field":"age"}
}
},
"size":0
}
Index Template 索引模板,顾名思义,就是
创建索引的模具
,其中可以定义一系列规则来帮助我们构建符合特定业务需求的索引的 mappings 和 settings,通过使用 Index Template 可以让索引具备可预知的一致性
。
PUT _template/template_movie2020
{
"index_patterns": ["movie_test*"],
"settings": {
"number_of_shards": 1
},
"aliases" : {
"{index}-query": {},
"movie_test-query":{}
},
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "keyword"
},
"movie_name": {
"type": "text",
"analyzer": "ik_smart"
}
}
}
}
}
# 查看所有模板
GET _cat/templates
# 查看某个具体的模板
GET _template/template_movie2020
# 查看某些具体的模板
GET _template/template_movie*
# 模拟旧索引
PUT shopping
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
}
}
GET shopping
{
"shopping" : {
"aliases" : { },
"mappings" : { },
"settings" : {
"index" : {
"creation_date" : "1624169939562",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "M3fgqN8oSieMffRD2V8xlg",
"version" : {
"created" : "7080099"
},
"provided_name" : "shopping"
}
}
}
}
# 模拟旧索引插入数据
POST shopping/_doc
{
"type": "trousers",
"brand": "levis",
"price": "1299"
}
POST shopping/_doc
{
"type": "shirt",
"brand": "patagonia",
"price": "399"
}
POST shopping/_doc
{
"type": "shoes",
"brand": "crispi",
"price": "2499"
}
# 检查数据
GET shopping/_search
{
"took" : 228,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "shopping",
"_type" : "_doc",
"_id" : "MsUTKHoBp2xPwhRDOsQa",
"_score" : 1.0,
"_source" : {
"type" : "trousers",
"brand" : "levis",
"price" : "1299"
}
},
{
"_index" : "shopping",
"_type" : "_doc",
"_id" : "M8UTKHoBp2xPwhRDYsSq",
"_score" : 1.0,
"_source" : {
"type" : "shirt",
"brand" : "patagonia",
"price" : "399"
}
},
{
"_index" : "shopping",
"_type" : "_doc",
"_id" : "NcUTKHoBp2xPwhRDbsR-",
"_score" : 1.0,
"_source" : {
"type" : "shoes",
"brand" : "crispi",
"price" : "2499"
}
}
]
}
}
# 模拟新索引
PUT new_shopping
{
"settings": {
"number_of_shards": 2
, "number_of_replicas": 2
}
}
GET new_shopping
{
"new_shopping" : {
"aliases" : { },
"mappings" : { },
"settings" : {
"index" : {
"creation_date" : "1624170079551",
"number_of_shards" : "2",
"number_of_replicas" : "2",
"uuid" : "NMz6hoe7ReayzjjvsKn2Zg",
"version" : {
"created" : "7080099"
},
"provided_name" : "new_shopping"
}
}
}
}
# 重建索引
POST _reindex
{
"source": {
"index": "shopping"
},
"dest": {
"index": "new_shopping"
}
}
# 新索引查看数据
GET new_shopping/_search
{
"took" : 278,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "new_shopping",
"_type" : "_doc",
"_id" : "MsUTKHoBp2xPwhRDOsQa",
"_score" : 1.0,
"_source" : {
"type" : "trousers",
"brand" : "levis",
"price" : "1299"
}
},
{
"_index" : "new_shopping",
"_type" : "_doc",
"_id" : "M8UTKHoBp2xPwhRDYsSq",
"_score" : 1.0,
"_source" : {
"type" : "shirt",
"brand" : "patagonia",
"price" : "399"
}
},
{
"_index" : "new_shopping",
"_type" : "_doc",
"_id" : "NcUTKHoBp2xPwhRDbsR-",
"_score" : 1.0,
"_source" : {
"type" : "shoes",
"brand" : "crispi",
"price" : "2499"
}
}
]
}
}
# 环境还原
DELETE shopping
DELETE new_shopping
<properties>
<maven.compiler.source>8maven.compiler.source>
<maven.compiler.target>8maven.compiler.target>
properties>
<dependencies>
<dependency>
<groupId>org.elasticsearchgroupId>
<artifactId>elasticsearchartifactId>
<version>7.8.0version>
dependency>
<dependency>
<groupId>org.elasticsearch.clientgroupId>
<artifactId>elasticsearch-rest-high-level-clientartifactId>
<version>7.8.0version>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-apiartifactId>
<version>2.8.2version>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-coreartifactId>
<version>2.8.2version>
dependency>
<dependency>
<groupId>com.fasterxml.jackson.coregroupId>
<artifactId>jackson-databindartifactId>
<version>2.9.9version>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.12version>
dependency>
dependencies>
package com.simwor.bigdata.es;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.client.indices.GetIndexResponse;
import java.io.IOException;
public class ESClient {
public static void main(String[] args) throws IOException {
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200))
);
createIndex(esClient);
queryIndex(esClient);
deleteIndex(esClient);
esClient.close();
}
private static void deleteIndex(RestHighLevelClient esClient) throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest("user");
AcknowledgedResponse acknowledgedResponse = esClient.indices().delete(request, RequestOptions.DEFAULT);
System.out.println("删除索引是否成功:" + acknowledgedResponse.isAcknowledged());
}
private static void queryIndex(RestHighLevelClient esClient) throws IOException {
GetIndexRequest request = new GetIndexRequest("user");
GetIndexResponse response = esClient.indices().get(request, RequestOptions.DEFAULT);
System.out.println(response.getAliases());
System.out.println(response.getMappings());
System.out.println(response.getSettings());
}
private static void createIndex(RestHighLevelClient esClient) throws IOException {
CreateIndexRequest request = new CreateIndexRequest("user");
CreateIndexResponse createIndexResponse = esClient.indices().create(request, RequestOptions.DEFAULT);
System.out.println("创建索引是否成功:" + createIndexResponse.isAcknowledged());
}
}
创建索引是否成功:true
{user=[]}
{user=org.elasticsearch.cluster.metadata.MappingMetadata@91416359}
{user={"index.creation_date":"1618280034576","index.number_of_replicas":"1","index.number_of_shards":"1","index.provided_name":"user","index.uuid":"Dus3O-NJTiekoE_Gr2Rchg","index.version.created":"7080099"}}
删除索引是否成功:true
package com.simwor.bigdata.es;
public class User {
private String name;
private String sex;
private Integer age;
public User(String name, String sex, Integer age) {
this.name = name;
this.sex = sex;
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSex() {
return sex;
}
public void setSex(String sex) {
this.sex = sex;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
}
package com.simwor.bigdata.es;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
public class ESClient {
public static void main(String[] args) throws IOException {
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200))
);
insertDoc(esClient);
updateDoc(esClient);
getDoc(esClient);
deleteDoc(esClient);
batchInsertDoc(esClient);
batchDeleteDoc(esClient);
esClient.close();
}
private static void batchDeleteDoc(RestHighLevelClient esClient) throws IOException {
BulkRequest request = new BulkRequest();
DeleteRequest r1 = new DeleteRequest().index("user").id("1001");
DeleteRequest r2 = new DeleteRequest().index("user").id("1002");
DeleteRequest r3 = new DeleteRequest().index("user").id("1003");
request.add(r1).add(r2).add(r3);
BulkResponse response = esClient.bulk(request, RequestOptions.DEFAULT);
System.out.println("批量删除耗费时间:" + response.getTook());
}
private static void batchInsertDoc(RestHighLevelClient esClient) throws IOException {
ObjectMapper mapper = new ObjectMapper();
String u1Json = mapper.writeValueAsString(new User("rayslee", "male", 18));
String u2Json = mapper.writeValueAsString(new User("rachel", "female", 19));
String u3Json = mapper.writeValueAsString(new User("chandler", "male", 20));
BulkRequest request = new BulkRequest();
IndexRequest r1 = new IndexRequest().index("user").id("1001").source(u1Json, XContentType.JSON);
IndexRequest r2 = new IndexRequest().index("user").id("1002").source(u2Json, XContentType.JSON);
IndexRequest r3 = new IndexRequest().index("user").id("1003").source(u3Json, XContentType.JSON);
request.add(r1).add(r2).add(r3);
BulkResponse response = esClient.bulk(request, RequestOptions.DEFAULT);
System.out.println("批量插入耗费时间:" + response.getTook());
}
private static void deleteDoc(RestHighLevelClient esClient) throws IOException {
DeleteRequest request = new DeleteRequest();
request.index("user").id("1001");
DeleteResponse response = esClient.delete(request, RequestOptions.DEFAULT);
System.out.println(response);
}
private static void getDoc(RestHighLevelClient esClient) throws IOException {
GetRequest request = new GetRequest();
request.index("user").id("1001");
GetResponse response = esClient.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());
}
private static void updateDoc(RestHighLevelClient esClient) throws IOException {
UpdateRequest request = new UpdateRequest();
request.index("user").id("1001");
request.doc(XContentType.JSON, "sex", "男");
UpdateResponse response = esClient.update(request, RequestOptions.DEFAULT);
System.out.println("更新文档结果:" + response.getResult());
}
private static void insertDoc(RestHighLevelClient esClient) throws IOException {
User user = new User("rayslee", "male", 18);
ObjectMapper mapper = new ObjectMapper();
String userJson = mapper.writeValueAsString(user);
IndexRequest request = new IndexRequest();
request.index("user").id("1001");
request.source(userJson, XContentType.JSON);
IndexResponse response = esClient.index(request, RequestOptions.DEFAULT);
System.out.println("插入文档结果:" + response.getResult());
}
}
插入文档结果:CREATED
更新文档结果:UPDATED
{"name":"rayslee","sex":"男","age":18}
DeleteResponse[index=user,type=_doc,id=1001,version=45,result=deleted,shards=ShardInfo{total=2, successful=1, failures=[]}]
批量插入耗费时间:6ms
批量删除耗费时间:5ms
package com.simwor.bigdata.es;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import java.io.IOException;
public class ESClient {
public static void main(String[] args) throws Exception {
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http")));
// 0. 准备数据
//batchInsertDoc(esClient);
SearchRequest request = new SearchRequest();
request.indices("user");
// 1. 查询索引中全部的数据
// request.source(new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()));
// 2. 条件查询 : termQuery
// request.source(new SearchSourceBuilder().query(QueryBuilders.termQuery("age", 30)));
// 3. 分页查询
// SearchSourceBuilder builder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery());
// builder.from(2);
// builder.size(2);
// request.source(builder);
// 4. 查询排序
// SearchSourceBuilder builder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery());
// builder.sort("age", SortOrder.DESC);
// request.source(builder);
// 5. 过滤字段
// SearchSourceBuilder builder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery());
// String[] excludes = {"age"};
// String[] includes = {};
// builder.fetchSource(includes, excludes);
// request.source(builder);
// 6. 组合查询
// SearchSourceBuilder builder = new SearchSourceBuilder();
// BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
// //boolQueryBuilder.must(QueryBuilders.matchQuery("age", 30));
// //boolQueryBuilder.must(QueryBuilders.matchQuery("sex", "男"));
// //boolQueryBuilder.mustNot(QueryBuilders.matchQuery("sex", "男"));
// boolQueryBuilder.should(QueryBuilders.matchQuery("age", 30));
// boolQueryBuilder.should(QueryBuilders.matchQuery("age", 40));
// builder.query(boolQueryBuilder);
// request.source(builder);
// 7. 范围查询
// SearchSourceBuilder builder = new SearchSourceBuilder();
// RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("age");
// rangeQuery.gte(30);
// rangeQuery.lt(50);
// builder.query(rangeQuery);
// request.source(builder);
// 8. 模糊查询
// SearchSourceBuilder builder = new SearchSourceBuilder();
// //builder.query(QueryBuilders.fuzzyQuery("name", "wangwu").fuzziness(Fuzziness.ONE));
// builder.query(QueryBuilders.fuzzyQuery("name", "wangwu").fuzziness(Fuzziness.TWO));
// request.source(builder);
// 9. 高亮查询
// SearchSourceBuilder builder = new SearchSourceBuilder();
// TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery("name", "zhangsan");
// HighlightBuilder highlightBuilder = new HighlightBuilder();
//
// highlightBuilder.preTags("");
// highlightBuilder.postTags("");
// highlightBuilder.field("name");
//
// builder.highlighter(highlightBuilder);
// builder.query(termsQueryBuilder);
// request.source(builder);
// 10. 聚合查询
// SearchSourceBuilder builder = new SearchSourceBuilder();
// AggregationBuilder aggregationBuilder = AggregationBuilders.max("maxAge").field("age");
// builder.aggregation(aggregationBuilder);
// request.source(builder);
// 11. 分组查询
SearchSourceBuilder builder = new SearchSourceBuilder();
AggregationBuilder aggregationBuilder = AggregationBuilders.terms("ageGroup").field("age");
builder.aggregation(aggregationBuilder);
request.source(builder);
SearchResponse response = esClient.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
for ( SearchHit hit : hits ) {
System.out.println(hit.getSourceAsString());
}
esClient.close();
}
private static void batchInsertDoc(RestHighLevelClient esClient) throws IOException {
BulkRequest request = new BulkRequest();
request.add(new IndexRequest().index("user").id("1001").source(XContentType.JSON, "name", "zhangsan", "age",30,"sex","男"));
request.add(new IndexRequest().index("user").id("1002").source(XContentType.JSON, "name", "lisi", "age",30,"sex","女"));
request.add(new IndexRequest().index("user").id("1003").source(XContentType.JSON, "name", "wangwu", "age",40,"sex","男"));
request.add(new IndexRequest().index("user").id("1004").source(XContentType.JSON, "name", "wangwu1", "age",40,"sex","女"));
request.add(new IndexRequest().index("user").id("1005").source(XContentType.JSON, "name", "wangwu2", "age",50,"sex","男"));
request.add(new IndexRequest().index("user").id("1006").source(XContentType.JSON, "name", "wangwu3", "age",50,"sex","男"));
request.add(new IndexRequest().index("user").id("1007").source(XContentType.JSON, "name", "wangwu44", "age",60,"sex","男"));
request.add(new IndexRequest().index("user").id("1008").source(XContentType.JSON, "name", "wangwu555", "age",60,"sex","男"));
request.add(new IndexRequest().index("user").id("1009").source(XContentType.JSON, "name", "wangwu66666", "age",60,"sex","男"));
BulkResponse response = esClient.bulk(request, RequestOptions.DEFAULT);
}
}
# 索引
一个索引就是一个拥有几分相似特征的文档的集合。
# 文档
一个文档是一个可被索引的基础信息单元,也就是一条数据。
# 字段
字段相当于是数据表的字段,对文档数据根据不同属性进行的分类标识。
# 映射
映射(Mapping)是处理数据的方式和规则方面做一些限制
# 分片
1. 一个索引可以存储超出单个节点硬件限制的大量数据,Elasticsearch提供了将索引划分成多份的能力,每一份就称之为分片。
2. 每个分片本身也是一个功能完善并且独立的“索引”,这个“索引”可以被放置到集群中的任何节点上。
3. 分片很重要,主要有两方面的原因:
3.1)允许你水平分割 / 扩展你的内容容量。
3.2)允许你在分片之上进行分布式的、并行的操作,进而提高性能/吞吐量。
4. 一个 Lucene 索引 我们在 Elasticsearch 称作 分片 。
5. 一个 Elasticsearch 索引 是分片的集合。
6. 当 Elasticsearch 在索引中搜索的时候, 他发送查询到每一个属于索引的分片(Lucene 索引),然后合并每个分片的结果到一个全局的结果集。
# 副本
1. Elasticsearch允许你创建分片的一份或多份拷贝,这些拷贝叫做复制分片(副本)。
2. 复制分片可以扩展你的搜索量/吞吐量,因为搜索可以在所有的副本上并行运行。
# 分配
1. 分配(Allocation)是将分片分配给某个节点的过程,包括分配主分片或者副本。
2. 如果是副本,还包含从主分片复制数据的过程。这个过程是由master节点完成的。
1. 一个运行中的 Elasticsearch 实例称为一个节点,而集群是由一个或者多个拥有相同 cluster.name 配置的节点组成, 它们共同承担数据和负载的压力。
2. 当有节点加入集群中或者从集群中移除节点时,集群将会重新平均分布所有的数据。
3. 当一个节点被选举成为主节点时, 它将负责管理集群范围内的所有变更,例如增加、删除索引,或者增加、删除节点等。
4. 而主节点并不需要涉及到文档级别的变更和搜索等操作,所以当集群只拥有一个主节点的情况下,即使流量的增加它也不会成为瓶颈。
5. 任何节点都可以成为主节点。
6. 作为用户,我们可以将请求发送到集群中的任何节点 ,包括主节点。
7. 每个节点都知道任意文档所处的位置,并且能够将我们的请求直接转发到存储我们所需文档的节点。
8. 无论我们将请求发送到哪个节点,它都能负责从各个包含我们所需文档的节点收集回数据,并将最终结果返回給客户端。
PUT http://bigdata01:9200/users
BODY {
"settings" : {
"number_of_shards" : 3,
"number_of_replicas" : 1
}
}
RESULT {
"acknowledged": true,
"shards_acknowledged": true,
"index": "users"
}
Chrome 安装 “ElasticSearch Head” 插件
当索引一个文档的时候,文档会被存储到一个主分片中。 Elasticsearch 如何知道一个文档应该存放到哪个分片中呢?
在处理读取请求时,协调结点在每次请求的时候都会通过轮询所有的副本分片来达到负载均衡。
所谓的正向索引,就是搜索引擎会将待搜索的文件都对应一个文件ID,搜索时将这个ID和搜索关键字进行对应,形成K-V对,然后对关键字进行统计计数。
倒排索引,即把文件ID对应到关键词的映射转换为关键词到文件ID的映射,每个关键词都对应着一系列的文件,这些文件中都出现这个关键词。
如何在保留不变性的前提下实现倒排索引的更新?
/users/_refresh
进行实时刷新。{
"settings": {
"refresh_interval": "30s"
}
}
POST http://127.0.0.1:9200/shopping/_doc/1001
BODY
{
"title":"小米手机",
"category":"小米",
"images":"http://www.gulixueyuan.com/xm.jpg",
"price":3999.00
}
RESULT
{
"_index": "shopping",
"_type": "_doc",
"_id": "1000",
"_version": 1,
"result": "created",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 6,
"_primary_term": 3
}
默认情况下,并发更新同一个文档会互相覆盖,通过添加版本号来乐观尝试更新、冲突报错。
POST http://127.0.0.1:9200/shopping/_doc/1000?if_seq_no=1&if_primary_term=1
RESULT {
"error": {
"root_cause": [
{
"type": "version_conflict_engine_exception",
"reason": "[1000]: version conflict, required seqNo [1], primary term [1]. current document has seqNo [6] and primary term [3]",
"index_uuid": "odvP_kiVSKuBWb4_zjrjsA",
"shard": "0",
"index": "shopping"
}
],
"type": "version_conflict_engine_exception",
"reason": "[1000]: version conflict, required seqNo [1], primary term [1]. current document has seqNo [6] and primary term [3]",
"index_uuid": "odvP_kiVSKuBWb4_zjrjsA",
"shard": "0",
"index": "shopping"
},
"status": 409
}
POST http://127.0.0.1:9200/shopping/_doc/1000?if_seq_no=6&if_primary_term=3
{
"_index": "shopping",
"_type": "_doc",
"_id": "1000",
"_version": 2,
"result": "updated",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 7,
"_primary_term": 3
}
# es服务地址
elasticsearch.host=127.0.0.1
# es服务端口
elasticsearch.port=9200
# 配置日志级别,开启debug日志
logging.level.com.simwor.bigdata.es=debug
<parent>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-parentartifactId>
<version>2.3.6.RELEASEversion>
<relativePath/>
parent>
<properties>
<maven.compiler.source>8maven.compiler.source>
<maven.compiler.target>8maven.compiler.target>
properties>
<dependencies>
<dependency>
<groupId>org.projectlombokgroupId>
<artifactId>lombokartifactId>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-data-elasticsearchartifactId>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-devtoolsartifactId>
<scope>runtimescope>
<optional>trueoptional>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-testartifactId>
<scope>testscope>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-testartifactId>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
dependency>
<dependency>
<groupId>org.springframeworkgroupId>
<artifactId>spring-testartifactId>
dependency>
dependencies>
package com.simwor.bigdata.es;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class SpringDataElasticSearchMainApplication {
public static void main(String[] args) {
SpringApplication.run(SpringDataElasticSearchMainApplication.class,args);
}
}
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;
@ConfigurationProperties(prefix = "elasticsearch")
@Configuration
@Data
public class ElasticsearchConfig extends AbstractElasticsearchConfiguration {
private String host ;
private Integer port ;
@Override
public RestHighLevelClient elasticsearchClient() {
RestClientBuilder builder = RestClient.builder(new HttpHost(host, port));
return new RestHighLevelClient(builder);
}
}
package com.simwor.bigdata.es;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.ToString;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
@Data
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Document(indexName = "product", shards = 3, replicas = 1)
public class Product {
//必须有id,这里的id是全局唯一的标识,等同于es中的"_id"
@Id
private Long id;//商品唯一标识
/**
* type : 字段数据类型
* analyzer : 分词器类型
* index : 是否索引(默认:true)
* Keyword : 短语,不进行分词
*/
@Field(type = FieldType.Text)
private String title;//商品名称
@Field(type = FieldType.Keyword)
private String category;//分类名称
@Field(type = FieldType.Double)
private Double price;//商品价格
@Field(type = FieldType.Keyword, index = false)
private String images;//图片地址
}
package com.simwor.bigdata.es;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface ProductDao extends ElasticsearchRepository<Product,Long> {
}
package com.simwor.bigdata.es;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.IndexOperations;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class SpringDataESIndexTest {
//注入ElasticsearchRestTemplate
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
//创建索引并增加映射配置
@Test
public void createIndex() {
IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(Product.class);
boolean isExist = indexOperations.exists();
//创建索引,系统初始化会自动创建索引
System.out.println("自动创建创建索引" + isExist);
}
@Test
public void deleteIndex(){
//创建索引,系统初始化会自动创建索引
IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(Product.class);
boolean deleted = indexOperations.delete();
System.out.println("删除索引 = " + deleted);
}
}
package com.simwor.bigdata.es;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.test.context.junit4.SpringRunner;
import java.util.ArrayList;
import java.util.List;
@RunWith(SpringRunner.class)
@SpringBootTest
public class SpringDataESProductDaoTest {
@Autowired
private ProductDao productDao;
/**
* 新增
*/
@Test
public void save(){
Product product = new Product();
product.setId(2L);
product.setTitle("华为手机");
product.setCategory("手机");
product.setPrice(2999.0);
product.setImages("http://www.atguigu/hw.jpg");
productDao.save(product);
}
//修改
@Test
public void update(){
Product product = new Product();
product.setId(1L);
product.setTitle("小米2手机");
product.setCategory("手机");
product.setPrice(9999.0);
product.setImages("http://www.atguigu/xm.jpg");
productDao.save(product);
}
//根据id查询
@Test
public void findById(){
Product product = productDao.findById(1L).get();
System.out.println(product);
}
//查询所有
@Test
public void findAll(){
Iterable<Product> products = productDao.findAll();
for (Product product : products) {
System.out.println(product);
}
}
//删除
@Test
public void delete(){
Product product = new Product();
product.setId(1L);
productDao.delete(product);
}
//批量新增
@Test
public void saveAll(){
List<Product> productList = new ArrayList<>();
for (int i = 0; i < 10; i++) {
Product product = new Product();
product.setId(Long.valueOf(i));
product.setTitle("["+i+"]小米手机");
product.setCategory("手机");
product.setPrice(1999.0+i);
product.setImages("http://www.atguigu/xm.jpg");
productList.add(product);
}
productDao.saveAll(productList);
}
//分页查询
@Test
public void findByPageable(){
//设置排序(排序方式,正序还是倒序,排序的id)
Sort sort = Sort.by(Sort.Direction.DESC,"id");
int currentPage=0;//当前页,第一页从0开始,1表示第二页
int pageSize = 5;//每页显示多少条
//设置查询分页
PageRequest pageRequest = PageRequest.of(currentPage, pageSize,sort);
//分页查询
Page<Product> productPage = productDao.findAll(pageRequest);
for (Product Product : productPage.getContent()) {
System.out.println(Product);
}
}
}
package com.simwor.bigdata.es;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.data.domain.PageRequest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class SpringDataESSearchTest {
@Autowired
private ProductDao productDao;
/**
* term查询
* search(termQueryBuilder) 调用搜索方法,参数查询构建器对象
*/
@Test
public void termQuery(){
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", "小米");
Iterable<Product> products = productDao.search(termQueryBuilder);
for (Product product : products) {
System.out.println(product);
}
}
/**
* term查询加分页
*/
@Test
public void termQueryByPage(){
int currentPage= 0 ;
int pageSize = 5;
//设置查询分页
PageRequest pageRequest = PageRequest.of(currentPage, pageSize);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", "小米");
Iterable<Product> products = productDao.search(termQueryBuilder,pageRequest);
for (Product product : products) {
System.out.println(product);
}
}
}
<dependencies>
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-core_2.12artifactId>
<version>3.0.0version>
dependency>
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-streaming_2.12artifactId>
<version>3.0.0version>
dependency>
<dependency>
<groupId>org.elasticsearchgroupId>
<artifactId>elasticsearchartifactId>
<version>7.8.0version>
dependency>
<dependency>
<groupId>org.elasticsearch.clientgroupId>
<artifactId>elasticsearch-rest-high-level-clientartifactId>
<version>7.8.0version>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-apiartifactId>
<version>2.8.2version>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-coreartifactId>
<version>2.8.2version>
dependency>
dependencies>
package com.simwor.bigdata
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.{RequestOptions, RestClient, RestHighLevelClient}
import org.elasticsearch.common.xcontent.XContentType
import java.util.Date
object Spark2ESTest {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("ESTest")
val ssc = new StreamingContext(sparkConf, Seconds(3))
val ds: ReceiverInputDStream[String] = ssc.socketTextStream("localhost", 9999)
ds.foreachRDD(
rdd => {
println("*************** " + new Date())
rdd.foreach(
data => {
val client = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http"))
);
// 新增文档 - 请求对象
val request = new IndexRequest();
// 设置索引及唯一性标识
val ss = data.split(" ")
println("ss = " + ss.mkString(","))
request.index("sparkstreaming").id(ss(0));
val productJson =
s"""
| { "data":"${ss(1)}" }
|""".stripMargin;
// 添加文档数据,数据格式为JSON格式
request.source(productJson,XContentType.JSON);
// 客户端发送请求,获取响应对象
val response = client.index(request, RequestOptions.DEFAULT);
System.out.println("_index:" + response.getIndex());
System.out.println("_id:" + response.getId());
System.out.println("_result:" + response.getResult());
client.close()
}
)
}
)
ssc.start()
ssc.awaitTermination()
}
}
<dependencies>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-scala_2.12artifactId>
<version>1.12.0version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-scala_2.12artifactId>
<version>1.12.0version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.12artifactId>
<version>1.12.0version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-elasticsearch7_2.11artifactId>
<version>1.12.0version>
dependency>
<dependency>
<groupId>com.fasterxml.jackson.coregroupId>
<artifactId>jackson-coreartifactId>
<version>2.11.1version>
dependency>
dependencies>
package com.simwor.bigdata;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class FlinkElasticsearchSinkTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("127.0.0.1", 9200, "http"));
ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<>(
httpHosts,
new ElasticsearchSinkFunction<String>() {
@Override
public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
indexer.add(createIndexRequest(element));
}
public IndexRequest createIndexRequest(String element) {
Map<String, String> json = new HashMap<>();
json.put("data", element);
return Requests.indexRequest()
.index("my-index")
.source(json);
}
}
);
esSinkBuilder.setBulkFlushMaxActions(1);
source.addSink(esSinkBuilder.build());
env.execute("flink-es");
}
}
节点数 <= 主分片数 *(副本数+1)
PUT /_all/_settings
{
"settings": {
"index.unassigned.node_left.delayed_timeout": "5m"
}
}
index.translog.flush_threshold_size
512MB 或者 30 分钟时,会触发一次 Flush;index.number_of_replicas
: 0 关闭副本。在写入完成后,Replica 修改回正常的状态。参数名 | 参数值 | 说明 |
---|---|---|
cluster.name | elasticsearch | 配置 ES 的集群名称,默认值是ES,建议改成与所存数据相关的名称,ES 会自动发现在同一网段下的集群名称相同的节点 |
node.name | node-1 | 集群中的节点名,在同一个集群中不能重复。节点的名称一旦设置,就不能再改变了。当然,也可以设置成服务器的主机名称,例如 node.name:${HOSTNAME}。 |
node.master | true | 指定该节点是否有资格被选举成为 Master 节点,默认是 True,如果被设置为 True,则只是有资格成为 Master 节点,具体能否成为 Master 节点,需要通过选举产生。 |
node.data | true | 指定该节点是否存储索引数据,默认为 True。数据的增、删、改、查都是在 Data 节点完成的。 |
index.number_of_shards | 1 | 设置都索引分片个数,默认是 1 片。也可以在创建索引时设置该值,具体设置为多大都值要根据数据量的大小来定。如果数据量不大,则设置成 1 时效率最高 |
index.number_of_replicas | 1 | 设置默认的索引副本个数,默认为 1 个。副本数越多,集群的可用性越好,但是写索引时需要同步的数据越多。 |
transport.tcp.compress | true | 设置在节点间传输数据时是否压缩,默认为 False,不压缩 |
discovery.zen.minimum_master_nodes | 1 | 设置在选举 Master 节点时需要参与的最少的候选主节点数,默认为 1。如果使用默认值,则当网络不稳定时有可能会出现脑裂合理的数值为(master_eligible_nodes/2)+1,其中 master_eligible_nodes 表示集群中的候选主节点数 |
discovery.zen.ping.timeout | 3s | 设置在集群中自动发现其他节点时 Ping 连接的超时时间,默认为 3 秒。在较差的网络环境下需要设置得大一点,防止因误判该节点的存活状态而导致分片的转移 |
index.refresh_interval | 1s | 即持续写入时每秒都会强制生成1个新的segments文件,增大索引刷新时间可以生成更大的segments文件,有效降低IO并降低segments merge 的压力;副作用:当数据添加到索引后并不能马上被查询到,等到索引刷新后才会被查询到。 |
# 直接设置
curl –XPUT --tlsv1.2 --negotiate -k -u : "https://ip:httpport/index/_settings?pretty" -H 'Content-Type: application/json' -d'
{
"refresh_interval" : "60s"
}'
# 通过索引模板设置
curl -XPUT --tlsv1.2 --negotiate -k -u : "https://127.0.0.1:24100/_template/template_1" -H 'Content-Type: application/json' -d'
{
"template": "index*",
"settings": {
"refresh_interval": "60s"
}
}'
discovery.zen.ping_timeout
节点状态的响应时间,默认为3s,可以适当调大,如果master在该响应时间的范围内没有做出响应应答,判断该节点已经挂掉了。调大参数(如6s,discovery.zen.ping_timeout:6),可适当减少误判。discovery.zen.minimum_master_nodes:1
。该参数是用于控制选举行为发生的最小集群主节点数量。当备选主节点的个数大于等于该参数的值, 且备选主节点中有该参数个节点认为主节点挂了,进行选举。官方建议为(n/2)+1,n为主节点个数 (即有资格成为主节点的节点个数)