参考:https://blog.csdn.net/msllws/article/details/102807605?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task
中间报了几次错,网上都可以查到解决方案
参考:Elasticsearch 7.x 最详细安装及配置
注意:需要把之前安装时,配置文件中的配置去掉。变成默认配置
配置文件如下:
version: '2.2'
services:
cerebro:
image: lmenezes/cerebro:0.8.3
container_name: hwc_cerebro
ports:
- "9000:9000"
command:
- -Dhosts.0.host=http://elasticsearch:9200
networks:
- hwc_es7net
kibana:
image: docker.elastic.co/kibana/kibana:7.1.0
container_name: hwc_kibana7
environment:
#- I18N_LOCALE=zh-CN
- XPACK_GRAPH_ENABLED=true
- TIMELION_ENABLED=true
- XPACK_MONITORING_COLLECTION_ENABLED="true"
ports:
- "5601:5601"
networks:
- hwc_es7net
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_hot
environment:
- cluster.name=geektime-hwc
- node.name=es7_hot
- node.attr.box_type=hot
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot,es7_warm,es7_cold
- cluster.initial_master_nodes=es7_hot,es7_warm,es7_cold
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_hot:/usr/share/elasticsearch/data
ports:
- 9200:9200
networks:
- hwc_es7net
elasticsearch2:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_warm
environment:
- cluster.name=geektime-hwc
- node.name=es7_warm
- node.attr.box_type=warm
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot,es7_warm,es7_cold
- cluster.initial_master_nodes=es7_hot,es7_warm,es7_cold
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_warm:/usr/share/elasticsearch/data
networks:
- hwc_es7net-com
elasticsearch3:
image: docker.elastic.co/elasticsearch/elasticsearch:7.1.0
container_name: es7_cold
environment:
- cluster.name=geektime-hwc
- node.name=es7_cold
- node.attr.box_type=cold
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- discovery.seed_hosts=es7_hot,es7_warm,es7_cold
- cluster.initial_master_nodes=es7_hot,es7_warm,es7_cold
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- hwc_es7data_cold:/usr/share/elasticsearch/data
networks:
- hwc_es7net
volumes:
hwc_es7data_hot:
driver: local
hwc_es7data_warm:
driver: local
hwc_es7data_cold:
driver: local
networks:
hwc_es7net:
driver: bridge
这里坑太多,首先就是不能从官方镜像库下载elasticsearch镜像。配置阿里云镜像,不知道为什么还是从官方下载。后来先手动从指定库下载才解决
docker pull docker.mirrors.ustc.edu.cn/library/elasticsearch:7.1.0
不是我写的,教程里的老师写的
)input {
file {
path => "/目录xx/movies.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
separator => ","
columns => ["id","content","genre"]
}
mutate {
split => { "genre" => "|" }
remove_field => ["path", "host","@timestamp","message"]
}
mutate {
split => ["content", "("]
add_field => { "title" => "%{[content][0]}"}
add_field => { "year" => "%{[content][1]}"}
}
mutate {
convert => {
"year" => "integer"
}
strip => ["title"]
remove_field => ["path", "host","@timestamp","message","content"]
}
}
output {
elasticsearch {
hosts => "http://localhost:9200"
index => "movies"
document_id => "%{id}"
}
stdout {}
}
./bin/logstash -f logstash.conf
Elasticsearch是面向文档的,文档是所有可搜索数据的最小单位
文档会被序列化成JSON格式,保存在Elasticsearch 中
一条记录
格式灵活,不需要预先定义格式
(字段的类型可以指定或者通过Elasticsearch自动推算\支持数组/支持嵌套)结构和mongodb很像
元数据,用于标注文档的相关信息
原始Json数据
相关性
打分Index-索引是文档的容器,是一类文档的结合
逻辑空间
的概念:每个索引都有自己的Mapping定义
,用于定义包含的文档的字段名和字段类型
物理空间的
概念:索引中的数据分散在Shard
.上索引的Mapping与Settings
字段的类型
不同的数据分布
Elasticsearch的分布式架构的好处
水平扩容
可用性
, 部分节点停止服务,整个集群的服务不受影响Elasticsearch的分布式架构
-E cluster.name=xxx
进行设定一个或者多个节点
节点是一个Elasticsearch的实例
配置文件配置
,或者启动时候-E node.name=node1
指定UID, 保存在data目录下
参加选主流程
,成为Master节点都保存了集群的状
态,只有
Master节点才能修改
集群的状态信息节点信息
、所有的索引
和其相关的Mapping与Setting信息、分片的路由信息
)Data Node
可以保存数据的节点,叫做Data Node。负责保存分片数据
。在数据扩展上起到了至关重要的作用
Coordinating Node
请求分发
到合适的节点,最终把结果汇集
到一起默认
都起到 了Coordinating Node的职责主分片,用以解决数据水平扩展的问题。通过主分片,可以将数据分布到集群
内的所有节点之上
Lucene的实例(一种搜索引擎)
索引创建时指定
,后续不允许修改,除非Reindex
副本,用以解决数据高可用的问题。分片是主分片的拷贝
对于生产环境中分片的设定,需要提前做好容量规划!!!
如果分片数设置过小
无法
增加节点实现水平扩展重新分配耗时(前面提过reindex)
如果分片数设置过大,7.0开始,默认主分片设置成1, 解决了over-sharding的问题
相关性打分
,影响统计结果的准确性
get _cluster/health
get _cat/nodes
get _cat/shards
GET /_cat/indices?v
GET /_mapping?pretty=true
# 查看movies的index
GET movies/_mapping?pretty=true
PUT users2
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"age": {
"type": "long"
},
"name": {
"type": "keyword"
},
"desc": {
"type": "text"
},
"hobby": {
"type": "text"
}
}
}
}
注意,这里第一次创建文档是,其实还没有创建过mapping。但es会自动根据字段值创建
没指定id,自动创建id
POST users/_doc
{
"user" : "Mike",
"post_date" : "2019-04-15T14:12:12",
"message" : "trying out Kibana"
}
指定id创建
此时,如果id已经存在,则会报错
POST users/_doc/1?op_type=create
{
"user" : "ZYC",
"post_date" : "2016-04-15T14:12:12",
"message" : "i have id"
}
#PUT也可以,这里的_create是关键字
put users/_create/2
{
"user" : "11dian",
"post_date" : "2016-04-15T14:12:12",
"message" : "i have id by put"
}
找到文档,返回200
GET users/_doc/1
{
"_index" : "users",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"_seq_no" : 1,
"_primary_term" : 1,
"found" : true,
"_source" : {
"user" : "ZYC",
"post_date" : "2016-04-15T14:12:12",
"message" : "i have id"
}
}
找不到文档,返回404
GET users/_doc/100
{
"_index" : "users",
"_type" : "_doc",
"_id" : "100",
"found" : false
}
index和create不一样的地方,如果文档不存在。则索引新的文档。否则,旧文档会被删除
,新的文档被索引。版本信息+1
PUT users/_doc/1
{
"user":"zyc1"
}
{
"_index" : "users",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,//版本增加了
"_seq_no" : 3,
"_primary_term" : 1,
"found" : true,
"_source" : {
"user" : "zyc1"
}
}
真正的更新
POST
需要用到doc属性
POST users/_update/1/
{
"doc":{
"post_date" : "2019-05-15T14:12:12",
"message" : "trying out Elasticsearch"
}
}
{
"_index" : "users",
"_type" : "_doc",
"_id" : "1",
"_version" : 3,
"_seq_no" : 4,
"_primary_term" : 1,
"found" : true,
"_source" : {
"user" : "zyc1",
"post_date" : "2019-05-15T14:12:12",
"message" : "trying out Elasticsearch"
}
}
POST _bulk
{ "index" : { "_index" : "test", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_id" : "2" } }
{ "create" : { "_index" : "test2", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : {"_id" : "1", "_index" : "test"} }
{ "doc" : {"field1" : "value1_new"} }
结果
{
"took" : 317,
"errors" : true,
"items" : [
{
"index" : {
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1,
"status" : 201
}
},
{
"delete" : {
"_index" : "test",
"_type" : "_doc",
"_id" : "2",
"_version" : 1,
"result" : "not_found",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 1,
"_primary_term" : 1,
"status" : 404
}
},
{
"create" : {
"_index" : "test2",
"_type" : "_doc",
"_id" : "3",
"status" : 409,
"error" : {
"type" : "version_conflict_engine_exception",
"reason" : "[3]: version conflict, document already exists (current version [1])",
"index_uuid" : "BsuXOUxpS_eVxmbgjF9iVQ",
"shard" : "0",
"index" : "test2"
}
}
},
{
"update" : {
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 1,
"status" : 200
}
}
]
}
批量操作,降低了请求次数和网络开销
GET /_mget
{
"docs" : [
{
"_index" : "test",
"_id" : "1"
},
{
"_index" : "test",
"_id" : "2"
}
]
}
{
"docs" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,
"_seq_no" : 2,
"_primary_term" : 1,
"found" : true,
"_source" : {
"field1" : "value1_new"
}
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "2",
"found" : false
}
]
}
uri中指定
GET /test/_mget
{
"docs" : [
{
"_id" : "1"
},
{
"_id" : "2"
}
]
}
POST users/_msearch
{}
{"query" : {"match_all" : {}},"size":1}
{"index" : "test"}
{"query" : {"match_all" : {}},"size":2}
结果
{
"took" : 2,
"responses" : [
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "users",
"_type" : "_doc",
"_id" : "zOMveXEBcLw4fF5IGKcV",
"_score" : 1.0,
"_source" : {
"user" : "Mike",
"post_date" : "2019-04-15T14:12:12",
"message" : "trying out Kibana"
}
}
]
},
"status" : 200
},
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"field1" : "value1_new"
}
}
]
},
"status" : 200
}
]
}