- 拉镜像
docker pull elasticsearch:6.5.4
6.5.4: Pulling from library/elasticsearch
a02a4930cb5d: Downloading [===================> ] 30MB/75.17MB
dd8a94cca3f9: Downloading [=> ] 6.421MB/188.1MB
bd73f551dee4: Download complete
70de352c4efc: Downloading [===================> ] 2.637MB/6.859MB
0b5ae4c7310f: Waiting
489d9f8b18f1: Waiting
8ba96caf5951: Waiting
f1df04f27c5f: Waiting
- 查看镜像
docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
elasticsearch 6.5.4 93109ce1d590 5 weeks ago 774MB
- 启动一个容器
elasticsearch/jvm.options 默认配置 -Xms2g - Xmx2g 来指定内存 我使用的是1G内存 所以需要指定-Xms -Xmx 大小
内存够大就使用默认-Xmx 启动容器如下:
docker run -d --name elasticsearch --net somenetwork -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:6.5.4
d2953375ec7ea5eef1f84d9d39f3f0678a17274d7698716456034c1563aab864
内存比较小比如我1g 就需要指定-Xms -Xmx 大小
docker run -d --name elasticsearch --net somenetwork -p 9200:9200 -p 9300:9300 -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" -e "discovery.type=single-node" elasticsearch:6.5.4
ed40afba226b0ca3a148f41d142d195529b902726b0019742a83a8d595ed5583
9300端口: ES节点之间通讯使用
9200端口: ES节点 和 外部 通讯使用
- 查看启动容器
docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d2953375ec7e elasticsearch:6.5.4 "/usr/local/bin/dock…" 37 seconds ago Exited (1) 36 seconds ago elasticsearch
curl -v 127.0.0.1:9200
* Rebuilt URL to: 127.0.0.1:9200/
* Trying 127.0.0.1...
* Connected to 127.0.0.1 (127.0.0.1) port 9200 (#0)
> GET / HTTP/1.1
> Host: 127.0.0.1:9200
> User-Agent: curl/7.47.0
> Accept: */*
>
< HTTP/1.1 200 OK
< content-type: application/json; charset=UTF-8
< content-length: 494
<
{
"name" : "JFvwCOs",
"cluster_name" : "docker-cluster",
"cluster_uuid" : "gFw-ERtCRs-5vc-zEMBbIg",
"version" : {
"number" : "6.5.4",
"build_flavor" : "default",
"build_type" : "tar",
"build_hash" : "d2ef93d",
"build_date" : "2018-12-17T21:17:40.758843Z",
"build_snapshot" : false,
"lucene_version" : "7.5.0",
"minimum_wire_compatibility_version" : "5.6.0",
"minimum_index_compatibility_version" : "5.0.0"
},
"tagline" : "You Know, for Search"
}
* Connection #0 to host 127.0.0.1 left intact
- 安装head插件
最简单方式可以直接安装谷歌插件的elasticsearch-head-chrome,也可以在Chrome网上应用店上找到
下面是通过docker 安装方式
docker pull mobz/elasticsearch-head:5
* Pulling from mobz/elasticsearch-head
75a822cd7888: Pulling fs layer
57de64c72267: Pulling fs layer
4306be1e8943: Pulling fs layer
871436ab7225: Waiting
0110c26a367a: Waiting
1f04fe713f1b: Waiting
723bac39028e: Waiting
7d8cb47f1c60: Waiting
7328dcf65c42: Waiting
b451f2ccfb9a: Waiting
304d5c28a4cf: Waiting
4cf804850db1: Waiting
启动head
docker run -d -p 9100:9100 --name elasticsearch-head mobz/elasticsearch-head:5
a31c966d1eec8c83fceefd0515df2f9e91986f08315d0a0d07b9ae261086d7d4
-
然后浏览器访问 127.0.0.1:9100
出现这个界面表示 elasticsearch-head 安装成功
但是发现“集群健康值:未连接” 说明没有和elasticsearch 连接成功,需要elasticsearch配置跨域 - elasticsearch 跨域配置
1.进入elasticsearch容器
docker exec -it 9d53699397a8 /bin/bash
[root@9d53699397a8 elasticsearch]#
2.安装vim
[root@9d53699397a8 elasticsearch]# yum install -y vim
3.修改/usr/share/elasticsearch/config/elasticsearch.yml
vim elasticsearch.yml
cluster.name: "docker-cluster"
network.host: 0.0.0.0
# minimum_master_nodes need to be explicitly set when bound on a public IP
# set to 1 to allow single node clusters
# Details: https://github.com/elastic/elasticsearch/pull/17288
discovery.zen.minimum_master_nodes: 1
# headR件设置
http.cors.enabled: true
http.cors.allow-origin: "*"
3.重启容器
docker restart 9d53699397a8
- 使用 Logstash 将mysql 数据库数据同步到 elasticsearch
1.下载
wget https://artifacts.elastic.co/downloads/logstash/logstash-6.5.4.tar.gz
2.解压
tar -zvxf logstash-6.5.4.tar.gz
3.修改jvm
jvm.options 默认
-Xms1g
-Xmx1g
我机器内存很小所以需要修改
/opt/logstash-6.5.4/config# vim jvm.options
-Xms512m
-Xmx512m
4.运行
/opt/logstash-6.5.4/bin#./logstash -e 'input { stdin { } } output { stdout {} }'
3.安装 jdbc 和 elasticsearch 插件
/opt/logstash-6.5.4# bin/logstash-plugin install logstash-input-jdbc
Validating logstash-input-jdbc
Installing logstash-input-jdbc
Installation successful
/opt/logstash-6.5.4# bin/logstash-plugin install logstash-output-elasticsearch
Validating logstash-output-elasticsearch
Installing logstash-output-elasticsearch
Installation successful
4.下载mysql-connector-java
5.编写配置文件 sync_table.conf
注意:数据库中删除的数据无法同步到ES中,只能同步insert update 数据
/opt/logstash-6.5.4/config# vim sync_table.conf
input {
jdbc {
# mysql相关jdbc配置
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8&useSSL=false"
jdbc_user => "root"
jdbc_password => "123456"
# jdbc连接mysql驱动的文件 此处路径一定要正确 否则会报com.mysql.cj.jdbc.Driver could not be loaded
jdbc_driver_library => "/opt/logstash-6.5.4/sync_config/mysql-connector-java-8.0.12.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
jdbc_paging_enabled => true
jdbc_page_size => "50000"
jdbc_default_timezone =>"Asia/Shanghai"
# mysql文件, 也可以直接写SQL语句在此处,如下:
# 如果要使字段和实体类的驼峰命名法一致 则需要这样写sql select d_name as dName, c_id as cId from area where update_time >= :sql_last_value order by update_time asc
statement => "select * from area where update_time >= :sql_last_value order by update_time asc"
# statement_filepath => "./config/jdbc.sql"
# 这里类似crontab,可以定制定时操作,比如每分钟执行一次同步(分 时 天 月 年)
schedule => "* * * * *"
#type => "jdbc"
# 是否记录上次执行结果, 如果为真,将会把上次执行到的 tracking_column 字段的值记录下来,保存到 last_run_metadata_path 指定的文件中
#record_last_run => true
# 是否需要记录某个column 的值,如果record_last_run为真,可以自定义我们需要 track 的 column 名称,此时该参数就要为 true. 否则默认 track 的是 timestamp 的值.
use_column_value => true
# 如果 use_column_value 为真,需配置此参数. track 的数据库 column 名,该 column 必须是递增的. 一般是mysql主键
tracking_column => "update_time"
tracking_column_type => "timestamp"
last_run_metadata_path => "area_logstash_capital_bill_last_id"
# 是否清除 last_run_metadata_path 的记录,如果为真那么每次都相当于从头开始查询所有的数据库记录
clean_run => false
#是否将 字段(column) 名称转小写
#lowercase_column_names => false
}
}
filter {
date {
match => [ "update_time", "yyyy-MM-dd HH:mm:ss" ]
timezone => "Asia/Shanghai"
}
}
output {
elasticsearch {
hosts => ["127.0.0.1:9200"]
# index名 自定义 相当于数据库 对于实体类上@Document(indexName = "sys_core", type = "area")indexName
index => "sys_core"
#索引的类型 相当于数据库里面的表 对于实体类上@Document(indexName = "sys_core", type = "area")type
document_type => "area"
#需要关联的数据库中有有一个id字段,对应索引的id号
document_id => "%{id}"
template_overwrite => true
}
# 这里输出调试,正式运行时可以注释掉
stdout {
codec => json_lines
}
}
- 启动
/opt/logstash-6.5.4# bin/logstash -f config/sync_table.cfg
7.配置同步多张表
比如想同步tableA tableB tableC 3张表 则需要创建3个 sync_table.conf 文件 sync_tableA.conf sync_tableB.conf sync_tableC.conf
只是修改里面的sql语句和索引名
sync_table.conf 文件创建好后最后在 /opt/logstash-6.5.4/config/pipelines.yml 配置
- pipeline.id: table1
path.config: "/opt/logstash-6.5.4/sync_config/sync_tableA.conf"
- pipeline.id: table2
path.config: "/opt/logstash-6.5.4/sync_config/ sync_tableB.conf"
- pipeline.id: table3
path.config: "/opt/logstash-6.5.4/sync_config/sync_tableC.conf"
然后启动
/opt/logstash-6.5.4# bin/logstash
最后成功同步数据
[2019-01-24T22:40:00,333][INFO ][logstash.inputs.jdbc ] (0.013511s) SELECT version()
[2019-01-24T22:40:00,340][INFO ][logstash.inputs.jdbc ] (0.002856s) SELECT version()
[2019-01-24T22:40:00,349][INFO ][logstash.inputs.jdbc ] (0.009841s) SELECT version()
[2019-01-24T22:40:00,408][INFO ][logstash.inputs.jdbc ] (0.005667s) SELECT count(*) AS `count` FROM (select * from area where update_time >= '2019-01-23 22:36:24' order by update_time asc) AS `t1` LIMIT 1
[2019-01-24T22:40:00,410][INFO ][logstash.inputs.jdbc ] (0.002467s) SELECT count(*) AS `count` FROM (select * from dictionaries where update_time >= '2019-01-24 06:52:53' order by update_time asc) AS `t1` LIMIT 1
[2019-01-24T22:41:00,361][INFO ][logstash.inputs.jdbc ] (0.000663s) SELECT version()
8.单机版(只有一个节点) 集群状态为yellow 和索引为Unassigned
这里解释一下为什么集群状态为yellow
由于我们是单节点部署elasticsearch,而默认的分片副本数目配置为1,而相同的分片不能在一个节点上,所以就存在副本分片指定不明确的问题,所以显示为yellow,我们可以通过在elasticsearch集群上添加一个节点来解决问题,如果你不想这么做,你可以删除那些指定不明确的副本分片(当然这不是一个好办法)但是作为测试和解决办法还是可以尝试的,下面我们试一下删除副本分片的办法
删除副本分片 即可解决
curl -H "Content-Type: application/json" -X PUT http://localhost:9200/_settings -d '{"number_of_replicas":0}'
{"acknowledged":true}
curl -v http://localhost:9200/_cluster/health?pretty
* Trying 127.0.0.1...
* Connected to localhost (127.0.0.1) port 9200 (#0)
> GET /_cluster/health?pretty HTTP/1.1
> Host: localhost:9200
> User-Agent: curl/7.47.0
> Accept: */*
>
< HTTP/1.1 200 OK
< content-type: application/json; charset=UTF-8
< content-length: 470
<
{
"cluster_name" : "docker-cluster",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 1,
"number_of_data_nodes" : 1,
"active_primary_shards" : 10,
"active_shards" : 10,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 100.0
}
-
Elasticsearch设置最大返回条数
解决异常
Caused by: org.elasticsearch.search.query.QueryPhaseExecutionException: Result window is too large, from + size must be less than or equal to: [10000] but was [100000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting.
curl -H "Content-Type: application/json" -X PUT http://localhost:9200/_settings -d '{"max_result_window":2147483647}'
注意:
1.size的大小不能超过index.max_result_window这个参数的设置,默认为10,000。
2.需要搜索分页,可以通过from size组合来进行。from表示从第几行开始,size表示查询多少条文档。from默认为0,size默认为10;
通过页面设置方法参考:https://blog.csdn.net/chenhq_/article/details/77507956