前言:本篇文章主要介绍如何在
linux
环境下安装ELK
栈,包括es、kibana
和logstash
运行命令
rpm -ivh elasticsearch-7.17.4-aarch64.rpm
mkdir /home/elasticsearch
mkdir /home/elasticsearch/data
mkdir /home/elasticsearch/logs
chown elasticsearch:elasticsearch -R /home/elasticsearch
chown elasticsearch:elasticsearch -R /home/elasticsearch/data/
chown elasticsearch:elasticsearch -R /home/elasticsearch/logs/
vi /etc/elasticsearch/elasticsearch.yml
cluster.name: "my-application"
node.name: node-1
path.data: /home/elasticsearch/data # 数据目录,需要赋予权限,否则报错
path.logs: /home/elasticsearch/logs # 存储目录,需要赋予权限,否则报错
network.host: "localhost" # 访问地址
cluster.initial_master_nodes: ["node-1"]
完整配置文件
# ======================== Elasticsearch Configuration =========================
#
# NOTE: Elasticsearch comes with reasonable defaults for most settings.
# Before you set out to tweak and tune the configuration, make sure you
# understand what are you trying to accomplish and the consequences.
#
# The primary way of configuring a node is via this file. This template lists
# the most important settings you may want to configure for a production cluster.
#
# Please consult the documentation for further information on configuration options:
# https://www.elastic.co/guide/en/elasticsearch/reference/index.html
#
# ---------------------------------- Cluster -----------------------------------
#
# Use a descriptive name for your cluster:
#
cluster.name: my-application
#
# ------------------------------------ Node ------------------------------------
#
# Use a descriptive name for the node:
#
node.name: node-1
#
# Add custom attributes to the node:
#
#node.attr.rack: r1
#
# ----------------------------------- Paths ------------------------------------
#
# Path to directory where to store the data (separate multiple locations by comma):
path.data: /home/es/elasticsearch/data
# Path to log files:
path.logs: /home/es/elasticsearch/logs
# 备份路径
# path.repo: 'D:/ElasticSearch/backup'
#
# ----------------------------------- Memory -----------------------------------
#
# Lock the memory on startup:
#
#bootstrap.memory_lock: true
#
# Make sure that the heap size is set to about half the memory available
# on the system and that the owner of the process is allowed to use this
# limit.
#
# Elasticsearch performs poorly when the system is swapping the memory.
#
# ---------------------------------- Network -----------------------------------
#
# By default Elasticsearch is only accessible on localhost. Set a different
# address here to expose this node on the network:
#
#network.host: 0.0.0.0
# 当前节点的IP地址,任意IP
network.host: 0.0.0.0
#
# By default Elasticsearch listens for HTTP traffic on the first free port it
# finds starting at 9200. Set a specific HTTP port here:
#
#http.port: 9200
http.port: 9200
transport.port: 9300
#
# For more information, consult the network module documentation.
#
# --------------------------------- Discovery ----------------------------------
#
# Pass an initial list of hosts to perform discovery when this node is started:
# The default list of hosts is ["127.0.0.1", "[::1]"]
#
#discovery.seed_hosts: ["host1", "host2"]
# 集群中的节点可以相互发现并选举一个主节点
discovery.seed_hosts: ["localhost"]
#
#
# Bootstrap the cluster using an initial set of master-eligible nodes:
#
cluster.initial_master_nodes: ["node-1"]
#
# For more information, consult the discovery and cluster formation module documentation.
#
# ---------------------------------- Various -----------------------------------
#
# Require explicit names when deleting indices:
#
#action.destructive_requires_name: true
# 允许跨域
http.cors.enabled: true
http.cors.allow-origin: "*"
# 安全设置
# xpack.security.enabled: true
#
# ---------------------------------- Security ----------------------------------
#
# *** WARNING ***
#
# Elasticsearch security features are not enabled by default.
# These features are free, but require configuration changes to enable them.
# This means that users don’t have to provide credentials and can get full access
# to the cluster. Network connections are also not encrypted.
#
# To protect your data, we strongly encourage you to enable the Elasticsearch security features.
# Refer to the following documentation for instructions.
#
# https://www.elastic.co/guide/en/elasticsearch/reference/7.16/configuring-stack-security.html
systemctl enable elasticsearch
systemctl start elasticsearch # 启动
systemctl stop elasticsearch # 停止
systemctl status elasticsearch # 查看状态
IK Analysis for Elasticsearch
=============================
The IK Analysis plugin integrates Lucene IK analyzer (http://code.google.com/p/ik-analyzer/) into elasticsearch,
support customized dictionary.
Analyzer: `ik_smart` , `ik_max_word` , Tokenizer: `ik_smart` , `ik_max_word`
Versions
--------
IK version | ES version |
---|---|
master | 7.x -> master |
6.x | 6.x |
5.x | 5.x |
1.10.6 | 2.4.6 |
1.9.5 | 2.3.5 |
1.8.1 | 2.2.1 |
1.7.0 | 2.1.1 |
1.5.0 | 2.0.0 |
1.2.6 | 1.0.0 |
1.2.5 | 0.90.x |
1.1.3 | 0.20.x |
1.0.0 | 0.16.2 -> 0.19.0 |
elasticsearch-analysis-ik下载地址
在es plugins创建目录ik,将ik压缩包解压到ik目录下
plugings目录:/usr/share/elasticsearch/plugins
解压命令:unzip filename.zip
* optional 1 - download pre-build package from here: https://github.com/medcl/elasticsearch-analysis-ik/releases
create plugin folder `cd your-es-root/plugins/ && mkdir ik`
unzip plugin to folder `your-es-root/plugins/ik`
* optional 2 - use elasticsearch-plugin to install ( supported from version v5.5.1 ):
./bin/elasticsearch-plugin install
https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.0/elasticsearch-analysis-ik-6.3.0.zip
NOTE: replace `6.3.0` to your own elasticsearch version
1.create a index
curl -XPUT http://localhost:9200/index
2.create a mapping
curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/json' -d'
{
"properties": {
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
}
}
}'
3.index some docs
curl -XPOST http://localhost:9200/index/_create/1 -H 'Content-Type:application/json' -d'
{"content":"美国留给伊拉克的是个烂摊子吗"}
'
curl -XPOST http://localhost:9200/index/_create/2 -H 'Content-Type:application/json' -d'
{"content":"公安部:各地校车将享最高路权"}
'
curl -XPOST http://localhost:9200/index/_create/3 -H 'Content-Type:application/json' -d'
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
'
curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/json' -d'
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
'
4.query with highlighting
curl -XPOST http://localhost:9200/index/_search -H 'Content-Type:application/json' -d'
{
"query" : { "match" : { "content" : "中国" }},
"highlight" : {
"pre_tags" : ["", ""],
"post_tags" : [" ", ""],
"fields" : {
"content" : {}
}
}
}
'
Result
```json
{
"took": 14,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 2,
"hits": [
{
"_index": "index",
"_type": "fulltext",
"_id": "4",
"_score": 2,
"_source": {
"content": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
},
"highlight": {
"content": [
"中国 驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首 "
]
}
},
{
"_index": "index",
"_type": "fulltext",
"_id": "3",
"_score": 2,
"_source": {
"content": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"
},
"highlight": {
"content": [
"均每天扣1艘中国 渔船 "
]
}
}
]
}
}
IKAnalyzer.cfg.xml
can be located at {conf}/analysis-ik/config/IKAnalyzer.cfg.xml
or {plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml
DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置comment>
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dicentry>
<entry key="ext_stopwords">custom/ext_stopword.dicentry>
<entry key="remote_ext_dict">locationentry>
<entry key="remote_ext_stopwords">http://xxx.com/xxx.dicentry>
properties>
目前该插件支持热更新 IK 分词,通过上文在 IK 配置文件中提到的如下配置
<entry key="remote_ext_dict">locationentry>
<entry key="remote_ext_stopwords">locationentry>
其中 location
是指一个 url,比如 http://yoursite.com/getCustomDict
,该请求只需满足以下两点即可完成分词热更新。
该 http 请求需要返回两个头部(header),一个是 Last-Modified
,一个是 ETag
,这两者都是字符串类型,只要有一个发生变化,该插件就会去抓取新的分词进而更新词库。
该 http 请求返回的内容格式是一行一个分词,换行符用 \n
即可。
满足上面两点要求就可以实现热更新分词了,不需要重启 ES 实例。
可以将需自动更新的热词放在一个 UTF-8 编码的 .txt 文件里,放在 nginx 或其他简易 http server 下,当 .txt 文件修改时,http server 会在客户端请求该文件时自动返回相应的 Last-Modified 和 ETag。可以另外做一个工具来从业务系统提取相关词汇,并更新这个 .txt 文件。
1.自定义词典为什么没有生效?
请确保你的扩展词典的文本格式为 UTF8 编码
2.如何手动安装?
git clone https://github.com/medcl/elasticsearch-analysis-ik
cd elasticsearch-analysis-ik
git checkout tags/{version}
mvn clean
mvn compile
mvn package
拷贝和解压release下的文件: #{project_path}/elasticsearch-analysis-ik/target/releases/elasticsearch-analysis-ik-*.zip 到你的 elasticsearch 插件目录, 如: plugins/ik
重启elasticsearch
3.分词测试失败
请在某个索引下调用analyze接口测试,而不是直接调用analyze接口
如:
curl -XGET "http://localhost:9200/your_index/_analyze" -H 'Content-Type: application/json' -d'
{
"text":"中华人民共和国MN","tokenizer": "my_ik"
}'
ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合,适合 Term Query;
ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”,适合 Phrase 查询。
自 v5.0.0 起
ik
的analyzer和tokenizer,请分别使用 ik_smart
和 ik_max_word
YourKit supports IK Analysis for ElasticSearch project with its full-featured Java Profiler.
YourKit, LLC is the creator of innovative and intelligent tools for profiling
Java and .NET applications. Take a look at YourKit’s leading software products:
YourKit Java Profiler and
YourKit .NET Profiler.
安装、运行成功,但是启动后停止,原因是系统缺少c++依赖包
wget https://artifacts.elastic.co/downloads/kibana/kibana-7.14.1-x86_64.rpm
rpm -ivh kibana-7.14.1-x86_64.rpm
vim /etc/kibana/kibana.yml
server.port: 5601
server.host: "localhost"
elasticsearch.hosts: ["http://localhost:9200"]
i18n.locale: "zh-CN"
sudo systemctl daemon-reload
sudo systemctl enable kibana
sudo systemctl start kibana # 启动
sudo systemctl stop kibana # 停止
sudo systemctl status kibana.service # 查看状态
sudo systemctl cat kibana
rpm -qa|grep kibana #查看是否安装了kibaba
rpm -e package
mkdir /home/kibana
mkdir /home/kibana/config
mkdir /home/kibana/data
mkdir /home/kibana/plugins
修改配置文件 vim /home/kibana/config/kibana.yml
server.name: kibana
# 服务器主机地址
server.host: "0"
# elasticsearch 主机地址
elasticsearch.hosts: [ "http://localhost:9200" ]
xpack.monitoring.ui.container.elasticsearch.enabled: true
i18n.locale: "zh-CN"
sudo chown -R elasticsearch:elasticsearch /home/kibana
docker run -it -d --privileged --name kibana --restart=always -p 5601:5601 -v /home/kibana/config:/usr/share/kibana/config kibana:7.17.4
docker run -it -d --privileged --name kibana -p 5601:5601 \ -e ELASTICSEARCH_URL=http://172.25.18.243:9200 \ -e ELASTICSEARCH_HOSTS=http://172.25.18.243:9200 \ -e XPACK_SECURITY_ENABLED=false \ -e XPACK_MONITORING_ENABLED=true \ -e XPACK_MONITORING_UI_CONTAINER_ELASTICSEARCH_ENABLED=true \ -v /home/kibana/config:/usr/share/kibana/config kibana:7.17.4
rpm -ivh logstash-7.17.4.rpm
目录 | 说明 |
---|---|
/etc/logstash/logstash.yml | 服务的配置文件。日志,数据目录 |
/etc/logstash/logstash-sample.conf | 书写的logstash的配置文件 |
/etc/logstash/conf.d/ | 书写的logstash的配置文件 |
/etc/logstash/jvm.options | logstash jvm配置 这里可以设置为512m |
错误1(缺少权限)
unable to load ./jdbc-driver/postgresql-42.2.5.jre7.jar from :jdbc_driver_library, file not readable (please check user and group permissions for the path)
对jdbc-driver目录及文件进行授权
chown logstash:logstash -R /etc/logstash/conf.d
错误2(相对路径)
将jdbc驱动改为绝对路径
"jdbc_driver_library" => "/etc/logstash/conf.d/jdbc-driver/postgresql-42.5.2.jar"
在 /etc/logstash/conf.d/
文件夹下新建配置文件(logstash.conf
),从官网教程(es7.17.4中已经弃用type文档类型,可以配置多个执行文件–*.conf
)得知可以建立多个
.conf
文件,在运行logstash时可以运行多个配置文件
# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.
# logstash config
input {
# 连接PostgreSQL
jdbc {
"jdbc_driver_class" => "org.postgresql.Driver"
"jdbc_driver_library" => "/etc/logstash/conf.d/jdbc-driver/postgresql-42.5.2.jar"
# 设定自动重连
"jdbc_connection_string" => "jdbc:postgresql://localhost:5433/poi?autoReconnect=true&characterEncoding=utf8&useSSL=false&serverTimezone=UTC&rewriteBatchedStatements=true"
# 连接数据库配置
"jdbc_user" => "postgres"
"jdbc_password" => "pg123456"
# 时区设置
# jdbc_default_timezone => "Asia/Shanghai"
# 几何对象查询语句
# -----------------------------------无法转换几何类型,而且必须要写上gid字段--------------------------------------------------
# "statement" => "select *, st_asgeojson(poi_geom) as geometry from poi2"
# "statement" => "select gid,name,adress,telephone,category,st_asewkt(poi_geom) as geometry from poi2"
# "statement" => "select gid,name,adress,telephone,category,st_asgeojson(poi_geom) as geom from poi2"
# -----------------------------------无法转换几何类型,而且必须要写上gid字段--------------------------------------------------
# -----------------------------------以下方式有效-----------------------------------------------------------------------------
# 查询参数
#"parameters" => { "is_deleted" => "1"}
# 有效:获取经纬度坐标(String类型),需要转换经纬度类型,字符串是纬度在前,经度在后。
# "statement" => "select id,name,address,telephone,category,concat_ws(',',st_y(geom),st_x(geom))as geometry from synchronous"
# 有效:获取经纬度坐标(数组类型)
"statement" => "select gid,bsm,ysdm,zjnr,bz,st_asgeojson(poi_geom)::json->>'coordinates' as location from poi"
# 开启追踪,如果为true,则需要指定tracking_column,用于增量同步,需是数据库字段
use_column_value => true
# 指定追踪的字段
tracking_column => "gid"
# 开启分页查询
"jdbc_paging_enabled" => true
# 文档最大查询数
"jdbc_fetch_size" => "10000"
# 单次分页查询数
"jdbc_page_size" => "500"
#同步频率:每分钟执行一次
"schedule" => "* * * * *"
#处理中文乱码问题
codec => plain { charset => "UTF-8"}
# 连接类型
type => "poi"
}
}
# 字段过滤
filter {
json {
# 略过无效JSON字段
# skip_on_invalid_json => true
# 存储json数据的字段
source => "location"
# 放置json数据的目标字段
target => "geometry"
#remove_field => ["@timestamp","@version","geom"]
#remove_tag => ["@timestamp","@version"]
}
mutate {
#转换经纬度坐标
#convert => {"x" => "float"}
#convert => {"y" => "float"}
# 将geometry数组转换为float类型
#convert => {
#"[geometry][0]" => "float"
#"[geometry][1]" => "float"
#}
# 移除字段
#remove_field => "@timestamp"
remove_field => "@version"
}
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "poi"
# 重要:需要指定唯一id
document_id => "%{gid}"
# 索引映射关系引用文件。配置为模板文件,与同步数据相对应
# template =>"D:\ElasticSearch\logstash-7.13.4-windows-x86_64\template.json"
# 为了更好的控制动态模板,需要将其设置为false
# manage_template => false
# 重写模板
template_overwrite => true
# template_name => "test"
#user => "elastic"
#password => "changeme"
}
stdout {
codec => json_lines
}
}
https://www.elastic.co/guide/en/logstash/current/plugins-inputs-jdbc.html
配置链接
https://jdbc.postgresql.org/
下载地址