ELK环境搭建

ELK环境搭建

因csdn语法支持与github不太一样,欢迎访问本文github版:https://github.com/JimXiongGM/KnowledgeBasedSearch/blob/master/ELK环境搭建.md

目录

  • 准备
  • mysql数据准备
  • 安装配置elasticsearch 7.3
  • 安装配置Logstash
  • 更新mysql数据
  • 查询效果
  • 使用ik分词器
  • 安装Kibana

准备

本文参考网络资料,搭建Elasticsearch 7.3 + logstash 7.3 + kibana7.3环境,并使用ik分词器从mysql8.0中通过logstash导入数据到es中进行搜索。

将如下文件放入/root/xiazai/。点击可进入文件下载页面。

  1. elasticsearch-7.3.0-linux-x86_64.tar.gz
  2. elasticsearch-analysis-ik-7.3.0.zip
  3. logstash-7.3.0.tar.gz
  4. mysql-connector-java-8.0.16.jar 下拉列表中选择Platform Independent,解压.tar.gz可得到该jar
  5. kibana-7.3.0-linux-x86_64.tar.gz

mysql数据准备

mysql环境搭建可参考MySQL8.0环境搭建。

进入mysql:mysql -u root -p,创建测试用表。

CREATE DATABASE es_db;
USE es_db;
DROP TABLE IF EXISTS es_table;
CREATE TABLE es_table (
  id BIGINT UNSIGNED NOT NULL,
  PRIMARY KEY (id),
  UNIQUE KEY unique_id (id),
  client_name VARCHAR(256) NOT NULL,
  modification_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  insertion_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);

-- 测试数据
INSERT INTO es_table (id, client_name) VALUES (1, 'Jim Carrey');
INSERT INTO es_table (id, client_name) VALUES (2, 'Mike Myers');
INSERT INTO es_table (id, client_name) VALUES (3, 'Bryan Adams');

安装配置elasticsearch 7.3

运行如下命令配置elasticsearch。首先需要配置好JAVA12,可以参考这里.

将放到/root/xiazai/文件夹下。

cd /root/xiazai/;
tar -zxvf elasticsearch-7.3.0-linux-x86_64.tar.gz -C /opt/

# 创建非root账户 es必须使用非root用户运行
groupadd elsearch;
useradd elsearch -g elsearch -p elasticsearch;
chown -R elsearch:elsearch  /opt/elasticsearch-7.3.0;
cd /opt/elasticsearch-7.3.0;

# 配置外网访问等
echo '
# allow all hosts
# network.host: 0.0.0.0

# set master node
cluster.name: xgm_Cluster
node.name: xgm_Node
cluster.initial_master_nodes: xgm_Node
' >> ./config/elasticsearch.yml;

# 指定java12
# 使用java 8 也行,但是启动会有warning
sed -i 's/JAVA_HOME/JAVA12_HOME/g' ./bin/elasticsearch-env;

# 安装中文分词器
./bin/elasticsearch-plugin install file:///root/xiazai/elasticsearch-analysis-ik-7.3.0.zip

# 修改内存设置
sed -i 's/-Xms1g/-Xms2g/g' ./config/jvm.options;
sed -i 's/-Xmx1g/-Xmx10g/g' ./config/jvm.options;

# 使用elsearch用户启动elasticsearch
su elsearch
cd /opt/elasticsearch-7.3.0 && nohup ./bin/elasticsearch >> ./elasticsearch.log &
exit

# 非后台启动
# su elsearch
# cd /opt/elasticsearch-7.3.0 && ./bin/elasticsearch 

# 查询进程
ps aux|grep elasticsearch;

# 测试是否成功
curl -X GET "localhost:9200"

安装配置Logstash

必须先启动elasticsearch。

参考资料为这个和这个。

此外,插件logstash-input-jdbc的用法在这里

cd /root/xiazai;
tar -xzvf logstash-7.3.0.tar.gz -C /opt/;
# test
# cd /opt/logstash-7.3.0;
# ./bin/logstash -e 'input { stdin { } } output { stdout {} }';
# CTRL-D  to exit
cd /opt/logstash-7.3.0/;

# 修改内存设置
sed -i 's/-Xms1g/-Xms2g/g' ./config/jvm.options;
sed -i 's/-Xmx1g/-Xmx10g/g' ./config/jvm.options;

# 安装插件 logstash-input-jdbc
./bin/logstash-plugin install logstash-input-jdbc;

# 配置和mysql的交互
cp /root/xiazai/mysql-connector-java-8.0.16.jar /opt/logstash-7.3.0/lib/;
echo 'input {
  jdbc {
    jdbc_driver_library => "/opt/logstash-7.3.0/lib/mysql-connector-java-8.0.16.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://localhost:3306/es_db?characterEncoding=utf8&useSSL=false"
    jdbc_user => "root"
    jdbc_password => "xiong"
    jdbc_paging_enabled => true
    tracking_column => "unix_ts_in_secs"
    use_column_value => true
    tracking_column_type => "numeric"
    schedule => "*/10 * * * * *"    # 10 seconds
    last_run_metadata_path => "/opt/logstash-7.3.0/data/mysql_test_1"
    statement => "SELECT *, UNIX_TIMESTAMP(modification_time) AS unix_ts_in_secs FROM es_table WHERE (UNIX_TIMESTAMP(modification_time) > :sql_last_value AND modification_time < NOW()) ORDER BY modification_time ASC"
  }
}
filter {
  mutate {
    copy => { "id" => "[@metadata][_id]"}
    remove_field => ["id", "@version", "unix_ts_in_secs"]
  }
}
output {
  # stdout { codec =>  "rubydebug"}
  elasticsearch {
      index => "mysql_test_1"
      document_id => "%{[@metadata][_id]}"
  }
}
' > ./config/mysql_test_1.conf;

# 启动
./bin/logstash -f ./config/mysql_test_1.conf --config.reload.automatic

# 后台启动
# nohup ./bin/logstash -f ./config/mysql_test_1.conf --config.reload.automatic >> /logs/logstash.log &

开启新的终端,查看es中是否有新的index。

curl -X GET "localhost:9200/_cat/indices?v"

curl -X GET "localhost:9200/mysql_test_1/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match_all": {} }
}
'

更新mysql数据

保持logstash终端开启,开启新终端,向mysql中插入新的数据。

USE es_db;
INSERT INTO es_table (id, client_name) VALUES (4, '中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首');
INSERT INTO es_table (id, client_name) VALUES (5, '领馆分为总领事馆,领事馆,副领事馆和领事代理处,负责管理当地本国侨民和其它领事事务。');
exit

继续使用命令确认更新。

curl -X GET "localhost:9200/_cat/indices?v"

curl -X GET "localhost:9200/mysql_test_1/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match_all": {} }
}
'

查询效果

查询index:mysql_test_1

curl -X GET "localhost:9200/mysql_test_1/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match": { "client_name": "领事馆" } },
  "size" : 1
}
'
curl -X GET "localhost:9200/mysql_test_1/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match": { "client_name": "事馆领" } },
  "size" : 1
}
'

可以看到,领事馆查询结果的最高分为3.6995318事馆领查询结果的最高分为3.6995318

使用ik分词器

因为ES默认将中文按字切分,因此领事馆事馆领的查询结果是相同的。接下来使用ik分词器,对logstash传入数据进行自动分词。

注意,不少网络资料显示应修改logstash的模板文件,并使用template_overwrite等关键字指定文件路径,但根据笔者实验,并不需要。只需要提前建立好索引,指定mapping值即可。对比过程可以参考笔者的另一篇文档logstash_template_对比.md

另外,我们需要手动增加索引,并指定分词器。

# 建立索引 指定analyzer "client_name"为需要分词的字段
curl -X PUT http://localhost:9200/mysql_test_2?pretty
curl -X POST http://localhost:9200/mysql_test_2/_mapping?pretty -H 'Content-Type:application/json' -d'
{
  "properties": {
      "client_name": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_smart"
      }
  }
}'
curl -X GET "localhost:9200/_cat/indices?v"

cd /opt/logstash-7.3.0/;
echo 'input {
  jdbc {
    jdbc_driver_library => "/opt/logstash-7.3.0/lib/mysql-connector-java-8.0.16.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://localhost:3306/es_db?characterEncoding=utf8&useSSL=false"
    jdbc_user => "root"
    jdbc_password => "xiong"
    jdbc_paging_enabled => true
    tracking_column => "unix_ts_in_secs"
    use_column_value => true
    tracking_column_type => "numeric"
    schedule => "*/10 * * * * *"    # 10 seconds
    last_run_metadata_path => "/opt/logstash-7.3.0/data/mysql_test_2"
    statement => "SELECT *, UNIX_TIMESTAMP(modification_time) AS unix_ts_in_secs FROM es_table WHERE (UNIX_TIMESTAMP(modification_time) > :sql_last_value AND modification_time < NOW()) ORDER BY modification_time ASC"
  }
}
filter {
  mutate {
    copy => { "id" => "[@metadata][_id]"}
    remove_field => ["id", "@version", "unix_ts_in_secs"]
  }
}
output {
  # stdout { codec =>  "rubydebug"}
  elasticsearch {
        index => "mysql_test_2"
        document_id => "%{[@metadata][_id]}"
  }
}
' > ./config/mysql_test_2.conf;

# 启动
./bin/logstash -f ./config/mysql_test_2.conf --config.reload.automatic

新开一个终端。

curl -X GET "localhost:9200/_cat/indices?v"
curl -X GET "localhost:9200/mysql_test_2/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match": { "client_name": "领事馆" } },
  "size" : 1
}
'
curl -X GET "localhost:9200/mysql_test_2/_search?pretty" -H 'Content-Type: application/json' -d'
{
  "query": { "match": { "client_name": "事馆领" } },
  "size" : 1
}
'

可以看到,领事馆查询结果的最高分为0.95738393事馆领查询结果的最高分为1.850861

安装Kibana

必须先启动elasticsearch,再启动kibana。

cd /root/xiazai;
tar -xzvf kibana-7.3.0-linux-x86_64.tar.gz -C /opt/;
cd /opt/kibana-7.3.0-linux-x86_64;
# 配置
echo '
# set ES host
elasticsearch.hosts: ["http://localhost:9200"]
# allow all hosts
server.host: "0.0.0.0"
# set language
i18n.locale: "zh-CN"
' >> ./config/kibana.yml;
# 后台启动
nohup bin/kibana --allow-root &

访问http://localhost:5601即可。

ES 7.3语法备忘

ES 7.3语法备忘.md

你可能感兴趣的:(Search)