服务器环境:Centos7
ELK流量分析流程
1.通过hive对昨日的流量日志数据,进行离线批处理,按维度将一些指标预先聚合出来,将结果写入mysql,默认有一些预先处理好的数据已经存在mysql
2.手动准备一些样例数据,然后写入mysql中,装一个mysql,模拟成是hive导入mysql的一份数据
3.通过logstash,将mysql中的数据导入es中
4.通过kibana+各种es聚合语法,生成各种各样的报表出来
[root@elasticsearch01 ~]# mkdir -p /data/server
[root@elasticsearch01 ~]# chown -R elasticsearch:elasticsearch /data/server/
tar -zxvf logstash-5.6.3.tar.gz -C /data/server/
mv logstash-5.6.3/ logstash
cd logstash
两个组成部分:input和output,也可以包含一个可选的filter
input plugin负责从数据源中获取数据
filter plugin负责对数据进行定制化的处理和修改
output plugin负责将数据写入目的地中
./bin/logstash -e 'input { stdin { } } output { stdout {} }'
-e:直接在命令行对logstash进行配置,从命令行接受输入,将输出写入命令行
输入:hello world,可以看到输出,logstash会补充timestamp和ip地址
用ctrl-d可以结束这个piepline
wget http://dev.mysql.com/get/mysql-community-release-el7-5.noarch.rpm
rpm -ivh mysql-community-release-el7-5.noarch.rpm
yum install -y mysql-community-server
重启mysql服务
service mysqld restart
设置mysql密码
mysql -u root
mysql> set password for 'root'@'localhost' =password('123456');
mysql> quit
登录mysql
mysql -u root -p123456
创建数据库
mysql> create database patpat_demo;
使用数据库
use patpat_demo;
datekey cookie section userid province city pv is_return_visit is_bounce_visit visit_time visit_page_cnt
日期 cookie 版块 用户id 省份 城市 pv 是否老用户回访 是否跳出 访问时间 访问页面数量
创建表
create table user_access_log_aggr (
datekey varchar(255),
cookie varchar(255),
section varchar(255),
userid int,
province varchar(255),
city varchar(255),
pv int,
is_return_visit int,
is_bounce_visit int,
visit_time int,
visit_page_cnt int
);
插入数据
insert into user_access_log_aggr values('20171001', 'dasjfkaksdfj33', 'game', 1, 'beijing', 'beijing', 10, 0, 1, 600000, 3);
insert into user_access_log_aggr values('20171001', 'dasjadfssdfj33', 'game', 2, 'jiangsu', 'nanjing', 5, 0, 0, 700000, 5);
insert into user_access_log_aggr values('20171001', 'dasjffffksfj33', 'sport', 1, 'beijing', 'beijing', 8, 1, 0, 800000, 6);
insert into user_access_log_aggr values('20171001', 'dasjdddksdfj33', 'sport', 2, 'jiangsu', 'nanjing', 20, 0, 1, 900000, 7);
insert into user_access_log_aggr values('20171001', 'dasjeeeksdfj33', 'sport', 3, 'jiangsu', 'nanjing', 30, 1, 0, 600000, 10);
insert into user_access_log_aggr values('20171001', 'dasrrrrksdfj33', 'news', 3, 'jiangsu', 'nanjing', 40, 0, 0, 600000, 12);
insert into user_access_log_aggr values('20171001', 'dasjtttttdfj33', 'news', 4, 'shenzhen', 'shenzhen', 50, 0, 1, 500000, 4);
insert into user_access_log_aggr values('20171001', 'dasjfkakkkfj33', 'game', 4, 'shenzhen', 'shenzhen', 20, 1, 0, 400000, 3);
insert into user_access_log_aggr values('20171001', 'dasjyyyysdfj33', 'sport', 5, 'guangdong', 'guangzhou', 10, 0, 0, 300000, 1);
insert into user_access_log_aggr values('20171001', 'dasjqqqksdfj33', 'news', 5, 'guangdong', 'guangzhou', 9, 0, 1, 200000, 2);
tar -zxvf elasticsearch-5.6.3.tar.gz -C /data/server/
[root@elasticsearch01 server]# mv elasticsearch-5.6.3/ elasticsearch
[root@elasticsearch01 server]# chown -R elasticsearch:elasticsearch /data/server/elasticsearch
cd /data/server/elasticsearch
编辑
[elasticsearch@elasticsearch01 elasticsearch]$ vi config/jvm.options
-Xms512m
-Xmx512m
编辑
[elasticsearch@elasticsearch01 elasticsearch]$ vi config/elasticsearch.yml
path.data: /data/server/elasticsearch/data
path.logs: /data/server/elasticsearch/logs
# 设置回环地址
network.host: 0.0.0.0
cd /data/server/elasticsearch
./bin/elasticsearch -d
[elasticsearch@elasticsearch01 ~]$ curl -XGET localhost:9200
cd /data/server/elasticsearch
tail -f elasticsearch.log
yum install -y gem
gem sources --add https://ruby.taobao.org/ --remove https://rubygems.org/
gem sources -l
gem install bundler
bundle config mirror.https://rubygems.org https://ruby.taobao.org
在logstash目录下,vi Gemfile,修改source的值为: "https://ruby.taobao.org"
[elasticsearch@elasticsearch01 logstash]$ vi Gemfile
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash-plugin install logstash-input-jdbc
wget https://github.com/logstash-plugins/logstash-input-jdbc/archive/v4.2.4.zip
unzip v4.2.4.zip
cd logstash-input-jdbc-4.2.4
修改source的值
修改source的值为: “https://ruby.taobao.org”
vi Gemfile
vi logstash-input-jdbc.gemspec
找到
s.files = `git ls-files`.split($\)
改为:
s.files = [".gitignore", "CHANGELOG.md", "Gemfile", "LICENSE", "NOTICE.TXT", "README.md", "Rakefile", "lib/logstash/inputs/jdbc.rb", "lib/logstash/plugin_mixins/jdbc.rb", "logstash-input-jdbc.gemspec", "spec/inputs/jdbc_spec.rb"]
gem build logstash-input-jdbc.gemspec
mv logstash-input-jdbc-4.2.4.gem /data/server/logstash/
[elasticsearch@elasticsearch01 logstash-input-jdbc-4.2.4]$ cd /data/server/logstash
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash-plugin install logstash-input-jdbc-4.2.4.gem
在logstash目录中创建一份配置pipeline配置文件
vi conf/user-access-log-pipeline.conf
input {
jdbc {
jdbc_driver_library => "/data/server/mysql-connector-java-5.1.36-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/patpat_demo"
jdbc_user => "root"
jdbc_password => "123456"
schedule => "* * * * *"
statement => "SELECT * from user_access_log_aggr"
}
}
output {
elasticsearch {
hosts => [ "localhost:9200" ]
}
}
检查配置文件语法是否正确
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash -f conf/user-access-log-pipeline.conf --config.test_and_exit
--config.reload.automatic,会自动重新加载配置文件的内容
./bin/logstash -f conf/user-access-log-pipeline.conf --config.reload.automatic
[elasticsearch@elasticsearch01 ~]$ curl -XGET localhost:9200/_cat/indices
[elasticsearch@elasticsearch01 ~]$ curl -XGET localhost:9200/_cat/health
[elasticsearch@elasticsearch01 ~]$ curl -XGET localhost:9200/logstash-2017.12.21
[elasticsearch@elasticsearch01 ~]$ curl -XGET localhost:9200/logstash-2017.12.21/_search?pretty
tar -zxvf kibana-5.6.3-linux-x86_64.tar.gz -C /data/server/
[root@elasticsearch01 server]# mv kibana-5.6.3-linux-x86_64/ kibana
[root@elasticsearch01 kibana]# vi config/kibana.yml
server.port: 5601
server.host: "0.0.0.0"
elasticsearch.url: "http://localhost:9200"
[root@elasticsearch01 kibana]# ./bin/kibana
访问浏览器地址栏:http://192.168.31.180:5601/
在kibana配置一个index pattern来匹配es中的索引名称,默认是logstash-*,匹配logstash写入es中的数据
同时还要配置一个time-field name,那个field是timestamp类型的,这是给kibana用来按照时间进行过滤的,kibana会自动加载出来给我们选择
对指定的版块进行查询,然后统计出如下指标的汇总
pv: 所有人的pv相加
uv: 对userid进行去重
return_visit_uv: 回访uv
total_visit_time: 总访问时长
bounce_visit_uv: 跳出次数
curl -XGET 'http://localhost:9200/logstash-2017.12.21/logs/_search?q=section:news&pretty' -d '
{
"size": 0,
"aggs": {
"pv": {"sum": {"field": "pv"}},
"uv": {"cardinality": {"field": "userid", "precision_threshold": 40000}},
"total_visit_time": {"sum": {"field": "visit_time"}},
"return_visit_uv": {
"filter": {"term": {"is_return_visit": 1}},
"aggs": {
"total_return_visit_uv": {"cardinality": {"field": "userid", "precision_threshold": 40000}}
}
},
"bounce_visit_uv": {
"filter": {"term": {"is_bounce_visit": 1}},
"aggs": {
"total_bounce_visit_uv": {"cardinality": {"field": "userid", "precision_threshold": 40000}}
}
}
}
}'
[elasticsearch@elasticsearch01 logstash]$ bin/logstash-plugin list
grok:数据结构化转换工具
match:匹配条件格式,将nginx日志作为message变量,并应用grok条件NGINXACCESS进行转换
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash-plugin install logstash-filter-grok
geoip:该过滤器从geoip中匹配ip字段,显示该ip的地理位置
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash-plugin install logstash-filter-geoip
FileBeat的四种输出方式可以输出到
Elasticsearch,logstash,file和console
解压压缩文件
tar -zxvf filebeat-5.6.3-linux-x86_64.tar.gz -C /data/server/
目录重命名
mv filebeat-5.6.3-linux-x86_64/ filebeat
编辑配置文件
vim conf/filebeat-to-logstash.yml
filebeat.prospectors:
# 采集系统日志
- input_type: log
paths: /data/server/logs/access.log
document_type: "nginx_access"
# 指定注册表文件,用于记录上次读取的位置,默认位于filebeat下的data目录
registry_file: /data/server/filebeat/data/registry
output.logstash:
hosts: ["192.168.31.180:5044"]
授权
chmod go-w /data/server/filebeat/conf/filebeat-to-logstash.yml
启动
[patpat_bi@node01 filebeat]$ ./filebeat -e -c conf/filebeat-to-logstash.yml
vim conf/filebeat-logstash.conf
input {
beats {
port => 5044
}
}
output {
stdout {
codec => rubydebug # 直接将数据输出到控制台
}
}
启动Logstash
./bin/logstash -f conf/filebeat-logstash.conf
vim conf/logstash-kafka.conf
input {
beats {
port => 5044
}
}
output {
kafka {
bootstrap_servers => "node01:9092,node02:9092,node03:9092"
topic_id => "patpat_demo"
}
}
启动Logstash
./bin/logstash -f conf/logstash-kafka.conf
[elasticsearch@elasticsearch01 logstash]$ vim conf/first-pipeline.conf
input {
file {
path => "/data/server/logs/access.log"
start_position => beginning
}
}
filter {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}"}
}
geoip {
source => "clientip"
}
}
output {
stdout {
codec => rubydebug # 直接将数据输出到控制台
}
}
启动logstash
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash -f conf/first-pipeline.conf
[elasticsearch@elasticsearch01 logstash]$ vim conf/first-pipeline-to-es.conf
input {
file {
path => "/data/server/logs/access.log"
start_position => beginning
}
}
filter {
grok {
match => { "message" => "%{COMBINEDAPACHELOG}"}
}
geoip {
source => "clientip"
}
}
output {
elasticsearch {
hosts => [ "192.168.31.180:9200" ]
index => "logstash-nginx-access-%{+YYYY.MM.dd}"
}
stdout {codec => rubydebug}
}
启动logstash
[elasticsearch@elasticsearch01 logstash]$ ./bin/logstash -f conf/first-pipeline-to-es.conf
查看ES索引
[elasticsearch@elasticsearch01 logs]$ curl -XGET localhost:9200/_cat/indices
[elasticsearch@elasticsearch01 logs]$ curl -XGET localhost:9200/logstash-nginx-access-2017.12.24/_search?pretty
启动Kibana
[elasticsearch@elasticsearch01 kibana]$ ./bin/kibana
在浏览器地址栏访问:http://192.168.31.180:5601/