一、链路追踪
1、涉及的插件elasticsearch7.0.0、skywalking-oap-server:8.9.1、skywalking-ui:8.9.1、skywalking-agent 8.7.0
2、原理
3、实现
3.1、docker-command yml文件配置
version: '3'
services:
elasticsearch:
image: elasticsearch:7.0.0
container_name: elasticsearch
restart: always
ports:
- 9200:9200
- 9300:9300
environment:
discovery.type: single-node
TZ: Asia/Shanghai
skywalking-server:
image: apache/skywalking-oap-server:8.9.1
container_name: skywalking-server
depends_on:
- elasticsearch
links:
- elasticsearch
restart: always
ports:
- 11800:11800
- 12800:12800
environment:
TZ: Asia/Shanghai
SW_STORAGE: elasticsearch
SW_STORAGE_ES_CLUSTER_NODES: elasticsearch:9200
volumes:
- ./skywalking/alarm-settings.yml:/skywalking/config/alarm-settings.yml
skywalking-ui:
image: apache/skywalking-ui:8.9.1
container_name: skywalking-ui
depends_on:
- skywalking-server
links:
- skywalking-server
restart: always
ports:
- 8081:8080
environment:
SW_OAP_ADDRESS: http://skywalking-server:12800
TZ: Asia/Shanghai
networks:
monitor:
driver: bridge
3.2、alarm-settings.yml 默认配置
rules:
service_resp_time_rule:
metrics-name: service_resp_time
op: ">"
threshold: 1000
period: 10
count: 3
silence-period: 5
message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
service_sla_rule:
metrics-name: service_sla
op: "<"
threshold: 8000
period: 10
count: 2
silence-period: 3
message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
service_resp_time_percentile_rule:
metrics-name: service_percentile
op: ">"
threshold: 1000,1000,1000,1000,1000
period: 10
count: 3
silence-period: 5
message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
service_instance_resp_time_rule:
metrics-name: service_instance_resp_time
op: ">"
threshold: 1000
period: 10
count: 2
silence-period: 5
message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
database_access_resp_time_rule:
metrics-name: database_access_resp_time
threshold: 1000
op: ">"
period: 10
count: 2
message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
endpoint_relation_resp_time_rule:
metrics-name: endpoint_relation_resp_time
threshold: 1000
op: ">"
period: 10
count: 2
message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
webhooks:
3.3、idea添加vm参数配置启动
-javaagent:H:\space\apache-skywalking-apm-bin-es7\agent\skywalking-agent.jar
-DSW_AGENT_NAME=film-server
-DSW_AGENT_COLLECTOR_BACKEND_SERVICES=192.168.1.9:11800
3.4、效果:链路、vm等查看
二、elk标准日志收集
1、涉及的技术栈:elasticsearch、logstash、kibana、kafka、zookeeper
2、原理
3、服务器实现
3.1、docker-command的yml配置
version: '3'
services:
elasticsearch:
image: elasticsearch:7.0.0
container_name: elasticsearch
restart: always
ports:
- 9200:9200
- 9300:9300
environment:
discovery.type: single-node
TZ: Asia/Shanghai
kibana:
image: kibana:7.0.0
container_name: kibana
ports:
- 5601:5601
depends_on:
- elasticsearch
links:
- elasticsearch:es
environment:
- "elasticsearch.hosts=http://es:9200"
restart: always
logstash:
image: logstash:6.4.0
container_name: logstash
volumes:
- ./logstash/logstash.conf:/usr/share/logstash/pipeline/logstash.conf
depends_on:
- elasticsearch
links:
- elasticsearch:es
ports:
- 4560:4560
kafka:
image: wurstmeister/kafka
ports:
- "9092:9092"
environment:
KAFKA_ADVERTISED_HOST_NAME: 192.168.1.9
KAFKA_CREATE_TOPICS: "test:1:1"
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
volumes:
- /var/run/docker.sock:/var/run/docker.sock
ports:
- 6379:6379
zookeeper:
image: zookeeper:3.5
ports:
- 2181:2181
volumes:
- ./zookeeper/data:/zk/data
- ./zookeeper/conf:/zk/conf
networks:
monitor:
driver: bridge
3.2、logstash.conf基本配置
input { #logstash直接收集日志的tcp端口
tcp {
mode => "server"
host => "0.0.0.0"
port => 4560
codec => json_lines
}
}
output { #logstash将收集到的日志发送到es里存储
elasticsearch {
hosts => "es:9200"
index => "springboot-logstash-%{+YYYY.MM.dd}"
}
}
3.3、logstash安装插件
[root@anonymous dev]# docker exec -it logstash /bin/bash
bash-4.2$ cd /bin/
bash-4.2$ logstash-plugin install logstash-codec-json_lines
bash-4.2$ exit
[root@anonymous dev]# docker restart logstash
4、微服务配置
4.1、导入pom依赖
net.logstash.logback
logstash-logback-encoder
5.3
4.2、在resources新增xml配置并重启微服务
${APP_NAME}
${LOG_FILE_PATH}/${APP_NAME}-%d{yyyy-MM-dd}.log
30
${FILE_LOG_PATTERN}
192.168.1.9:4560
4.3、打开kibaba( 我的本地地址是http://192.168.1.9:5601/)配置,选择@timestamp完成
4.4、利用sentinel限流异常制造点日志
三、高并发场景日志收集升级
1、将服务器logstash.conf中的input的tcp修改为
kafka {
id => "kafka_input_9527" #随便写
bootstrap_servers => "192.168.1.9:9092"
topics => ["test"]
auto_offset_reset => "latest"
}
2、修改微服务配置
2.1、新增pom依赖
com.github.danielwegener
logback-kafka-appender
0.2.0-RC2
2.2、将resources-->xml 中 logstash的appender替换成kafka的appender
%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
test
bootstrap.servers=192.168.1.9:9092
acks=0
2.3、改后的效果参考