为了线上服务的稳定性、可用性,引入skywalking 针对trace、metric可视化,EFK做日志可视化。基于业务层现在可视化信息,我们采用skywalking服务度量信息和日志系统中日志进行监控,从而达到对部署环境中实例运行状况进行监控。paas-alert为了可以提供多种灵活配置告警通知方式和告警信息的统一管理。
数据库一共有两种表:paas_alert_info(存储 告警信息);paas_alert_rule_config(告警配置)
paas_alert_info字段设计:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
DROP TABLE IF EXISTS `paas_alert_info`;
CREATE TABLE `paas_alert_info` (
`id` varchar(128) COLLATE utf8mb4_general_ci NOT NULL,
`alert_rule_config_id` varchar(128) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警规则配置id',
`alert_rule_name` varchar(64) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警规则名',
`alert_service_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '发生告警的服务名',
`alert_scope` varchar(16) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '业务类型,如服务、接口、调用关系',
`alert_extent_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警第三方扩展id,用于记录有id的源告警id',
`alert_message` varchar(1024) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警信息',
`alert_handlers` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警处理人列表',
`alert_env` varchar(16) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '服务所在环境',
`alert_source` varchar(32) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警来源',
`alert_notify_type` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警通知类型 dingDingRobot email',
`alert_trigger_time` datetime DEFAULT NULL COMMENT '告警触发时间',
`valid` int DEFAULT NULL,
`alert_status` bigint DEFAULT '1' COMMENT '告警通知发送状态 0失败 1成功',
`creator` varchar(64) COLLATE utf8mb4_general_ci DEFAULT NULL,
`creation_time` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
SET FOREIGN_KEY_CHECKS = 1;
paas_alert_rule_config字段设计:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for paas_alert_rule_config
-- ----------------------------
DROP TABLE IF EXISTS `paas_alert_rule_config`;
CREATE TABLE `paas_alert_rule_config` (
`id` varchar(128) COLLATE utf8mb4_general_ci DEFAULT NULL,
`alert_rule_name` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL,
`alert_env` varchar(16) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警环境',
`alert_source` varchar(32) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT 'skywalking、es_watcher',
`alert_notify_type` varchar(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '通知类型 dingDingRobot、email 。以逗号分开,如果设置对应通知类型,就要再alert_handlers_email和alert_handlers_phone设置对应数据',
`alert_handlers_email` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警处理人邮箱',
`alert_handlers_phone` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '告警处理人手机号',
`alert_url` varchar(255) COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '支持告警类型中发送通知的url,比如钉钉自定义机器人发送通知',
`alert_rule_description` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '规则描述',
`valid` bigint DEFAULT '1' COMMENT '告警规则状态',
`creation_time` datetime DEFAULT NULL,
`creator` varchar(64) COLLATE utf8mb4_general_ci DEFAULT NULL,
`modified_time` datetime DEFAULT NULL,
`modifier` varchar(64) COLLATE utf8mb4_general_ci DEFAULT NULL,
UNIQUE KEY `ruleName_source_index` (`alert_source`,`alert_rule_name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
-- ----------------------------
-- Records of paas_alert_rule_config
-- ----------------------------
特说说明:现在的paas_alert_rule_config信息是直接录入到数据库的,没有对应的可视化控制台
告警信息格式:
环境名:dev
告警规则名:123456
告警服务名:service nn
告警信息:this is alert message
告警字段映射表
告警信息字段 | skywalking对应字段 | es watcer 对应字段 |
---|---|---|
环境名 | skywalking对应的是告警配置中的tags env字段 | watcher的metadata env字段 |
告警规则名 | 对应skywalking中alarm.yml中以_rule结尾的告警规则名 | 对应es watcher 日志告警 是watcher id |
告警服务名 | skywalking webhook上报的service字段 | watcher的metadata serviceName |
告警信息 | 对应skywalking中告警配置的中信息 | es watcher 中webhook请求中的body种的字段alertMessage |
适配skywalking告警详细见官网文档中的webhook模块。
示例:
es watcher 如下:
PUT _watcher/watch/123456
{
"metadata" : { # 调用paas-alert原始数据
"env" : "dev",
"serviceName" : "consumer",
"scope":"service"
},
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"body": {
"size": 0,
"query": {
"bool": {
"filter": {
"range": {
"@timestamp": {
"gte": "{{ctx.trigger.scheduled_time}}||-50m",
"lte": "{{ctx.trigger.scheduled_time}}",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"must": [{"term":{"loglevel":"WARN"}}]
}
}
},
"indices": [
""
]
}
}
},
"condition" : {
"compare" : { "ctx.payload.hits.total" : { "gt" : 5 }}
},
"actions" : {
"my_webhook" : {
"throttle_period" : "1m",
"webhook" : {
"method" : "POST",
"scheme" : "http",
"host" : "127.0.0.1", # paas-alert host
"port" : 9910, # paas-alert port
"path": "/paas/alert/es/watcher", # 对应paas-alert 针对es watcer 告警接口
"headers" : {
"Content-Type" : "application/json"
},
"body" : """{
"env":"{{ctx.metadata.env}}", # 对应metadata数据
"alertName":"{{ctx.watch_id}}",
"alertServiceName":"{{ctx.metadata.serviceName}}",
"scope":"{{ctx.metadata.scope}}",
"startTime":"{{ctx.trigger.triggered_time}}",
"alertMessage":"There are {{ctx.payload.hits.total}} documents in your index. Threshold is 5 in last 50 minutes"
}"""
}
}
}
}