在上一文中,讲述了某国企互联网公司是如何利用Thanos + Prometheus + Grafana + AlertManager + Dingtalk完善自己的监控告警体系。
文章见《大厂都在用的监控高可用方案,小公司还不赶紧学起来?》
虽然利用 Prometheus 能够更方便的对相关组件进行监控,但是Prometheus + AlertManager 只能借助 Dingtalk 对接钉钉进行消息报警。 无法实现短信、电话等告警升级功能。
整体痛点如下:
解决方案:针对dingTalk告警组件进行二次开发。集成钉钉、短信、电话,并开放统一的API,方便直接调用
此文对prometheus-webhook-dingtalk 其他功能不做过多介绍,仅详细描述如何通过对源代码的修改,增添短信、电话功能。
使用Go语言编写,能够对接 Alertmanager 将告警信息发送至钉钉群。但无法发送短信、电话等告警。
代码地址:https://github.com/timonwong/prometheus-webhook-dingtalk
prometheus-webhook-dingtalk 是利用Gin框架开发的一款webhook组件。
入口:cmd/prometheus-webhook-dingtalk/main.go
webHandler := web.New() // 启动web组件
递归到 func New()
继续递归向下查看
此时,便找到了dingtalk对外提供的webhook接口,例如:
router.Post("/{name}/send", api.serveSend)
即,所有的 ip:8060/*/send请求都会转发到serveSend这个方法。
web/dingtalk
目录下新增sms.go、call.go
,并且在路由中增加smsSend、callSend方法。
router.Use(middleware.Recoverer)
router.Post("/{name}/send", api.serveSend)
// 发送短信接口
router.Post("/{name}/sms", api.smsSend)
// 电话告警接口
router.Post("/{name}/call", api.callSend)
func (api *API) smsSend(w http.ResponseWriter, r *http.Request) {
api.mtx.RLock()
targets := api.targets
api.mtx.RUnlock()
targetName := chi.URLParam(r, "name")
logger := log1.With(api.logger, "target", targetName)
target, ok := targets[targetName]
if !ok {
level.Warn(logger).Log("msg", "target not found")
http.NotFound(w, r)
return
}
// todo 拿到告警信息
var promMessage models.WebhookMessage
if err := json.NewDecoder(r.Body).Decode(&promMessage); err != nil {
level.Error(logger).Log("msg", "Cannot decode prometheus webhook JSON request", "err", err)
http.Error(w, "Bad Request", http.StatusBadRequest)
return
}
// todo 拿到手机号码
mobiles := target.Mention.Mobiles
// todo 循环发送短信
for _, mob := range mobiles {
for _, alert := range promMessage.Alerts {
status := ""
if alert.Status == "firing" {
status = "告警触发"
} else if alert.Status == "resolved" {
status = "告警恢复"
}
//time_obj := alert.StartsAt.Format("2006-01-02 15:04:05")
//format := time.Unix(alert.StartsAt.Unix(), 8).Format("2006-01-02 15:04:05")
//fmt.Println(format)
//fmt.Println(time_obj)
m, _ := time.ParseDuration("+8h")
time_obj := alert.StartsAt.Add(m).Format("2006-01-02 15:04:05")
fmt.Println("res:", time_obj)
description := alert.Annotations["description"]
content := fmt.Sprintf("status:%s告警时间:%s\n告警内容:%s",
status,
time_obj,
description)
api.sendContent(content, mob)
}
}
io.WriteString(w, "OK")
}
此处需要添加自己的短信appKey、appSecret、templateID(阿里云、腾讯云均可购买)
func (api *API) sendContent(content string, mobile string) {
// Template method
url := "http://inside-mp.01zhuanche.com/api/v1/message/template/send"
appKey := "x"
appSecret := "x"
templateID := 1111
dtime := time.Now().UnixNano() / 1e6
mobileStr := mobile
params := []string{strconv.QuoteToASCII(content)}
signContent := fmt.Sprintf("appkey=%s&mobile=%s¶ms=%s&templetId=%d×tamp=%d&appsecret=%s", appKey, mobileStr, params, templateID, dtime, appSecret)
fmt.Println("signContent", signContent)
// Create an MD5 hash of the signContent
h := md5.New()
h.Write([]byte(signContent))
hashBytes := h.Sum(nil)
// Convert the hash to a hexadecimal string
hexHash := strings.ToUpper(hex.EncodeToString(hashBytes))
fmt.Println("hexHash", strings.ToUpper(hexHash))
data := fmt.Sprintf("appkey=%s&mobile=%s&templetId=%d¶ms=%s×tamp=%d&sign=%s", appKey, mobileStr, templateID, url2.QueryEscape(fmt.Sprintf("%s", params)), dtime, hexHash)
resp, err := http.Post(url, "application/x-www-form-urlencoded", bytes.NewBufferString(data))
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
log.Printf("发送内容:%s, 手机号:%s\n", content, mobile)
log.Printf("%s\n", string(body))
}
代码中,对阿里云ALIBABA_CLOUD_ACCESS_KEY_ID、ACCESS_KEY_SECRET进行替换
func (api *API) callSend(w http.ResponseWriter, r *http.Request) {
api.mtx.RLock()
targets := api.targets
api.mtx.RUnlock()
targetName := chi.URLParam(r, "name")
logger := log1.With(api.logger, "target", targetName)
target, ok := targets[targetName]
if !ok {
level.Warn(logger).Log("msg", "target not found")
http.NotFound(w, r)
return
}
// todo 拿到告警信息
var promMessage models.WebhookMessage
if err := json.NewDecoder(r.Body).Decode(&promMessage); err != nil {
level.Error(logger).Log("msg", "Cannot decode prometheus webhook JSON request", "err", err)
http.Error(w, "Bad Request", http.StatusBadRequest)
return
}
// todo 拿到手机号码
mobiles := target.Mention.Mobiles
for _, mob := range mobiles {
for _, alert := range promMessage.Alerts {
description := alert.Annotations["description"]
// todo 对 description 做一个过滤
api.sendCall(description, mob)
}
}
io.WriteString(w, "OK")
}
func (api *API) sendCall(content string, mobile string) {
// 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID 和 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
// 工程代码泄露可能会导致 AccessKey 泄露,并威胁账号下所有资源的安全性。以下代码示例使用环境变量获取 AccessKey 方式进行调用,仅供参考,建议使用更安全的 STS 方式,更多鉴权访问方式请参见:https://help.aliyun.com/document_detail/378661.html
ALIBABA_CLOUD_ACCESS_KEY_ID := "xx"
ACCESS_KEY_SECRET := "xx"
client, _err := api.createClient(tea.String(ALIBABA_CLOUD_ACCESS_KEY_ID), tea.String(ACCESS_KEY_SECRET))
if _err != nil {
log.Fatal(_err)
}
singleCallByTtsRequest := &dyvmsapi20170525.SingleCallByTtsRequest{}
singleCallByTtsRequest.SetCalledShowNumber("057100000575")
singleCallByTtsRequest.SetCalledNumber(mobile)
singleCallByTtsRequest.SetTtsCode("TTS_186598765")
singleCallByTtsRequest.SetTtsParam("{\"details\":\"" + content + "\"}")
log.Println(content)
runtime := &util.RuntimeOptions{}
tryErr := func() (_e error) {
defer func() {
if r := tea.Recover(recover()); r != nil {
_e = r
}
}()
// 复制代码运行请自行打印 API 的返回值
resp, _err := client.SingleCallByTtsWithOptions(singleCallByTtsRequest, runtime)
if _err != nil {
return _err
}
fmt.Println(resp.Body.String())
return nil
}()
if tryErr != nil {
var error = &tea.SDKError{}
if _t, ok := tryErr.(*tea.SDKError); ok {
error = _t
} else {
error.Message = tea.String(tryErr.Error())
}
// 如有需要,请打印 error
_, _err = util.AssertAsString(error.Message)
if _err != nil {
log.Fatal(_err)
}
}
}
func (api *API) createClient(accessKeyId *string, accessKeySecret *string) (_result *dyvmsapi20170525.Client, _err error) {
config := &openapi.Config{
// 必填,您的 AccessKey ID
AccessKeyId: accessKeyId,
// 必填,您的 AccessKey Secret
AccessKeySecret: accessKeySecret,
}
// Endpoint 请参考 https://api.aliyun.com/product/Dyvmsapi
config.Endpoint = tea.String("dyvmsapi.aliyuncs.com")
_result = &dyvmsapi20170525.Client{}
_result, _err = dyvmsapi20170525.NewClient(config)
return _result, _err
}
执行go run cmd/prometheus-webhook-dingtalk/main.go
可见:
此时我们新增加的短信、电话webhook已经启动。可对接Alertmanager调用即可。
同时,为了将其他脚本的代码更方便统一管理。
可在sms.go 中,新增smsap,以提供更便利的短信调用。
func (api *API) smsapi(w http.ResponseWriter, r *http.Request) {
var sendSms SendSmsMessage
err := json.NewDecoder(r.Body).Decode(&sendSms)
if err != nil {
http.Error(w, "Failed to decode JSON", http.StatusBadRequest)
return
}
for i := range sendSms.Phone {
api.sendContent(sendSms.Msg, sendSms.Phone[i])
}
}
call.go 同理
func (api *API) callapi(w http.ResponseWriter, r *http.Request) {
var sendSms SendSmsMessage
err := json.NewDecoder(r.Body).Decode(&sendSms)
if err != nil {
http.Error(w, "Failed to decode JSON", http.StatusBadRequest)
return
}
for i := range sendSms.Phone {
api.sendCall(sendSms.Msg, sendSms.Phone[i])
}
}