prometheus本身报警规则及服务发现策略基于文件配置很不方便,对于非K8S服务监控经常需要操作配置文件,不利于管理系统平台化建设。
实现思路:将相关配置信息存储在MySQL里,加入新的逻辑,实现保留文件加载配置的同时,加载MySQL中的信息,
动态生成static_config
及alert_rule从
而实现报警及监控目标的配置UI化.
使用以下环境变量定义MySQL元信息
MYSQL_HOST #主机名/ip
MYSQL_PORT #端口
MYSQL_USER #用户名
MYSQL_PWD #密码
MYSQL_DB #数据库名
因为使用gorm实现,对于代码需要引入依赖
"github.com/jinzhu/gorm"
_ "github.com/go-sql-driver/mysql"
type AlertRule struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time
UpdatedAt time.Time
Group string `json:"group,omitempty"` // 存储报警规则的组
Alert string `json:"alert,omitempty"` // 存储报警规则的名称
Expr string `json:"expr,omitempty"` // 存储报警规则的表达式
For string `json:"for,omitempty"` // 存储报警规则的延迟时间
Labels JSON `sql:"type:json" json:"labels,omitempty"`// 存储报警规则的label
Annotations JSON `sql:"type:json" json:"annotations,omitempty"` // 存储报警规则的注释
}
func (self *AlertRule) TableName() string {
return "alert_rule"
}
type JSON []map[string]string
func (c JSON) Value() (driver.Value, error) {
b, err := json.Marshal(c)
return string(b), err
}
func (c *JSON) Scan(input interface{}) error {
return json.Unmarshal(input.([]byte), c)
}
github.com/prometheus/prometheus/rules/manager.go LoadGroups方法 返回前加入以下代码
//出事连接
var rules []AlertRule
db_url:=fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=true&loc=Local",os.Getenv("MYSQL_USER"),os.Getenv("MYSQL_PWD"),os.Getenv("MYSQL_HOST"),os.Getenv("MYSQL_PORT"),os.Getenv("MYSQL_DB"))
db,err:=gorm.Open("mysql", fmt)
if err!=nil {
level.Error(m.logger).Log( "init db error: ",err)
}else {
level.Info(m.logger).Log("get db connection success")
}
//获取报警条目
err=db.Model(&AlertRule{}).Scan(&rules).Error
if err!=nil {
level.Error(m.logger).Log(err)
}else {
level.Info(m.logger).Log("get rules success")
}
defer func(db *gorm.DB) {
err:=db.Close()
if err!=nil {
level.Error(m.logger).Log("close db error: ",err)
}
}(db)
arules := make(map[string][]Rule)
for _,v:=range rules {
expr, err := promql.ParseExpr(v.Expr)
if err != nil {
return nil, []error{err}
}
var lbs=labels.Labels{labels.Label{
Name: "product",
Value: v.Group,
}}
for _,v:=range v.Labels {
lbs=append(lbs,labels.Label{
v["key"],v["value"],
})
}
var ans=labels.Labels{}
for _,v:=range v.Annotations {
ans=append(ans,labels.Label{
v["key"],v["value"],
})
}
arules[v.Group]= append(arules[v.Group], NewAlertingRule(v.Alert,expr,interval,lbs,ans,nil,shouldRestore,log.With(m.logger, "alert", v.Alert)))
}
level.Info(m.logger).Log(arules)
for k,v:=range arules {
rules:=make([]Rule,0,len(v))
rules=v[0:len(v)]
groups[groupKey(k,"rule.yaml")]=NewGroup(k,"rule.yaml",interval, rules,shouldRestore,m.opts)
}
从数据库获取信息生成static config配置
type AlertSdRule struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time `json:"create_at"`
UpdatedAt time.Time `json:"update_at"`
Target string `gorm:"type:varchar(200);not null" json:"target"`
Labels JSON `sql:"type:json" json:"type"`
GroupId int `gorm:"type:integer;not null" json:"group_id"`
Describe string `gorm:"type:varchar(200);not null" json:"describe"`
}
func (self *AlertSdRule) TableName() string {
return "alert_sd_rule"
}
type AlertSdGroup struct {
ID uint `gorm:"primary_key"`
CreatedAt time.Time `json:"create_at"`
UpdatedAt time.Time `json:"update_at"`
Name string `gorm:"type:varchar(200);not null" json:"name"`
MetricsPath string `gorm:"type:varchar(200);not null" json:"metrics_path"`
Labels JSON `sql:"type:json" json:"labels"`
Scheme string `gorm:"type:varchar(200);not null" json:"scheme"`
Interval int `gorm:"type:integer;not null" json:"interval"`
Timeout int `gorm:"type:integer;not null" json:"timeout"`
Describe string `gorm:"type:varchar(200);not null" json:"describe"`
AuthUser string `gorm:"type:varchar(200)" json:"auth_user"`
AuthPassword string `gorm:"type:varchar(200)" json:"auth_password"`
AuthToken string `gorm:"type:varchar(200)" json:"auth_token"`
}
func (self *AlertSdGroup) TableName() string {
return "alert_sd_group"
}
type JSON []map[string]string
func (c JSON) Value() (driver.Value, error) {
b, err := json.Marshal(c)
return string(b), err
}
func (c *JSON) Scan(input interface{}) error {
return json.Unmarshal(input.([]byte), c)
}
main.go reloadConfig中
//初始化数据库连接
db_url:=fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=true&loc=Local",os.Getenv("MYSQL_USER"),os.Getenv("MYSQL_PWD"),os.Getenv("MYSQL_HOST"),os.Getenv("MYSQL_PORT"),os.Getenv("MYSQL_DB"))
conn, err := gorm.Open("mysql", db_url)
if err != nil {
panic(err)
}
var groups []AlertSdGroup
// 获取所有的JOB
err = conn.Model(&AlertSdGroup{}).Scan(&groups).Error
if err != nil {
panic(err)
}
for _, group := range groups {
var rules []AlertSdRule
// 获取指定job缩包含的targets
err = conn.Model(&AlertSdRule{}).Where("group_id = ?", group.ID).Scan(&rules).Error
if err != nil {
panic(err)
}
var targetgroups = []*targetgroup.Group{}
for _, rule := range rules {
var labels model.LabelSet = map[model.LabelName]model.LabelValue{}
for _, label := range group.Labels {
key := model.LabelName(label["key"])
value := model.LabelValue(label["value"])
labels[key] = value
}
for _, label := range rule.Labels {
key := model.LabelName(label["key"])
value := model.LabelValue(label["value"])
labels[key] = value
}
targetgroups = append(targetgroups, &targetgroup.Group{
Targets: []model.LabelSet{
model.LabelSet{
"__address__": model.LabelValue(rule.Target),
},
},
Labels: labels,
Source: fmt.Sprint(rule.ID),
})
}
//生成对应组的scrapeConfig
scrapeConfig := &config.ScrapeConfig{
JobName: group.Name,
Scheme: group.Scheme,
ScrapeInterval: model.Duration(time.Duration(group.Interval) * time.Second),
ScrapeTimeout: model.Duration(time.Duration(group.Timeout) * time.Second),
MetricsPath: group.MetricsPath,
HTTPClientConfig: config_util.HTTPClientConfig{
TLSConfig: config_util.TLSConfig{
InsecureSkipVerify: true,
},
},
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
StaticConfigs: targetgroups,
},
}
//生成对应组的scrapeConfig的认证方式
if group.AuthUser != "" && group.AuthPassword != "" {
scrapeConfig.HTTPClientConfig.BasicAuth = &config_util.BasicAuth{
Username: group.AuthUser,
Password: config_util.Secret(group.AuthPassword),
}
} else if group.AuthToken != "" {
scrapeConfig.HTTPClientConfig.BearerToken = config_util.Secret(group.AuthToken)
}
conf.ScrapeConfigs = append(conf.ScrapeConfigs, scrapeConfig)
}