修改prometheus实现数据库存储报警规则和收集目标

从数据库获取报警规则和服务目标组

prometheus本身报警规则及服务发现策略基于文件配置很不方便,对于非K8S服务监控经常需要操作配置文件,不利于管理系统平台化建设。
实现思路:将相关配置信息存储在MySQL里,加入新的逻辑,实现保留文件加载配置的同时,加载MySQL中的信息,
动态生成static_configalert_rule从而实现报警及监控目标的配置UI化.

MySQL配置

使用以下环境变量定义MySQL元信息

MYSQL_HOST #主机名/ip
MYSQL_PORT #端口
MYSQL_USER #用户名
MYSQL_PWD #密码
MYSQL_DB #数据库名

因为使用gorm实现,对于代码需要引入依赖

	"github.com/jinzhu/gorm"
	_ "github.com/go-sql-driver/mysql"

表结构定义

报警规则表结构定义

type AlertRule struct {
	ID        uint `gorm:"primary_key"`
	CreatedAt time.Time
	UpdatedAt time.Time
	Group       string    `json:"group,omitempty"`   // 存储报警规则的组
	Alert       string    `json:"alert,omitempty"`  // 存储报警规则的名称
	Expr        string    `json:"expr,omitempty"`  // 存储报警规则的表达式
	For         string    `json:"for,omitempty"` // 存储报警规则的延迟时间
	Labels      JSON `sql:"type:json" json:"labels,omitempty"`// 存储报警规则的label
	Annotations JSON `sql:"type:json" json:"annotations,omitempty"` // 存储报警规则的注释
}

func (self *AlertRule) TableName() string {
	return "alert_rule"
}

type JSON []map[string]string

func (c JSON) Value() (driver.Value, error) {
	b, err := json.Marshal(c)
	return string(b), err
}

func (c *JSON) Scan(input interface{}) error {
	return json.Unmarshal(input.([]byte), c)
}

代码逻辑

github.com/prometheus/prometheus/rules/manager.go LoadGroups方法 返回前加入以下代码

    //出事连接
    var rules []AlertRule
    db_url:=fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=true&loc=Local",os.Getenv("MYSQL_USER"),os.Getenv("MYSQL_PWD"),os.Getenv("MYSQL_HOST"),os.Getenv("MYSQL_PORT"),os.Getenv("MYSQL_DB"))
	db,err:=gorm.Open("mysql", fmt)
	if err!=nil {
		level.Error(m.logger).Log( "init db error: ",err)
	}else {
		level.Info(m.logger).Log("get db connection success")
	}
    //获取报警条目
	err=db.Model(&AlertRule{}).Scan(&rules).Error
	if err!=nil {
		level.Error(m.logger).Log(err)
	}else {
		level.Info(m.logger).Log("get rules success")
	}
	defer func(db *gorm.DB) {
		err:=db.Close()
		if err!=nil {
			level.Error(m.logger).Log("close db error: ",err)
		}
	}(db)
	arules := make(map[string][]Rule)
	for _,v:=range rules {
		expr, err := promql.ParseExpr(v.Expr)
		if err != nil {
			return nil, []error{err}
		}
		var lbs=labels.Labels{labels.Label{
			Name: "product",
			Value: v.Group,
		}}
		for _,v:=range v.Labels {
			lbs=append(lbs,labels.Label{
				v["key"],v["value"],
			})
		}

		var ans=labels.Labels{}
		for _,v:=range v.Annotations {
			ans=append(ans,labels.Label{
				v["key"],v["value"],
			})
		}
		arules[v.Group]= append(arules[v.Group], NewAlertingRule(v.Alert,expr,interval,lbs,ans,nil,shouldRestore,log.With(m.logger, "alert", v.Alert)))
	}
	level.Info(m.logger).Log(arules)
	for k,v:=range arules {
		rules:=make([]Rule,0,len(v))
		rules=v[0:len(v)]
		groups[groupKey(k,"rule.yaml")]=NewGroup(k,"rule.yaml",interval, rules,shouldRestore,m.opts)
	}

job及targer代码

从数据库获取信息生成static config配置

结构定义

type AlertSdRule struct {
	ID        uint      `gorm:"primary_key"`
	CreatedAt time.Time `json:"create_at"`
	UpdatedAt time.Time `json:"update_at"`

	Target   string `gorm:"type:varchar(200);not null" json:"target"`
	Labels   JSON   `sql:"type:json" json:"type"`
	GroupId  int    `gorm:"type:integer;not null" json:"group_id"`
	Describe string `gorm:"type:varchar(200);not null" json:"describe"`
}

func (self *AlertSdRule) TableName() string {
	return "alert_sd_rule"
}

type AlertSdGroup struct {
	ID        uint      `gorm:"primary_key"`
	CreatedAt time.Time `json:"create_at"`
	UpdatedAt time.Time `json:"update_at"`

	Name         string `gorm:"type:varchar(200);not null" json:"name"`
	MetricsPath  string `gorm:"type:varchar(200);not null" json:"metrics_path"`
	Labels       JSON   `sql:"type:json" json:"labels"`
	Scheme       string `gorm:"type:varchar(200);not null" json:"scheme"`
	Interval     int    `gorm:"type:integer;not null" json:"interval"`
	Timeout      int    `gorm:"type:integer;not null" json:"timeout"`
	Describe     string `gorm:"type:varchar(200);not null" json:"describe"`
	AuthUser     string `gorm:"type:varchar(200)" json:"auth_user"`
	AuthPassword string `gorm:"type:varchar(200)" json:"auth_password"`
	AuthToken    string `gorm:"type:varchar(200)" json:"auth_token"`
}

func (self *AlertSdGroup) TableName() string {
	return "alert_sd_group"
}

type JSON []map[string]string

func (c JSON) Value() (driver.Value, error) {
	b, err := json.Marshal(c)
	return string(b), err
}

func (c *JSON) Scan(input interface{}) error {
	return json.Unmarshal(input.([]byte), c)
}

代码逻辑

main.go reloadConfig中

    //初始化数据库连接
    db_url:=fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=true&loc=Local",os.Getenv("MYSQL_USER"),os.Getenv("MYSQL_PWD"),os.Getenv("MYSQL_HOST"),os.Getenv("MYSQL_PORT"),os.Getenv("MYSQL_DB"))
	conn, err := gorm.Open("mysql", db_url)
	if err != nil {
		panic(err)
	}
	var groups []AlertSdGroup
    // 获取所有的JOB
	err = conn.Model(&AlertSdGroup{}).Scan(&groups).Error
	if err != nil {
		panic(err)
	}
	for _, group := range groups {
		var rules []AlertSdRule
        // 获取指定job缩包含的targets
		err = conn.Model(&AlertSdRule{}).Where("group_id = ?", group.ID).Scan(&rules).Error
		if err != nil {
			panic(err)
		}
		var targetgroups = []*targetgroup.Group{}
		for _, rule := range rules {
			var labels model.LabelSet = map[model.LabelName]model.LabelValue{}
			for _, label := range group.Labels {
				key := model.LabelName(label["key"])
				value := model.LabelValue(label["value"])
				labels[key] = value
			}
			for _, label := range rule.Labels {
				key := model.LabelName(label["key"])
				value := model.LabelValue(label["value"])
				labels[key] = value
			}
			targetgroups = append(targetgroups, &targetgroup.Group{
				Targets: []model.LabelSet{
					model.LabelSet{
						"__address__": model.LabelValue(rule.Target),
					},
				},
				Labels: labels,
				Source: fmt.Sprint(rule.ID),
			})
		}
        //生成对应组的scrapeConfig
		scrapeConfig := &config.ScrapeConfig{
			JobName:        group.Name,
			Scheme:         group.Scheme,
			ScrapeInterval: model.Duration(time.Duration(group.Interval) * time.Second),
			ScrapeTimeout:  model.Duration(time.Duration(group.Timeout) * time.Second),
			MetricsPath:    group.MetricsPath,
			HTTPClientConfig: config_util.HTTPClientConfig{
				TLSConfig: config_util.TLSConfig{
					InsecureSkipVerify: true,
				},
			},
			ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
				StaticConfigs: targetgroups,
			},
		}
        //生成对应组的scrapeConfig的认证方式
		if group.AuthUser != "" && group.AuthPassword != "" {
			scrapeConfig.HTTPClientConfig.BasicAuth = &config_util.BasicAuth{
				Username: group.AuthUser,
				Password: config_util.Secret(group.AuthPassword),
			}
		} else if group.AuthToken != "" {
			scrapeConfig.HTTPClientConfig.BearerToken = config_util.Secret(group.AuthToken)
		}
		conf.ScrapeConfigs = append(conf.ScrapeConfigs, scrapeConfig)
	}

你可能感兴趣的:(the,way,to,kubernetes)