go-prometheus业务监控指标实战(二)

本文主主要是对上文的补充,关于 Prometheus及grafana 的安装配置就参考上文 go-prometheus业务监控指标实战 。本文主要讲解的是 Histogram和Summary的案例。并且结合案例配置自动告警机制

案例代码

案例 Histogram 统计班级人数的考试分数(value),每个人有subject(学科)和age(年龄)两个属性(label)

代码

main.go

package main

import (
	"net/http"

	qzPro "gitee.com/qzcsu/go-web-study/service/prometheus"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

func main() {
	prometheus.MustRegister(qzPro.CommonCounter, qzPro.FuncCounter, qzPro.VecCounter, qzPro.CommonGauge, qzPro.FuncGauge, qzPro.VecGauge,
		qzPro.CommonHistogram, qzPro.VecHistogram, qzPro.CommonSummary, qzPro.VecSummary)
	http.HandleFunc("/common_counter", qzPro.DealCommCounter)
	http.HandleFunc("/vec_counter", qzPro.DealVecCounter)
	http.HandleFunc("/common_gauge", qzPro.DealCommGauge)
	http.HandleFunc("/vec_gauge", qzPro.DealVecGauge)
	http.HandleFunc("/common_histogram", qzPro.DealCommHistogram)
	http.HandleFunc("/vec_histogram", qzPro.DealVecHistogram)
	http.Handle("/metrics", promhttp.Handler()) // 暴露 metrics 指标
	http.ListenAndServe(":8090", nil)
}

service/prometheus/constants.go

package prometheus

import (
	"fmt"
	"net/http"
	"strconv"

	"gitee.com/qzcsu/go-web-study/utils/randomutil"
)

const (
	namePrefix = "the_number_of_student"
	subSys     = "client_golang"
	nameSpace  = "prometheus_demo"
)

var names = []string{"小明", "小红", "小花"}
var ages = []int64{20, 21, 22, 23, 24, 25}
var subjects = []string{"语文", "数学", "体育"}

func GetParamNum(req *http.Request) int64 {
	err := req.ParseForm()
	if err != nil {
		fmt.Println("parse form err")
		return 0
	}
	numStr := req.Form.Get("num")
	fmt.Printf("numStr:[%v]\n", numStr)
	num, err := strconv.ParseInt(numStr, 10, 64)
	if err != nil {
		fmt.Printf("parse int err :%v\n", err)
		return 0
	}
	return num
}

// randData随机获取一个元素,并且将本次请求的随机元素统计到countStrMap
func getCurRandomStrMap(countStrMap map[string]int64, randData []string) string {
	index := randomutil.RandomNum(0, int64(len(randData)))
	randVal := randData[index]
	countStrMap[randVal] = countStrMap[randVal] + 1
	return randVal
}

// randData随机获取一个元素,并且将本次请求的随机元素统计到countIntMap
func getCurRandomIntMap(countIntMap map[int64]int64, randData []int64) string {
	index := randomutil.RandomNum(0, int64(len(randData)))
	randVal := randData[index]
	countIntMap[randVal] = countIntMap[randVal] + 1
	return fmt.Sprintf("%d", randVal)
}

service/prometheus/histogram.go

package prometheus

import (
	"fmt"
	"net/http"
	"sort"
	"sync/atomic"
	"time"

	"github.com/prometheus/client_golang/prometheus"

	"gitee.com/qzcsu/go-web-study/utils/randomutil"
)

const HistogramNamePrefix = namePrefix + "_histogram"

// CommonHistogram 普通的直方图
var commonHistogramTotalCount int64
var totalHistogramCommonPoint []float64
var CommonHistogram = prometheus.NewHistogram(
	prometheus.HistogramOpts{
		Subsystem: subSys,
		Namespace: nameSpace,
		Help:      "desc the metric",
		Name:      fmt.Sprintf("%s:%s", HistogramNamePrefix, "common"),
		// DefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10} //默认的分桶
		// 定义桶 (开始值-步长-桶个数) 定义的bucket 小于等于某个桶的count数 prometheus.LinearBuckets(20, 3, 5) 指的是 [20,23],(23,26],(26,29],(29,+q)
		Buckets: prometheus.LinearBuckets(60, 10, 5),
	},
)

// 创建一个常量的直方图
var ConstHistogram, _ = prometheus.NewConstHistogram(
	prometheus.NewDesc(
		fmt.Sprintf("%s:%s", HistogramNamePrefix, "const"),
		"A histogram of the HTTP request durations.",
		[]string{"code", "method"},
		prometheus.Labels{"owner": "example", "头衔": "将军"},
	),
	4711, 403.34,
	map[float64]uint64{25: 121, 50: 2403, 100: 3221, 200: 4233},
	"200", "get",
)

// VecHistogram 带有 "name", "age" 标签的直方图
var vecHistogramTotalCount int64
var totalVecHistogramPoint []float64
var VecHistogram = prometheus.NewHistogramVec(
	prometheus.HistogramOpts{
		Subsystem: subSys,
		Namespace: nameSpace,
		Name:      fmt.Sprintf("%s:%s", HistogramNamePrefix, "vec"),
		Buckets:   prometheus.LinearBuckets(60, 10, 5),
	}, []string{"subject", "age"})

func DealCommHistogram(w http.ResponseWriter, req *http.Request) {
	dealCount := GetParamNum(req)
	var curDealCount int64
	go func() {
		ticker := time.NewTicker(3 * time.Second)
		for {
			<-ticker.C
			curDealCount++
			atomic.AddInt64(&commonHistogramTotalCount, 1)
			thisVal := float64(randomutil.RandomNum(55, 110)) // 随机生成每个学生的得分 (55,110]
			totalHistogramCommonPoint = append(totalHistogramCommonPoint, thisVal)
			CommonHistogram.Observe(thisVal)
			fmt.Printf("commonHistogramTotalCount:%v,curDealCount:%v\n", commonHistogramTotalCount, curDealCount)
			if curDealCount == dealCount {
				sort.Float64s(totalHistogramCommonPoint)
				fmt.Printf("DealHistogram结束 totalHistogramCommonPoint:%v\n", totalHistogramCommonPoint)
				return
			}
		}
	}()
	fmt.Fprintf(w, "DealCommHistogram done v !!!")
}

func DealVecHistogram(w http.ResponseWriter, req *http.Request) {
	dealCount := GetParamNum(req)
	var curDealCount int64
	go func() {
		ticker := time.NewTicker(3 * time.Second)
		thisSubjectMap := make(map[string]int64)
		thisAgeMap := make(map[int64]int64)
		for {
			<-ticker.C
			subjectStr := getCurRandomStrMap(thisSubjectMap, subjects)
			ageStr := getCurRandomIntMap(thisAgeMap, ages)
			thisVal := float64(randomutil.RandomNum(55, 110)) // 随机生成每个学生的得分 (55,110]
			totalVecHistogramPoint = append(totalVecHistogramPoint, thisVal)
			VecHistogram.With(prometheus.Labels{"subject": subjectStr, "age": ageStr}).Observe(thisVal)
			curDealCount++
			atomic.AddInt64(&vecHistogramTotalCount, 1)
			fmt.Printf("vecHistogramTotalCount:%v,curDealCount:%v, subjectMap:%v, ageMap:%v\n", vecHistogramTotalCount, curDealCount, thisSubjectMap, thisAgeMap)
			if curDealCount == dealCount {
				sort.Float64s(totalVecHistogramPoint)
				fmt.Printf("DealVecHistogram结束 totalVecHistogramPoint:%v\n", totalVecHistogramPoint)
				return
			}
		}
	}()
	fmt.Fprintf(w, "DealVecHistogram done !!!")
}

grafana 图像

普通 histogram (不带label) 名称 common_historgram

每个学生只有一个总分,学生不带属性(label),样本学生一共一百个

模拟操作 
curl http://127.0.0.1:8090/common_histogram?num=100
模拟结果
[55 55 55 57 57 58 59 60 60 61 61 61 62 62 63 64 66 66 66 67 67 67 67 67 67 68 69 69 69 70 71 71 72 72 72 73 73 74 74 75 75 75 75 76 77 78 78  80 80 81 82 82 82 82 83 83 83 84 84 85 87 88 89 89 89 90 90 90 92 93 93 93 94 94 96 97 97 98 100 100 101 101 102 102 103 103 104 104 104 105 105 106 106 106 107 107 108 108 108]

计数指标 (学生的数量)

  • prometheus_demo_client_golang_the_number_of_student_histogram:common_count
    go-prometheus业务监控指标实战(二)_第1张图片

总分指标 (全部学生的分数总和)

  • prometheus_demo_client_golang_the_number_of_student_histogram:common_sum
    go-prometheus业务监控指标实战(二)_第2张图片

直方图分组 (全部学生分数分布)

prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“60”} 分数小于或者等于60分的学生个数 => 测试结果为10
prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“70”} 分数小于或者等于70分的学生个数 => 测试结果为29
prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“80”} 分数小于或者等于80分的学生个数 => 测试结果为48
prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“90”} 分数小于或者等于90分的学生个数 => 测试结果为61
prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“100”} 分数小于或者等于100分的学生个数 => 测试结果为79
prometheus_demo_client_golang_the_number_of_student_histogram:common_bucket{le=“+Inf”} 所有学生的个数,也就是样本集总和 => 测试结果为100
go-prometheus业务监控指标实战(二)_第3张图片

总览图

go-prometheus业务监控指标实战(二)_第4张图片

带label的histogram 名称 vec_historgram

每个学生只有一个总分,学生带属性(label),年龄age,学生试卷学科(subject),样本学生一共一百个

模拟操作 
curl http://127.0.0.1:8090/vec_histogram?num=100
模拟结果
[55 55 56 57 57 58 58 59 60 60 61 61 61 61 62 62 62 63 63 63 63 63 64 65 65 67 67 68 68 69 69 71 71 71 72 72 73 74 75 75 76 77 77 77 77 77 79  80 80 80 82 82 83 83 84 84 85 85 86 86 86 86 87 88 88 90 91 92 92 93 94 94 96 96 96 96 97 98 98 98 98 98 99 99 99 99 100 101 101 101 101 102 104 105 106 107 108 109 109]
subjectMap:map[体育:28 数学:38 语文:34], ageMap:map[20:18 21:15 22:17 23:19 24:10 25:21]

计数指标

  • prometheus_demo_client_golang_the_number_of_student_histogram:vec_count
(全体学生交上来的试卷,按照学科分组,每个学科各收上来多少张)go-prometheus业务监控指标实战(二)_第5张图片
(全体学生交上来的试卷,按照年龄分组,每个年龄各收上来多少张)

go-prometheus业务监控指标实战(二)_第6张图片

总数指标

  • prometheus_demo_client_golang_the_number_of_student_histogram:vec_sum
(全体学生交上来的试卷,按照学科分组,每个学科总分多少)

go-prometheus业务监控指标实战(二)_第7张图片

(全体学生交上来的试卷,按照年龄分组,每个年龄总分多少)

go-prometheus业务监控指标实战(二)_第8张图片

直方图分组 (全部学生分数分布)

  • prometheus_demo_client_golang_the_number_of_student_histogram:vec_bucket

所有试卷的分数分布图
go-prometheus业务监控指标实战(二)_第9张图片
所有交上来的体育试卷分数分布
go-prometheus业务监控指标实战(二)_第10张图片
总览图
go-prometheus业务监控指标实战(二)_第11张图片

案例 Summary 统计班级人数的考试分数(value),每个人有subject(学科)和age(年龄)两个属性(label)

代码

mian.go

package main

import (
	"net/http"

	qzPro "gitee.com/qzcsu/go-web-study/service/prometheus"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

func main() {
	prometheus.MustRegister(qzPro.CommonCounter, qzPro.FuncCounter, qzPro.VecCounter, qzPro.CommonGauge, qzPro.FuncGauge, qzPro.VecGauge,
		qzPro.CommonHistogram, qzPro.VecHistogram, qzPro.CommonSummary, qzPro.VecSummary)
	http.HandleFunc("/common_counter", qzPro.DealCommCounter)
	http.HandleFunc("/vec_counter", qzPro.DealVecCounter)
	http.HandleFunc("/common_gauge", qzPro.DealCommGauge)
	http.HandleFunc("/vec_gauge", qzPro.DealVecGauge)
	http.HandleFunc("/common_histogram", qzPro.DealCommHistogram)
	http.HandleFunc("/vec_histogram", qzPro.DealVecHistogram)
	http.HandleFunc("/common_summary", qzPro.DealCommSummary)
	http.HandleFunc("/vec_summary", qzPro.DealVecSummary)
	http.Handle("/metrics", promhttp.Handler()) // 暴露 metrics 指标
	http.ListenAndServe(":8090", nil)
}

service/prometheus/constants.go

见上

service/prometheus/summary.go

package prometheus

import (
	"fmt"
	"net/http"
	"sort"
	"strings"
	"sync/atomic"
	"time"

	"github.com/prometheus/client_golang/prometheus"

	"gitee.com/qzcsu/go-web-study/utils/randomutil"
)

const SummaryNamePrefix = namePrefix + "_summary"

// CommonSummary 普通的摘要
var commonSummaryTotalCount int64
var totalSummaryCommonPoint []float64
var CommonSummary = prometheus.NewSummary(
	prometheus.SummaryOpts{
		Subsystem: subSys,
		Namespace: nameSpace,
		Help:      "desc the metric",
		Name:      fmt.Sprintf("%s:%s", SummaryNamePrefix, "common"),
		// 定义 数值和 可接受的误差 例:在所有的当前观测值中找到 50%的值是小于某个值,误差是0.05。
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
	},
)

// 创建一个常量的摘要
var ConstSummary, _ = prometheus.NewConstSummary(
	prometheus.NewDesc(
		fmt.Sprintf("%s:%s", SummaryNamePrefix, "const"),
		"A Summary of the HTTP request durations.",
		[]string{"code", "method"},
		prometheus.Labels{"owner": "example", "头衔": "将军"},
	),
	4711, 403.34,
	map[float64]float64{25: 0.01, 50: 0.01, 100: 0.01, 200: 0.001},
	"200", "get",
)

// VecSummary 带有 "name", "age" 标签的计数器
var vecSummaryTotalCount int64
var totalVecPoint []float64
var VecSummary = prometheus.NewSummaryVec(
	prometheus.SummaryOpts{
		Subsystem: subSys,
		Namespace: nameSpace,
		Name:      fmt.Sprintf("%s:%s", SummaryNamePrefix, "vec"),
		// 定义 数值和 可接受的误差 例:在所有的当前观测值中找到 50%的值是小于某个值,误差是0.05。
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
	}, []string{"subject", "age"})

func DealCommSummary(w http.ResponseWriter, req *http.Request) {
	dealCount := GetParamNum(req)
	var curDealCount int64
	go func() {
		ticker := time.NewTicker(1 * time.Second)
		for {
			<-ticker.C
			curDealCount++
			atomic.AddInt64(&commonSummaryTotalCount, 1)
			thisVal := float64(randomutil.RandomNum(55, 110)) // 随机生成每个学生的得分 (55,110]
			totalSummaryCommonPoint = append(totalSummaryCommonPoint, thisVal)
			fmt.Printf("commonSummaryTotalCount:%v,curDealCount:%v,thisVal:%v \n", commonSummaryTotalCount, curDealCount, thisVal)
			CommonSummary.Observe(thisVal)
			if curDealCount == dealCount {
				sort.Float64s(totalSummaryCommonPoint)
				fmt.Printf("DealSummary结束 totalSummaryCommonPoint:%v\n", totalSummaryCommonPoint)
				totalLen := len(totalSummaryCommonPoint)
				oneStartIndex := float64(totalLen) * 0.45
				oneEndIndex := float64(totalLen) * 0.55
				sl1 := getSE(oneStartIndex, oneEndIndex, totalSummaryCommonPoint)

				twoStartIndex := float64(totalLen) * 0.89
				twoEndIndex := float64(totalLen) * 0.91
				sl2 := getSE(twoStartIndex, twoEndIndex, totalSummaryCommonPoint)

				thStartIndex := float64(totalLen) * 0.989
				thEndIndex := float64(totalLen) * 0.991
				sl3 := getSE(thStartIndex, thEndIndex, totalSummaryCommonPoint)
				fmt.Printf("sl1:%v, sl2:%v, sl3:%v\n", sl1, sl2, sl3)
				fmt.Printf("sv1:%v, sv2:%v, sv3:%v\n", getSV(sl1), getSV(sl2), getSV(sl3))
				return
			}
		}
	}()
	fmt.Fprintf(w, "DealCommSummary done v !!!")
}

func getSE(st float64, en float64, tolS []float64) []float64 {
	fmt.Printf("getSE ss:%v, ee:%v\n", st, en)
	tsLen := len(tolS)
	spPoint := strings.Split(fmt.Sprintf("%v", st), ".")
	var sHasPoint bool
	if len(spPoint) > 1 {
		sHasPoint = true
	}
	if !sHasPoint && st-1 >= 0 {
		st = st - 1
	}
	en -= 1
	iSt := int(st)
	iEt := int(en)
	if iSt > iEt {
		iEt = iSt + 1
	}
	if iEt >= tsLen {
		iEt = tsLen - 1
	}
	fmt.Printf("getSE s:%v, e:%v\n", iSt, iEt)
	return tolS[iSt : iEt+1]
}

func getSV(tolS []float64) []float64 {
	tsLen := len(tolS)
	if tsLen%2 == 1 {
		return []float64{tolS[tsLen/2]}
	}

	sv1 := tolS[(tsLen-1)/2]
	sv2 := tolS[(tsLen+1)/2]

	return []float64{sv1, sv2}
}

func DealVecSummary(w http.ResponseWriter, req *http.Request) {
	dealCount := GetParamNum(req)
	var curDealCount int64
	go func() {
		ticker := time.NewTicker(3 * time.Second)
		thisSubjectMap := make(map[string]int64)
		thisAgeMap := make(map[int64]int64)
		for {
			<-ticker.C
			subjectStr := getCurRandomStrMap(thisSubjectMap, subjects)
			ageStr := getCurRandomIntMap(thisAgeMap, ages)
			thisVal := float64(randomutil.RandomNum(55, 110)) // 随机生成每个学生的得分 (55,110]
			totalVecPoint = append(totalVecPoint, thisVal)
			VecSummary.With(prometheus.Labels{"subject": subjectStr, "age": ageStr}).Observe(thisVal)
			curDealCount++
			atomic.AddInt64(&vecSummaryTotalCount, 1)
			fmt.Printf("vecSummaryTotalCount:%v,curDealCount:%v, subjectMap:%v, ageMap:%v\n", vecSummaryTotalCount, curDealCount, thisSubjectMap, thisAgeMap)
			if curDealCount == dealCount {
				sort.Float64s(totalVecPoint)
				fmt.Printf("DealVecSummary结束 totalVecHistogramPoint:%v\n", totalVecPoint)
				return
			}
		}
	}()
	fmt.Fprintf(w, "DealVecSummary done !!!")
}

grafana图像

普通summary (不带label)名称 common_summary

每个学生只有一个总分,学生不带属性(label),样本学生一共一百个

模拟操作 
curl http://127.0.0.1:8090/common_summary?num=100
模拟结果
[57 57 57 57 58 59 59 59 60 61 62 62 62 62 62 63 63 63 63 64 64 64 64 64 65 65 65 66 66 66 67 67 67 69 69 69 70 71 71 71 72 74 76 77 77 77 77 77 77 789 79 80 80 80 80 80 81 82 83 83 84 85 85 86 86 87 87 88 88 88 89 89 89 90 90 91 91 91 92 92 92 93 94 95 95 95 95 96 97 99 100 102 103 104 106 107 108 108 109]
sl1:[77 77 77 77 77 78 79 79 80 80 80], sl2:[96 97 99], sl3:[108]
sv1:[78], sv2:[97], sv3:[108]

计数指标 (学生的数量)

  • prometheus_demo_client_golang_the_number_of_student_summary:common_count
    go-prometheus业务监控指标实战(二)_第12张图片

总分指标 (学生的总分)

  • prometheus_demo_client_golang_the_number_of_student_summary:common_sumgo-prometheus业务监控指标实战(二)_第13张图片

分位数图 (学生的分数分位图)

prometheus_demo_client_golang_the_number_of_student_summary:common{quantile=“0.5”} 有50%人的分数小于或者等于的值 => 测试结果为79,即50%的人分数小于等于78分
prometheus_demo_client_golang_the_number_of_student_summary:common{quantile=“0.9”} 有90%人的分数小于或者等于的值 => 测试结果为104,即90%的人分数小于等于97分
prometheus_demo_client_golang_the_number_of_student_summary:common{quantile=“0.99”} 有99%人的分数小于或者等于的值 => 测试结果为108,即99%的人分数小于等于108分
go-prometheus业务监控指标实战(二)_第14张图片

总览图

go-prometheus业务监控指标实战(二)_第15张图片

带label的histogram 名称 vec_historgram

每个学生只有一个总分,学生带属性(label),年龄age,学生试卷学科(subject),样本学生一共一百个

模拟操作 
curl http://127.0.0.1:8090/vec_summary?num=100
模拟结果
[55 55 56 57 57 58 58 59 60 60 61 61 61 61 62 62 62 63 63 63 63 63 64 65 65 67 67 68 68 69 69 71 71 71 72 72 73 74 75 75 76 77 77 77 77 77 79  80 80 80 82 82 83 83 84 84 85 85 86 86 86 86 87 88 88 90 91 92 92 93 94 94 96 96 96 96 97 98 98 98 98 98 99 99 99 99 100 101 101 101 101 102 104 105 106 107 108 109 109]
subjectMap:map[体育:28 数学:38 语文:34], ageMap:map[20:18 21:15 22:17 23:19 24:10 25:21]

计数指标

  • prometheus_demo_client_golang_the_number_of_student_summary:vec_count
试卷总数按照年龄分组(学生试卷数量图按年龄分类)

go-prometheus业务监控指标实战(二)_第16张图片

试卷总数按照学科分组(学生试卷数量按学科分类)

go-prometheus业务监控指标实战(二)_第17张图片

总分指标

  • prometheus_demo_client_golang_the_number_of_student_summary:vec_sum
总分按照年龄分组(学生总分数按年龄分组)

go-prometheus业务监控指标实战(二)_第18张图片

总分按照学科分组(学生总分数按学科分组)

go-prometheus业务监控指标实战(二)_第19张图片

一个分位图样例 (年龄是22岁的学生,三科分数的0.5分位图 )

go-prometheus业务监控指标实战(二)_第20张图片

总览图

go-prometheus业务监控指标实战(二)_第21张图片

告警机制配置

假设场景:设置学生人数超过50 就触发告警
用来测试的指标:prometheus_demo_client_golang_the_number_of_student_summary:common_count

配置告警通道

go-prometheus业务监控指标实战(二)_第22张图片

选择告警的协议

告警通道协议有很多中,目前飞书不能原生支持,需要二次开发。因为飞书机器人的 webhook 发送的数据格式和grafana 发送告警的格式不一致。这里仅仅做一个演示不实际触发
go-prometheus业务监控指标实战(二)_第23张图片

创建告警策略

go-prometheus业务监控指标实战(二)_第24张图片
go-prometheus业务监控指标实战(二)_第25张图片

触发告警策略

  1. 刚开始
    go-prometheus业务监控指标实战(二)_第26张图片
  2. 随后达到告警阈值go-prometheus业务监控指标实战(二)_第27张图片
  3. 触发告警

go-prometheus业务监控指标实战(二)_第28张图片

总结

本文主要演示了 Histogram和Summary两个案例及其grafana面板的配置。随后针对一个指标,演示了配置自动告警的过程。

你可能感兴趣的:(golang,实战演练,golang,prometheus,grafana,业务告警,监控告警)