prometheus有很多的export,本文先自己写一个export,再介绍一个官方提供的node export。
其实只要返回prometheus能够识别的标准的文本,prometheus就能兼容,这个就是接口的好处。
你首先得提供一个http的server去提供数据,本篇文章主要介绍一个http的监控,可以通过http、tcp等协议做服务监控。
http.HandleFunc("/probe", func(w http.ResponseWriter, r *http.Request) {
sc.Lock()
conf := sc.C
sc.Unlock()
probeHandler(w, r, conf, logger, rh)
})
if err := http.ListenAndServe(*listenAddress, nil); err != nil {
level.Error(logger).Log("msg", "Error starting HTTP server", "err", err)
os.Exit(1)
}
这样你就能提供一个服务端了,下面就是具体接口参数probeHandler代码具体实现,我这里定义一些module,可以通过url指定使用的module。
func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logger log.Logger, rh *resultHistory) {
moduleName := r.URL.Query().Get("module")
if moduleName == "" {
moduleName = "http_2xx"
}
module, ok := c.Modules[moduleName]
if !ok {
http.Error(w, fmt.Sprintf("Unknown module %q", moduleName), http.StatusBadRequest)
return
}
// 如果prometheus携带了超时,则我这边也设置超时时间
var timeoutSeconds float64
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
var err error
timeoutSeconds, err = strconv.ParseFloat(v, 64)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to parse timeout from Prometheus header: %s", err), http.StatusInternalServerError)
return
}
}
if timeoutSeconds == 0 {
timeoutSeconds = 10
}
if module.Timeout.Seconds() < timeoutSeconds && module.Timeout.Seconds() > 0 {
timeoutSeconds = module.Timeout.Seconds()
}
timeoutSeconds -= *timeoutOffset
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds*float64(time.Second)))
defer cancel()
r = r.WithContext(ctx)
//定义了一个prometheus变量,探测成功是否成功,1代表成功0代表失败
probeSuccessGauge := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "probe_success",
Help: "Displays whether or not the probe was a success",
})
//定义了一个探测时间的变量
probeDurationGauge := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "probe_duration_seconds",
Help: "Returns how long the probe took to complete in seconds",
})
params := r.URL.Query()
target := params.Get("target")
if target == "" {
http.Error(w, "Target parameter is missing", http.StatusBadRequest)
return
}
prober, ok := Probers[module.Prober]
if !ok {
http.Error(w, fmt.Sprintf("Unknown prober %q", module.Prober), http.StatusBadRequest)
return
}
sl := newScrapeLogger(logger, moduleName, target)
level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober, "timeout_seconds", timeoutSeconds)
start := time.Now()
registry := prometheus.NewRegistry()
//这里是注册上面两个指标
registry.MustRegister(probeSuccessGauge)
registry.MustRegister(probeDurationGauge)
success := prober(ctx, target, module, registry, sl)
duration := time.Since(start).Seconds()
probeDurationGauge.Set(duration)
if success {
probeSuccessGauge.Set(1)
level.Info(sl).Log("msg", "Probe succeeded", "duration_seconds", duration)
} else {
level.Error(sl).Log("msg", "Probe failed", "duration_seconds", duration)
}
debugOutput := DebugOutput(&module, &sl.buffer, registry)
rh.Add(moduleName, target, debugOutput, success)
if r.URL.Query().Get("debug") == "true" {
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(debugOutput))
return
}
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}
上面的Probers是所以是探测器。
Probers = map[string]prober.ProbeFn{
"http": prober.ProbeHTTP,
"tcp": prober.ProbeTCP,
"icmp": prober.ProbeICMP,
"dns": prober.ProbeDNS,
}
这个里面定义四种探测的方法。程序启动加载module,module里面包含probe
modules:
http_2xx:
prober: http
http:
preferred_ip_protocol: "ip4"
上面的http代表使用http的探针。以最简单的ICMP为例,
func ProbeICMP(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) (success bool) {
...
timeoutDeadline, _ := ctx.Deadline()
deadline := time.Now().Add(timeoutDeadline.Sub(time.Now()))
ip, _, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger)
level.Info(logger).Log("msg", "Creating socket")
...
socket, err = icmp.ListenPacket("ip6:ipv6-icmp", "::")
...
socket, err = icmp.ListenPacket("ip4:icmp", "0.0.0.0")
....
s, err := net.ListenPacket("ip4:icmp", "0.0.0.0")
rc, err := ipv4.NewRawConn(s)
socket = &dfConn{c: rc}
}
}
defer socket.Close()
level.Info(logger).Log("msg", "Writing out packet")
if _, err = socket.WriteTo(wb, ip); err != nil {
level.Warn(logger).Log("msg", "Error writing to socket", "err", err)
return
}
rb := make([]byte, 65536)
if err := socket.SetReadDeadline(deadline); err != nil {
level.Error(logger).Log("msg", "Error setting socket deadline", "err", err)
return
}
level.Info(logger).Log("msg", "Waiting for reply packets")
for {
n, peer, err := socket.ReadFrom(rb)
if err != nil {
if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
level.Warn(logger).Log("msg", "Timeout reading from socket", "err", err)
return
}
level.Error(logger).Log("msg", "Error reading from socket", "err", err)
continue
}
if peer.String() != ip.String() {
continue
}
if replyType == ipv6.ICMPTypeEchoReply {
// Clear checksum to make comparison succeed.
rb[2] = 0
rb[3] = 0
}
if bytes.Compare(rb[:n], wb) == 0 {
level.Info(logger).Log("msg", "Found matching reply packet")
return true
}
}
}
篇幅有限,之保留部分源代码。这样就可以给prometheus提供标准接口数据了。