给prometheus加一个export

prometheus有很多的export,本文先自己写一个export,再介绍一个官方提供的node export。
其实只要返回prometheus能够识别的标准的文本,prometheus就能兼容,这个就是接口的好处。
你首先得提供一个http的server去提供数据,本篇文章主要介绍一个http的监控,可以通过http、tcp等协议做服务监控。

    http.HandleFunc("/probe", func(w http.ResponseWriter, r *http.Request) {
        sc.Lock()
        conf := sc.C
        sc.Unlock()
        probeHandler(w, r, conf, logger, rh)
    })
    if err := http.ListenAndServe(*listenAddress, nil); err != nil {
        level.Error(logger).Log("msg", "Error starting HTTP server", "err", err)
        os.Exit(1)
    }

这样你就能提供一个服务端了,下面就是具体接口参数probeHandler代码具体实现,我这里定义一些module,可以通过url指定使用的module。

func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logger log.Logger, rh *resultHistory) {
    moduleName := r.URL.Query().Get("module")
    if moduleName == "" {
        moduleName = "http_2xx"
    }
    module, ok := c.Modules[moduleName]
    if !ok {
        http.Error(w, fmt.Sprintf("Unknown module %q", moduleName), http.StatusBadRequest)
        return
    }

    // 如果prometheus携带了超时,则我这边也设置超时时间
    var timeoutSeconds float64
    if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
        var err error
        timeoutSeconds, err = strconv.ParseFloat(v, 64)
        if err != nil {
            http.Error(w, fmt.Sprintf("Failed to parse timeout from Prometheus header: %s", err), http.StatusInternalServerError)
            return
        }
    }
    if timeoutSeconds == 0 {
        timeoutSeconds = 10
    }

    if module.Timeout.Seconds() < timeoutSeconds && module.Timeout.Seconds() > 0 {
        timeoutSeconds = module.Timeout.Seconds()
    }
    timeoutSeconds -= *timeoutOffset
    ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds*float64(time.Second)))
    defer cancel()
    r = r.WithContext(ctx)
//定义了一个prometheus变量,探测成功是否成功,1代表成功0代表失败
    probeSuccessGauge := prometheus.NewGauge(prometheus.GaugeOpts{
        Name: "probe_success",
        Help: "Displays whether or not the probe was a success",
    })
//定义了一个探测时间的变量
    probeDurationGauge := prometheus.NewGauge(prometheus.GaugeOpts{
        Name: "probe_duration_seconds",
        Help: "Returns how long the probe took to complete in seconds",
    })
    params := r.URL.Query()
    target := params.Get("target")
    if target == "" {
        http.Error(w, "Target parameter is missing", http.StatusBadRequest)
        return
    }

    prober, ok := Probers[module.Prober]
    if !ok {
        http.Error(w, fmt.Sprintf("Unknown prober %q", module.Prober), http.StatusBadRequest)
        return
    }

    sl := newScrapeLogger(logger, moduleName, target)
    level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober, "timeout_seconds", timeoutSeconds)

    start := time.Now()
    registry := prometheus.NewRegistry()
    //这里是注册上面两个指标
    registry.MustRegister(probeSuccessGauge)
    registry.MustRegister(probeDurationGauge)
    success := prober(ctx, target, module, registry, sl)
    duration := time.Since(start).Seconds()
    probeDurationGauge.Set(duration)
    if success {
        probeSuccessGauge.Set(1)
        level.Info(sl).Log("msg", "Probe succeeded", "duration_seconds", duration)
    } else {
        level.Error(sl).Log("msg", "Probe failed", "duration_seconds", duration)
    }

    debugOutput := DebugOutput(&module, &sl.buffer, registry)
    rh.Add(moduleName, target, debugOutput, success)

    if r.URL.Query().Get("debug") == "true" {
        w.Header().Set("Content-Type", "text/plain")
        w.Write([]byte(debugOutput))
        return
    }

    h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
    h.ServeHTTP(w, r)
}

上面的Probers是所以是探测器。

Probers = map[string]prober.ProbeFn{
        "http": prober.ProbeHTTP,
        "tcp":  prober.ProbeTCP,
        "icmp": prober.ProbeICMP,
        "dns":  prober.ProbeDNS,
    }

这个里面定义四种探测的方法。程序启动加载module,module里面包含probe

modules:
  http_2xx:
    prober: http
    http:
      preferred_ip_protocol: "ip4"

上面的http代表使用http的探针。以最简单的ICMP为例,

func ProbeICMP(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) (success bool) {
    ...
    timeoutDeadline, _ := ctx.Deadline()
    deadline := time.Now().Add(timeoutDeadline.Sub(time.Now()))

    ip, _, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger)

    level.Info(logger).Log("msg", "Creating socket")
        ...
        socket, err = icmp.ListenPacket("ip6:ipv6-icmp", "::")
        ...
        socket, err = icmp.ListenPacket("ip4:icmp", "0.0.0.0")
        ....
        s, err := net.ListenPacket("ip4:icmp", "0.0.0.0")

            rc, err := ipv4.NewRawConn(s)

            socket = &dfConn{c: rc}
        }
    }
    defer socket.Close()

    level.Info(logger).Log("msg", "Writing out packet")
    if _, err = socket.WriteTo(wb, ip); err != nil {
        level.Warn(logger).Log("msg", "Error writing to socket", "err", err)
        return
    }

    rb := make([]byte, 65536)
    if err := socket.SetReadDeadline(deadline); err != nil {
        level.Error(logger).Log("msg", "Error setting socket deadline", "err", err)
        return
    }
    level.Info(logger).Log("msg", "Waiting for reply packets")
    for {
        n, peer, err := socket.ReadFrom(rb)
        if err != nil {
            if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
                level.Warn(logger).Log("msg", "Timeout reading from socket", "err", err)
                return
            }
            level.Error(logger).Log("msg", "Error reading from socket", "err", err)
            continue
        }
        if peer.String() != ip.String() {
            continue
        }
        if replyType == ipv6.ICMPTypeEchoReply {
            // Clear checksum to make comparison succeed.
            rb[2] = 0
            rb[3] = 0
        }
        if bytes.Compare(rb[:n], wb) == 0 {
            level.Info(logger).Log("msg", "Found matching reply packet")
            return true
        }
    }
}

篇幅有限,之保留部分源代码。这样就可以给prometheus提供标准接口数据了。

你可能感兴趣的:(prometheus,云计算)