使用event_control监听memory cgroup的内存使用率

前言

如果要在日常使用中监控一个应用程序的内存使用率,达到某个阈值就触发报警会怎么去做?
1.监控系统定时拉取agent的指标
2.agent轮询按时上报
以上的两种方式虽然能做到报警触发,但总感觉差了点什么,什么时候发现达到阈值取决于定时和轮询的频率。那能不能做到一旦达到阈值就发现问题呢。linux提供了event_control,通过epoll来监听eventfd就能满足实时事件通知的需求。

kubelet中的实现

kubelet的驱逐功能会在内存达到用户配置的阈值后,触发驱逐节点上pod的操作,该通知机制就是基于内核的 cgroups Memory thresholds来实现的。

func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
  var watchfd, eventfd, epfd, controlfd int

  watchfd, err = unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY|unix.O_CLOEXEC, 0)
  defer unix.Close(watchfd)
  
  controlfd, err = unix.Open(fmt.Sprintf("%s/cgroup.event_control", path), unix.O_WRONLY|unix.O_CLOEXEC, 0)
  defer unix.Close(controlfd)
  
  eventfd, err = unix.Eventfd(0, unix.EFD_CLOEXEC)
  defer func() {
    // Close eventfd if we get an error later in initialization
    if err != nil {
      unix.Close(eventfd)
    }
  }()
  
  epfd, err = unix.EpollCreate1(unix.EPOLL_CLOEXEC)
  defer func() {
    // Close epfd if we get an error later in initialization
    if err != nil {
      unix.Close(epfd)
    }
  }()
  
  config := fmt.Sprintf("%d %d %d", eventfd, watchfd, threshold)
  _, err = unix.Write(controlfd, []byte(config))

  return &linuxCgroupNotifier{
    eventfd: eventfd,
    epfd:    epfd,
    stop:    make(chan struct{}),
  }, nil
}
func (n *linuxCgroupNotifier) Start(eventCh chan<- struct{}) {
  err := unix.EpollCtl(n.epfd, unix.EPOLL_CTL_ADD, n.eventfd, &unix.EpollEvent{
    Fd:     int32(n.eventfd),
    Events: unix.EPOLLIN,
  })

  for {
    select {
    case <-n.stop:
      return
    default:
    }
    event, err := wait(n.epfd, n.eventfd, notifierRefreshInterval)
    if err != nil {
      klog.InfoS("Eviction manager: error while waiting for memcg events", "err", err)
      return
    } else if !event {
      // Timeout on wait.  This is expected if the threshold was not crossed
      continue
    }
    // Consume the event from the eventfd
    buf := make([]byte, eventSize)
    _, err = unix.Read(n.eventfd, buf)
    if err != nil {
      klog.InfoS("Eviction manager: error reading memcg events", "err", err)
      return
    }
    eventCh <- struct{}{}
  }
}
demo实现

利用cgroup提供的库可以很方便的实现该通知机制(基于cgroupv1)

type EventFD struct {
	fd    uintptr
	valid bool
}

func FromFd(fd uintptr) *EventFD {
	return &EventFD{
		fd:    fd,
		valid: true,
	}
}

func (e *EventFD) ReadEvents() (uint64, error) {
	buf := make([]byte, 8)
	n, err := syscall.Read(int(e.fd), buf[:])
	if err != nil {
		return 0, err
	}
	if n != 8 {
		return 0, fmt.Errorf("could not read for eventfd")
	}

	val, n := binary.Uvarint(buf)
	if n <= 0 {
		return 0, fmt.Errorf("could not read for eventfd")
	}
	return val, nil
}

func main() {
	control, err := cgroup1.Load(cgroup1.StaticPath("/test"))
	if err != nil {
		panic(err)
	}
	fmt.Println(control.State())
	event := cgroup1.MemoryThresholdEvent(50 * 1024 * 1024, false)
	efd, err := control.RegisterMemoryEvent(event)
	if err != nil {
		panic(err)
	}

	f := FromFd(efd)
	for {
		val, err := f.ReadEvents()
		if err != nil {
			log.Printf("error while reading from eventfd: %v", err)
			break
		}
		log.Printf("got threshold event: %v", val)
	}
}
测试验证
[root@kind-node test]# mkdir /sys/fs/cgroup/memory/test
[root@kind-node test]# echo $$ > cgroup.procs
[root@kind-node test]# cat memory.usage_in_bytes 
2871296
[root@kind-node test]# dd if=/dev/zero of=/root/testfile bs=101M count=1
[root@kind-node test]# go run memory_pressure.go 
thawed
2023/03/07 01:38:44 got threshold event: 1
2023/03/07 01:38:44 got threshold event: 1
参考:
  1. https://cloud.tencent.com/developer/article/1882019?from=15425&areaSource=102001.7&traceId=XmEvMEE3Hcp6sTjKLtCi7
  2. https://www.jianshu.com/p/f2403e33c766

你可能感兴趣的:(go,linux,linux,unix,服务器)