Kafka3.x监控

kafka从2012年11月加入Apache,转眼已经过去了十个年头。
0.8版本,引入了副本机制,弃用了旧版Scala客户端,诞生了消息格式v0
0.10版本,消息格式升至v1
0.11版本,新增了事务能力,消息格式升至v2
1.0版本,支持Java 9的支持
2.0版本,放弃了对 Java 7 的支持,并删除了之前已弃用的 Scala 生产者和消费者。
2.1版本,Java 11 支持
2.8版本,使用自有的K Raft替换 zk
3.0版本,不再支持 Java 8 和 Scala 2.12,弃用消息格式 v0 和 v1
3.1版本,Apache Kafka 支持 Java 17

虽然现在大家对kafka的使用依然会选择对zookeeper强依赖,但是如果要使用Raft协议,那么kafka-manager(后改名为CMAK)已经无法兼容。如果想要对kafka运维监控,并且集成自有告警系统,需要自研。

获取主题和消费者元数据

    /**
     * 获取topic信息
     * @param adminClient kafkaAdminClient
     */
    private static void getAllTopics(AdminClient adminClient) throws ExecutionException, InterruptedException {
        Set listTopicsName = adminClient.listTopics().names().get();
        //旧版为values()
        Map> map = adminClient.describeTopics(listTopicsName).topicNameValues();
        for (String topicName : map.keySet()) {
            TopicDescription topicDescription = map.get(topicName).get();
            for (TopicPartitionInfo partition : topicDescription.partitions()) {
                log.info("topicName:{}, partition:{}", topicName, partition.toString());
            }
        }
    }

    /**
     * 获取group集合
     * @param adminClient kafkaAdminClient
     */
    private static Set getAllGroups(AdminClient adminClient) throws ExecutionException, InterruptedException {
        ListConsumerGroupsResult groupList = adminClient.listConsumerGroups();
        ArrayList consumerGroupListings = (ArrayList)groupList.all().get();
        consumerGroupListings.forEach(x->log.info(x.groupId()));
        return consumerGroupListings.stream().map(x -> {
            log.info(x.groupId());
            return x.groupId();
        }).collect(Collectors.toSet());
    }

获取消费延迟Lag元数据

    private static void collectByGroup(String group) throws ExecutionException, InterruptedException {
        //获取消费者组元数据信息
        DescribeConsumerGroupsResult groupsResult = KafkaUtils.getAdminClient()
                .describeConsumerGroups(Collections.singleton(group));
        //获取消费者组描述
        ConsumerGroupDescription groupDescription = groupsResult.all().get().get(group);
        //ConsumerGroupState 打印消费组状态
        log.info("group name:{} state:{}", group, groupDescription.state().toString());
        List assignedTps = new ArrayList<>();
        List rowWithConsumer = new ArrayList<>();
        //获取组内成员描述
        Collection members = groupDescription.members();
        if (members != null) {
            //通过OffsetFetchRequest请求获取消费位移ConsumerOffset
            ListConsumerGroupOffsetsResult offsetsResult = KafkaUtils.getAdminClient().listConsumerGroupOffsets(group);
            Map offsets = offsetsResult.partitionsToOffsetAndMetadata().get();
            if (offsets != null && !offsets.isEmpty()) {
                //判断消费者组的状态是否正常 正常就打印lag信息
                if (groupDescription.state().toString().equals(ConsumerGroupState.STABLE.toString())) {
                    rowWithConsumer = getRowWithConsumer(groupDescription, offsets, members, assignedTps, group);
                }
            }
            //获取没有消费者的消费者组的lag信息
            if (offsets == null) {
                offsets = new HashMap<>();
            }
            List rowWithoutConsumer = getRowWithoutConsumer(groupDescription, offsets, assignedTps, group);
            if (!CollectionUtils.isEmpty(rowWithoutConsumer)) {
                rowWithConsumer.addAll(rowWithoutConsumer);
            }
        }
        rowWithConsumer.forEach(x-> log.info(x.toString()));
    }

    private static List getRowWithConsumer(ConsumerGroupDescription description,
                                                              Map offsets,
                                                              Collection members,
                                                              List assignedTps, String group) {
        List rowWithConsumer = new ArrayList<>();
        //遍历消费者组内成员
        for (MemberDescription member : members) {
            MemberAssignment assignment = member.assignment();
            if (assignment == null) {
                continue;
            }
            //获取组内所有主题分区
            Set tpSet = assignment.topicPartitions();
            if (tpSet.isEmpty()) {
                //如果消费组没有订阅topic
                PartitionAssignmentState p = PartitionAssignmentState.builder()
                        .group(group)
                        .coordinator(description.coordinator())
                        .consumerId(member.consumerId())
                        .host(member.host())
                        .clientId(member.clientId())
                        .build();
                rowWithConsumer.add(p);
            } else {
                //如果消费组订阅了topic 获取logEndOffsets(LEO)
                //如果未开启事务 则lag = logEndOffsets(LEO) - offset
                //如果开启了事务 则lag = logStableOffset(LSO) - offset 对应ListOffsetRequest获取LSO
                Map logSizes = KafkaUtils.getConsumer().endOffsets(tpSet);
                assignedTps.addAll(tpSet);
                List tempList = tpSet.stream()
                        .sorted(Comparator.comparing(TopicPartition::partition))
                        .map(tp -> getPasWithConsumer(description, offsets, member, group, logSizes, tp))
                        .collect(Collectors.toList());
                rowWithConsumer.addAll(tempList);
            }
        }
        return rowWithConsumer;
    }

    private static PartitionAssignmentState getPasWithConsumer(ConsumerGroupDescription description,
                                                        Map offsets,
                                                        MemberDescription member,
                                                        String group,
                                                        Map logSizes,
                                                        TopicPartition tp) {
        long logSize = logSizes.get(tp);
        if (offsets.containsKey(tp)) {
            //获取现在的消费偏移量
            long offset = offsets.get(tp).offset();
            //消息总量-消费量=消息积压量
            long lag = logSize - offset < 0 ? 0 : logSize - offset;
            return PartitionAssignmentState.builder()
                    .group(group).coordinator(description.coordinator())
                    .lag(lag).topic(tp.topic()).partition(tp.partition()).offset(offset)
                    .consumerId(member.consumerId()).host(member.host())
                    .clientId(member.clientId()).logSize(logSize).build();
        } else {
            return PartitionAssignmentState.builder()
                    .group(group).coordinator(description.coordinator())
                    .topic(tp.topic()).partition(tp.partition())
                    .consumerId(member.consumerId()).host(member.host())
                    .clientId(member.clientId()).logSize(logSize).build();
        }
    }

    private static List getRowWithoutConsumer(ConsumerGroupDescription description,
                                                                 Map offsets,
                                                                 List assignedTps,
                                                                 String group) {
        Set tpSet = offsets.keySet();
        //过滤已分配消费者的分区
        //获取消息总大小logSize 和 已偏移量offset
        //按分区信息排序
        return tpSet.stream()
                .filter(tp->!assignedTps.contains(tp))
                .map(tp-> {
                    long logSize = 0;
                    Long endOffset = KafkaUtils.getConsumer().endOffsets(Collections.singleton(tp)).get(tp);
                    if (endOffset != null) {
                        logSize = endOffset;
                    }
                    long offset = 0;
                    if (offsets.get(tp) != null) {
                        offset = offsets.get(tp).offset();
                    }
                    return PartitionAssignmentState.builder()
                            .group(group)
                            .coordinator(description.coordinator())
                            .topic(tp.topic())
                            .partition(tp.partition())
                            .logSize(logSize)
                            .lag(logSize - offset < 0 ? 0 : logSize - offset)
                            .offset(offset).build();
                })
                .sorted(Comparator.comparing(PartitionAssignmentState::getPartition))
                .collect(Collectors.toList());
    }
}

Kafka自身提供的指标,需要开启JMX

public static void main(String[] args) {
        MBeanServerConnection conn = init("192.168.1.201:9999");
        String objName = "kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec";
        String objAttr = "OneMinuteRate";
        try {
            ObjectName objectName = new ObjectName(objName);
            Object attribute = conn.getAttribute(objectName, objAttr);
            System.out.println(attribute.toString());
        } catch (MalformedObjectNameException | MBeanException | AttributeNotFoundException |
                 InstanceNotFoundException | ReflectionException | IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static MBeanServerConnection init(String ipAndPort) {
        String jmxUrl = "service:jmx:rmi:///jndi/rmi://" + ipAndPort+ "/jmxrmi";
        try {
            JMXServiceURL jmxServiceURL = new JMXServiceURL(jmxUrl);
            return JMXConnectorFactory.connect(jmxServiceURL, null).getMBeanServerConnection();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
kafka-Metrics-MessagesInPerSec.png

你可能感兴趣的:(Kafka3.x监控)