我们的服务和实例数比较多,有的时候部分实例可能资源竞争或者OOM问题导致服务已经假死,或者心跳已经停止,但是由于监控平台不完善,没有及时的通知到对应的应用负责人,导致服务异常,甚至导致连锁崩掉。为了解决这个问题,写了一个简易版本的,以及对简易版本进行了优化,服务上下线通知。
<dependency>
<groupId>com.alibaba.cloudgroupId>
<artifactId>spring-cloud-alibaba-dependenciesartifactId>
<version>2021.0.5.0version>
<type>pomtype>
<scope>importscope>
dependency>
<dependency>
<groupId>com.alibaba.cloudgroupId>
<artifactId>spring-cloud-starter-alibaba-nacos-configartifactId>
dependency>
<dependency>
<groupId>com.alibaba.cloudgroupId>
<artifactId>spring-cloud-starter-alibaba-nacos-discoveryartifactId>
dependency>
本来想找个nacos客户端中现成的服务发现的监听类或者其他工具类进行,nacos所有服务的变化通知,发现没有找到 进行订阅
void subscribe(String serviceName, EventListener listener)
来订阅服务变化第一个参数为服务名字,第二个为事件监听器,用于回调
import com.alibaba.cloud.nacos.NacosServiceManager;
import com.alibaba.fastjson.JSON;
import com.alibaba.nacos.api.exception.NacosException;
import com.alibaba.nacos.api.naming.NamingService;
import com.alibaba.nacos.api.naming.listener.EventListener;
import com.alibaba.nacos.api.naming.pojo.Instance;
import com.alibaba.nacos.client.naming.event.InstancesChangeEvent;
import com.alibaba.nacos.common.notify.Event;
import com.alibaba.nacos.common.notify.NotifyCenter;
import com.alibaba.nacos.common.notify.listener.Subscriber;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import java.util.List;
/**
* nacos 客户端 服务监听变化简易版本
* @author 冰点
* @version 1.0.0
* @date 2023/11/28 14:09
*/
@Component
@Slf4j
public class NacosServiceInstanceChangeNotifier extends Subscriber<InstancesChangeEvent> {
@Resource
private NacosServiceManager nacosServiceManager;
@PostConstruct
public void registerToNotifyCenter(){
NotifyCenter.registerSubscriber(this);
NamingService namingService = nacosServiceManager.getNamingService();
try {
namingService.subscribe("填写你要监听的服务名", new EventListener() {
@Override
public void onEvent(com.alibaba.nacos.api.naming.listener.Event event) {
log.info("监听nacos的服务实例变化情况: {}", JSON.toJSONString(event));
}
});
} catch (NacosException e) {
log.error("监听nacos的服务实例变化情况失败", e);
}
}
/**
* 服务实例变化监听
* 事件类型:com.alibaba.nacos.client.naming.event.InstancesChangeEvent
* 经过测试此处监听到的是当前服务的所有实例变化情况,不是某个实例变化情况。无法直接获取某个实例的变化情况,、
* 也就没法通过事件来判断服务上下线,又优化了一个版本,想着我自己维护服务实例的健康状态和实例数来判断哪些服务是上线还是下线。
* @param instancesChangeEvent
*/
@Override
public void onEvent(InstancesChangeEvent instancesChangeEvent) {
log.info("监听nacos的服务实例变化情况: {}", JSON.toJSONString(instancesChangeEvent));
try {
NamingService namingService = nacosServiceManager.getNamingService();
// 获取当前所有的服务名
List<String> allServiceNames = namingService.getServicesOfServer(1, Integer.MAX_VALUE).getData();
for (String serviceName : allServiceNames) {
// 获取每个服务的所有实例
List<Instance> allInstances = namingService.getAllInstances(serviceName);
for (Instance instance : allInstances) {
if (instance.isHealthy()) {
// 实例状态为上线
log.info("服务上线 - 服务名: {}, 实例: {}:{}", serviceName, instance.getIp(), instance.getPort());
} else {
// 实例状态为下线
log.info("服务下线 - 服务名: {}, 实例: {}:{}", serviceName, instance.getIp(), instance.getPort());
}
}
}
} catch (NacosException e) {
log.error("获取服务实例失败", e);
}
}
@Override
public Class<? extends Event> subscribeType() {
return InstancesChangeEvent.class;
}
}
优化了一下上下线的判断逻辑
package com.icepip.devops.admin.listener;
import com.alibaba.cloud.nacos.NacosServiceManager;
import com.alibaba.fastjson.JSON;
import com.alibaba.nacos.api.exception.NacosException;
import com.alibaba.nacos.api.naming.NamingService;
import com.alibaba.nacos.api.naming.listener.EventListener;
import com.alibaba.nacos.api.naming.listener.NamingEvent;
import com.alibaba.nacos.api.naming.pojo.Instance;
import com.alibaba.nacos.api.naming.pojo.ServiceInfo;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
/**
* nacos 客户端 服务监听变化。当服务下线和上线的时候能够收到通知
* @author 冰点
* @Date 2023/11/27 12:00
*/
@Component
@Slf4j
public class NacosDiscoveryListener {
private final Set<String> subscribedServices = ConcurrentHashMap.newKeySet();
@Resource
private NacosServiceManager nacosServiceManager;
private static Map<String, Map<String, Boolean>> instanceHealthStatus = new ConcurrentHashMap<>();
@Autowired
private ApplicationNotice applicationNotice;
// 由devops或者运维平台在执行流水线的时候传进来,给通知使用
@Value("${project.name: xx项目}")
private String projectName;
@Value("${project.env: 开发环境}")
private String projectEnv;
/**
* 构造一个事件监听器,主要作用是监听服务实例变化
*
* @return EventListener
*/
private EventListener buildEventListener() {
return event -> {
if (event instanceof NamingEvent) {
NamingEvent namingEvent = (NamingEvent) event;
log.trace("服务实例变化:{}", JSON.toJSONString(namingEvent));
String serviceName = namingEvent.getServiceName();
if (!instanceHealthStatus.containsKey(serviceName)) {
ConcurrentHashMap<String, Boolean> instanceMap = new ConcurrentHashMap<>();
instanceHealthStatus.put(serviceName, instanceMap);
List<Instance> newInstance = namingEvent.getInstances();
newInstance.forEach(instance -> {
String instanceKey = instance.getIp() + ":" + instance.getPort();
instanceMap.put(instanceKey, instance.isHealthy());
log.trace("服务首次上线: {} -> {}", serviceName, instanceKey);
});
return;
}
List<ServiceInfo> allServiceInstances = getAllServiceInstances();
int instanceTotal = allServiceInstances.stream()
.mapToInt(serviceInfo -> Integer.parseInt(serviceInfo.getClusters()))
.sum();
Map<String, Boolean> serviceMap = instanceHealthStatus.computeIfAbsent(serviceName, k -> new ConcurrentHashMap<>());
Set<String> oldInstanceKeys = new HashSet<>(serviceMap.keySet());
List<Instance> newInstance = namingEvent.getInstances();
Set<String> newInstanceKeys = newInstance.stream()
.map(instance -> instance.getIp() + ":" + instance.getPort())
.collect(Collectors.toSet());
int oldSize = serviceMap.size();
int newSize = namingEvent.getInstances().size();
// 服务实例没有增减,只是状态变化
if (oldSize == newSize) {
newInstance.forEach((instance) -> {
String instanceKey = instance.getIp() + ":" + instance.getPort();
if (instance.isHealthy() != serviceMap.get(instanceKey)) {
if (instance.isHealthy()) {
log.trace("服务上线: {} -> {}", serviceName, instanceKey);
applicationNotice.sendNacosServiceMessage(serviceName+"服务上线通知",projectName, projectEnv, allServiceInstances.size(), instanceTotal, allServiceInstances);
} else {
log.trace("服务下线: {} -> {}", serviceName, instanceKey);
applicationNotice.sendNacosServiceMessage(serviceName+"服务下线通知",projectName, projectEnv, allServiceInstances.size(), instanceTotal, allServiceInstances);
}
serviceMap.put(instanceKey, instance.isHealthy());
}
});
}
// 下线实例
if (oldSize > newSize) {
newInstanceKeys.forEach(oldInstanceKeys::remove);
oldInstanceKeys.forEach(instanceKey -> log.info("服务下线: {} -> {}", serviceName, instanceKey));
applicationNotice.sendNacosServiceMessage(serviceName+"服务下线通知",projectName, projectEnv, allServiceInstances.size(), instanceTotal, allServiceInstances);
} else {
// 上线实例
newInstanceKeys.removeAll(oldInstanceKeys);
StringBuffer noticeTitle = new StringBuffer("服务上线通知");
newInstanceKeys.forEach(instanceKey -> {
String message = String.format("[%s-%s]", serviceName, instanceKey);
log.info(message);
noticeTitle.append(message).append(",");
});
applicationNotice.sendNacosServiceMessage(serviceName+"服务上线通知"+noticeTitle,projectName, projectEnv, allServiceInstances.size(), instanceTotal, allServiceInstances);
}
}
};
}
/**
* 定时获取服务列表,然后根据获取到的服务名,进行订阅,nacos客户端目前不能订阅所有服务,只能手动的订阅
* 也可以不用定时需要的时候通过getAllServiceInstances获取
*/
@Scheduled(fixedDelay = 10000)
public void reportServices() {
List<String> services = null;
try {
NamingService namingService = nacosServiceManager.getNamingService();
services = namingService.getServicesOfServer(1, Integer.MAX_VALUE).getData();
services.forEach(serviceName -> {
if (!subscribedServices.contains(serviceName)) {
try {
namingService.subscribe(serviceName, buildEventListener());
subscribedServices.add(serviceName);
} catch (NacosException e) {
log.error("订阅服务失败", e);
}
}
});
} catch (NacosException e) {
log.error("获取服务列表失败", e);
}
}
/**
* 获取所有服务实例
* @return 服务实例列表
*/
public List<ServiceInfo> getAllServiceInstances() {
List<ServiceInfo> serviceInfos = new ArrayList<>();
try {
NamingService namingService = nacosServiceManager.getNamingService();
List<String> services = namingService.getServicesOfServer(1, Integer.MAX_VALUE).getData();
for (String serviceName : services) {
List<Instance> onlineInstances = namingService.selectInstances(serviceName, true);
// 下线服务暂时不用关注
List<Instance> offlineInstances = namingService.selectInstances(serviceName, false);
serviceInfos.add(new ServiceInfo(serviceName, String.valueOf(onlineInstances.size())));
}
} catch (NacosException e) {
e.printStackTrace();
}
return serviceInfos;
}
}
结果飞书通知
package com.alibaba.nacos.example;
import com.alibaba.nacos.api.exception.NacosException;
import com.alibaba.nacos.api.naming.NamingFactory;
import com.alibaba.nacos.api.naming.NamingService;
import com.alibaba.nacos.api.naming.listener.AbstractEventListener;
import com.alibaba.nacos.api.naming.listener.Event;
import com.alibaba.nacos.api.naming.listener.NamingEvent;
import java.util.Properties;
import java.util.concurrent.Executor;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* Nacos naming example.
* Add the JVM parameter to run the NamingExample:
* {@code -DserverAddr=${nacos.server.ip}:${nacos.server.port} -Dnamespace=${namespaceId}}
*
* @author nkorange
*/
public class NamingExample {
private static final String INSTANCE_SERVICE_NAME = "nacos.test.3";
private static final String INSTANCE_IP = "11.11.11.11";
private static final int INSTANCE_PORT = 8888;
private static final String INSTANCE_CLUSTER_NAME = "TEST1";
public static void main(String[] args) throws NacosException, InterruptedException {
Properties properties = new Properties();
properties.setProperty("serverAddr", System.getProperty("serverAddr", "localhost"));
properties.setProperty("namespace", System.getProperty("namespace", "public"));
NamingService naming = NamingFactory.createNamingService(properties);
naming.registerInstance(INSTANCE_SERVICE_NAME, INSTANCE_IP, INSTANCE_PORT, INSTANCE_CLUSTER_NAME);
System.out.println("[instances after register] " + naming.getAllInstances(INSTANCE_SERVICE_NAME));
Executor executor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(),
runnable -> {
Thread thread = new Thread(runnable);
thread.setName("test-thread");
return thread;
});
naming.subscribe(INSTANCE_SERVICE_NAME, new AbstractEventListener() {
//EventListener onEvent is sync to handle, If process too low in onEvent, maybe block other onEvent callback.
//So you can override getExecutor() to async handle event.
@Override
public Executor getExecutor() {
return executor;
}
@Override
public void onEvent(Event event) {
System.out.println("[serviceName] " + ((NamingEvent) event).getServiceName());
System.out.println("[instances from event] " + ((NamingEvent) event).getInstances());
}
});
naming.deregisterInstance(INSTANCE_SERVICE_NAME, INSTANCE_IP, INSTANCE_PORT, INSTANCE_CLUSTER_NAME);
Thread.sleep(1000);
System.out.println("[instances after deregister] " + naming.getAllInstances(INSTANCE_SERVICE_NAME));
Thread.sleep(1000);
}
}
https://github.com/nacos-group/nacos-spring-boot-project
https://github.com/alibaba/nacos
https://blog.csdn.net/yang131peng/article/details/129931838