最近在搞admin监控时,遇到一个坑。背景是这样得,使用得事nacos做注册中心,开发了admin server服务以后,服务offline,down状态可以正常推送到钉钉,但是up状态一直监控不到,网上查了一些资料,相关说明较少,有一篇文章是说要复制NacosWatch.java,本地实现一下,因为nacos没有监听上线通知。
按说明操作了,但是还是不好使。相关代码如下:
/**
* @ClassName: NacosWatch
* @Author: shuyu.wang
* @Description:
* @Date: 2020/6/1 14:02
* @Version: 1.0
*/
@Slf4j
public class NacosWatch implements ApplicationEventPublisherAware, SmartLifecycle {
/**
* watch delay,duration to pull new service from nacos server.
*/
private long watchDelay = 30000;
private final NacosDiscoveryProperties properties;
private final TaskScheduler taskScheduler;
private final AtomicLong nacosWatchIndex;
private final AtomicBoolean running;
private ApplicationEventPublisher publisher;
private ScheduledFuture<?> watchFuture;
private Set<String> cacheServices;
private HashMap<String, EventListener> subscribeListeners;
public NacosWatch(NacosDiscoveryProperties properties) {
this(properties, getTaskScheduler());
}
public NacosWatch(NacosDiscoveryProperties properties, TaskScheduler taskScheduler) {
this.nacosWatchIndex = new AtomicLong(0L);
this.running = new AtomicBoolean(false);
this.cacheServices = new HashSet();
this.subscribeListeners = new HashMap();
this.properties = properties;
this.taskScheduler = taskScheduler;
}
private static ThreadPoolTaskScheduler getTaskScheduler() {
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
taskScheduler.initialize();
return taskScheduler;
}
@Override
public void setApplicationEventPublisher(ApplicationEventPublisher publisher) {
this.publisher = publisher;
}
@Override
public boolean isAutoStartup() {
return true;
}
@Override
public void stop(Runnable callback) {
this.stop();
callback.run();
}
@Override
public void start() {
if (this.running.compareAndSet(false, true)) {
this.watchFuture =
this.taskScheduler.scheduleWithFixedDelay(this::nacosServicesWatch, watchDelay);
}
}
@Override
public void stop() {
if (this.running.compareAndSet(true, false) && this.watchFuture != null) {
this.watchFuture.cancel(true);
}
}
@Override
public boolean isRunning() {
return false;
}
@Override
public int getPhase() {
return 0;
}
public void nacosServicesWatch() {
try {
boolean changed = false;
NamingService namingService = this.properties.namingServiceInstance();
ListView<String>
listView = this.properties.namingServiceInstance().getServicesOfServer(1, 2147483647);
List<String> serviceList = listView.getData();
Set<String> currentServices = new HashSet(serviceList);
currentServices.removeAll(this.cacheServices);
if (currentServices.size() > 0) {
changed = true;
}
Iterator var6;
String serviceName;
if (this.cacheServices.removeAll(new HashSet(serviceList)) && this.cacheServices.size() > 0) {
changed = true;
var6 = this.cacheServices.iterator();
while (var6.hasNext()) {
serviceName = (String) var6.next();
namingService
.unsubscribe(serviceName, (EventListener) this.subscribeListeners.get(serviceName));
this.subscribeListeners.remove(serviceName);
}
}
this.cacheServices = new HashSet(serviceList);
var6 = this.cacheServices.iterator();
while (var6.hasNext()) {
serviceName = (String) var6.next();
if (!this.subscribeListeners.containsKey(serviceName)) {
EventListener eventListener = (event) -> {
this.publisher
.publishEvent(new HeartbeatEvent(this, this.nacosWatchIndex.getAndIncrement()));
};
this.subscribeListeners.put(serviceName, eventListener);
namingService.subscribe(serviceName, eventListener);
}
}
if (changed) {
this.publisher
.publishEvent(new HeartbeatEvent(this, this.nacosWatchIndex.getAndIncrement()));
}
} catch (Exception var9) {
log.error("Error watching Nacos Service change", var9);
}
}
}
@Configuration
public class NacosWatchAutoConfiguration {
@Bean
@ConditionalOnMissingBean
@ConditionalOnProperty(value = "spring.cloud.nacos.discovery.watch.enabled", matchIfMissing = true)
public NacosWatch nacosWatchDeepBlue(NacosDiscoveryProperties nacosDiscoveryProperties) {
return new NacosWatch(nacosDiscoveryProperties);
}
}
添加上这两个类以后,一顿debug,但是还是一直没有上线状态得提醒,最后发现自定义通知类继承得AbstractStatusChangeNotifier.java需要重写两个方法。
正常情况下我们一般只会重写 doNotify(InstanceEvent event, Instance instance)这个方法,但实际上还要重写shouldNotify(InstanceEvent event, Instance instance)方法父类中只对UNKNOWN:UP得状态变化做了推送。
最终自定义通知类如下:
DingTalkNotifier.java
@Slf4j
public class DingTalkNotifier extends AbstractStatusChangeNotifier {
@Autowired
private AlarmDingTalkRobotClient alarmDingTalkRobotClient;
@Autowired
private NacosConfigService nacosConfigService;
/**
* 消息模板
*/
private static final String template = "<<<%s>>> \n 【服务名】: %s(%s) \n 【状态】: %s(%s) \n 【服务ip】: %s \n 【详情】: %s";
private String titleAlarm = "系统告警";
private String titleNotice = "系统通知";
private String[] ignoreChanges = new String[]{"UNKNOWN:UP","DOWN:UP","OFFLINE:UP"};
public DingTalkNotifier(InstanceRepository repository) {
super(repository);
}
@Override
protected boolean shouldNotify(InstanceEvent event, Instance instance) {
if (!(event instanceof InstanceStatusChangedEvent)) {
return false;
} else {
InstanceStatusChangedEvent statusChange = (InstanceStatusChangedEvent)event;
String from = this.getLastStatus(event.getInstance());
String to = statusChange.getStatusInfo().getStatus();
return Arrays.binarySearch(this.ignoreChanges, from + ":" + to) < 0 && Arrays.binarySearch(this.ignoreChanges, "*:" + to) < 0 && Arrays.binarySearch(this.ignoreChanges, from + ":*") < 0;
}
}
@Override
protected Mono<Void> doNotify(InstanceEvent event, Instance instance) {
return Mono.fromRunnable(() -> {
if (!nacosConfigService.getIsopen()){
return;
}
String watchapplications = nacosConfigService.getWatchapplications();
Boolean flag=watchapplications.contains(instance.getRegistration().getName());
if (!flag){
return;
}
if (event instanceof InstanceStatusChangedEvent) {
log.info("Instance {} ({}) is {}", instance.getRegistration().getName(),
event.getInstance(),
((InstanceStatusChangedEvent) event).getStatusInfo().getStatus());
String status = ((InstanceStatusChangedEvent) event).getStatusInfo().getStatus();
String messageText = null;
switch (status) {
// 健康检查没通过
case "DOWN":
log.info("发送 健康检查没通过 的通知!");
messageText = String
.format(template,titleAlarm, instance.getRegistration().getName(), event.getInstance(),
((InstanceStatusChangedEvent) event).getStatusInfo().getStatus(), "健康检查没通过",
instance.getRegistration().getServiceUrl(), JSONObject.toJSONString(instance.getStatusInfo().getDetails()));
alarmDingTalkRobotClient.sendMarkdownMessage(titleAlarm, messageText, true);
break;
// 服务离线
case "OFFLINE":
log.info("发送 服务离线 的通知!");
messageText = String
.format(template,titleAlarm, instance.getRegistration().getName(), event.getInstance(),
((InstanceStatusChangedEvent) event).getStatusInfo().getStatus(), "服务离线",
instance.getRegistration().getServiceUrl(), JSONObject.toJSONString(instance.getStatusInfo().getDetails()));
alarmDingTalkRobotClient.sendMarkdownMessage(titleAlarm, messageText, true);
break;
//服务上线
case "UP":
log.info("发送 服务上线 的通知!");
messageText = String
.format(template,titleNotice, instance.getRegistration().getName(), event.getInstance(),
((InstanceStatusChangedEvent) event).getStatusInfo().getStatus(), "服务上线",
instance.getRegistration().getServiceUrl(), JSONObject.toJSONString(instance.getStatusInfo().getDetails()));
alarmDingTalkRobotClient.sendMarkdownMessage(titleNotice, messageText, true);
break;
// 服务未知异常
case "UNKNOWN":
log.info("发送 服务未知异常 的通知!");
messageText = String
.format(template,titleAlarm, instance.getRegistration().getName(), event.getInstance(),
((InstanceStatusChangedEvent) event).getStatusInfo().getStatus(), "服务未知异常",
instance.getRegistration().getServiceUrl(), JSONObject.toJSONString(instance.getStatusInfo().getDetails()));
alarmDingTalkRobotClient.sendMarkdownMessage(titleAlarm, messageText, true);
break;
default:
break;
}
} else {
log.info("Instance {} ({}) {}", instance.getRegistration().getName(), event.getInstance(),
event.getType());
}
});
}
}
其中AlarmDingTalkRobotClient类是钉钉报警自己封装得类,这里不做详细描述,大家可以根据自己业务需求,自行封装。