紧接着上篇,发布了服务变更事件,来到处理的流程。
Notifier#handle:
private void handle(Pair pair) {
try {
//服务对应的key
String datumKey = pair.getValue0();
//任务类型
DataOperation action = pair.getValue1();
//从services map中删除,表示已处理
services.remove(datumKey);
int count = 0;
if (!listeners.containsKey(datumKey)) {
return;
}
//遍历该service key的每个服务,一般来说只有一个,Service实现了RecordListener接口
for (RecordListener listener : listeners.get(datumKey)) {
count++;
try {
if (action == DataOperation.CHANGE) {
//处理change事件
listener.onChange(datumKey, dataStore.get(datumKey).value);
continue;
}
if (action == DataOperation.DELETE) {
listener.onDelete(datumKey);
continue;
}
} catch (Throwable e) {
Loggers.DISTRO.error("[NACOS-DISTRO] error while notifying listener of key: {}", datumKey, e);
}
}
if (Loggers.DISTRO.isDebugEnabled()) {
Loggers.DISTRO
.debug("[NACOS-DISTRO] datum change notified, key: {}, listener count: {}, action: {}",
datumKey, count, action.name());
}
} catch (Throwable e) {
Loggers.DISTRO.error("[NACOS-DISTRO] Error while handling notifying task", e);
}
}
service的onChange方法:
public void onChange(String key, Instances value) throws Exception {
Loggers.SRV_LOG.info("[NACOS-RAFT] datum is changed, key: {}, value: {}", key, value);
//遍历实例,设置一下权重,权重越高,被负载的概率越高
for (Instance instance : value.getInstanceList()) {
if (instance == null) {
// Reject this abnormal instance list:
throw new RuntimeException("got null instance " + key);
}
if (instance.getWeight() > 10000.0D) {
instance.setWeight(10000.0D);
}
if (instance.getWeight() < 0.01D && instance.getWeight() > 0.0D) {
instance.setWeight(0.01D);
}
}
//更新注册表中service服务实例信息
updateIPs(value.getInstanceList(), KeyBuilder.matchEphemeralInstanceListKey(key));
recalculateChecksum();
}
public void updateIPs(Collection instances, boolean ephemeral) {
//新创建一个clusterMap
Map> ipMap = new HashMap<>(clusterMap.size());
for (String clusterName : clusterMap.keySet()) {
ipMap.put(clusterName, new ArrayList<>());
}
for (Instance instance : instances) {
try {
if (instance == null) {
Loggers.SRV_LOG.error("[NACOS-DOM] received malformed ip: null");
continue;
}
if (StringUtils.isEmpty(instance.getClusterName())) {
//没有集群信息就各个默认的集群名DEFAULT
instance.setClusterName(UtilsAndCommons.DEFAULT_CLUSTER_NAME);
}
if (!clusterMap.containsKey(instance.getClusterName())) {
//原来的clusterMap不存在该实例的集群就创建一个
Loggers.SRV_LOG
.warn("cluster: {} not found, ip: {}, will create new cluster with default configuration.",
instance.getClusterName(), instance.toJson());
Cluster cluster = new Cluster(instance.getClusterName(), this);
cluster.init();
getClusterMap().put(instance.getClusterName(), cluster);
}
//获取集群下的所有实例的集合
List clusterIPs = ipMap.get(instance.getClusterName());
if (clusterIPs == null) {
clusterIPs = new LinkedList<>();
ipMap.put(instance.getClusterName(), clusterIPs);
}
//将要注册的实例加入clusterIPs
clusterIPs.add(instance);
} catch (Exception e) {
Loggers.SRV_LOG.error("[NACOS-DOM] failed to process ip: " + instance, e);
}
}
for (Map.Entry> entry : ipMap.entrySet()) {
//make every ip mine
List entryIPs = entry.getValue();
//更新该集群下的实例列表
clusterMap.get(entry.getKey()).updateIps(entryIPs, ephemeral);
}
setLastModifiedMillis(System.currentTimeMillis());
//发布服务列表change事件,这里会向客户端以udp的方式推送服务变更信息
getPushService().serviceChanged(this);
StringBuilder stringBuilder = new StringBuilder();
for (Instance instance : allIPs()) {
stringBuilder.append(instance.toIpAddr()).append("_").append(instance.isHealthy()).append(",");
}
Loggers.EVT_LOG.info("[IP-UPDATED] namespace: {}, service: {}, ips: {}", getNamespaceId(), getName(),
stringBuilder.toString());
}
关键方法是clusterMap.get(entry.getKey()).updateIps(entryIPs, ephemeral),nacos注册表最小更新单位是service对象中的cluster:
public void updateIps(List ips, boolean ephemeral) {
//原来的实例集合
Set toUpdateInstances = ephemeral ? ephemeralInstances : persistentInstances;
HashMap oldIpMap = new HashMap<>(toUpdateInstances.size());
for (Instance ip : toUpdateInstances) {
oldIpMap.put(ip.getDatumKey(), ip);
}
//获取更新过后的旧的实例列表,注意这里只包含旧的被更新了的实例,只是为了下面打印日志
//和更新注册表没啥关系,本质是获取那些旧的里面ip:port和新实例列表重复的,但是权重,健康,是否标记等信息改变了的旧的实例
List updatedIPs = updatedIps(ips, oldIpMap.values());
if (updatedIPs.size() > 0) {
for (Instance ip : updatedIPs) {
Instance oldIP = oldIpMap.get(ip.getDatumKey());
// do not update the ip validation status of updated ips
// because the checker has the most precise result
// Only when ip is not marked, don't we update the health status of IP:
if (!ip.isMarked()) {
ip.setHealthy(oldIP.isHealthy());
}
if (ip.isHealthy() != oldIP.isHealthy()) {
// ip validation status updated
Loggers.EVT_LOG.info("{} {SYNC} IP-{} {}:{}@{}", getService().getName(),
(ip.isHealthy() ? "ENABLED" : "DISABLED"), ip.getIp(), ip.getPort(), getName());
}
if (ip.getWeight() != oldIP.getWeight()) {
// ip validation status updated
Loggers.EVT_LOG.info("{} {SYNC} {IP-UPDATED} {}->{}", getService().getName(), oldIP.toString(),
ip.toString());
}
}
}
//获取新增的那些实例(原来不存在,根据ip:port)
List newIPs = subtract(ips, oldIpMap.values());
if (newIPs.size() > 0) {
Loggers.EVT_LOG
.info("{} {SYNC} {IP-NEW} cluster: {}, new ips size: {}, content: {}", getService().getName(),
getName(), newIPs.size(), newIPs.toString());
//重置健康检查状态,应该不算重置吧。。。
for (Instance ip : newIPs) {
HealthCheckStatus.reset(ip);
}
}
//获取死亡的那些实例,新的实例列表中不存在了
List deadIPs = subtract(oldIpMap.values(), ips);
if (deadIPs.size() > 0) {
Loggers.EVT_LOG
.info("{} {SYNC} {IP-DEAD} cluster: {}, dead ips size: {}, content: {}", getService().getName(),
getName(), deadIPs.size(), deadIPs.toString());
//健康检查状态map中删除
for (Instance ip : deadIPs) {
HealthCheckStatus.remv(ip);
}
}
toUpdateInstances = new HashSet<>(ips);
//更新Cluster中的实例列表,这里才是真正更新注册表
if (ephemeral) {
ephemeralInstances = toUpdateInstances;
} else {
persistentInstances = toUpdateInstances;
}
}
里面的updatedIps,其实这个方法和更新注册表没啥关系,只是为了打印一下日志,其实有点复杂:
private List updatedIps(Collection newInstance, Collection oldInstance) {
//获取新的实例集合与旧的的交集
List intersects = (List) CollectionUtils.intersection(newInstance, oldInstance);
Map stringIpAddressMap = new ConcurrentHashMap<>(intersects.size());
for (Instance instance : intersects) {
stringIpAddressMap.put(instance.getIp() + ":" + instance.getPort(), instance);
}
Map intersectMap = new ConcurrentHashMap<>(newInstance.size() + oldInstance.size());
Map updatedInstancesMap = new ConcurrentHashMap<>(newInstance.size());
Map newInstancesMap = new ConcurrentHashMap<>(newInstance.size());
for (Instance instance : oldInstance) {
if (stringIpAddressMap.containsKey(instance.getIp() + ":" + instance.getPort())) {
//如果原来的实例是在这个交集中,注意是ip:port相同,则加入intersectMap,value为1
intersectMap.put(instance.toString(), 1);
}
}
for (Instance instance : newInstance) {
if (stringIpAddressMap.containsKey(instance.getIp() + ":" + instance.getPort())) {
if (intersectMap.containsKey(instance.toString())) {
//如果新的实例在这个交集中,则加入intersectMap,value为2
intersectMap.put(instance.toString(), 2);
} else {
//如果不在交集中,则value为1
intersectMap.put(instance.toString(), 1);
}
}
//新的实例都加到这个map
newInstancesMap.put(instance.toString(), instance);
}
for (Map.Entry entry : intersectMap.entrySet()) {
String key = entry.getKey();
Integer value = entry.getValue();
if (value == 1) {
if (newInstancesMap.containsKey(key)) {
//因为value为1,所以是旧的和新的都存在的实例,他们ip:port相同,但不完全相同,其它信息不一样
// 用新的替换,这里其实还是有点复杂,不是简单的新旧的概念
updatedInstancesMap.put(key, newInstancesMap.get(key));
}
}
}
return new ArrayList<>(updatedInstancesMap.values());
}
其实最后更新注册表是比较简单粗暴的:
if (ephemeral) {
ephemeralInstances = toUpdateInstances;
} else {
persistentInstances = toUpdateInstances;
}
用的直接赋值,这里用到了写时复制的思想,更新的时候先复制一份原来的,更新完后直接替换。更新完后,会发布一个服务变更事件,服务端会向客户端以udp的方式推送变更,此外会基于多级异步队列向集群其它节点同步数据,由于是ap模式,如果同步失败,只是重试,属于最大努力通知。
Nacos AP模式的源码到此结束,1.4.x版本其实在CP架构上实现了一个简易版本的raft协议,而在2.x版本,笔者还没详细阅读,换成了JRaft,更完善的CP架构,此外2.x版本的注册表结构好像也变了,具体有时间再去研究吧。