邻居项主要的状态转移如下(省略邻居项垃圾回收及转移原因,更权威详细的状态转移图参看《深入理解LINUX网络技术内幕》P648 "图26-13: NUD状态间的转换"):
邻居子系统调用neigh_alloc创建邻居项的时候,邻居项初始化为NONE状态;网络层发送报文时,需要查找目的IP地址对应的邻居项,ip_finish_output2调用__ipv4_neigh_lookup_noref查找邻居项,如果没有找到,就调用__neigh_create、neigh_alloc创建新的邻居项,如下是ping发送报文的调用栈:
neigh_alloc实现代码如下:
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
int entries;
entries = atomic_inc_return(&tbl->entries) - 1; // 邻居项数目
if (entries >= tbl->gc_thresh3 ||
(entries >= tbl->gc_thresh2 &&
time_after(now, tbl->last_flush + 5 * HZ))) { // 垃圾回收相关操作
if (!neigh_forced_gc(tbl) &&
entries >= tbl->gc_thresh3) {
net_info_ratelimited("%s: neighbor table overflow!\n",
tbl->id);
NEIGH_CACHE_STAT_INC(tbl, table_fulls);
goto out_entries;
}
}
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
if (!n)
goto out_entries;
__skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
seqlock_init(&n->ha_lock);
n->updated = n->used = now; // 邻居项更新、使用时间(最一次更新状态、使用的时间,状态转移的时候会用到这几个时间)
n->nud_state = NUD_NONE; // 邻居项初始状态设置为NONE
n->output = neigh_blackhole; // 邻居项的输出函数指针(NONE状态不能发送报文,neigh_blackhole直接释放发送的报文并返回,也就是丢弃网络层的数据,所以网络层是不可靠的传输协议,不保证传输数据到目的地址)
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); // 设置邻居项定时器的回掉函数/回调函数参数,定时器超时后调用neigh_timer_handler,neigh_timer_handler的函数参数为邻居项n
NEIGH_CACHE_STAT_INC(tbl, allocs);
n->tbl = tbl;
atomic_set(&n->refcnt, 1);
n->dead = 1;
out:
return n;
out_entries:
atomic_dec(&tbl->entries);
goto out;
}
neigh_alloc主要是创建一个邻居项,设置邻居项状态为NONE,初始化更新/使用时间,初始化邻居项定时器的回调函数及回调函数的参数;neigh_alloc只初始化了部分通用的数据,更多的设置在__neigh_create函数里面,__neigh_create设置邻居项的输出网卡设备,调用arp_constructor初始化邻居项ARP相关数据(设置邻居项输出函数指针为neigh_resolve_output,不同的链路使用不同的协议,输出函数指针也不同,并初始化其他参数),__neigh_create实现代码如下:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
}
memcpy(n->primary_key, pkey, key_len);
n->dev = dev; // 设置邻居项的输出网卡设备(网络层通过目的IP地址,查找目的IP地址的路由,路由里面就包括目的地址的下一跳以及输出网卡)
dev_hold(dev);
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) { // 调用arp_constructor初始化邻居项的其他字段(设置邻居项的ops为arp_hh_ops,output为arp_hh_ops的connected_output,也就是neigh_resolve_output)
rc = ERR_PTR(error);
goto out_neigh_release;
}
if (dev->netdev_ops->ndo_neigh_construct) {
error = dev->netdev_ops->ndo_neigh_construct(n);
if (error < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
}
/* Device specific setup. */
if (n->parms->neigh_setup &&
(error = n->parms->neigh_setup(n)) < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); // 更新确认时间(当前时间减去BASE_REACHABLE_TIME,定时器超时回调函数neigh_timer_handler需要用到确认时间)
write_lock_bh(&tbl->lock);
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock));
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
goto out_tbl_unlock;
}
}
n->dead = 0;
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
out:
return rc;
out_tbl_unlock:
write_unlock_bh(&tbl->lock);
out_neigh_release:
neigh_release(n);
goto out;
}
ip_finish_output2查找到或者创建邻居项之后,调用dst_neigh_output,发送数据到链路层,dst_neigh_output在邻居项非连接状态,调用n->output发送报文,n->output在创建邻居项时设置为neigh_resolve_output,dst_neigh_output实现代码如下:
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
const struct hh_cache *hh;
if (dst->pending_confirm) {
unsigned long now = jiffies;
dst->pending_confirm = 0;
/* avoid dirtying neighbour */
if (n->confirmed != now)
n->confirmed = now;
}
hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);
else // INCOMPLETE等其他非连接状态,调用n->output,对于ARP,前面创建邻居的项的时候,n->output设置为neigh_resolve_output
return n->output(n, skb);
}
neigh_resolve_output最终调用neigh_event_send,neigh_event_send更新邻居项的使用时间,非连接、延迟、探测状态,调用__neigh_event_send发送报文,连接、延迟、探测等状态下,没发送ARP请求的必要,已经连接的状态,邻居的状态是可达的,已经获取到了目的IP地址的MAC地址或者链路层不需要MAC地址,延迟状态情况下,定时器超时会进入探测状态,探测状态会发送ARP请求去获取目的IP地址的MAC地址,延迟状态需要等待超时再发送ARP请求,探测阶段需要等待ARP应答,因此延迟、探测状态都不需要立即发送ARP请求,避免发送过多的ARP请求到网络上,neigh_event_send实现代码如下:
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies;
if (neigh->used != now)
neigh->used = now;
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb);
return 0;
}
neigh_event_send检查邻居项当前状态,检查是否需要发送探测报文,根据当前的状态设置相关的定时器,NONE状态转移到INCOMPLETE状态并启动超时定时器,有数据要发送的话,缓存当前要发送的数据到ARP缓存队列(邻居可达的话就可以立即发送数据,不需要等待TCP等超时重传,缓存不够的话,替换最早缓存的数据),调用neigh_probe发送相关的探测报文,neigh_event_send实现代码如下:
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
int rc;
bool immediate_probe = false;
write_lock_bh(&neigh->lock);
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) // CONNECTED、DELAY、PROBE状态不需要发送ARP请求,跳到out_unlock_bh解锁即可
goto out_unlock_bh;
if (neigh->dead)
goto out_dead;
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { // 邻居项不是STALE、INCOMPLETE状态(排除前面的各种跳过的状态,这里就剩下NONE、FAILED状态)
if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
NEIGH_VAR(neigh->parms, APP_PROBES)) { // 探测次数相关参数,参考《深入理解LINUX网络技术内幕》P771
unsigned long next, now = jiffies;
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES)); // 设置探测次数probes(INCOMPLETE没有收到ARP应答会重传ARP请求,超过一定次数及结束,认为目的不可达)
neigh->nud_state = NUD_INCOMPLETE; // 邻居项状态转换为INCOMPLETE
neigh->updated = now; // 更新时间设置为当前时间(邻居项的更新时间)
next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
HZ/2); // 计算下一次重传时间,INCOMPLETE状态超过RETRANS_TIME时间没有得到应答,会再次发送ARP请求
neigh_add_timer(neigh, next); // 增加定时器(编辑邻居项定时器超时时间)
immediate_probe = true; // 设置immediate_probe为true,需要立即发送ARP探测请求报文
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
}
if (neigh->nud_state == NUD_INCOMPLETE) { // INCOMPLETE状态(NONE、FAILED状态会转换到INCOMPLETE状态,INCOMPLETE超时会停留在INCOMPLETE状态)
if (skb) { // 有数据要发送(NONE状态发送数据,会进入INCOMPLETE状态,如果是TCP报文发送的网络层再到链路层,如果邻居不可达情况下直接丢弃报文的话,那么只有等到TCP超时重传才会再次发送报文,但是ARP请求可能很快就会得到应答,因此如果有足够缓存的话,链路层先缓存不能发送的数据)
while (neigh->arp_queue_len_bytes + skb->truesize >
NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { // 邻居项已经缓存的数据+本次需要发送的数据 > 邻居项允许的缓存数,需要释放之前缓存的数据
struct sk_buff *buff;
buff = __skb_dequeue(&neigh->arp_queue); // 邻居项缓存的数据出队列,先释放最早入队列的数据
if (!buff) // ARP缓存队列已经没有数据了,跳出循环
break;
neigh->arp_queue_len_bytes -= buff->truesize; // ARP缓存的数据长度减去出队列数据的长度
kfree_skb(buff); // 释放最早入队列的数据
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb); // 本次发送的数据添加到ARP发送缓存队列末尾
neigh->arp_queue_len_bytes += skb->truesize; // 更新ARP发送缓存数据大小
}
rc = 1;
}
out_unlock_bh:
if (immediate_probe) // NONE需要立即发送ARP请求(其他已经有发送或者等待发送的状态,不需要立即发送,等待超时再发送)
neigh_probe(neigh); // 调用neigh_probe发送ARP请求(邻居探测)
else
write_unlock(&neigh->lock);
local_bh_enable();
return rc;
out_dead:
if (neigh->nud_state & NUD_STALE)
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
kfree_skb(skb);
return 1;
}
对于ARP协议,neigh_probe函数调用arp_solicit构建ARP请求报文并发送请求到邻居项输出网卡设备,arp_solicit计算源IP地址、增加发送次数,调用arp_send_dst构建ARP报文发送到网卡设备,实现代码如下:
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
__be32 saddr = 0;
u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
struct net_device *dev = neigh->dev; // 邻居项输出网卡设备
__be32 target = *(__be32 *)neigh->primary_key; // 目的IP地址
int probes = atomic_read(&neigh->probes); // 邻居项发送探测的次数加1
struct in_device *in_dev;
struct dst_entry *dst = NULL;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
rcu_read_unlock();
return;
}
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
if (skb && inet_addr_type_dev_table(dev_net(dev), dev,
ip_hdr(skb)->saddr) == RTN_LOCAL) // 报文源地址路由作用域等相关检查
saddr = ip_hdr(skb)->saddr; // 使用报文源地址作为ARP报文的源IP地址
break;
case 1: /* Restrict announcements of saddr in same subnet */
if (!skb)
break;
saddr = ip_hdr(skb)->saddr;
if (inet_addr_type_dev_table(dev_net(dev), dev,
saddr) == RTN_LOCAL) {
/* saddr should be known to target */
if (inet_addr_onlink(in_dev, target, saddr))
break;
}
saddr = 0;
break;
case 2: /* Avoid secondary IPs, get a primary/preferred one */
break;
}
rcu_read_unlock();
if (!saddr)
saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
if (!(neigh->nud_state & NUD_VALID))
pr_debug("trying to ucast probe in NUD_INVALID\n");
neigh_ha_snapshot(dst_ha, neigh, dev);
dst_hw = dst_ha;
} else {
probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
if (probes < 0) {
neigh_app_ns(neigh);
return;
}
}
if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
dst = skb_dst(skb);
arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
dst_hw, dev->dev_addr, NULL, dst); // 发送ARP请求,ARP报文类型: ARPOP_REQUEST,协议类型: ETH_P_ARP,目的IP地址: target,源IP地址: saddr,输出网卡设备: dev
}
内核定时器调用栈如下:
硬件tick定时器超时,产生IRQ中断,定时器相关函数检查定时器的wheel,如果有定时器超时,那么触发一个软中断,中断返回时,检查到有触发软中断,那么内核就去处理相关的软中断,run_timer_softirq处理所有超时的定时器,调用对应的定时器回调函数,定时器回调调用代码如下:
1177 trace_timer_expire_entry(timer);
1178 fn(data);
1179 trace_timer_expire_exit(timer);
对于邻居项超时定时器,上面的fn就是neigh_timer_handler,data就是邻居项指针,前面邻居创建的时候已经设置好了这几个参数。
neigh_timer_handler检查邻居项当前的状态,对于INCOMPLETE状态的邻居项,计算下一次超时重传的时间,检查重传次数是否超过了最大重传次数,如果超过了最大重传次数就进入FAILED状态并清理发送缓存等,如果没有超过最大重传次数,那么再次设置超时定时器,调用邻居探测函数接口发送探测报文并增加发送次数,neigh_timer_handler实现代码如下:
static void neigh_timer_handler(unsigned long arg)
{
unsigned long now, next;
struct neighbour *neigh = (struct neighbour *)arg;
unsigned int state;
int notify = 0;
write_lock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
next = now + HZ;
if (!(state & NUD_IN_TIMER)) // 非定时器状态(某些事件使邻居项改变了状态,但是不一定会删除定时器,并且这些状态没有超时时间,不需要定时器,因此这些不需要定时器的状态不需要处理定时器,超时回调函数直接返回即可)
goto out;
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {
neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
}
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); // 计算超时重传的时间;PROBE、INCOMPLETE都需要超时重传,从这里看,ARP的超时重传时间间隔是固定的
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { // INCOMPLETE、PROBE状态下超时,如果重传次数超过最大重传次数,那么进入FAILED状态
neigh->nud_state = NUD_FAILED; // 超出最大重传次数,转换为FAILED状态
notify = 1;
neigh_invalidate(neigh); // AILED相关处理(释放ARP缓存等)
goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) { // 编辑定时器,修改定时器超时时间
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { // INCOMPLETE、PROBE状态发送超时,调用neigh_probe再次发送ARP探测报文,发送次数在neigh_probe函数里面增加
neigh_probe(neigh);
} else { // 其他状态不需要发送ARP探测报文
out:
write_unlock(&neigh->lock);
}
if (notify)
neigh_update_notify(neigh);
neigh_release(neigh);
}
INCOMPLETE只有在收到邻居的ARP报文的时候,才会转换为REACHABLE状态,在arp_process函数里面转换;arp_process检查ARP报文输入网卡的IP、协议类型、硬件类型等是否有效,无效就释放报文,更新相应的邻居项,ARP应答报文,更新邻居项的状态为REACHABLE,更具体的解释可以参考机械工业出版社《Linux内核源码剖析:TCP/IP实现(上册)》"18.1.2 arp_process",虽然版本比较旧,但是实现基本一样。arp_process实现代码如下:
static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev; // 获取接收报文的网卡设备
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct arphdr *arp;
unsigned char *arp_ptr;
struct rtable *rt;
unsigned char *sha;
__be32 sip, tip;
u16 dev_type = dev->type;
int addr_type;
struct neighbour *n;
struct dst_entry *reply_dst = NULL;
bool is_garp = false;
/* arp_rcv below verifies the ARP header and verifies the device
* is ARP'able.
*/
if (!in_dev)
goto out;
arp = arp_hdr(skb); // 获取ARP首部
switch (dev_type) {
default:
if (arp->ar_pro != htons(ETH_P_IP) ||
htons(dev_type) != arp->ar_hrd)
goto out;
break;
case ARPHRD_ETHER:
case ARPHRD_FDDI:
case ARPHRD_IEEE802:
/*
* ETHERNET, and Fibre Channel (which are IEEE 802
* devices, according to RFC 2625) devices will accept ARP
* hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
* This is the case also of FDDI, where the RFC 1390 says that
* FDDI devices should accept ARP hardware of (1) Ethernet,
* however, to be more robust, we'll accept both 1 (Ethernet)
* or 6 (IEEE 802.2)
*/
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
arp->ar_pro != htons(ETH_P_IP)) // 检查ARP首部硬件类型、协议类型,非以太网地址、非IP协议地址,非arp_process能处理的协议,直接跳转到out,不处理该分组
goto out;
break;
case ARPHRD_AX25:
if (arp->ar_pro != htons(AX25_P_IP) ||
arp->ar_hrd != htons(ARPHRD_AX25))
goto out;
break;
case ARPHRD_NETROM:
if (arp->ar_pro != htons(AX25_P_IP) ||
arp->ar_hrd != htons(ARPHRD_NETROM))
goto out;
break;
}
/* Understand only these message types */
if (arp->ar_op != htons(ARPOP_REPLY) && // ARP不是ARPOP_REPLY
arp->ar_op != htons(ARPOP_REQUEST)) // ARP也不是ARPOP_REQUEST,直接跳转到out,不处理该分组,arp_process只处理ARPOP_REQUEST/ARPOP_REPLY
goto out;
/*
* Extract fields
*/
arp_ptr = (unsigned char *)(arp + 1); // arp是arphdr类型的指针,arp + 1也就是arp的地址+sizeof(arphdr),arp_ptr就指向ARP首部的下一个内存地址,也就是以太网发送端地址字段的内存地址
sha = arp_ptr; // 以太网发送端地址
arp_ptr += dev->addr_len; // 以太网发送端地址 + 硬件地址长度 = 发送端IP地址
memcpy(&sip, arp_ptr, 4); // 拷贝发送端IP地址到sip
arp_ptr += 4; // 移动到分组的目的以太网地址(也就是本机的以太网地址,对于接收方,这个以太网地址不需要考虑)
switch (dev_type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
case ARPHRD_IEEE1394:
break;
#endif
default:
arp_ptr += dev->addr_len; // 接收方跳过目的以太网地址(跳过本机的以太网地址字段),移动到分组的目的IP地址
}
memcpy(&tip, arp_ptr, 4); // 拷贝目的IP地址到tip
/*
* Check for bad requests for 127.x.x.x and requests for multicast
* addresses. If this is one such, delete it.
*/
if (ipv4_is_multicast(tip) || // 丢弃目的IP地址为多播地址的报文
(!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) // 丢弃输入网卡设备配置为非ROUTE_LOCALNET并且目的IP地址为环回地址的报文
goto out;
/*
* Special case: We must set Frame Relay source Q.922 address
*/
if (dev_type == ARPHRD_DLCI)
sha = dev->broadcast;
/*
* Process entry. The idea here is we want to send a reply if it is a
* request for us or if it is a request for someone else that we hold
* a proxy for. We want to add an entry to our cache if it is a reply
* to us or if it is a request for our address.
* (The assumption for this last is that if someone is requesting our
* address, they are probably intending to talk to us, so it saves time
* if we cache their address. Their address is also probably not in
* our cache, since ours is not in their cache.)
*
* Putting this another way, we only care about replies if they are to
* us, in which case we add them to the cache. For requests, we care
* about those for us and those for our proxies. We reply to both,
* and in the case of requests for us we add the requester to the arp
* cache.
*/
if (arp->ar_op == htons(ARPOP_REQUEST) && skb_metadata_dst(skb))
reply_dst = (struct dst_entry *)
iptunnel_metadata_reply(skb_metadata_dst(skb),
GFP_ATOMIC);
/* Special case: IPv4 duplicate address detection packet (RFC2131) */
if (sip == 0) { // 检测IPv4地址冲突(RFC2131 Dynamic Host Configuration Protocol)
if (arp->ar_op == htons(ARPOP_REQUEST) &&
inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL &&
!arp_ignore(in_dev, sip, tip)) // 目的IP地址是本机的IP地址,检测到地址冲突
arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
sha, dev->dev_addr, sha, reply_dst); // 发送ARP应答报文
goto out; // 跳转到out
}
if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { // 查找输入路由(目的IP地址应该是本机IP的地址)
rt = skb_rtable(skb);
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) { // 输入到本地?
int dont_send;
dont_send = arp_ignore(in_dev, sip, tip); // 是否丢弃ARP报文
if (!dont_send && IN_DEV_ARPFILTER(in_dev))
dont_send = arp_filter(sip, tip, dev); // 是否过滤ARP报文
if (!dont_send) { // 非过滤和丢弃ARP报文
n = neigh_event_ns(&arp_tbl, sha, &sip, dev); // 更新对应的邻居项
if (n) {
arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
sip, dev, tip, sha,
dev->dev_addr, sha,
reply_dst); // 发送ARP应答报文
neigh_release(n);
}
}
goto out;
} else if (IN_DEV_FORWARD(in_dev)) { // ARP代理
if (addr_type == RTN_UNICAST &&
(arp_fwd_proxy(in_dev, dev, rt) ||
arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
(rt->dst.dev != dev &&
pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
skb->pkt_type == PACKET_HOST ||
NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
sip, dev, tip, sha,
dev->dev_addr, sha,
reply_dst);
} else {
pneigh_enqueue(&arp_tbl,
in_dev->arp_parms, skb);
goto out_free_dst;
}
goto out;
}
}
}
/* Update our ARP tables */
n = __neigh_lookup(&arp_tbl, &sip, dev, 0); // 在邻居表中查找邻居项(ARP应答,没有输入路由的ARP请求,非输入到本地、非转发的ARP请求)
if (IN_DEV_ARP_ACCEPT(in_dev)) {
unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
addr_type == RTN_UNICAST; // Gratuitous ARP
if (!n && // 邻居项为空(本地没有发起ARP请求,如果有发起ARP请求,那么邻居项就不为空)
((arp->ar_op == htons(ARPOP_REPLY) && // ARP应答报文
addr_type == RTN_UNICAST) || is_garp)) // 系统允许接受并非由ARP请求而接收到的ARP应答
n = __neigh_lookup(&arp_tbl, &sip, dev, 1); // 创建相应的邻居项
}
if (n) { // 邻居项有效
int state = NUD_REACHABLE; // 邻居项的新状态记为reachable
int override;
/* If several different ARP replies follows back-to-back,
use the FIRST one. It is possible, if several proxy
agents are active. Taking the first reply prevents
arp trashing and chooses the fastest router.
*/
override = time_after(jiffies,
n->updated +
NEIGH_VAR(n->parms, LOCKTIME)) || // 超过LOCKTIME时间没有更新updated(没有更新邻居表项的状态)
is_garp;
/* Broadcast replies and request packets
do not assert neighbour reachability.
*/
if (arp->ar_op != htons(ARPOP_REPLY) ||
skb->pkt_type != PACKET_HOST) // 如果不是发给自己的应答报文,那么邻居的新状态记为STALE
state = NUD_STALE;
neigh_update(n, sha, state,
override ? NEIGH_UPDATE_F_OVERRIDE : 0); // 更新邻居表项的状态
neigh_release(n);
}
out:
consume_skb(skb);
out_free_dst:
dst_release(reply_dst);
return 0;
}
收到ARP报文,arp_process调用neigh_update更新邻居项的状态,如果更新为连接状态,那么更新最后一次确认邻居可达的时间confirmed,更新邻居项的更新时间,如果新收到的地址不一样,需要判断是否更新邻居的缓存的地址,状态更新之后,新的状态如果要启动超时定时器,那么启动超时定时器,新状态为连接状态时,更新邻居输出函数指针(ARP协议,更新前后都是neigh_resolve_output),如果邻居项状态从不可用状态变更为可用状态,那么把ARP发送缓存队列的数据都发送出去。
neigh_update实现代码如下:
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
u32 flags)
{
u8 old;
int err;
int notify = 0;
struct net_device *dev;
int update_isrouter = 0;
write_lock_bh(&neigh->lock);
dev = neigh->dev; // 邻居项的输出网卡设备
old = neigh->nud_state; // 旧的邻居项状态
err = -EPERM;
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
if (neigh->dead)
goto out;
if (!(new & NUD_VALID)) { // 邻居项的新的状态不是VALID状态
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
neigh->nud_state = new;
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
(new & NUD_FAILED)) {
neigh_invalidate(neigh);
notify = 1;
}
goto out;
}
/* Compare new lladdr with cached one */
if (!dev->addr_len) {
/* First case: device needs no address. */
lladdr = neigh->ha;
} else if (lladdr) {
/* The second case: if something is already cached
and a new address is proposed:
- compare new & old
- if they are different, check override flag
*/
if ((old & NUD_VALID) &&
!memcmp(lladdr, neigh->ha, dev->addr_len))
lladdr = neigh->ha;
} else {
/* No address is supplied; if we know something,
use it, otherwise discard the request.
*/
err = -EINVAL;
if (!(old & NUD_VALID))
goto out;
lladdr = neigh->ha;
}
if (new & NUD_CONNECTED) // 邻居项的状态是连接状态(邻居可达、不需要ARP)
neigh->confirmed = jiffies; // 更新确认时间(confirmed记录最后一次确认邻居可连接的时间,一般就是最后一次确认邻居可达的时间)
neigh->updated = jiffies; // 更新邻居项的更新时间
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
err = 0;
update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
if (old & NUD_VALID) { // 旧的邻居项状态是VALID(REACHABLE/PROBE/STALE/DELAY),这几个状态应该有记录之前的邻居MAC地址等信息,新的ARP报文的MAC地址等不一定与之前记录的不一样,并不能完全确定是MAC地址变了还是收到了攻击报文
if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { // 如果与已缓存的地址不同,并且不是强制更新覆盖邻居项,那么检查是否需要更新缓存的地址
update_isrouter = 0;
if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
(old & NUD_CONNECTED)) { // 弱覆盖并且之前邻居项的状态是CONNECTED,那么保留之前缓存的地址,状态转换为STALE,没办法确定邻居的物理地址是缓存的可靠还是新收到的可靠,暂时保留缓存的地址,STALE状态如果有使用邻居项的话,那么会进入DELAY状态,最后进入PROBE状态去探测邻居,如果长时间没有使用,就会可能被回收
lladdr = neigh->ha; // 新的地址使用已经缓存的地址
new = NUD_STALE; // 新状态更新为STALE
} else
goto out;
} else {
if (lladdr == neigh->ha && new == NUD_STALE && // 新地址与缓存地址相等、新的状态为STALE、设置了弱覆盖标志或者旧的状态是CONNECTED状态,那么继续保留之前的状态
((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
(old & NUD_CONNECTED))
)
new = old;
}
}
if (new != old) { // 需要更新邻居项的状态
neigh_del_timer(neigh); // 删除邻居项的定时器
if (new & NUD_PROBE) // 进入PROBE状态
atomic_set(&neigh->probes, 0); // probes次数设置为0
if (new & NUD_IN_TIMER) // 需要启动定时器(NUD_IN_TIMER的状态需要定时器,这些状态有超时时间,超时之后状态可能要改变或者重发报文...)
neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0))); // 添加定时器
neigh->nud_state = new; // 更新邻居项状态
notify = 1;
}
if (lladdr != neigh->ha) { // 新的地址与缓存的地址不一样,更新邻居项缓存的地址(不一样且不需要更新的情况,前面会用缓存的地址替换lladdr)
write_seqlock(&neigh->ha_lock);
memcpy(&neigh->ha, lladdr, dev->addr_len); // 更新邻居项缓存的地址
write_sequnlock(&neigh->ha_lock);
neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED)) // 新的状态不是CONNECTED状态,需要把确认时间往前移动到一个超时时间,否则定时器超时检测的confirmed比较新的话,会错误以为新的时间确认邻居可达
neigh->confirmed = jiffies -
(NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
notify = 1;
}
if (new == old) // 状态没有更新,跳转到out
goto out;
if (new & NUD_CONNECTED) // 邻居项处于CONNECTED状态
neigh_connect(neigh); // 更新邻居项的输出函数指针
else
neigh_suspect(neigh);
if (!(old & NUD_VALID)) { // 旧的状态不是VALID状态(可能之前有ARP缓存待发送的数据)
struct sk_buff *skb;
/* Again: avoid dead loop if something went wrong */
while (neigh->nud_state & NUD_VALID && // 邻居的状态为VALID
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { // ARP缓存队列有数据
struct dst_entry *dst = skb_dst(skb);
struct neighbour *n2, *n1 = neigh;
write_unlock_bh(&neigh->lock);
rcu_read_lock();
/* Why not just use 'neigh' as-is? The problem is that
* things such as shaper, eql, and sch_teql can end up
* using alternative, different, neigh objects to output
* the packet in the output path. So what we need to do
* here is re-lookup the top-level neigh in the path so
* we can reinject the packet there.
*/
n2 = NULL;
if (dst) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
}
n1->output(n1, skb); // 调用邻居项到output函数发送报文,此次还是neigh_resolve_output,底层函数根据邻居项状态决定是发送ARP请求还是数据报文
if (n2)
neigh_release(n2);
rcu_read_unlock();
write_lock_bh(&neigh->lock);
}
__skb_queue_purge(&neigh->arp_queue); // 释放ARP缓存队列的数据
neigh->arp_queue_len_bytes = 0; // 更新ARP缓存队列数据长度为0
}
out:
if (update_isrouter) {
neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
(neigh->flags | NTF_ROUTER) :
(neigh->flags & ~NTF_ROUTER);
}
write_unlock_bh(&neigh->lock);
if (notify)
neigh_update_notify(neigh);
return err;
}
邻居可用时,neigh_update调用neigh_resolve_output发送ARP缓存队列数据,neigh_resolve_output调用neigh_event_send,非连接等状态调用发送ARP请求的函数,连接状态则直接返回0,也就是可以直接发送数据报文,neigh_resolve_output就调用dev_queue_xmit发送数据报文到网卡里面。neigh_resolve_output实现代码如下:
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
int rc = 0;
if (!neigh_event_send(neigh, skb)) { // 连接状态返回0,执行if里面的代码
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;
if (dev->header_ops->cache && !neigh->hh.hh_len)
neigh_hh_init(neigh);
do {
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0)
rc = dev_queue_xmit(skb); // 调用dev_queue_xmit发送报文到网卡
else
goto out_kfree_skb;
}
out:
return rc;
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}
网上收到数据发生中断, 内核处理网卡中断,smsc911x_poll调用网卡驱动接收网卡数据,调用netif_receive_skb处理收到的报文,__netif_receive_skb_core解析报文的类型,一级一级调用,最后调用ARP协议的处理函数arp_process,更新状态之后,调用neigh_resolve_output、dev_queue_xmit、__dev_queue_xmit发送ARP缓存的队列数据。
REACHABLE状态会启动一个定时器,在定时器超时时间内,都认为邻居是可达的,超过一定时间没有确认邻居是否可达的情况下,邻居项缓存的数据已经不可靠了,在arp_process收到ARP报文时,更新邻居项状态时,对于REACHABLE的邻居项都会更新最后一次确认时间confirmed,那么从confirmed时间开始的一段时间内认为邻居也是可达的,REACHABLE定时器超时之后,并不能直接更新邻居项为过期状态,还的检查启动定时器之后是否有更新确认时间;REACHABLE超时实现代码如下:
static void neigh_timer_handler(unsigned long arg)
{
unsigned long now, next;
struct neighbour *neigh = (struct neighbour *)arg;
unsigned int state;
int notify = 0;
write_lock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
next = now + HZ;
if (!(state & NUD_IN_TIMER))
goto out;
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) { // 检查当前时间是否超过reachable_time时间,confirmed并不是启动定时器时的时间,如果启动定时器之后如果有确认邻居可达,那么confirmed就会更新,neigh->confirmed + neigh->parms->reachable_time之前的时间都认为邻居是可达的,如果now在这个时间之前,那么邻居仍然是可达的
neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time; // neigh->confirmed + neigh->parms->reachable_time最后一次确认邻居可达之后的超时时间点,例如9点、10点确认了一次,超时时间为两小时,9点启动定时器在11点超时了,因为10点确认邻居可达,那么邻居实际在12点前邻居都认为可达的,11点超时之后,只需要再起一个12点超时的定时器即可
} else if (time_before_eq(now,
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { // now小于最后一次使用时间used+DELAY_PROBE_TIME时间,虽然邻居项REACHABLE超时了,但是最近有用到邻居项,那么有可能再次使用,没必要立即发送探测邻居状态的报文,也不能直接释放掉邻居项,那么进入DELAY状态,有可能最近发送的报文在哪个时间会有应答,能够确认邻居可达,或者后面不再使用了,所以没必要立即发送探测报文
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY; // 转换为DELAY状态(neigh_resolve_output调用neigh_event_send,检测到当前是DELAY、PROBE都会直接发送数据报文的,这些状态下还缓存了之前的地址信息,不一定可靠,但是网络没有变化的情况下,这些地址都是有效的,如果地址不对,大不了等获取新的地址之后由上层协议重复即可)
neigh->updated = jiffies; // 更新邻居项更新时间
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); // DELAY状态超时时间(DELAY_PROBE_TIME时间之后再进入PROBE状态,再去探测邻居状态)
} else { // REACHABLE超时了并且很久没有用到该邻居项了,那么转换为STALE状态即可,STALE状态如果有数据发送,那么发送函数会把邻居项的状态改为INCOMPLETE
neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE; // 转换为STALE状态,邻居项缓存的数据过期了,不能再使用缓存的地址去发送报文
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
}
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) { // 邻居项所在状态需要启动定时器
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next)) // 启动超时定时器
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
neigh_probe(neigh);
} else {
out:
write_unlock(&neigh->lock);
}
if (notify)
neigh_update_notify(neigh);
neigh_release(neigh);
}
REACHABLE超时并且最近有使用邻居项的情况下,邻居项会转换为DELAY状态,DELAY时间内不会发送探测邻居状态报文,这段时间内可能有应答报文等可以确认邻居可达,更新confirmed时间;DELAY状态超时处理比较简单,neigh_timer_handler检查当前时间是否小于等于最后一次确认邻居可达时间+DELAY_PROBE_TIME,如果小于等于,也就是最近不久前就确认过邻居可达了,那么直接转换为REACHABLE状态,如果大于,那么就转换为PROBE状态,主动去探测邻居的状态,更新邻居项的相关时间;DELAY状态超时回调处理函数实现代码如下:
static void neigh_timer_handler(unsigned long arg)
{
unsigned long now, next;
struct neighbour *neigh = (struct neighbour *)arg;
unsigned int state;
int notify = 0;
write_lock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
next = now + HZ;
if (!(state & NUD_IN_TIMER))
goto out;
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {
neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
}
} else if (state & NUD_DELAY) { // DELAY状态超时
if (time_before_eq(now,
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { // now小于等于最后一次确认邻居可达时间+DELAY_PROBE_TIME时间,那么认为邻居是可达的,最近一段时间有确认邻居可达
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE; // 邻居项状态转换为REACHABLE状态
neigh->updated = jiffies;
neigh_connect(neigh); // ARP协议没实际用处,输出函数会根据状态决定是发送ARP请求还是数据报文
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time; // REACHABLE状态超时时间
} else { // 有一段时间没有确认邻居是否可达,需要主动发起探测报文
neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE; // 邻居项转换为PROBE状态
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0); // probes次数设置为0
notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); // PROBE超时时间,下一次重发探测报文的时间
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
neigh_probe(neigh);
} else {
out:
write_unlock(&neigh->lock);
}
if (notify)
neigh_update_notify(neigh);
neigh_release(neigh);
}
DELAY状态超时并且最近没有确认邻居可达会转换为PROBE状态,同时会发送邻居探测报文,PROBE状态只可能转换为FAILED或者REACHABLE状态,前面ARP报文的arp_process收到ARP请求的应答时,会将邻居项状态转换为REACHABLE状态,PROBE状态的定时器超时只是为了重复ARP请求,PROBE定时器超时处理与INCOMPLETE状态走一样的代码,实现代码如下:
/* Called when a timer expires for a neighbour entry. */
static void neigh_timer_handler(unsigned long arg)
{
unsigned long now, next;
struct neighbour *neigh = (struct neighbour *)arg;
unsigned int state;
int notify = 0;
write_lock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
next = now + HZ;
if (!(state & NUD_IN_TIMER))
goto out;
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {
neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
}
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else { // PROBE、INCOMPLETE状态定时器超时
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); // 计算下一次超时重传时间
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { // PROBE、INCOMPLETE状态,检查发送次数是否超过最大探测次数,是的话转换为FAILED状态
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next)) // 启动下一次超时定时器
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { // PROBE、INCOMPLETE状态
neigh_probe(neigh); // 发送邻居探测报文
} else {
out:
write_unlock(&neigh->lock);
}
if (notify)
neigh_update_notify(neigh);
neigh_release(neigh);
}
《Linux内核源码剖析:TCP/IP实现(上册)》机械工业出版社
《深入理解LINUX网络技术内幕》中国电力出版社