ARP地址解析协议理论前篇参见ARP与RARP,这里则通过源码(Linux kernel 1.2.13;net\inet\arp.c)来剖析其内部原理及实现过程。
一、ARP表项
/* * This structure defines the ARP mapping cache. As long as we make changes * in this structure, we keep interrupts of. But normally we can copy the * hardware address and the device pointer in a local variable and then make * any "long calls" to send a packet out. */ //ARP缓存中的每一个由arp_table结构表示,将这些表项串联起来构成链表,就构成了ARP缓存 //每个字段英文注释已经说的很清楚了 struct arp_table { struct arp_table *next; /* Linked entry list */ unsigned long last_used; /* For expiry */ unsigned int flags; /* Control status */ unsigned long ip; /* ip address of entry */ unsigned long mask; /* netmask - used for generalised proxy arps (tridge) */ unsigned char ha[MAX_ADDR_LEN]; /* Hardware address */ unsigned char hlen; /* Length of hardware address */ unsigned short htype; /* Type of hardware in use */ struct device *dev; /* Device the entry is tied to */ /* * The following entries are only used for unresolved hw addresses. */ struct timer_list timer; /* expire timer */ int retries; /* remaining retries */ struct sk_buff_head skb; /* list of queued packets */ };值得说明的是,ARP缓存其实是一个链式哈希表,其数据结果和前面文章我们介绍的 array_sock 是一样的结构。该哈希表的索引关键字就是目的 ip 地址,arp 地址解析就是已知对方ip地址,要获得其硬件地址,以完成链路层首部的创建。
鉴于哈希表强大的查找功能(时间复杂度O(1)),所以在内核中应用广泛。
二、从arp缓存中清除过期的arp表项——arp_check_expire 函数
有限的空间不能老是让一些人占着,得常换换血液。
/* * Check if there are too old entries and remove them. If the ATF_PERM * flag is set, they are always left in the arp cache (permanent entry). * Note: Only fully resolved entries, which don't have any packets in * the queue, can be deleted, since ARP_TIMEOUT is much greater than * ARP_MAX_TRIES*ARP_RES_TIME. */ //从ARP缓存中清除过期的ARP表项 static void arp_check_expire(unsigned long dummy) { int i; unsigned long now = jiffies; unsigned long flags; save_flags(flags); cli(); //遍历整个ARP缓存表,该缓存表实则是一个链式哈希表(数组,然后数组元素为arp结构链表) for (i = 0; i < FULL_ARP_TABLE_SIZE; i++) { struct arp_table *entry; struct arp_table **pentry = &arp_tables[i];//获得arp表项 while ((entry = *pentry) != NULL) { //对每个表项上次使用时间标志进行检查,如果使用时间在ARP_TIMEOUT之前, //表示这是一个过期的表项,如果该表项不是一个永久表项,则需要清除 if ((now - entry->last_used) > ARP_TIMEOUT && !(entry->flags & ATF_PERM)) { *pentry = entry->next; /* remove from list */ del_timer(&entry->timer); /* 停止该表项设置的定时器 */ kfree_s(entry, sizeof(struct arp_table));//清除占有的内存空间 } else pentry = &entry->next;//切换到下一个表项 /* go to next entry */ } } restore_flags(flags); /* * Set the timer again. */ //剖析了那么多源码,tcp内核协议栈重置的操作一般都是:清除现有的,然后新建一个再插入到链表中 //而不是直接修改现有的,直接修改会造成内核状态的不一致 del_timer(&arp_timer);//清除已有定时器 arp_timer.expires = ARP_CHECK_INTERVAL;//重新设置一个定时器 add_timer(&arp_timer);//添加到定时器链表中 }三、释放一个arp表项——arp_release_entry 函数
/* * Release all linked skb's and the memory for this entry. */ //该函数用于释放一个ARP缓存表项,参数指向要释放的表项 //不仅要释放表项,还要释放其可能缓存的待发送的数据包 static void arp_release_entry(struct arp_table *entry) { struct sk_buff *skb; unsigned long flags; save_flags(flags); cli(); /* Release the list of `skb' pointers. */ //找到缓存在该表项中的待发送的数据包,并释放 while ((skb = skb_dequeue(&entry->skb)) != NULL) { skb_device_lock(skb); restore_flags(flags); dev_kfree_skb(skb, FREE_WRITE);//释放数据包 } restore_flags(flags); del_timer(&entry->timer);//将定时器从系统中删除,不然定时器到期了,会访问到已经不存在的资源 kfree_s(entry, sizeof(struct arp_table));//释放空间 return; }四、系统事件处理,即处理皮之不存事故——arp_device_event 函数
/* * Purge a device from the ARP queue */ //对系统事件做出响应,主要指网络设备停止工作,即NETDEV_DOWN事件 //因为每个arp表项都绑定在一个网络设备上,如果对应的网络设备不再工作, //那么这些被绑定的arp表项就不可再被使用 int arp_device_event(unsigned long event, void *ptr) { struct device *dev=ptr;//对应网络设备 int i; unsigned long flags; if(event!=NETDEV_DOWN)//本函数只处理网络设备停止工作这一事件 return NOTIFY_DONE; /* * This is a bit OTT - maybe we need some arp semaphores instead. */ save_flags(flags); cli(); //下面原理很简单,遍历arp缓存,找到对应网络设备dev的arp表项,并清除 for (i = 0; i < FULL_ARP_TABLE_SIZE; i++) { struct arp_table *entry; struct arp_table **pentry = &arp_tables[i]; while ((entry = *pentry) != NULL) { if(entry->dev==dev) { *pentry = entry->next; /* remove from list */ del_timer(&entry->timer); /* Paranoia */ kfree_s(entry, sizeof(struct arp_table)); } else pentry = &entry->next; /* go to next entry */ } } restore_flags(flags); return NOTIFY_DONE; }
好,神秘的arp地址解析过程就要开始揭开序幕了,看过之后,其实也不怎么神秘。
五、创建一个arp报文并发送出去——arp_send 函数/* * Create and send an arp packet. If (dest_hw == NULL), we create a broadcast * message. */ //创建一个arp请求报文并发送出去 void arp_send(int type, int ptype, unsigned long dest_ip, struct device *dev, unsigned long src_ip, unsigned char *dest_hw, unsigned char *src_hw) { struct sk_buff *skb; struct arphdr *arp; unsigned char *arp_ptr; /* * No arp on this interface. */ if(dev->flags&IFF_NOARP)//非ARP协议 return; /* * Allocate a buffer */ //创建一个arp报文,其也是一个sk_buff数据包 //其大小=arp报头+源端和目的端对应的MAC地址长度和IP地址长度+以太网帧头 //dev->addr_len表示MAC地址,后面+4是+ip地址长度 skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) + dev->hard_header_len, GFP_ATOMIC); if (skb == NULL) { printk("ARP: no memory to send an arp packet\n"); return; } //数据包字段设置 skb->len = sizeof(struct arphdr) + dev->hard_header_len + 2*(dev->addr_len+4); skb->arp = 1;//表示已完成MAC首部的创建 skb->dev = dev;//绑定设备 skb->free = 1;//数据包发送后立即释放 /* * Fill the device header for the ARP frame */ //调用eth_header函数创建MAC首部,该函数的说明参见前面博文connect函数剖析(二) dev->hard_header(skb->data,dev,ptype,dest_hw?dest_hw:dev->broadcast,src_hw?src_hw:NULL,skb->len,skb); /* Fill out the arp protocol part. */ //得到arp报头。arp报文内存布局:以太网帧头 | arp报头 | 地址类 arp = (struct arphdr *) (skb->data + dev->hard_header_len); //设置arp报头字段 arp->ar_hrd = htons(dev->type); #ifdef CONFIG_AX25 arp->ar_pro = (dev->type != ARPHRD_AX25)? htons(ETH_P_IP) : htons(AX25_P_IP); #else arp->ar_pro = htons(ETH_P_IP); #endif arp->ar_hln = dev->addr_len; arp->ar_pln = 4; arp->ar_op = htons(type); //往后偏移一个arp报头位置,定位到地址部分 //arp_ptr的内存布局为:源mac地址 | 源ip地址 | 目的mac地址 | 目的ip地址 arp_ptr=(unsigned char *)(arp+1); //源mac地址设置 memcpy(arp_ptr, src_hw, dev->addr_len); arp_ptr+=dev->addr_len; //源ip地址设置 memcpy(arp_ptr, &src_ip,4); arp_ptr+=4;//ip地址长度固定为4,定位到目的mac地址 if (dest_hw != NULL) memcpy(arp_ptr, dest_hw, dev->addr_len); else memset(arp_ptr, 0, dev->addr_len);//传参为空,就赋0 arp_ptr+=dev->addr_len; memcpy(arp_ptr, &dest_ip, 4);//目的ip地址设置 //将该数据包传递给驱动程序,由驱动程序最终将数据发送到物理介质上 dev_queue_xmit(skb, dev, 0);//参数具体解释参见前面博文:数据包发送 }六、arp报文接收处理——arp_rcv 函数
/* * Receive an arp request by the device layer. Maybe I rewrite it, to * use the incoming packet for the reply. The time for the current * "overhead" isn't that high... */ //该函数是所有使用arp协议进行数据包传送的总入口函数 //arp数据包分为arp请求数据包和arp应答数据包 //skb:接收的arp协议数据包;dev:接收数据包的网络设备;pt:指向arp协议本身的packe_type结构 int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { /* * We shouldn't use this type conversion. Check later. */ struct arphdr *arp = (struct arphdr *)skb->h.raw;//arp首部 unsigned char *arp_ptr= (unsigned char *)(arp+1);//arp报文中的地址位置 struct arp_table *entry; struct arp_table *proxy_entry; int addr_hint,hlen,htype; unsigned long hash; unsigned char ha[MAX_ADDR_LEN]; /* So we can enable ints again. */ long sip,tip; unsigned char *sha,*tha; /* * The hardware length of the packet should match the hardware length * of the device. Similarly, the hardware types should match. The * device should be ARP-able. Also, if pln is not 4, then the lookup * is not from an IP number. We can't currently handle this, so toss * it. */ //地址检查以及标志等检查,实现程序的健壮性,检查参数的有效性这都是必须的 if (arp->ar_hln != dev->addr_len || dev->type != ntohs(arp->ar_hrd) || dev->flags & IFF_NOARP || arp->ar_pln != 4) { kfree_skb(skb, FREE_READ); return 0; } /* * Another test. * The logic here is that the protocol being looked up by arp should * match the protocol the device speaks. If it doesn't, there is a * problem, so toss the packet. */ //根据硬件类型采取相应操作 switch(dev->type) { #ifdef CONFIG_AX25 case ARPHRD_AX25: if(arp->ar_pro != htons(AX25_P_IP)) { kfree_skb(skb, FREE_READ); return 0; } break; #endif case ARPHRD_ETHER: case ARPHRD_ARCNET: if(arp->ar_pro != htons(ETH_P_IP)) { kfree_skb(skb, FREE_READ); return 0; } break; default: printk("ARP: dev->type mangled!\n"); kfree_skb(skb, FREE_READ); return 0; } /* * Extract fields */ //变量初始化 hlen = dev->addr_len; htype = dev->type; sha=arp_ptr;//sha为源端mac地址 arp_ptr+=hlen; memcpy(&sip,arp_ptr,4);//sip是源ip地址 arp_ptr+=4; tha=arp_ptr;//tha为目的端mac地址 arp_ptr+=hlen; memcpy(&tip,arp_ptr,4);//这里tip是目的ip地址 /* * Check for bad requests for 127.0.0.1. If this is one such, delete it. */ //检查接收端主机ip地址是否为回环地址 if(tip == INADDR_LOOPBACK) { //如果是,表示这个arp数据包是由本机发送的 kfree_skb(skb, FREE_READ); return 0; } /* * Process entry. The idea here is we want to send a reply if it is a * request for us or if it is a request for someone else that we hold * a proxy for. We want to add an entry to our cache if it is a reply * to us or if it is a request for our address. * (The assumption for this last is that if someone is requesting our * address, they are probably intending to talk to us, so it saves time * if we cache their address. Their address is also probably not in * our cache, since ours is not in their cache.) * * Putting this another way, we only care about replies if they are to * us, in which case we add them to the cache. For requests, we care * about those for us and those for our proxies. We reply to both, * and in the case of requests for us we add the requester to the arp * cache. */ //处理完arp数据包后,还会在本地arp缓存中加入远方主机的arp映射表项 //arp数据包分为请求数据包和应答数据包 //检查arp首部中目的ip地址,判断该arp报文是否发送给本机 addr_hint = ip_chk_addr(tip); //检查数据包是否为一个arp应答报文 if(arp->ar_op == htons(ARPOP_REPLY))//应答报文 { if(addr_hint!=IS_MYADDR)//如果是一个应答报文,但不是发送给本机的 { /* * Replies to other machines get tossed. */ kfree_skb(skb, FREE_READ); return 0; } /* * Fall through to code below that adds sender to cache. */ } else//请求报文 //请求数据包分为两种情况:一是发送给本机的arp请求,二是发送给由本机进行代理的主机的arp请求 { /* * It is now an arp request */ /* * Only reply for the real device address or when it's in our proxy tables */ //tip是目的ip地址 //可能发送给本机也有可能是发送给本机代理的主机的 // if(tip!=dev->pa_addr) {//处理发送给由本机代理的主机的 /* * To get in here, it is a request for someone else. We need to * check if that someone else is one of our proxies. If it isn't, * we can toss it. */ cli(); //该for循环式遍历arp缓存中代理表项(arp缓存最后一个索引元素指向) //对ip地址进行匹配检查 for(proxy_entry=arp_tables[PROXY_HASH]; proxy_entry; proxy_entry = proxy_entry->next) { /* we will respond to a proxy arp request if the masked arp table ip matches the masked tip. This allows a single proxy arp table entry to be used on a gateway machine to handle all requests for a whole network, rather than having to use a huge number of proxy arp entries and having to keep them uptodate. */ //ip地址、网络设备、硬件地址类型同时进行精确匹配 if (proxy_entry->dev != dev && proxy_entry->htype == htype && !((proxy_entry->ip^tip)&proxy_entry->mask)) break; } if (proxy_entry)//bingo,找到 { //根据表项中硬件地址(来源于匹配表项)进行arp应答 //该硬件地址一般就是代理主机对应网段网络设备的硬件地址 memcpy(ha, proxy_entry->ha, hlen); sti(); arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha); kfree_skb(skb, FREE_READ); return 0;//这里直接返回 } else//没找到 { sti(); kfree_skb(skb, FREE_READ); return 0; } } else//发往本机的,直接作出应答 { /* * To get here, it must be an arp request for us. We need to reply. */ arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr); //这里并没有直接返回,将进行后面的arp表项添加操作 } } //既然远端主机在询问本地硬件地址,则表示远端主机极有可能将要与本机进行通信, //那么同时本机主机也需要发送相关数据包给远端主机,为此本地主机在其arp缓存中添加一个 //远端主机的arp表项,之后发送数据包给远端时,就不用进行对远端主机的arp地址解析过程 /* * Now all replies are handled. Next, anything that falls through to here * needs to be added to the arp cache, or have its entry updated if it is * there. */ //下面就是在arp缓存中添加一个远方主机的arp表项,提高以后的命中率 //之后发送数据包给远方主机时,可以不用进行对远方主机的arp地址解析过程 hash = HASH(sip);//哈希表散列函数,这里是用远端ip地址作为索引查找 cli(); //下面for循环检查arp缓存中是否存在一个未完成表项,对方收到一个arp应答的情况 for(entry=arp_tables[hash];entry;entry=entry->next) if(entry->ip==sip && entry->htype==htype) break; if(entry)//找到表项 { /* * Entry found; update it. */ memcpy(entry->ha, sha, hlen);//(源端)硬件字段赋值 entry->hlen = hlen; entry->last_used = jiffies;//最后一次使用时间,过了一定时间,会清除过时的表项 if (!(entry->flags & ATF_COM))//该表项是未完成表项 { /* * This entry was incomplete. Delete the retransmit timer * and switch to complete status. */ del_timer(&entry->timer);//删除定时器 entry->flags |= ATF_COM; sti(); /* * Send out waiting packets. We might have problems, if someone is * manually removing entries right now -- entry might become invalid * underneath us. */ //滞留的数据包重发 //因为arp请求的主动发起是在发送普通数据包时无法建立链路层首部的情况下进行的(不知道对端硬件地址) //内核在进行arp地址解析的过程中,会将这个数据包暂存在新创建的arp表项(未完成,缺少对端硬件地址) //的相关队列中,等到新创建的表项完成所有字段的初始化(主要是硬件地址),才可以把之前滞留的数据包发送出去 //这里便是通过arp_send_q函数实现的,其函数内部创建了链路层首部 arp_send_q(entry, sha); } else//是已经完成的表项,那么数据包自然已经发送出去了,不作处理 { sti(); } } else//没有找到,则需要创建一个新的表项插入 { /* * No entry found. Need to add a new entry to the arp table. */ //创建一个表项 entry = (struct arp_table *)kmalloc(sizeof(struct arp_table),GFP_ATOMIC); if(entry == NULL) { sti(); printk("ARP: no memory for new arp entry\n"); kfree_skb(skb, FREE_READ); return 0; } //新表项字段设置 entry->mask = DEF_ARP_NETMASK; entry->ip = sip; entry->hlen = hlen; entry->htype = htype; entry->flags = ATF_COM; init_timer(&entry->timer); memcpy(entry->ha, sha, hlen); entry->last_used = jiffies; entry->dev = skb->dev; skb_queue_head_init(&entry->skb);//将数据包插入队列中 //插入到arp缓存ip地址对应索引的首部位置 entry->next = arp_tables[hash]; arp_tables[hash] = entry; sti(); } /* * Replies have been sent, and entries have been added. All done. */ //arp报文功成身就,寿终正寝 kfree_skb(skb, FREE_READ); return 0; } 七、arp地址查询——arp_find 函数 /* * Find an arp mapping in the cache. If not found, post a request. */ //根据目的IP地址在系统ARP缓存中查找匹配的表项从而完成数据帧中链路层首部的创建工作 //这就是ARP地址解析过程的代码实现,已知对方ip地址获得mac地址,以完成数据包以太网帧头的创建 //arp地址解析的目的就是通过目的ip地址获得硬件地址,以帮助完成数据帧首部的创建工作 int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev, unsigned long saddr, struct sk_buff *skb) { struct arp_table *entry; unsigned long hash; #ifdef CONFIG_IP_MULTICAST unsigned long taddr; #endif //对目的ip地址的类型进行检查并做相应的处理 switch (ip_chk_addr(paddr)) { case IS_MYADDR://本地地址 printk("ARP: arp called for own IP address\n"); memcpy(haddr, dev->dev_addr, dev->addr_len); skb->arp = 1; return 0; #ifdef CONFIG_IP_MULTICAST case IS_MULTICAST://多播地址 if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802) { haddr[0]=0x01; haddr[1]=0x00; haddr[2]=0x5e; taddr=ntohl(paddr); haddr[5]=taddr&0xff; taddr=taddr>>8; haddr[4]=taddr&0xff; taddr=taddr>>8; haddr[3]=taddr&0x7f; return 0; } /* * If a device does not support multicast broadcast the stuff (eg AX.25 for now) */ #endif case IS_BROADCAST://广播地址 memcpy(haddr, dev->broadcast, dev->addr_len); skb->arp = 1; return 0; } hash = HASH(paddr);//散列函数 cli(); /* * Find an entry */ //完成具体的arp表项查询,根据ip地址查询arp缓存,返回查询结果 entry = arp_lookup(paddr, PROXY_NONE); if (entry != NULL) /* It exists */ { if (!(entry->flags & ATF_COM)) { /* * A request was already send, but no reply yet. Thus * queue the packet with the previous attempt */ if (skb != NULL) { //将该数据包插入到arp表项相关的数据包队列中 //就是将待发送的数据包缓存在arp相关队列中 skb_queue_tail(&entry->skb, skb); skb_device_unlock(skb); } sti(); return 1; } /* * Update the record */ entry->last_used = jiffies;//更新记录 memcpy(haddr, entry->ha, dev->addr_len);//硬件地址赋值 if (skb) skb->arp = 1;//置位,表示已经完成mac首部的建立 sti(); return 0; } /* * Create a new unresolved entry. */ //如果没有找到,就需要创建一个表项 entry = (struct arp_table *) kmalloc(sizeof(struct arp_table), GFP_ATOMIC); if (entry != NULL) { //arp表项字段设置 entry->mask = DEF_ARP_NETMASK; entry->ip = paddr; entry->hlen = dev->addr_len; entry->htype = dev->type; entry->flags = 0; memset(entry->ha, 0, dev->addr_len); entry->dev = dev; entry->last_used = jiffies; init_timer(&entry->timer); entry->timer.function = arp_expire_request; entry->timer.data = (unsigned long)entry; entry->timer.expires = ARP_RES_TIME; entry->next = arp_tables[hash]; arp_tables[hash] = entry; add_timer(&entry->timer); entry->retries = ARP_MAX_TRIES; skb_queue_head_init(&entry->skb); if (skb != NULL) {//待发送数据包的缓存工作 skb_queue_tail(&entry->skb, skb); skb_device_unlock(skb); } } else { if (skb != NULL && skb->free) kfree_skb(skb, FREE_WRITE); } sti(); /* * If we didn't find an entry, we will try to send an ARP packet. */ //没有找到arp表项,则创建并发送一个arp请求报文,启动arp地址解析过程 //该函数将发送上面缓存在arp表项相关队列中的滞留数据包 arp_send(ARPOP_REQUEST, ETH_P_ARP, paddr, dev, saddr, NULL, dev->dev_addr); return 1; }
八、arp匹配表项查询——arp_lookup 函数
/* * This will find an entry in the ARP table by looking at the IP address. * If proxy is PROXY_EXACT then only exact IP matches will be allowed * for proxy entries, otherwise the netmask will be used */ //完成具体的arp匹配表项查询工作 static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy) { struct arp_table *entry; unsigned long hash = HASH(paddr);//根据目的ip地址确定哈希表索引 //查找arp缓存对应索引位置的arp表项链表 for (entry = arp_tables[hash]; entry != NULL; entry = entry->next) if (entry->ip == paddr) break; /* it's possibly a proxy entry (with a netmask) */ //代理arp表项情况,不加大括号,这编码风格... if (!entry && proxy != PROXY_NONE) //定位到代理表项,arp缓存的最后一个位置 for (entry=arp_tables[PROXY_HASH]; entry != NULL; entry = entry->next) //如果是精确匹配,则全32位网络地址匹配 //如果不是则只要网络部分地址相同就行 if ((proxy==PROXY_EXACT) ? (entry->ip==paddr) : !((entry->ip^paddr)&entry->mask)) break; return entry;//不管匹配成功与否都返回,没成功entry==null }建议结合 ARP地址解析过程理论部分理解。
通过上面分析我们知道:
ARP缓存其实就是一个链式哈希表(本质是一个数组,但其每个数组元素又是一个链表),链表元素为 ARP 表项;
每个ARP表项由一个 arp_table 结构表示,该结构中含有ip地址,硬件地址映射关系,表项状态标识以及其他辅助字段(重要的就是数据包暂存队列);
对ARP缓存的操作,具体的就是对每个 arp_table 结构的操作
本人能力有限,对于arp地址解析过程难免有理解偏差或没理解的地方,欢迎指正,交流进步。
参考书籍:《Linux 内核网络栈源代码情景分析》、Linux kernel 1.2.13