参见linux内核网络栈源代码情景分析一书
arp协议是围着一个数组链表的数据结构进行的,包括对节点的增删改查,一些回调函数的设置。
相关数据结构:
// 维护ip和mac地址映射的数组链表
struct arp_table
{
struct arp_table *next; /* Linked entry list */
// 该条数据上一次使用的时间,用来判断该数据是否已经过期
unsigned long last_used; /* For expiry */
// 该条数据的一些设置,比如是否是永久性的,如果是则不会过期,也就是会一直存在该数组链表中
unsigned int flags; /* Control status */
unsigned long ip; /* ip address of entry */
unsigned long mask; /* netmask - used for generalised proxy arps (tridge) */
unsigned char ha[MAX_ADDR_LEN]; /* Hardware address */
unsigned char hlen; /* Length of hardware address */
unsigned short htype; /* Type of hardware in use */
struct device *dev; /* Device the entry is tied to */
/*
* The following entries are only used for unresolved hw addresses.
*/
// 定时器
struct timer_list timer; /* expire timer */
// 重试的次数
int retries; /* remaining retries */
// 因为还没有完成ip和mac映射而导致无法发送的数据包,完成映射会进行处理这些数据包
struct sk_buff_head skb; /* list of queued packets */
};
/*
* Configurable Parameters (don't touch unless you know what you are doing
*/
/*
* If an arp request is send, ARP_RES_TIME is the timeout value until the
* next request is send.
*/
// 如果第一个查询的包发出去后,ARP_RES_TIME长的时间还没有收到回复,就重新发一个
#define ARP_RES_TIME (250*(HZ/10))
/*
* The number of times an arp request is send, until the host is
* considered unreachable.
*/
// 最多重发ARP_MAX_TRIES次查询包
#define ARP_MAX_TRIES 3
/*
* After that time, an unused entry is deleted from the arp table.
*/
// arp_table数组链表中的数据最多存活的时长
#define ARP_TIMEOUT (600*HZ)
/*
* How often is the function 'arp_check_retries' called.
* An entry is invalidated in the time between ARP_TIMEOUT and
* (ARP_TIMEOUT+ARP_CHECK_INTERVAL).
*/
// 隔ARP_CHECK_INTERVAL秒查一次arp_table数组链表中的数据,看是否有过期的数据
#define ARP_CHECK_INTERVAL (60 * HZ)
enum proxy {
PROXY_EXACT=0,
PROXY_ANY,
PROXY_NONE,
};
/* Forward declarations. */
static void arp_check_expire (unsigned long);
static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy);
static struct timer_list arp_timer =
{ NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire };
/*
* The default arp netmask is just 255.255.255.255 which means it's
* a single machine entry. Only proxy entries can have other netmasks
*
*/
// 默认掩码是全1
#define DEF_ARP_NETMASK (~0)
/*
* The size of the hash table. Must be a power of two.
* Maybe we should remove hashing in the future for arp and concentrate
* on Patrick Schaaf's Host-Cache-Lookup...
*/
//arp_table数组的大小,不包括代理的的
#define ARP_TABLE_SIZE 16
/* The ugly +1 here is to cater for proxy entries. They are put in their
own list for efficiency of lookup. If you don't want to find a proxy
entry then don't look in the last entry, otherwise do
*/
// 整个arp_table数组的大小,包括代理的
#define FULL_ARP_TABLE_SIZE (ARP_TABLE_SIZE+1)
// 初始化arp_table数组链表
struct arp_table *arp_tables[FULL_ARP_TABLE_SIZE] =
{
NULL,
};
/*
* The last bits in the IP address are used for the cache lookup.
* A special entry is used for proxy arp entries
*/
// arp_table中的数组是通过hash的方式存储到相应的位置的,这里是hash算法的实现
#define HASH(paddr) (htonl(paddr) & (ARP_TABLE_SIZE - 1))
// 代理的位置索引
#define PROXY_HASH ARP_TABLE_SIZE
/*
* Check if there are too old entries and remove them. If the ATF_PERM
* flag is set, they are always left in the arp cache (permanent entry).
* Note: Only fully resolved entries, which don't have any packets in
* the queue, can be deleted, since ARP_TIMEOUT is much greater than
* ARP_MAX_TRIES*ARP_RES_TIME.
*/
// 隔一段时间检查arp_table中的数组,看是否有需要删除的
static void arp_check_expire(unsigned long dummy)
{
int i;
// 当前时间
unsigned long now = jiffies;
unsigned long flags;
save_flags(flags);
cli();
for (i = 0; i < FULL_ARP_TABLE_SIZE; i++){
struct arp_table *entry;
// 指向整个arp_table数组链表
struct arp_table **pentry = &arp_tables[i];
while ((entry = *pentry) != NULL)
{ // 如果上一次使用的时间离现在超过了ARP_TIMEOUT的大小,并且该数组没有设置永久存储标记,则删除该数据
if ((now - entry->last_used) > ARP_TIMEOUT && !(entry->flags & ATF_PERM))
{
*pentry = entry->next; /* remove from list */
// 清除定时器
del_timer(&entry->timer); /* Paranoia */
// 释放该数据对应的结构体
kfree_s(entry, sizeof(struct arp_table));
}
else
pentry = &entry->next; /* go to next entry */
}
}
restore_flags(flags);
/*
* Set the timer again.
*/
// 删除旧的定时器,增加新的定时器(重置定时器),add_timer函数会自动加上当前时间jiffies,所以只需要设置时间间隔ARP_CHECK_INTERVAL就行
del_timer(&arp_timer);
arp_timer.expires = ARP_CHECK_INTERVAL;
add_timer(&arp_timer);
}
/*
* Release all linked skb's and the memory for this entry.
*/
/*
释放某条arp缓存相关的内存,包括:
1.挂在arp缓存结构体的sk_buff队列
2.定时器
3.arp缓存项对应的结构体
*/
static void arp_release_entry(struct arp_table *entry)
{
struct sk_buff *skb;
unsigned long flags;
save_flags(flags);
cli();
// 释放该数据对应的sk_buff,也就是因为没有完成映射导致还没有发送出去的数据包
/* Release the list of `skb' pointers. */
while ((skb = skb_dequeue(&entry->skb)) != NULL)
{
skb_device_lock(skb);
restore_flags(flags);
dev_kfree_skb(skb, FREE_WRITE);
}
restore_flags(flags);
// 释放定时器
del_timer(&entry->timer);
// 释放该数据本身的结构体
kfree_s(entry, sizeof(struct arp_table));
return;
}
/*
* Purge a device from the ARP queue
*/
// 硬件有问题时释放该硬件对应的arp数据
int arp_device_event(unsigned long event, void *ptr)
{
struct device *dev=ptr;
int i;
unsigned long flags;
if(event!=NETDEV_DOWN)
return NOTIFY_DONE;
/*
* This is a bit OTT - maybe we need some arp semaphores instead.
*/
save_flags(flags);
cli();
// 遍历arp缓存数组链表
for (i = 0; i < FULL_ARP_TABLE_SIZE; i++)
{
struct arp_table *entry;
struct arp_table **pentry = &arp_tables[i];
while ((entry = *pentry) != NULL)
{ // 找到和该设备相关的arp缓存项
if(entry->dev==dev)
{
*pentry = entry->next; /* remove from list */
del_timer(&entry->timer); /* Paranoia */
kfree_s(entry, sizeof(struct arp_table));
}
else
pentry = &entry->next; /* go to next entry */
}
}
restore_flags(flags);
return NOTIFY_DONE;
}
/*
* Create and send an arp packet. If (dest_hw == NULL), we create a broadcast
* message.
*/
// 发送arp包
void arp_send(int type, int ptype, unsigned long dest_ip, struct device *dev, unsigned long src_ip, unsigned char *dest_hw, unsigned char *src_hw){
struct sk_buff *skb;
struct arphdr *arp;
unsigned char *arp_ptr;
/*
* No arp on this interface.
*/
if(dev->flags&IFF_NOARP)
return;
/*
* Allocate a buffer
*/
// 申请一个sk_buff和设置包的内容
skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
+ dev->hard_header_len, GFP_ATOMIC);
if (skb == NULL)
{
printk("ARP: no memory to send an arp packet\n");
return;
}
// len为有效数据的长度,此时为arp头(sizeof(struct arphdr))+两个ip和硬件地址(2*(dev->addr_len+4))+mac头长度(dev->hard_header_len)
skb->len = sizeof(struct arphdr) + dev->hard_header_len + 2*(dev->addr_len+4);
skb->arp = 1;
skb->dev = dev;
skb->free = 1;
/*
* Fill the device header for the ARP frame
*/
// 设置mac头部
dev->hard_header(skb->data,dev,ptype,dest_hw?dest_hw:dev->broadcast,src_hw?src_hw:NULL,skb->len,skb);
/* Fill out the arp protocol part. */
arp = (struct arphdr *) (skb->data + dev->hard_header_len);
arp->ar_hrd = htons(dev->type);
// 设置arp的上层协议
#ifdef CONFIG_AX25
arp->ar_pro = (dev->type != ARPHRD_AX25)? htons(ETH_P_IP) : htons(AX25_P_IP);
#else
arp->ar_pro = htons(ETH_P_IP);
#endif
arp->ar_hln = dev->addr_len;
arp->ar_pln = 4;
arp->ar_op = htons(type);
arp_ptr=(unsigned char *)(arp+1);
memcpy(arp_ptr, src_hw, dev->addr_len);
arp_ptr+=dev->addr_len;
memcpy(arp_ptr, &src_ip,4);
arp_ptr+=4;
if (dest_hw != NULL)
memcpy(arp_ptr, dest_hw, dev->addr_len);
else
memset(arp_ptr, 0, dev->addr_len);
arp_ptr+=dev->addr_len;
memcpy(arp_ptr, &dest_ip, 4);
// 传给链路层进行处理
dev_queue_xmit(skb, dev, 0);
}
/*
* This function is called, if an entry is not resolved in ARP_RES_TIME.
* Either resend a request, or give it up and free the entry.
*/
// arp解析请求超时重发
static void arp_expire_request (unsigned long arg)
{
struct arp_table *entry = (struct arp_table *) arg;
struct arp_table **pentry;
unsigned long hash;
unsigned long flags;
save_flags(flags);
cli();
/*
* Since all timeouts are handled with interrupts enabled, there is a
* small chance, that this entry has just been resolved by an incoming
* packet. This is the only race condition, but it is handled...
*/
// 说明该数据已经完成了映射
if (entry->flags & ATF_COM)
{
restore_flags(flags);
return;
}
// 如果重传次数还大于0,
if (--entry->retries > 0)
{
unsigned long ip = entry->ip;
struct device *dev = entry->dev;
// 重试定时器,如果ARP_RES_TIME秒后还没有回复,可能需要继续发送
/* Set new timer. */
del_timer(&entry->timer);
entry->timer.expires = ARP_RES_TIME;
add_timer(&entry->timer);
restore_flags(flags);
// 发送arp包
arp_send(ARPOP_REQUEST, ETH_P_ARP, ip, dev, dev->pa_addr,
NULL, dev->dev_addr);
return;
}
/*
* Arp request timed out. Delete entry and all waiting packets.
* If we give each entry a pointer to itself, we don't have to
* loop through everything again. Maybe hash is good enough, but
* I will look at it later.
*/
// 找到该数据对应的ip在arp_table里的位置
hash = HASH(entry->ip);
/* proxy entries shouldn't really time out so this is really
only here for completeness
*/
if (entry->flags & ATF_PUBL)
pentry = &arp_tables[PROXY_HASH];
else
pentry = &arp_tables[hash];
// 删除解析失败的arp数据
while (*pentry != NULL)
{
if (*pentry == entry)
{
*pentry = entry->next; /* delete from linked list */
del_timer(&entry->timer);
restore_flags(flags);
arp_release_entry(entry);
return;
}
pentry = &(*pentry)->next;
}
restore_flags(flags);
printk("Possible ARP queue corruption.\n");
/*
* We should never arrive here.
*/
}
/*
* This will try to retransmit everything on the queue.
*/
// 完成某条数据的arp解析后,需要处理该数据对应的sk_buff链表
static void arp_send_q(struct arp_table *entry, unsigned char *hw_dest)
{
struct sk_buff *skb;
unsigned long flags;
/*
* Empty the entire queue, building its data up ready to send
*/
// 该数据没有完成解析,则不能发送对应的数据包
if(!(entry->flags&ATF_COM))
{
printk("arp_send_q: incomplete entry for %s\n",
in_ntoa(entry->ip));
return;
}
save_flags(flags);
cli();
// 处理该条数据对应的sk_buff链表
while((skb = skb_dequeue(&entry->skb)) != NULL)
{
IS_SKB(skb);
skb_device_lock(skb);
restore_flags(flags);
// 系统发送数据包时,如果没有找到需要的ip和马刺地址的映射,则不会创建mac头,这里需要加上
if(!skb->dev->rebuild_header(skb->data,skb->dev,skb->raddr,skb))
{ // 标记已经完成arp解析
skb->arp = 1;
// 传给链路层处理
if(skb->sk==NULL)
dev_queue_xmit(skb, skb->dev, 0);
else
dev_queue_xmit(skb,skb->dev,skb->sk->priority);
}
else
{
/* This routine is only ever called when 'entry' is
complete. Thus this can't fail. */
printk("arp_send_q: The impossible occurred. Please notify Alan.\n");
printk("arp_send_q: active entity %s\n",in_ntoa(entry->ip));
printk("arp_send_q: failed to find %s\n",in_ntoa(skb->raddr));
}
}
restore_flags(flags);
}
/*
* Delete an ARP mapping entry in the cache.
*/
// 删除某个ip对应的arp缓存
void arp_destroy(unsigned long ip_addr, int force)
{
int checked_proxies = 0;
struct arp_table *entry;
struct arp_table **pentry;
// 找到该ip对应的位置索引
unsigned long hash = HASH(ip_addr);
ugly:
cli();
pentry = &arp_tables[hash];
// 如果没找到,继续找arp代理的
if (! *pentry) /* also check proxy entries */
pentry = &arp_tables[PROXY_HASH];
while ((entry = *pentry) != NULL)
{ // 遍历链表,找到和该ip相等的数据
if (entry->ip == ip_addr)
{ // 判断是否是持久性的数据,如果是则根据force来判断是否强制删除
if ((entry->flags & ATF_PERM) && !force)
return;
*pentry = entry->next;
del_timer(&entry->timer);
sti();
arp_release_entry(entry);
/* this would have to be cleaned up */
goto ugly;
/* perhaps like this ?
cli();
entry = *pentry;
*/
}
pentry = &entry->next;
// 检查完一般的arp缓存后,还需要检查arp代理链表
if (!checked_proxies && ! *pentry)
{ /* ugly. we have to make sure we check proxy
entries as well */
checked_proxies = 1;
pentry = &arp_tables[PROXY_HASH];
}
}
sti();
}
/*
* Receive an arp request by the device layer. Maybe I rewrite it, to
* use the incoming packet for the reply. The time for the current
* "overhead" isn't that high...
*/
// 处理从链路层上报的数据包
int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
/*
* We shouldn't use this type conversion. Check later.
*/
struct arphdr *arp = (struct arphdr *)skb->h.raw;
unsigned char *arp_ptr= (unsigned char *)(arp+1);
struct arp_table *entry;
struct arp_table *proxy_entry;
int addr_hint,hlen,htype;
unsigned long hash;
unsigned char ha[MAX_ADDR_LEN]; /* So we can enable ints again. */
long sip,tip;
unsigned char *sha,*tha;
/*
* The hardware length of the packet should match the hardware length
* of the device. Similarly, the hardware types should match. The
* device should be ARP-able. Also, if pln is not 4, then the lookup
* is not from an IP number. We can't currently handle this, so toss
* it.
*/
// 硬件地址长度和类型是否相等,协议长度是否等于4,即ip协议,目前只支持这种
if (arp->ar_hln != dev->addr_len ||
dev->type != ntohs(arp->ar_hrd) ||
dev->flags & IFF_NOARP ||
arp->ar_pln != 4)
{
kfree_skb(skb, FREE_READ);
return 0;
}
/*
* Another test.
* The logic here is that the protocol being looked up by arp should
* match the protocol the device speaks. If it doesn't, there is a
* problem, so toss the packet.
*/
switch(dev->type)
{
#ifdef CONFIG_AX25
case ARPHRD_AX25:
if(arp->ar_pro != htons(AX25_P_IP))
{
kfree_skb(skb, FREE_READ);
return 0;
}
break;
#endif
case ARPHRD_ETHER:
case ARPHRD_ARCNET:
if(arp->ar_pro != htons(ETH_P_IP))
{
kfree_skb(skb, FREE_READ);
return 0;
}
break;
default:
printk("ARP: dev->type mangled!\n");
kfree_skb(skb, FREE_READ);
return 0;
}
/*
* Extract fields
*/
// 硬件长度和类型
hlen = dev->addr_len;
htype = dev->type;
// arp层数据
// arp_ptr指向数据首地址,sha等于发送者的硬件地址
sha=arp_ptr;
// 移动硬件长度hlen个字节
arp_ptr+=hlen;
// sip等于发送者ip
memcpy(&sip,arp_ptr,4);
// 继续移动4个字节,即移动ip长度个字节
arp_ptr+=4;
// 接收者的硬件地址
tha=arp_ptr;
// 继续移动
arp_ptr+=hlen;
// 接收者ip
memcpy(&tip,arp_ptr,4);
/*
* Check for bad requests for 127.0.0.1. If this is one such, delete it.
*/
// 该包是自己发的
if(tip == INADDR_LOOPBACK)
{
kfree_skb(skb, FREE_READ);
return 0;
}
/*
* Process entry. The idea here is we want to send a reply if it is a
* request for us or if it is a request for someone else that we hold
* a proxy for. We want to add an entry to our cache if it is a reply
* to us or if it is a request for our address.
* (The assumption for this last is that if someone is requesting our
* address, they are probably intending to talk to us, so it saves time
* if we cache their address. Their address is also probably not in
* our cache, since ours is not in their cache.)
*
* Putting this another way, we only care about replies if they are to
* us, in which case we add them to the cache. For requests, we care
* about those for us and those for our proxies. We reply to both,
* and in the case of requests for us we add the requester to the arp
* cache.
*/
// 检测接收者ip类型
addr_hint = ip_chk_addr(tip);
// 如果该包是一个回复包
if(arp->ar_op == htons(ARPOP_REPLY))
{ // 但是接收地址不是本机地址,则丢弃
if(addr_hint!=IS_MYADDR)
{
/*
* Replies to other machines get tossed.
*/
kfree_skb(skb, FREE_READ);
return 0;
}
/*
* Fall through to code below that adds sender to cache.
*/
}
// 是一个请求包
else
{
/*
* It is now an arp request
*/
/*
* Only reply for the real device address or when it's in our proxy tables
*/ // 接收者地址不是本机,这时候需要判断本机代理的arp缓存中是否有接收者的数据
if(tip!=dev->pa_addr)
{
/*
* To get in here, it is a request for someone else. We need to
* check if that someone else is one of our proxies. If it isn't,
* we can toss it.
*/
cli();
for(proxy_entry=arp_tables[PROXY_HASH];
proxy_entry;
proxy_entry = proxy_entry->next)
{
/* we will respond to a proxy arp request
if the masked arp table ip matches the masked
tip. This allows a single proxy arp table
entry to be used on a gateway machine to handle
all requests for a whole network, rather than
having to use a huge number of proxy arp entries
and having to keep them uptodate.
*/
if (proxy_entry->dev != dev && proxy_entry->htype == htype &&
!((proxy_entry->ip^tip)&proxy_entry->mask))
break;
}
// 找到了发送回复包
if (proxy_entry)
{
memcpy(ha, proxy_entry->ha, hlen);
sti();
arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha);
kfree_skb(skb, FREE_READ);
return 0;
}
// 找不到则丢弃包
else
{
sti();
kfree_skb(skb, FREE_READ);
return 0;
}
}
// 接收者是本机的包,则发送回复包
else
{
/*
* To get here, it must be an arp request for us. We need to reply.
*/
arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr);
}
}
/*
* Now all replies are handled. Next, anything that falls through to here
* needs to be added to the arp cache, or have its entry updated if it is
* there.
*/
// 更新arp缓存的数据
hash = HASH(sip);
cli();
for(entry=arp_tables[hash];entry;entry=entry->next)
if(entry->ip==sip && entry->htype==htype)
break;
// 如果本来就有发送者ip的缓存项,则更新数据
if(entry)
{
/*
* Entry found; update it.
*/
memcpy(entry->ha, sha, hlen);
entry->hlen = hlen;
entry->last_used = jiffies;
// 判断该ip对应的arp项是否处于已经完成解析状态,不是的话,把他置为解析完成并且发送缓存在sk_buff中的包
if (!(entry->flags & ATF_COM))
{
/*
* This entry was incomplete. Delete the retransmit timer
* and switch to complete status.
*/ // 删除定时器
del_timer(&entry->timer);
// 修改该数据的状态为解析完成
entry->flags |= ATF_COM;
sti();
/*
* Send out waiting packets. We might have problems, if someone is
* manually removing entries right now -- entry might become invalid
* underneath us.
*/ // 发送滞留的包
arp_send_q(entry, sha);
}
else
{
sti();
}
}
// 没有找到发送者ip对应的数据,则插入一条新的arp缓存项
else
{
/*
* No entry found. Need to add a new entry to the arp table.
*/
entry = (struct arp_table *)kmalloc(sizeof(struct arp_table),GFP_ATOMIC);
if(entry == NULL)
{
sti();
printk("ARP: no memory for new arp entry\n");
kfree_skb(skb, FREE_READ);
return 0;
}
entry->mask = DEF_ARP_NETMASK;
entry->ip = sip;
entry->hlen = hlen;
entry->htype = htype;
entry->flags = ATF_COM;
init_timer(&entry->timer);
memcpy(entry->ha, sha, hlen);
entry->last_used = jiffies;
entry->dev = skb->dev;
skb_queue_head_init(&entry->skb);
// 头插法
entry->next = arp_tables[hash];
arp_tables[hash] = entry;
sti();
}
/*
* Replies have been sent, and entries have been added. All done.
*/
kfree_skb(skb, FREE_READ);
return 0;
}
/*
* Find an arp mapping in the cache. If not found, post a request.
*/
// 在根据ip在arp缓存表里找相关的数据,找不到则发送arp请求去找
int arp_find(unsigned char *haddr, unsigned long paddr, struct device *dev,
unsigned long saddr, struct sk_buff *skb)
{
struct arp_table *entry;
unsigned long hash;
#ifdef CONFIG_IP_MULTICAST
unsigned long taddr;
#endif
switch (ip_chk_addr(paddr))
{ // 如果找的是本机ip和mac地址的映射,则返回本机的硬件地址,并设置完成解析标记
case IS_MYADDR:
printk("ARP: arp called for own IP address\n");
memcpy(haddr, dev->dev_addr, dev->addr_len);
skb->arp = 1;
return 0;
#ifdef CONFIG_IP_MULTICAST
// 如果多播地址,则组装对应的mac地址
case IS_MULTICAST:
if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802)
{
// ip多播地址和mac多播地址的关系是ip多播地址的后23位直接映射到mac多播地址,而mac多播地址的前25位是固定的
// 前面25位是固定的
haddr[0]=0x01;
haddr[1]=0x00;
haddr[2]=0x5e;
taddr=ntohl(paddr);
// 取后8位进行赋值
haddr[5]=taddr&0xff;
// 剩下的值往右挪8位
taddr=taddr>>8;
// 再取8位进行赋值
haddr[4]=taddr&0xff;
// 剩下的值往右挪8位
taddr=taddr>>8;
// 取7位进行赋值即可
haddr[3]=taddr&0x7f;
return 0;
}
/*
* If a device does not support multicast broadcast the stuff (eg AX.25 for now)
*/
#endif
// 广播地址,则取mac广播地址进行赋值
case IS_BROADCAST:
memcpy(haddr, dev->broadcast, dev->addr_len);
skb->arp = 1;
return 0;
}
hash = HASH(paddr);
cli();
/*
* Find an entry
*/
// 通过ip找对应的arp缓存,并设置不从代理里找
entry = arp_lookup(paddr, PROXY_NONE);
// 找到
if (entry != NULL) /* It exists */
{ // 缓存中有对应的数据但没有完成解析,先把数据包挂到该条数据的sk_buff队列中
if (!(entry->flags & ATF_COM))
{
/*
* A request was already send, but no reply yet. Thus
* queue the packet with the previous attempt
*/
if (skb != NULL)
{
skb_queue_tail(&entry->skb, skb);
skb_device_unlock(skb);
}
sti();
return 1;
}
// 缓存中有对应的数据并且已经解析完成
/*
* Update the record
*/
// 更新缓存中该条数据的信息
entry->last_used = jiffies;
// 把找到的arp缓存项的mac地址赋值给haddr
memcpy(haddr, entry->ha, dev->addr_len);
// 设置arp解析完成标记位
if (skb)
skb->arp = 1;
sti();
return 0;
}
/*
* Create a new unresolved entry.
*/
// 没有找到该ip对应的数据,则新增一条数据
entry = (struct arp_table *) kmalloc(sizeof(struct arp_table),
GFP_ATOMIC);
if (entry != NULL)
{
// 初始化新增数据的内容
entry->mask = DEF_ARP_NETMASK;
entry->ip = paddr;
entry->hlen = dev->addr_len;
entry->htype = dev->type;
entry->flags = 0;
memset(entry->ha, 0, dev->addr_len);
entry->dev = dev;
entry->last_used = jiffies;
init_timer(&entry->timer);
entry->timer.function = arp_expire_request;
entry->timer.data = (unsigned long)entry;
entry->timer.expires = ARP_RES_TIME;
entry->next = arp_tables[hash];
arp_tables[hash] = entry;
add_timer(&entry->timer);
entry->retries = ARP_MAX_TRIES;
skb_queue_head_init(&entry->skb);
if (skb != NULL)
{
skb_queue_tail(&entry->skb, skb);
skb_device_unlock(skb);
}
}
else
{
if (skb != NULL && skb->free)
kfree_skb(skb, FREE_WRITE);
}
sti();
/*
* If we didn't find an entry, we will try to send an ARP packet.
*/
// 加完新增的数据后,发送arp进行ip和mac地址的解析
arp_send(ARPOP_REQUEST, ETH_P_ARP, paddr, dev, saddr, NULL,
dev->dev_addr);
return 1;
}
/*
* Write the contents of the ARP cache to a PROCfs file.
*/
#define HBUFFERLEN 30
int arp_get_info(char *buffer, char **start, off_t offset, int length)
{
int len=0;
off_t begin=0;
off_t pos=0;
int size;
struct arp_table *entry;
char hbuffer[HBUFFERLEN];
int i,j,k;
const char hexbuf[] = "0123456789ABCDEF";
size = sprintf(buffer,"IP address HW type Flags HW address Mask\n");
pos+=size;
len+=size;
cli();
for(i=0; ifor(entry=arp_tables[i]; entry!=NULL; entry=entry->next)
{
/*
* Convert hardware address to XX:XX:XX:XX ... form.
*/
#ifdef CONFIG_AX25
if(entry->htype==ARPHRD_AX25)
strcpy(hbuffer,ax2asc((ax25_address *)entry->ha));
else {
#endif
for(k=0,j=0;k3 && jhlen;j++)
{
hbuffer[k++]=hexbuf[ (entry->ha[j]>>4)&15 ];
hbuffer[k++]=hexbuf[ entry->ha[j]&15 ];
hbuffer[k++]=':';
}
hbuffer[--k]=0;
#ifdef CONFIG_AX25
}
#endif
size = sprintf(buffer+len,
"%-17s0x%-10x0x%-10x%s",
in_ntoa(entry->ip),
(unsigned int)entry->htype,
entry->flags,
hbuffer);
size += sprintf(buffer+len+size,
" %-17s\n",
entry->mask==DEF_ARP_NETMASK?
"*":in_ntoa(entry->mask));
len+=size;
pos=begin+len;
if(pos0;
begin=pos;
}
if(pos>offset+length)
break;
}
}
sti();
*start=buffer+(offset-begin); /* Start of wanted data */
len-=(offset-begin); /* Start slop */
if(len>length)
len=length; /* Ending slop */
return len;
}
/*
* This will find an entry in the ARP table by looking at the IP address.
* If proxy is PROXY_EXACT then only exact IP matches will be allowed
* for proxy entries, otherwise the netmask will be used
*/
// 根据ip到arp缓存里找arp项,找不到就返回空
static struct arp_table *arp_lookup(unsigned long paddr, enum proxy proxy)
{
struct arp_table *entry;
unsigned long hash = HASH(paddr);
for (entry = arp_tables[hash]; entry != NULL; entry = entry->next)
if (entry->ip == paddr) break;
/* it's possibly a proxy entry (with a netmask) */
// 如果找不到并且没有设置不需要从代理里找,则到代理中找
if (!entry && proxy != PROXY_NONE){
for (entry=arp_tables[PROXY_HASH]; entry != NULL; entry = entry->next){
/* 代理匹配两种方式,一种是精确匹配(PROXY_EXACT),一直是网络号匹配即可(PROXY_ANY)
(entry->ip^paddr)&entry->mask)逻辑为,异或是不相等的话结果是1,否则为0,比如0^1等于1,
所以entry->ip^paddr比较好,前n位如果相等则结果的前n位都是0,而entry->mask是前n位是网络号且全
为1,后面为全0,如果(entry->ip^paddr)&entry->mask)结果为0,所说明entry->ip^paddr的结果中,前n位
等于0的个数大于等于mask中前面的1,说明网络号是一样的。
*/
if ((proxy==PROXY_EXACT) ? (entry->ip==paddr) : !((entry->ip^paddr)&entry->mask))
break;
}
}
return entry;
}
/*
* Set (create) an ARP cache entry.
*/
// 修改或新增arp缓存项
static int arp_req_set(struct arpreq *req)
{
struct arpreq r;
struct arp_table *entry;
struct sockaddr_in *si;
int htype, hlen;
unsigned long ip;
struct rtable *rt;
memcpy_fromfs(&r, req, sizeof(r));
/* We only understand about IP addresses... */
if (r.arp_pa.sa_family != AF_INET)
return -EPFNOSUPPORT;
/*
* Find out about the hardware type.
* We have to be compatible with BSD UNIX, so we have to
* assume that a "not set" value (i.e. 0) means Ethernet.
*/
switch (r.arp_ha.sa_family) {
case ARPHRD_ETHER:
htype = ARPHRD_ETHER;
hlen = ETH_ALEN;
break;
case ARPHRD_ARCNET:
htype = ARPHRD_ARCNET;
hlen = 1; /* length of arcnet addresses */
break;
#ifdef CONFIG_AX25
case ARPHRD_AX25:
htype = ARPHRD_AX25;
hlen = 7;
break;
#endif
default:
return -EPFNOSUPPORT;
}
si = (struct sockaddr_in *) &r.arp_pa;
ip = si->sin_addr.s_addr;
if (ip == 0)
{
printk("ARP: SETARP: requested PA is 0.0.0.0 !\n");
return -EINVAL;
}
/*
* Is it reachable directly ?
*/
// ip是否可达,不可达的ip不允许更新
rt = ip_rt_route(ip, NULL, NULL);
if (rt == NULL)
return -ENETUNREACH;
/*
* Is there an existing entry for this address?
*/
cli();
/*
* Find the entry
*/
// 如果已经存在该ip对应的arp缓存项,使用精确匹配
entry = arp_lookup(ip, PROXY_EXACT);
// 新的缓存项和原来的标记位不一样,则先删除再新增
if (entry && (entry->flags & ATF_PUBL) != (r.arp_flags & ATF_PUBL))
{
sti();
arp_destroy(ip,1);
cli();
entry = NULL;
}
/*
* Do we need to create a new entry
*/
// entry为NULL可能是找不到该ip对应的缓存项,或者找到了,但是标记位不一样,被删除了,这里需要新增
if (entry == NULL)
{ // 先预先得到一个位置索引
unsigned long hash = HASH(ip);
// 如果设置了ATF_PUBL标记位,说明该arp项应该挂到arp代理的链表中
if (r.arp_flags & ATF_PUBL)
hash = PROXY_HASH;
entry = (struct arp_table *) kmalloc(sizeof(struct arp_table),
GFP_ATOMIC);
if (entry == NULL)
{
sti();
return -ENOMEM;
}
entry->ip = ip;
entry->hlen = hlen;
entry->htype = htype;
init_timer(&entry->timer);
// 头插法
entry->next = arp_tables[hash];
arp_tables[hash] = entry;
skb_queue_head_init(&entry->skb);
}
/*
* We now have a pointer to an ARP entry. Update it!
*/
// 此处的entry代表的可能是一个新生成的arp项,也可能是arp缓存链表里原本就存在的项
memcpy(&entry->ha, &r.arp_ha.sa_data, hlen);
entry->last_used = jiffies;
/*
在原有的标记位上,追加设置该arp缓存项已经解析完成标记位
或者逻辑为:
如果arp_flags是0,证明没有任何标记位,与运算后arp_flags等于ATF_COM的值
如果arp_flags是等于ATF_COM的值,证明之前已经设置了该标记位,与运算后,值不变
如果arp_flags为其他的标记位,则标记位累加。具体可见下面的标记位,他们都有自己的位,不会冲突。
#define ATF_COM 0x02
#define ATF_PERM 0x04
#define ATF_PUBL 0x08
#define ATF_USETRAILERS 0x10
#define ATF_NETMASK 0x20
*/
entry->flags = r.arp_flags | ATF_COM;
// 设置了这个两个位说明是一个代理项,则对掩码进行赋值,否则使用DEF_ARP_NETMASK作为掩码
if ((entry->flags & ATF_PUBL) && (entry->flags & ATF_NETMASK))
{
si = (struct sockaddr_in *) &r.arp_netmask;
entry->mask = si->sin_addr.s_addr;
}
else
entry->mask = DEF_ARP_NETMASK;
entry->dev = rt->rt_dev;
sti();
return 0;
}
/*
* Get an ARP cache entry.
*/
// 获取arp缓存项
static int arp_req_get(struct arpreq *req)
{
struct arpreq r;
struct arp_table *entry;
struct sockaddr_in *si;
/*
* We only understand about IP addresses...
*/
memcpy_fromfs(&r, req, sizeof(r));
// 只支持AF_INET协议簇
if (r.arp_pa.sa_family != AF_INET)
return -EPFNOSUPPORT;
/*
* Is there an existing entry for this address?
*/
si = (struct sockaddr_in *) &r.arp_pa;
cli();
// 通过ip找arp缓存项
entry = arp_lookup(si->sin_addr.s_addr,PROXY_ANY);
if (entry == NULL)
{
sti();
return -ENXIO;
}
/*
* We found it; copy into structure.
*/
// 赋值硬件信息
memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen);
r.arp_ha.sa_family = entry->htype;
// arp缓存项标记位赋值
r.arp_flags = entry->flags;
sti();
/*
* Copy the information back
*/
memcpy_tofs(req, &r, sizeof(r));
return 0;
}
/*
* Handle an ARP layer I/O control request.
*/
// 操作arp_table里的数据,从而管理arp缓存
int arp_ioctl(unsigned int cmd, void *arg)
{
struct arpreq r;
struct sockaddr_in *si;
int err;
switch(cmd)
{
case SIOCDARP:
// 权限校验
if (!suser())
return -EPERM;
err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq));
if(err)
return err;
memcpy_fromfs(&r, arg, sizeof(r));
if (r.arp_pa.sa_family != AF_INET)
return -EPFNOSUPPORT;
si = (struct sockaddr_in *) &r.arp_pa;
// 删除
arp_destroy(si->sin_addr.s_addr, 1);
return 0;
case SIOCGARP:
err = verify_area(VERIFY_WRITE, arg, sizeof(struct arpreq));
if(err)
return err;
// 查询
return arp_req_get((struct arpreq *)arg);
case SIOCSARP:
if (!suser())
return -EPERM;
err = verify_area(VERIFY_READ, arg, sizeof(struct arpreq));
if(err)
return err;
// 修改、增加
return arp_req_set((struct arpreq *)arg);
default:
return -EINVAL;
}
/*NOTREACHED*/
return 0;
}
/*
* Called once on startup.
*/
// 挂到链路层packet_type链表的节点,链路层收到arp包会调用arp_rcv函数进行处理
static struct packet_type arp_packet_type =
{
0, /* Should be: __constant_htons(ETH_P_ARP) - but this _doesn't_ come out constant! */
NULL, /* All devices */
arp_rcv,
NULL,
NULL
};
// 硬件设备状态变更会调用arp_device_event处理
static struct notifier_block arp_dev_notifier={
arp_device_event,
NULL,
0
};
// 初始化arp协议
void arp_init (void)
{
/* Register the packet type */
// 注册arp协议到链路层,链路层收到包后会根据packet_type链表判断上层协议,然后上报数据包
arp_packet_type.type=htons(ETH_P_ARP);
dev_add_pack(&arp_packet_type);
/* Start with the regular checks for expired arp entries. */
// 系统启动时就开始隔段时间检查arp缓存的数据
add_timer(&arp_timer);
/* Register for device down reports */
// 注册回调事件,arp的数据和具体硬件设备有关,所以设备状态发生变化时,需要通知arp协议进行处理
register_netdevice_notifier(&arp_dev_notifier);
}