Linux内核-arp协议

从ip_finish_output2到dev_queue_xmit路径:

Linux内核-arp协议_第1张图片

arping命令:

http://www.bluestep.cc/linux%e5%91%bd%e4%bb%a4arping-%e7%bd%91%e7%bb%9c%e7%ae%a1%e7%90%86-%e9%80%9a%e8%bf%87%e5%8f%91%e9%80%81arp%e5%8d%8f%e8%ae%ae%e6%8a%a5%e6%96%87%e6%b5%8b%e8%af%95%e7%bd%91%e7%bb%9c/

arp协议:

Linux内核-arp协议_第2张图片

Linux内核-arp协议_第3张图片

 

Linux内核-arp协议_第4张图片

Linux内核-arp协议_第5张图片

 arp报文结构:

Linux内核-arp协议_第6张图片

(1).硬件类型:

硬件地址类型,该字段值一般为ARPHRD_ETHER,表示以太网。

F:\company\Linux\linux-4.1.45\linux-4.1.45\include\uapi\linux\if_arp.h
/* ARP protocol HARDWARE identifiers. */
#define ARPHRD_NETROM	0		/* from KA9Q: NET/ROM pseudo	*/
#define ARPHRD_ETHER 	1		/* Ethernet 10Mbps		*/
#define	ARPHRD_EETHER	2		/* Experimental Ethernet	*/
#define	ARPHRD_AX25	3		/* AX.25 Level 2		*/
#define	ARPHRD_PRONET	4		/* PROnet token ring		*/
#define	ARPHRD_CHAOS	5		/* Chaosnet			*/
#define	ARPHRD_IEEE802	6		/* IEEE 802.2 Ethernet/TR/TB	*/
#define	ARPHRD_ARCNET	7		/* ARCnet			*/
#define	ARPHRD_APPLETLK	8		/* APPLEtalk			*/
#define ARPHRD_DLCI	15		/* Frame Relay DLCI		*/
#define ARPHRD_ATM	19		/* ATM 				*/
...

 (2).协议类型:

表示三层地址使用的协议,该字段值一般为ETH_P_IP,表示IP协议

F:\company\Linux\linux-4.1.45\linux-4.1.45\include\uapi\linux\if_ether.h
#define ETH_P_LOOP	0x0060		/* Ethernet Loopback packet	*/
#define ETH_P_PUP	0x0200		/* Xerox PUP packet		*/
#define ETH_P_PUPAT	0x0201		/* Xerox PUP Addr Trans packet	*/
#define ETH_P_IP	0x0800		/* Internet Protocol packet	*/
#define ETH_P_X25	0x0805		/* CCITT X.25			*/
#define ETH_P_ARP	0x0806		/* Address Resolution packet	*/
...

(3)硬件地址长度,以太网MAC地址就是6;

(4)协议地址长度,IP地址就是4;

(5)操作码

常见的有四种,arp请求,arp相应,rarp请求,rarp相应。

F:\company\Linux\linux-4.1.45\linux-4.1.45\include\uapi\linux\if_arp.h
/* ARP protocol opcodes. */
#define	ARPOP_REQUEST	1		/* ARP request			*/
#define	ARPOP_REPLY	2		/* ARP reply			*/
#define	ARPOP_RREQUEST	3		/* RARP request			*/
#define	ARPOP_RREPLY	4		/* RARP reply			*/
#define	ARPOP_InREQUEST	8		/* InARP request		*/
#define	ARPOP_InREPLY	9		/* InARP reply			*/
#define	ARPOP_NAK	10		/* (ATM)ARP NAK			*/

(6)发送方硬件地址与IP地址,(7)目标硬件地址与目标IP地址。

arp头数据结构:

F:\company\Linux\linux-4.1.45\linux-4.1.45\include\uapi\linux\if_arp.h
/*
 *	This structure defines an ethernet arp header.
 */

struct arphdr {
	__be16		ar_hrd;		/* format of hardware address	*/
	__be16		ar_pro;		/* format of protocol address	*/
	unsigned char	ar_hln;		/* length of hardware address	*/
	unsigned char	ar_pln;		/* length of protocol address	*/
	__be16		ar_op;		/* ARP opcode (command)		*/

#if 0
	 /*
	  *	 Ethernet looks like this : This bit is variable sized however...
	  */
	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
	unsigned char		ar_sip[4];		/* sender IP address		*/
	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
	unsigned char		ar_tip[4];		/* target IP address		*/
#endif

};

1. arp_init()

arp模块的初始化函数为arp_init(),这个函数在ipv4协议栈的初始化函数inet_init()中被调用。
1.初始化arp表arp_tbl;
2.注册arp协议类型;
3.建立arp相关proc文件,/proc/net/arp;
4.注册通知事件

F:\company\Linux\linux-4.1.45\linux-4.1.45\net\ipv4\arp.c
void __init arp_init(void)
{
	neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl);//初始化arp协议的邻居表

	dev_add_pack(&arp_packet_type);//在协议栈中注册arp协议
	arp_proc_init();//建立proc对象
#ifdef CONFIG_SYSCTL
	neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);
#endif
	register_netdevice_notifier(&arp_netdev_notifier);//注册通知事件
}

arp邻居项函数指针表:

F:\company\Linux\linux-4.1.45\linux-4.1.45\net\ipv4\arp.c
static const struct neigh_ops arp_generic_ops = {
	.family =		AF_INET,
	.solicit =		arp_solicit,
	.error_report =		arp_error_report,
	.output =		neigh_resolve_output,
	.connected_output =	neigh_connected_output,
};

static const struct neigh_ops arp_hh_ops = {
	.family =		AF_INET,
	.solicit =		arp_solicit,
	.error_report =		arp_error_report,
	.output =		neigh_resolve_output,
	.connected_output =	neigh_resolve_output,
};

static const struct neigh_ops arp_direct_ops = {
	.family =		AF_INET,
	.output =		neigh_direct_output,
	.connected_output =	neigh_direct_output,
};

neigh_table:

一个neigh_table对应一种邻居协议,IPv4就是arp协议。用来存储于邻居协议相关的参数、功能函数、邻居项散列表等。

struct neigh_table {
	int			family;/*地址族,arp为AF_INET*/
	/*邻居项结构大小:sizeof(neighbour+4),因为neighbour结构最后一个成员0长数组,用于存储4字节长IP地址。*/
	int			entry_size;
	/*hash函数所使用的键值长度,就是IP地址长度,为4*/
	int			key_len;
	/*ETH_P_IP*/
	__be16			protocol;
	/*hash函数,arp_hash*/
	__u32			(*hash)(const void *pkey,
					const struct net_device *dev,
					__u32 *hash_rnd);
	bool			(*key_eq)(const struct neighbour *, const void *pkey);
	/*邻居表项初始化函数,用于初始化neighbour结构实例,即arp_constructor,在neigh_create中被调用*/
	int			(*constructor)(struct neighbour *);
	/*创建和释放一个代理项时被调用,代理先不管*/
	int			(*pconstructor)(struct pneigh_entry *);
	void			(*pdestructor)(struct pneigh_entry *);
	/*用来处理在proxy_queue缓存队列中的代理arp报文*/
	void			(*proxy_redo)(struct sk_buff *skb);
	/*用来分配neighbour结构实例的缓存区名,即arp_cache。*/
	char			*id;
	/*存储一些与协议相关的可调节参数,如超时重传时间,proxy_queue队列长度等*/
	struct neigh_parms	parms;
	struct list_head	parms_list;
	int			gc_interval;
	int			gc_thresh1;
	int			gc_thresh2;
	int			gc_thresh3;
	unsigned long		last_flush;
	struct delayed_work	gc_work;
	/*处理proxy_queue的定时器*/
	struct timer_list 	proxy_timer;
	/*对于接收到的需要进行代理的arp报文,先缓存到proxy_queue,在定时器处理函数中再对其进行处理。*/
	struct sk_buff_head	proxy_queue;
	/*邻居项条目数,在neigh_alloc()、neigh_destroy()中更新*/
	atomic_t		entries;
	rwlock_t		lock;
	unsigned long		last_rand;
	/*记录邻居表中有关邻居项的各类统计数据*/
	struct neigh_statistics	__percpu *stats;
	/*存储邻居项的散列表:hash表,用来存储邻居项*/
	struct neigh_hash_table __rcu *nht;
	/*存储arp代理三层协议地址的散列表*/
	struct pneigh_entry	**phash_buckets;
};

 neighbour

一个neighbour对应一个邻居项,就是一个arp条目

struct neighbour {
	struct neighbour __rcu	*next;
	struct neigh_table	*tbl;/*指向arp_tbl*/
	struct neigh_parms	*parms;
	unsigned long		confirmed;
	unsigned long		updated;
	rwlock_t		lock;
	atomic_t		refcnt;/*引用计数*/
	struct sk_buff_head	arp_queue;/*用来缓存待发送的报文*/
	unsigned int		arp_queue_len_bytes;
	struct timer_list	timer;/*定时器*/
	unsigned long		used;
	atomic_t		probes;
	__u8			flags;
	__u8			nud_state;/*邻居项状态*/
	__u8			type;/*邻居地址类型,例如单播、组播、广播等*/
	/*生存标志,为1时,表示该邻居项正在被删除,最终通过垃圾回收将其删除*/
	__u8			dead;
	seqlock_t		ha_lock;
	/*邻居项MAC地址*/
	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
	/*缓存二层报头,包括目的MAC地址*/
	struct hh_cache		hh;
	/*输出函数,用来将报文输出到该邻居*/
	int			(*output)(struct neighbour *, struct sk_buff *);
	/*邻居项函数指针*/
	const struct neigh_ops	*ops;
	struct rcu_head		rcu;
	struct net_device	*dev;/*通过该设备访问邻居项*/
	u8			primary_key[0];/*存储IP地址*/
};

邻居项函数指针表,实现三层和二层的dev_queue_xmit()之间的跳转。

struct neigh_ops {
	int			family;//AF_INET
	/*发送arp报文*/
	void			(*solicit)(struct neighbour *, struct sk_buff *);
	/*向三层报告错误*/
	void			(*error_report)(struct neighbour *, struct sk_buff *);
	/*通用的输出函数,实现了完整的输出过程,存在较多的操作。*/
	int			(*output)(struct neighbour *, struct sk_buff *);
	/*确定邻居可达,即状态为NUD_CONNETCTE时的输出函数,由于所有输出所需要的信息都已具备,
	该函数只是简单地添加二层首部,发送*/
	int			(*connected_output)(struct neighbour *, struct sk_buff *);
};

neigh_statistics

用来存储统计信息,一个结构实例对应一个网络设备上的一种邻居协议。

struct neigh_statistics {
	unsigned long allocs;		/* number of allocated neighs */
	unsigned long destroys;		/* number of destroyed neighs */
	unsigned long hash_grows;	/* number of hash resizes */

	unsigned long res_failed;	/* number of failed resolutions */

	unsigned long lookups;		/* number of lookups */
	unsigned long hits;		/* number of hits (among lookups) */

	unsigned long rcv_probes_mcast;	/* number of received mcast ipv6 */
	unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */

	unsigned long periodic_gc_runs;	/* number of periodic GC runs */
	unsigned long forced_gc_runs;	/* number of forced GC runs */

	unsigned long unres_discards;	/* number of unresolved drops */
};

arp表结构:arp_tbl

F:\company\Linux\linux-4.1.45\linux-4.1.45\net\ipv4\arp.c
struct neigh_table arp_tbl = {
	.family		= AF_INET,
	.key_len	= 4,
	.protocol	= cpu_to_be16(ETH_P_IP),
	.hash		= arp_hash,
	.key_eq		= arp_key_eq,
	.constructor	= arp_constructor,
	.proxy_redo	= parp_redo,
	.id		= "arp_cache",
	.parms		= {
		.tbl			= &arp_tbl,
		.reachable_time		= 30 * HZ,
		.data	= {
			[NEIGH_VAR_MCAST_PROBES] = 3,
			[NEIGH_VAR_UCAST_PROBES] = 3,
			[NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
			[NEIGH_VAR_PROXY_QLEN] = 64,
			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,
			[NEIGH_VAR_LOCKTIME] = 1 * HZ,
		},
	},
	.gc_interval	= 30 * HZ,
	.gc_thresh1	= 128,
	.gc_thresh2	= 512,
	.gc_thresh3	= 1024,
};

dev_add_pack()

注册arp报文类型:dev_add_pack(&arp_packet_type);

就是把arp_packet_type添加到ptype_base哈希表中。

void dev_add_pack(struct packet_type *pt)
{
	struct list_head *head = ptype_head(pt);

	spin_lock(&ptype_lock);
	list_add_rcu(&pt->list, head);
	spin_unlock(&ptype_lock);
}

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
	if (pt->type == htons(ETH_P_ALL))
		return pt->dev ? &pt->dev->ptype_all : &ptype_all;
	else
		return pt->dev ? &pt->dev->ptype_specific :
				 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}

struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

static struct packet_type arp_packet_type __read_mostly = {
	.type =	cpu_to_be16(ETH_P_ARP),
	.func =	arp_rcv,
};

struct packet_type {
	__be16			type;	/* This is really htons(ether_type). */
	struct net_device	*dev;	/* NULL is wildcarded here	     */
	int			(*func) (struct sk_buff *,
					 struct net_device *,
					 struct packet_type *,
					 struct net_device *);
	bool			(*id_match)(struct packet_type *ptype,
					    struct sock *sk);
	void			*af_packet_priv;
	struct list_head	list;
};

 

register_netdevice_notifier 

注册新通知事件的时候,在已经注册和UP的设备上,会调用一次这个通知事件。

/**
 *	register_netdevice_notifier - register a network notifier block
 *	@nb: notifier
 *
 *	Register a notifier to be called when network device events occur.
 *	The notifier passed is linked into the kernel structures and must
 *	not be reused until it has been unregistered. A negative errno code
 *	is returned on a failure.
 *
 * 	When registered all registration and up events are replayed
 *	to the new notifier to allow device to have a race free
 *	view of the network device list.
 */

int register_netdevice_notifier(struct notifier_block *nb)
{
	struct net_device *dev;
	struct net_device *last;
	struct net *net;
	int err;

	rtnl_lock();
	/*新事件注册到netdev_chain通知链上*/
	err = raw_notifier_chain_register(&netdev_chain, nb);
	if (err)
		goto unlock;
	if (dev_boot_phase)
		goto unlock;
	for_each_net(net) {
		for_each_netdev(net, dev) {
			/*在已经注册的设备上调用该事件*/
			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
			err = notifier_to_errno(err);
			if (err)
				goto rollback;

			if (!(dev->flags & IFF_UP))
				continue;
			/*在UP的设备上调用该事件*/
			call_netdevice_notifier(nb, NETDEV_UP, dev);
		}
	}

unlock:
	rtnl_unlock();
	return err;

rollback:
	last = dev;
	for_each_net(net) {
		for_each_netdev(net, dev) {
			if (dev == last)
				goto outroll;

			if (dev->flags & IFF_UP) {
				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
							dev);
				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
			}
			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
		}
	}

outroll:
	raw_notifier_chain_unregister(&netdev_chain, nb);
	goto unlock;
}
static RAW_NOTIFIER_HEAD(netdev_chain);
#define RAW_NOTIFIER_HEAD(name)					\
	struct raw_notifier_head name =				\
		RAW_NOTIFIER_INIT(name)
		
struct raw_notifier_head {
	struct notifier_block __rcu *head;
};
		
搞了半天就是:
struct raw_notifier_head netdev_chain = {.head = NULL }

 设备事件类型:

/* netdevice notifier chain. Please remember to update the rtnetlink
 * notification exclusion list in rtnetlink_event() when adding new
 * types.
 */
#define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
#define NETDEV_DOWN	0x0002
#define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
				   detected a hardware crash and restarted
				   - we can use this eg to kick tcp sessions
				   once done */
#define NETDEV_CHANGE	0x0004	/* Notify device state change */
#define NETDEV_REGISTER 0x0005
#define NETDEV_UNREGISTER	0x0006
#define NETDEV_CHANGEMTU	0x0007 /* notify after mtu change happened */
#define NETDEV_CHANGEADDR	0x0008
#define NETDEV_GOING_DOWN	0x0009
#define NETDEV_CHANGENAME	0x000A
#define NETDEV_FEAT_CHANGE	0x000B
#define NETDEV_BONDING_FAILOVER 0x000C
#define NETDEV_PRE_UP		0x000D
#define NETDEV_PRE_TYPE_CHANGE	0x000E
#define NETDEV_POST_TYPE_CHANGE	0x000F
#define NETDEV_POST_INIT	0x0010
#define NETDEV_UNREGISTER_FINAL 0x0011
#define NETDEV_RELEASE		0x0012
#define NETDEV_NOTIFY_PEERS	0x0013
#define NETDEV_JOIN		0x0014
#define NETDEV_CHANGEUPPER	0x0015
#define NETDEV_RESEND_IGMP	0x0016
#define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
#define NETDEV_CHANGEINFODATA	0x0018
#define NETDEV_BONDING_INFO	0x0019

Linux内核-arp协议_第7张图片

__neigh_create:

创建一个邻居项,并将其添加到散列表上,返回指向该邻居项的指针。
tbl:待创建的邻居项所属的邻居表,即arp_tbl;
pkey:三层协议地址(IP地址)
dev:输出设备
want_ref:??

struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
				 struct net_device *dev, bool want_ref)
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
	/*调用neigh_alloc创建邻居项*/
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
	struct neigh_hash_table *nht;

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	/*设置邻居项的三层协议地址、输出设备*/
	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	
	/*增加设备引用计数*/
	dev_hold(dev);

	/* Protocol specific setup. */
	/*执行邻居表的邻居项初始化函数,arp为arp_constructor完成*/
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	/*指向设备的邻居项初始化函数*/
	if (dev->netdev_ops->ndo_neigh_construct) {
		error = dev->netdev_ops->ndo_neigh_construct(n);
		if (error < 0) {
			rc = ERR_PTR(error);
			goto out_neigh_release;
		}
	}

	/* Device specific setup. */
	/*以太网设备neigh_setup为NULL*/
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	/*初始化邻居项的确认时间*/
	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);

	write_lock_bh(&tbl->lock);
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

	/*hash扩容*/
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);

	/*计算hash值*/
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);

	/*邻居项正在被删除*/
	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

	/*邻居项已经存在,递增其引用计数,释放新创建的邻居项*/
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
			if (want_ref)
				/*增加引用计数,#define neigh_hold(n)	atomic_inc(&(n)->refcnt)*/
				neigh_hold(n1); 
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
	if (want_ref)
		neigh_hold(n);
	/*不存在,添加邻居项到hash表中*/
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
	write_unlock_bh(&tbl->lock);
	neigh_dbg(2, "neigh %p is created\n", n);
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}

neigh_alloc

创建邻居项

static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	/*递增邻居表中邻居项的条目,然后返回当前条目(递增前)*/
	entries = atomic_inc_return(&tbl->entries) - 1;
	/*数目>=gc_thresh3,或者 >=gc_thresh2并且已超过5s未刷新,则必须立即刷新并强制垃圾回收*/
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		/*如果垃圾回收失败,并且数目>=gc_thresh3,则不分配邻居项*/
		if (!neigh_forced_gc(tbl) &&
		    entries >= tbl->gc_thresh3)
			goto out_entries;
	}

	/*分配邻居项*/
	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
	if (!n)
		goto out_entries;

	/*初始化neighbour成员*/
	__skb_queue_head_init(&n->arp_queue);
	rwlock_init(&n->lock);
	seqlock_init(&n->ha_lock);
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
	seqlock_init(&n->hh.hh_lock);
	/*parms初始化为tbl->parms*/
	n->parms	  = neigh_parms_clone(&tbl->parms);
	/*设置定时器*/
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;/*刚创建neighbour时,n->dead为1,在__neigh_create中被设置为0*/
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

arp_constructor

1.设置邻居项的类型
2.设置邻居项的ops指针
3.设置邻居项的output函数指针

static int arp_constructor(struct neighbour *neigh)
{
	__be32 addr = *(__be32 *)neigh->primary_key;
	struct net_device *dev = neigh->dev;
	struct in_device *in_dev;
	struct neigh_parms *parms;

	rcu_read_lock();
	/*获取IP配置块*/
	in_dev = __in_dev_get_rcu(dev);
	if (!in_dev) {
		rcu_read_unlock();
		return -EINVAL;
	}

	/*获取邻居项的类型*/
	neigh->type = inet_addr_type(dev_net(dev), addr);

	/*neigh->parms在neigh_alloc函数中初始化为tbl->parms,在这里初始化为in_dev->arp_parms*/
	parms = in_dev->arp_parms;
	__neigh_parms_put(neigh->parms);
	neigh->parms = neigh_parms_clone(parms);
	rcu_read_unlock();

	/*对于以太网设备,其dev->header_ops为eth_header_ops*/
	if (!dev->header_ops) {
		neigh->nud_state = NUD_NOARP;
		neigh->ops = &arp_direct_ops;
		neigh->output = neigh_direct_output;
	} else {
		/* Good devices (checked by reading texts, but only Ethernet is
		   tested)

		   ARPHRD_ETHER: (ethernet, apfddi)
		   ARPHRD_FDDI: (fddi)
		   ARPHRD_IEEE802: (tr)
		   ARPHRD_METRICOM: (strip)
		   ARPHRD_ARCNET:
		   etc. etc. etc.

		   ARPHRD_IPDDP will also work, if author repairs it.
		   I did not it, because this driver does not work even
		   in old paradigm.
		 */

		if (neigh->type == RTN_MULTICAST) {
			neigh->nud_state = NUD_NOARP;
			arp_mc_map(addr, neigh->ha, dev, 1);
		} else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
			neigh->nud_state = NUD_NOARP;
			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
		} else if (neigh->type == RTN_BROADCAST ||
			   (dev->flags & IFF_POINTOPOINT)) {
			neigh->nud_state = NUD_NOARP;
			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
		}

		/*对于以太网设备,其header_ops->cache为eth_header_cache,所以对于以太网设备其neighbour->ops为arp_hh_ops*/
		if (dev->header_ops->cache)
			neigh->ops = &arp_hh_ops;
		else
			neigh->ops = &arp_generic_ops;

		/*对于邻居项状态为有效状态时,则将neigh->output设置为neigh->ops->connected_output*/
		if (neigh->nud_state & NUD_VALID)
			neigh->output = neigh->ops->connected_output;
		else
			neigh->output = neigh->ops->output;
	}
	return 0;
}

 dst_neigh_output

static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
				   struct sk_buff *skb)
{
	const struct hh_cache *hh;

	if (dst->pending_confirm) {
		unsigned long now = jiffies;

		dst->pending_confirm = 0;
		/* avoid dirtying neighbour */
		if (n->confirmed != now)
			n->confirmed = now;
	}

	hh = &n->hh;
	if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
		return neigh_hh_output(hh, skb);
	else
		return n->output(n, skb);
}

neigh_resolve_output

/* Slow and careful. */

int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc = 0;

	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
		unsigned int seq;

		if (dev->header_ops->cache && !neigh->hh.hh_len)
			neigh_hh_init(neigh);

		do {
			__skb_pull(skb, skb_network_offset(skb));
			seq = read_seqbegin(&neigh->ha_lock);
			/*sbk添加二层头*/
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		} while (read_seqretry(&neigh->ha_lock, seq));

		if (err >= 0)
			/*发送skb*/
			rc = dev_queue_xmit(skb);
		else
			goto out_kfree_skb;
	}
out:
	return rc;
out_kfree_skb:
	rc = -EINVAL;
	kfree_skb(skb);
	goto out;
}

 neigh_event_send

static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	unsigned long now = jiffies;
	
	/*更新最近一次使用时间*/
	if (neigh->used != now)
		neigh->used = now;
	/*此时状态为NUD_NONE*/
	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
		return __neigh_event_send(neigh, skb);
	return 0;
}

__neigh_event_send

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
	bool immediate_probe = false;

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;
	if (neigh->dead)
		goto out_dead;

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
			unsigned long next, now = jiffies;

			atomic_set(&neigh->probes,
				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
			/*设置邻居状态为NUD_INCOMPLETE*/
			neigh->nud_state     = NUD_INCOMPLETE;
			neigh->updated = now;
			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
					 HZ/2);
			/*添加定时器*/
			neigh_add_timer(neigh, next);
			immediate_probe = true;
		} else {
			neigh->nud_state = NUD_FAILED;
			neigh->updated = jiffies;
			write_unlock_bh(&neigh->lock);

			kfree_skb(skb);
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
		neigh_dbg(2, "neigh %p is delayed\n", neigh);
		neigh->nud_state = NUD_DELAY;
		neigh->updated = jiffies;
		neigh_add_timer(neigh, jiffies +
				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
	}

	/*如果队列满了,把arp队列中前面几个skb删除*/
	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
			while (neigh->arp_queue_len_bytes + skb->truesize >
			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
				struct sk_buff *buff;

				buff = __skb_dequeue(&neigh->arp_queue);
				if (!buff)
					break;
				neigh->arp_queue_len_bytes -= buff->truesize;
				kfree_skb(buff);
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
			}
			skb_dst_force(skb);
			/*队列添加到arp队列*/
			__skb_queue_tail(&neigh->arp_queue, skb);
			neigh->arp_queue_len_bytes += skb->truesize;
		}
		rc = 1;
	}
out_unlock_bh:
	if (immediate_probe)
		neigh_probe(neigh);
	else
		write_unlock(&neigh->lock);
	local_bh_enable();
	return rc;

out_dead:
	if (neigh->nud_state & NUD_STALE)
		goto out_unlock_bh;
	write_unlock_bh(&neigh->lock);
	kfree_skb(skb);
	return 1;
}

 neigh_timer_handler

/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
	unsigned int state;
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

	if (!(state & NUD_IN_TIMER))
		goto out;

	if (state & NUD_REACHABLE) {
		if (time_before_eq(now,
				   neigh->confirmed + neigh->parms->reachable_time)) {
			neigh_dbg(2, "neigh %p is still alive\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used +
					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
			neigh_dbg(2, "neigh %p is delayed\n", neigh);
			neigh->nud_state = NUD_DELAY;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
		} else {
			neigh_dbg(2, "neigh %p is suspected\n", neigh);
			neigh->nud_state = NUD_STALE;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			notify = 1;
		}
	} else if (state & NUD_DELAY) {
		if (time_before_eq(now,
				   neigh->confirmed +
				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
			neigh->updated = jiffies;
			neigh_connect(neigh);
			notify = 1;
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			neigh_dbg(2, "neigh %p is probed\n", neigh);
			neigh->nud_state = NUD_PROBE;
			neigh->updated = jiffies;
			atomic_set(&neigh->probes, 0);
			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
	}

	/*发送报文请求次数大于上限*/
	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
		neigh_invalidate(neigh);
		goto out;
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
	}
	/*发送arp请求报文*/
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
		neigh_probe(neigh);
	} else {
out:
		write_unlock(&neigh->lock);
	}

	if (notify)
		neigh_update_notify(neigh);

	neigh_release(neigh);
}

neigh_probe

static void neigh_probe(struct neighbour *neigh)
	__releases(neigh->lock)
{
	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
	/* keep skb alive even if arp_queue overflows */
	if (skb)
		skb = skb_copy(skb, GFP_ATOMIC);
	write_unlock(&neigh->lock);
	/*调用arp_solicit发送arp请求报文*/
	if (neigh->ops->solicit)
		neigh->ops->solicit(neigh, skb);
	atomic_inc(&neigh->probes);
	kfree_skb(skb);
}

 

arp_error_report()

调用dst_link_failure()函数向三层报告错误,当邻居项缓存中还有未发送的报文,而该邻居却无法访问时被调用。不懂。


static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
	dst_link_failure(skb);
	kfree_skb(skb);
}

arp_solicit()

用来发送arp请求,在邻居项状态定时器处理函数中被调用。

neigh:arp请求的目的邻居项

skb:缓存在该邻居项中的待发送报文,用来获取该skb的源ip地址。


static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
	__be32 saddr = 0;
	u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
	struct net_device *dev = neigh->dev;
	__be32 target = *(__be32 *)neigh->primary_key;
	int probes = atomic_read(&neigh->probes);
	struct in_device *in_dev;

	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dev);
	if (!in_dev) {
		rcu_read_unlock();
		return;
	}
	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
	default:
	case 0:		/* By default announce any local IP */
		if (skb && inet_addr_type(dev_net(dev),
					  ip_hdr(skb)->saddr) == RTN_LOCAL)
			saddr = ip_hdr(skb)->saddr;
		break;
	case 1:		/* Restrict announcements of saddr in same subnet */
		if (!skb)
			break;
		saddr = ip_hdr(skb)->saddr;
		if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
			/* saddr should be known to target */
			if (inet_addr_onlink(in_dev, target, saddr))
				break;
		}
		saddr = 0;
		break;
	case 2:		/* Avoid secondary IPs, get a primary/preferred one */
		break;
	}
	rcu_read_unlock();

	if (!saddr)
		saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);

	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
	if (probes < 0) {
		if (!(neigh->nud_state & NUD_VALID))
			pr_debug("trying to ucast probe in NUD_INVALID\n");
		neigh_ha_snapshot(dst_ha, neigh, dev);
		dst_hw = dst_ha;
	} else {
		probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
		if (probes < 0) {
			neigh_app_ns(neigh);
			return;
		}
	}

	arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
		 dst_hw, dev->dev_addr, NULL);
}

 将得到的硬件源、目的地址,IP源、目的地址等作为参数,调用arp_send()函数创建一个arp报文并将其输出。

 

arp_send()

创建及发送arp报文

/*
 *	Create and send an arp packet.
 */
void arp_send(int type, int ptype, __be32 dest_ip,
	      struct net_device *dev, __be32 src_ip,
	      const unsigned char *dest_hw, const unsigned char *src_hw,
	      const unsigned char *target_hw)
{
	struct sk_buff *skb;

	/*
	 *	No arp on this interface.
	 */

	if (dev->flags&IFF_NOARP)
		return;

	skb = arp_create(type, ptype, dest_ip, dev, src_ip,
			 dest_hw, src_hw, target_hw);
	if (!skb)
		return;

	arp_xmit(skb);
}

arp_create()

创建arp报文,填充字段。


/*
 *	Interface to link layer: send routine and receive handler.
 */

/*
 *	Create an arp packet. If dest_hw is not set, we create a broadcast
 *	message.
 */
struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
			   struct net_device *dev, __be32 src_ip,
			   const unsigned char *dest_hw,
			   const unsigned char *src_hw,
			   const unsigned char *target_hw)
{
	struct sk_buff *skb;
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int hlen = LL_RESERVED_SPACE(dev);
	int tlen = dev->needed_tailroom;

	/*
	 *	Allocate a buffer
	 */

	skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
	if (!skb)
		return NULL;

	skb_reserve(skb, hlen);
	skb_reset_network_header(skb);
	arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
	skb->dev = dev;
	skb->protocol = htons(ETH_P_ARP);
	if (!src_hw)
		src_hw = dev->dev_addr;
        /*目的MAC未知时,置1*/
	if (!dest_hw)
		dest_hw = dev->broadcast;

	/*
	 *	Fill the device header for the ARP frame
	 */
	if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0)
		goto out;

	/*
	 * Fill out the arp protocol part.
	 *
	 * The arp hardware type should match the device type, except for FDDI,
	 * which (according to RFC 1390) should always equal 1 (Ethernet).
	 */
	/*
	 *	Exceptions everywhere. AX.25 uses the AX.25 PID value not the
	 *	DIX code for the protocol. Make these device structure fields.
	 */
	switch (dev->type) {
	default:
		arp->ar_hrd = htons(dev->type);
		arp->ar_pro = htons(ETH_P_IP);
		break;

#if IS_ENABLED(CONFIG_AX25)
	case ARPHRD_AX25:
		arp->ar_hrd = htons(ARPHRD_AX25);
		arp->ar_pro = htons(AX25_P_IP);
		break;

#if IS_ENABLED(CONFIG_NETROM)
	case ARPHRD_NETROM:
		arp->ar_hrd = htons(ARPHRD_NETROM);
		arp->ar_pro = htons(AX25_P_IP);
		break;
#endif
#endif

#if IS_ENABLED(CONFIG_FDDI)
	case ARPHRD_FDDI:
		arp->ar_hrd = htons(ARPHRD_ETHER);
		arp->ar_pro = htons(ETH_P_IP);
		break;
#endif
	}

	arp->ar_hln = dev->addr_len;
	arp->ar_pln = 4;
	arp->ar_op = htons(type);

	arp_ptr = (unsigned char *)(arp + 1);

	memcpy(arp_ptr, src_hw, dev->addr_len);
	arp_ptr += dev->addr_len;
	memcpy(arp_ptr, &src_ip, 4);
	arp_ptr += 4;

	switch (dev->type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
	case ARPHRD_IEEE1394:
		break;
#endif
	default:
		if (target_hw)
			memcpy(arp_ptr, target_hw, dev->addr_len);
		else
			memset(arp_ptr, 0, dev->addr_len);
		arp_ptr += dev->addr_len;
	}
	memcpy(arp_ptr, &dest_ip, 4);

	return skb;

out:
	kfree_skb(skb);
	return NULL;
}

arp_xmit()

发送arp报文

/*
 *	Send an arp packet.
 */
void arp_xmit(struct sk_buff *skb)
{
	/* Send it off, maybe filter it using firewalling first.  */
	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb,
		NULL, skb->dev, dev_queue_xmit_sk);
}

arp报文的输入:

arp_rcv()

用来从二层接收并处理一个arp报文。这个函数中就是做了一些参数检查,然后调用arp_process()函数。

Linux内核-arp协议_第8张图片

/*
 *	Receive an arp request from the device layer.
 */

static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
		   struct packet_type *pt, struct net_device *orig_dev)
{
	const struct arphdr *arp;

	/* do not tweak dropwatch on an ARP we will ignore */
	if (dev->flags & IFF_NOARP || //网络设备知否支持arp协议
	    skb->pkt_type == PACKET_OTHERHOST || //arp报文是否是转发的包,表示这个包不应该由自己接收
	    skb->pkt_type == PACKET_LOOPBACK) //arp报文来自回环接口
		goto consumeskb;

	skb = skb_share_check(skb, GFP_ATOMIC);//如果skb是共享的,就复制一份
	if (!skb)
		goto out_of_mem;

	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
        /*检测arp报文的完整性,其长度是否等于一个arp头部长度+两个硬件地址长度+两个IP地址长度*/
	if (!pskb_may_pull(skb, arp_hdr_len(dev)))
		goto freeskb;

	arp = arp_hdr(skb);
        /*arp报文的硬件地址长度与网络设备的硬件地址长度是否匹配,arp报文的协议地址长度是否为4*/
	if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
		goto freeskb;

	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); //#define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)cb[]数组中存放的是每一协议层都可以自由使用的一段空间,一般用来存放控制指令和控制数据
       

	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, //netfilter arp hook函数
		       dev, NULL, arp_process);

consumeskb:
	consume_skb(skb);
	return 0;
freeskb:
	kfree_skb(skb);
out_of_mem:
	return 0;
}

arp_process()


/*
 *	Process an arp request.
 */

static int arp_process(struct sock *sk, struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
	struct in_device *in_dev = __in_dev_get_rcu(dev);
	struct arphdr *arp;
	unsigned char *arp_ptr;
	struct rtable *rt;
	unsigned char *sha;
	__be32 sip, tip;
	u16 dev_type = dev->type;
	int addr_type;
	struct neighbour *n;
	struct net *net = dev_net(dev);
	bool is_garp = false;

	/* arp_rcv below verifies the ARP header and verifies the device
	 * is ARP'able.
	 */

	if (!in_dev)//输入网络设备的IP配置块
		goto out;

	arp = arp_hdr(skb);
        
        //根据网络设备类型,检测arp报文中硬件类型与协议类型的有效性
	switch (dev_type) {
	default:
		if (arp->ar_pro != htons(ETH_P_IP) ||
		    htons(dev_type) != arp->ar_hrd)
			goto out;
		break;
	case ARPHRD_ETHER:
	case ARPHRD_FDDI:
	case ARPHRD_IEEE802:
		/*
		 * ETHERNET, and Fibre Channel (which are IEEE 802
		 * devices, according to RFC 2625) devices will accept ARP
		 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
		 * This is the case also of FDDI, where the RFC 1390 says that
		 * FDDI devices should accept ARP hardware of (1) Ethernet,
		 * however, to be more robust, we'll accept both 1 (Ethernet)
		 * or 6 (IEEE 802.2)
		 */
		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
		    arp->ar_pro != htons(ETH_P_IP))
			goto out;
		break;
	case ARPHRD_AX25:
		if (arp->ar_pro != htons(AX25_P_IP) ||
		    arp->ar_hrd != htons(ARPHRD_AX25))
			goto out;
		break;
	case ARPHRD_NETROM:
		if (arp->ar_pro != htons(AX25_P_IP) ||
		    arp->ar_hrd != htons(ARPHRD_NETROM))
			goto out;
		break;
	}

	/* Understand only these message types */

	if (arp->ar_op != htons(ARPOP_REPLY) &&
	    arp->ar_op != htons(ARPOP_REQUEST))
		goto out;

/*
 *	Extract fields
 */     //从arp报文中解析发送方MAC,IP,目的target IP。
	arp_ptr = (unsigned char *)(arp + 1);
	sha	= arp_ptr;//发送方硬件地址
	arp_ptr += dev->addr_len;
	memcpy(&sip, arp_ptr, 4);//解析源IP
	arp_ptr += 4;
	switch (dev_type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
	case ARPHRD_IEEE1394:
		break;
#endif
	default:
		arp_ptr += dev->addr_len;
	}
	memcpy(&tip, arp_ptr, 4);//解析目的IP
/*
 *	Check for bad requests for 127.x.x.x and requests for multicast
 *	addresses.  If this is one such, delete it.
 */
        //丢弃ip地址为组播或互换地址的arp报文
	if (ipv4_is_multicast(tip) ||
	    (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
		goto out;

/*
 *     Special case: We must set Frame Relay source Q.922 address
 */    //如果硬件类型为Q.922,则arp应答报文中目的硬件地址设置为网路设备的广播地址
	if (dev_type == ARPHRD_DLCI)
		sha = dev->broadcast;

/*
 *  Process entry.  The idea here is we want to send a reply if it is a
 *  request for us or if it is a request for someone else that we hold
 *  a proxy for.  We want to add an entry to our cache if it is a reply
 *  to us or if it is a request for our address.
 *  (The assumption for this last is that if someone is requesting our
 *  address, they are probably intending to talk to us, so it saves time
 *  if we cache their address.  Their address is also probably not in
 *  our cache, since ours is not in their cache.)
 *
 *  Putting this another way, we only care about replies if they are to
 *  us, in which case we add them to the cache.  For requests, we care
 *  about those for us and those for our proxies.  We reply to both,
 *  and in the case of requests for us we add the requester to the arp
 *  cache.
 */

	/* Special case: IPv4 duplicate address detection packet (RFC2131) */
        //如果请求报文的源ip为0,则该arp报文是用来检测ipv4地址冲突的,因此在确定请求报文的目的IP为本地IP地址后,以该IP地址为源地址及目的地址发送arp响应报文。
	if (sip == 0) {
		if (arp->ar_op == htons(ARPOP_REQUEST) &&
		    inet_addr_type(net, tip) == RTN_LOCAL &&
		    !arp_ignore(in_dev, sip, tip))
			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
				 dev->dev_addr, sha);
		goto out;
	}

	if (arp->ar_op == htons(ARPOP_REQUEST) &&
	    ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { //找目的IP对应的路由

		rt = skb_rtable(skb);
		addr_type = rt->rt_type;
            /*如果arp报文是发送给本机的,调用neigh_event_ns更新对应的邻居项,然后根据系统设置来决定是否过滤和丢弃arp报文,最后发送arp响应报文。*/
		if (addr_type == RTN_LOCAL) {
			int dont_send;

			dont_send = arp_ignore(in_dev, sip, tip);
			if (!dont_send && IN_DEV_ARPFILTER(in_dev))
				dont_send = arp_filter(sip, tip, dev);
			if (!dont_send) {
				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
				if (n) {
					arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
						 dev, tip, sha, dev->dev_addr,
						 sha);
					neigh_release(n);
				}
			}
			goto out;
		} 
        /*对于不是发送给本机的arp请求报文,根据系统参数确定是否进行arp代理。*/
                else if (IN_DEV_FORWARD(in_dev)) {
			if (addr_type == RTN_UNICAST  &&
			    (arp_fwd_proxy(in_dev, dev, rt) ||
			     arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
			     (rt->dst.dev != dev &&
			      pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
				if (n)
					neigh_release(n);

				if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
				    skb->pkt_type == PACKET_HOST ||
				    NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
					arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
						 dev, tip, sha, dev->dev_addr,
						 sha);
				} else {
					pneigh_enqueue(&arp_tbl,
						       in_dev->arp_parms, skb);
					return 0;
				}
				goto out;
			}
		}
	}

	/* Update our ARP tables */
        /*对于arp应答消息,或未处理的代理请求,则需要更新邻居表,因此现在邻居表中根据sip找到对应的邻居项*/
	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
        /*对于那些并非由arp请求而接收到的arp应答,在系统允许接收的情况下,创建相应的邻居项*/
	if (IN_DEV_ARP_ACCEPT(in_dev)) {
		/* Unsolicited ARP is not accepted by default.
		   It is possible, that this option should be enabled for some
		   devices (strip is candidate)
		 */
		is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
			  inet_addr_type(net, sip) == RTN_UNICAST;

		if (!n &&
		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
		      inet_addr_type(net, sip) == RTN_UNICAST) || is_garp))
			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
	}
/*更新或创建新的邻居项,首先确定邻居项的新状态,如果是发送给本机的arp应答报文,则对应邻居项状态应转变为NUD_REACHABLE,否则转到NUD_STALE。然后调用neigh_update更新邻居项,如果其更新时间已超过locktime,则用覆盖的方式进行更新。*/
	if (n) {
		int state = NUD_REACHABLE;
		int override;

		/* If several different ARP replies follows back-to-back,
		   use the FIRST one. It is possible, if several proxy
		   agents are active. Taking the first reply prevents
		   arp trashing and chooses the fastest router.
		 */
		override = time_after(jiffies,
				      n->updated +
				      NEIGH_VAR(n->parms, LOCKTIME)) ||
			   is_garp;

		/* Broadcast replies and request packets
		   do not assert neighbour reachability.
		 */
		if (arp->ar_op != htons(ARPOP_REPLY) ||
		    skb->pkt_type != PACKET_HOST)
			state = NUD_STALE;
		neigh_update(n, sha, state,
			     override ? NEIGH_UPDATE_F_OVERRIDE : 0);
		neigh_release(n);
	}

out:
	consume_skb(skb);
	return 0;
}

neigh_event_ns 

struct neighbour *neigh_event_ns(struct neigh_table *tbl,
				 u8 *lladdr, void *saddr,
				 struct net_device *dev)
{
	/*创建neighbour*/
	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
						 lladdr || !dev->addr_len);
	if (neigh)
		neigh_update(neigh, lladdr, NUD_STALE,
			     NEIGH_UPDATE_F_OVERRIDE);
	return neigh;
}

 neigh_update

这个函数的作用就是更新邻居项硬件地址和状态。分支比较多。

/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
				if it is different.
	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
				lladdr instead of overriding it
				if it is different.
				It also allows to retain current state
				if lladdr is unchanged.
	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.

	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
				NTF_ROUTER flag.
	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
				a router.

   Caller MUST hold reference count on the entry.
 */
/*new:邻居项新状态,lladdr:邻居项新硬件地址*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
		 u32 flags)
{
	u8 old;//邻居项老的状态
	int err;
	int notify = 0;//是否通知该事件
	struct net_device *dev;
	int update_isrouter = 0;//邻居项是否为路由器

	write_lock_bh(&neigh->lock);

	dev    = neigh->dev;
	old    = neigh->nud_state;
	err    = -EPERM;

	/*case1:邻居项原先状态为NUD_NOARP或者NUD_PERMANENT时,标志位必须包含NEIGH_UPDATE_F_ADMIN
	才允许更新,表示是管理员进行的操作,否则不允许更新。*/
	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
	    (old & (NUD_NOARP | NUD_PERMANENT)))
		goto out;
	if (neigh->dead)
		goto out;

	/*case2:新状态为无效状态,删除其定时器。如果原先状态为NUD_CONNECTED状态,调用neigh_suspect
	更新其输出函数;如果老状态为NUD_INCOMPLETE或者NUD_PROBE,新状态为NUD_FAILED,执行neigh_invalidate操作。*/
	if (!(new & NUD_VALID)) {
		neigh_del_timer(neigh);
		if (old & NUD_CONNECTED)
			neigh_suspect(neigh);
		neigh->nud_state = new;
		err = 0;
		notify = old & NUD_VALID;
		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
		    (new & NUD_FAILED)) {
			neigh_invalidate(neigh);
			notify = 1;
		}
		goto out;
	}

	/* Compare new lladdr with cached one */
	if (!dev->addr_len) {
		/* First case: device needs no address. */
		lladdr = neigh->ha;
	} else if (lladdr) {
		/* The second case: if something is already cached
		   and a new address is proposed:
		   - compare new & old
		   - if they are different, check override flag
		 */
		if ((old & NUD_VALID) &&
		    !memcmp(lladdr, neigh->ha, dev->addr_len))
			lladdr = neigh->ha;
	} else {
		/* No address is supplied; if we know something,
		   use it, otherwise discard the request.
		 */
		err = -EINVAL;
		if (!(old & NUD_VALID))
			goto out;
		lladdr = neigh->ha;
	}

	if (new & NUD_CONNECTED)
		neigh->confirmed = jiffies;
	neigh->updated = jiffies;

	/* If entry was valid and address is not changed,
	   do not change entry state, if new one is STALE.
	 */
	err = 0;
	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
	if (old & NUD_VALID) {
		/*前后地址不相同,并且没有NEIGH_UPDATE_F_OVERRIDE标志*/
		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
			update_isrouter = 0;
			/*带有NEIGH_UPDATE_F_WEAK_OVERRIDE标志,并且老状态为NUD_CONNECTED*/
			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
			    (old & NUD_CONNECTED)) {
				lladdr = neigh->ha;
				new = NUD_STALE;
			} else
				goto out;
		} else {
			if (lladdr == neigh->ha && new == NUD_STALE &&
			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
			     (old & NUD_CONNECTED))
			    )
				new = old;
		}
	}

	if (new != old) {
		neigh_del_timer(neigh);
		if (new & NUD_IN_TIMER)
			neigh_add_timer(neigh, (jiffies +
						((new & NUD_REACHABLE) ?
						 neigh->parms->reachable_time :
						 0)));
		neigh->nud_state = new;
		notify = 1;
	}

	/*前后地址不相同,更新地址*/
	if (lladdr != neigh->ha) {
		write_seqlock(&neigh->ha_lock);
		memcpy(&neigh->ha, lladdr, dev->addr_len);
		write_sequnlock(&neigh->ha_lock);
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
		notify = 1;
	}
	if (new == old)
		goto out;
	if (new & NUD_CONNECTED)
		neigh_connect(neigh);//更新output函数
	else
		neigh_suspect(neigh);//更新output函数
	/*如果邻居项是无效状态变为有效状态,则便利arp_queue,将缓存在队列中的报文逐个发出。*/
	if (!(old & NUD_VALID)) {
		struct sk_buff *skb;

		/* Again: avoid dead loop if something went wrong */

		while (neigh->nud_state & NUD_VALID &&
		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
			struct dst_entry *dst = skb_dst(skb);
			struct neighbour *n2, *n1 = neigh;
			write_unlock_bh(&neigh->lock);

			rcu_read_lock();

			/* Why not just use 'neigh' as-is?  The problem is that
			 * things such as shaper, eql, and sch_teql can end up
			 * using alternative, different, neigh objects to output
			 * the packet in the output path.  So what we need to do
			 * here is re-lookup the top-level neigh in the path so
			 * we can reinject the packet there.
			 */
			n2 = NULL;
			if (dst) {
				n2 = dst_neigh_lookup_skb(dst, skb);
				if (n2)
					n1 = n2;
			}
			n1->output(n1, skb);
			if (n2)
				neigh_release(n2);
			rcu_read_unlock();

			write_lock_bh(&neigh->lock);
		}
		__skb_queue_purge(&neigh->arp_queue);
		neigh->arp_queue_len_bytes = 0;
	}
out:
	if (update_isrouter) {
		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
			(neigh->flags | NTF_ROUTER) :
			(neigh->flags & ~NTF_ROUTER);
	}
	write_unlock_bh(&neigh->lock);

	/*发送通知事件*/
	if (notify)
		neigh_update_notify(neigh);

	return err;
}

neigh_update_notify 

static void neigh_update_notify(struct neighbour *neigh)
{
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}

 

arp_ignore()


static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
{
	struct net *net = dev_net(in_dev->dev);
	int scope;

	switch (IN_DEV_ARP_IGNORE(in_dev)) {
	case 0:	/* Reply, the tip is already validated */
		return 0;
	case 1:	/* Reply only if tip is configured on the incoming interface */
		sip = 0;
		scope = RT_SCOPE_HOST;
		break;
	case 2:	/*
		 * Reply only if tip is configured on the incoming interface
		 * and is in same subnet as sip
		 */
		scope = RT_SCOPE_HOST;
		break;
	case 3:	/* Do not reply for scope host addresses */
		sip = 0;
		scope = RT_SCOPE_LINK;
		in_dev = NULL;
		break;
	case 4:	/* Reserved */
	case 5:
	case 6:
	case 7:
		return 0;
	case 8:	/* Do not reply */
		return 1;
	default:
		return 0;
	}
	return !inet_confirm_addr(net, in_dev, sip, tip, scope);
}

arp_filter


static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
{
	struct rtable *rt;
	int flag = 0;
	/*unsigned long now; */
	struct net *net = dev_net(dev);

	rt = ip_route_output(net, sip, tip, 0, 0);
	if (IS_ERR(rt))
		return 1;
	if (rt->dst.dev != dev) {
		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
		flag = 1;
	}
	ip_rt_put(rt);
	return flag;
}

 

arp代理

代理arp(proxy arp),通常像路由器这样的设备才使用,用来代替处于另一个网段的主机回答本网段主机的arp请求。

感觉代码ARP好像没啥用呀。

网络主机发包的一般过程:

1.当目的IP和自己在同一网段时,直接arp请求该目的IP的MAC。

2.当目的IP和自己不再同一网段时,arp请求默认网关的MAC。

https://www.cnblogs.com/taitai139/p/12336554.html

https://www.cnblogs.com/Widesky/p/10489514.html

当主机没有默认网关的时候,arp请求别的网段的报文,到达路由器后,本来路由器是要隔离广播的,把这个arp请求报文给丢弃,这样就没法通信了。当路由器开启arp proxy后,路由器发现请求的目的IP在其他网段,就自己给主机回复一个arp响应报文,这样源主机就把路由器的MAC当成目的IP主机对应的MAC,可以通信了。这样可能会造成主机arp表中,多个IP地址都对应于路由器的同一个MAC地址。

可以使用arping命令发送指定IP的arp请求报文。

Linux内核-arp协议_第9张图片

写完了发现这个老妹写的arp代理文章蛮好的,不过她好像是转载的。

https://www.cnblogs.com/csguo/p/7527812.html#commentform

https://blog.51cto.com/chenxinjie/1959191

https://edu.51cto.com/course/10839.html

你可能感兴趣的:(Linux内核网络,计算机网络)