__netif_receive_skb 是一个很关键的函数 ,可以看成L2-L3 的分水岭(如果该协议需要到L3的话)
net_rx_action 做完了之后基本上
struct sk_buff
{
//... ... ...
unsigned short protocol;
// ... ... ...
}; 就已经被设置了
在看 __netif_receive_skb 之前 先看一下这几个东西
这是网络协议解包的主要注册结构体
struct net_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; };
他们会用一个 hash 链表链接起来
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
还有一个可调文件系统接口
netdev_tstamp_prequeue
----------------------
If set to 0, RX packet timestamps can be sampled after RPS processing, when
the target CPU processes packets. It might give some delay on timestamps, but
permit to distribute the load on several cpus.
If set to 1 (default), timestamps are sampled as soon as possible, before
queueing.
/sys/net/core/netdev_tstamp_prequeue
由于这个函数涉及很多特殊协议的处理,vlan实现我也只知道皮毛,这里只简单介绍一下
static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; /*如果设置了 可能会有一些timestamps的延迟,默认是!1 ,不启用*/ if (!netdev_tstamp_prequeue) net_timestamp_check(skb); trace_netif_receive_skb(skb); /*netpoll 需要处理这个帧吗,要的话用netpoll_rx 处理*/ if (netpoll_receive_skb(skb)) return NET_RX_DROP; /*赋值设备的接口序号*/ if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; /*决定包的命运和走向*/ null_or_orig = NULL; orig_dev = skb->dev; master = ACCESS_ONCE(orig_dev->master); if (skb->deliver_no_wcard) null_or_orig = orig_dev; else if (master) { if (skb_bond_should_drop(skb, master)) { skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ } else skb->dev = master; } /*为L3的处理,校准相应的指针和赋值*/ __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; //... /* 处理 bridge or macvlan 的情况*/ //... /*这里就是主要的根据 注册了的协议处理函数 去调用处理 deliver_skb()*/ type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); ret = NET_RX_DROP; } out: rcu_read_unlock(); return ret; }
INET 是TCP/IP协议实现的linux版本
INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
在INET里面可以看到
fs_initcall(inet_init);
Protocol.h:
struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
#define MAX_INET_PROTOS 256
static int __init inet_init(void) { struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; /*保存端口的位图结构 *Inet_connection_sock.c : unsigned long *sysctl_local_reserved_ports;*/ sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); if (!sysctl_local_reserved_ports) goto out; /*socket 层 到 transport 层 接口的注册<struct proto >,都是BSD接口 *<accept,setsockopt,recvmsg......>*/ rc = proto_register(&tcp_prot, 1); if (rc) goto out_free_reserved_ports; rc = proto_register(&udp_prot, 1); if (rc) goto out_unregister_tcp_proto; rc = proto_register(&raw_prot, 1); if (rc) goto out_unregister_udp_proto; /*PF_INET 协议族的注册<struct net_proto_family> */ (void)sock_register(&inet_family_ops); #ifdef CONFIG_SYSCTL /*文件系统接口*/ ip_static_sysctl_init(); #endif /* * Add all the base protocols. */ /*直接用了cmpxchg 指令来把协议设置到 inet_protos[256]对应位中*/ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); #endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); /* * Set the IP module up */ ip_init(); tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); /* Setup UDP memory threshold */ udp_init(); /* Add UDP-Lite (RFC 3828) */ udplite4_register(); /* * Set the ICMP layer up */ if (icmp_init() < 0) panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); ipfrag_init(); /*这里用 ptype_head() 在 ptype_base[16] (如果设置了ETH_P_ALL 就在 *ptype_all)里面设置相应位 */ dev_add_pack(&ip_packet_type); rc = 0; out: return rc; out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); out_free_reserved_ports: kfree(sysctl_local_reserved_ports); goto out; } static inline struct list_head *ptype_head(const struct packet_type *pt) { if (pt->type == htons(ETH_P_ALL)) return &ptype_all; else return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; }