在分析AF_PACKET raw socket实现时,我们从创建socket入手来分析, 本篇我们从收包流程入手来分析。在分析协议栈报文接收IP层分析时,我们知道IP层把报文交给raw sock的入口函数是raw_local_deliver。我们从这个函数来看看是如何把一个报文提交给raw socket的。
1、raw_local_deliver函数
int raw_local_deliver(struct sk_buff *skb, int protocol) //该protocol为ip头中的协议 { int hash; struct sock *raw_sk; hash = protocol & (RAW_HTABLE_SIZE - 1); //根据协议类型计算出hash值,hash值共256个,所以不同的ip协议不会重叠 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); //得到sock,这个sock一定是在创建的时候放到raw_v4_hashinfo中的 /* If there maybe a raw socket we must check - if not we * don't care less */ if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) //sock不为空,则把报文提交给sock raw_sk = NULL; return raw_sk != NULL; }2、raw_v4_input函数
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; int delivered = 0; struct net *net; read_lock(&raw_v4_hashinfo.lock); head = &raw_v4_hashinfo.ht[hash]; //得到相同hash的sock链表 if (hlist_empty(head)) goto out; net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, //sock是否能够接收报文,匹配ip源地址、ip目的地址等 iph->saddr, iph->daddr, skb->dev->ifindex); while (sk) { //第一个如果不匹配,后续则不再处理,所以次序很重要,对raw socket肯定是能够匹配的报文的 delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && //报文不是ICMP报文或者sock未设置icmp filter ip_mc_sf_allow(sk, iph->daddr, iph->saddr, //非组播报文,或者组播报文允许通过 skb->dev->ifindex)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); //克隆skb /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); //提交报文到sock } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, //取下一个sock iph->saddr, iph->daddr, skb->dev->ifindex); } out: read_unlock(&raw_v4_hashinfo.lock); return delivered; }3、raw_rcv函数
int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { //ipset策略检测 atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } nf_reset(skb); skb_push(skb, skb->data - skb_network_header(skb)); //报文移动到ip头,用户看到报文时候包含了IP头 raw_rcv_skb(sk, skb); //sock接收报文 return 0; }4、raw_rcv_skb函数
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { /* Charge it to the socket. */ ipv4_pktinfo_prepare(sk, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { //放入sock的收包队列,并唤醒等待进程 kfree_skb(skb); return NET_RX_DROP; } return NET_RX_SUCCESS; }到这里我们知道,把报文提交给raw socket的关键是raw_v4_hashinfo全局变量,那么要看看是否在创建AF_INET raw socket时把sock对象保存到该全局变量中。socket创建流程请参考 AF_PACKET raw socket分析。AF_INET raw socket的create函数是inet_create,我们来看一看该函数的实现。
inet_create函数
static int inet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { //从inetsw找到匹配的inet_protosw对象 err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-2-proto-132-type-1 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-2-proto-132 * (net-pf-PF_INET-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; //raw socket,对象为raw_prot answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); //创建sock对象 if (!sk) goto out; err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; inet->nodefrag = 0; if (SOCK_RAW == sock->type) { //raw socket inet->inet_num = protocol; //如果是raw socket,则inet_num设置为protocol值 if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; inet->inet_id = 0; sock_init_data(sock, sk); //sock对象初始化 sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL; inet->rcv_tos = 0; sk_refcnt_debug_inc(sk); if (inet->inet_num) { //raw socket该值等于protocol,条件成立 /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ sk->sk_prot->hash(sk); //socket放到hash表中,raw socket对应raw_hash_sk函数 } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); //raw socket对应raw_init函数 if (err) sk_common_release(sk); } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; }raw_hash_sk函数
void raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; //h.raw_hash即为raw_v4_hashinfo,和收包中的全局对象对上了 struct hlist_head *head; head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; //通过inet_num(即protocol)计算出链表的header write_lock_bh(&h->lock); sk_add_node(sk, head); //sock添加到head中 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&h->lock); }raw_init函数
static int raw_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); //如果是icmp协议,那么初始化filter为0 return 0; }