UDP报文的处理入口是udp_rcv函数,该函数是在ip_local_deliver_finish函数中被调用的。
1、udp_rcv函数
int udp_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); }2、__udp4_lib_rcv函数
int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; struct udphdr *uh; unsigned short ulen; struct rtable *rt = skb_rtable(skb); //得到路由表项信息 __be32 saddr, daddr; struct net *net = dev_net(skb->dev); /* * Validate the packet. */ if (!pskb_may_pull(skb, sizeof(struct udphdr))) //检测报文长度 goto drop; /* No space for header. */ uh = udp_hdr(skb); ulen = ntohs(uh->len); saddr = ip_hdr(skb)->saddr; daddr = ip_hdr(skb)->daddr; if (ulen > skb->len) //skb报文长度必须UDP头设定的长度,skb可能存在pad goto short_packet; if (proto == IPPROTO_UDP) { /* UDP validates ulen. */ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) //裁剪skb报文 goto short_packet; uh = udp_hdr(skb); } if (udp4_csum_init(skb, uh, proto)) //csum值校验,如果非零则表示csum错误 goto csum_error; sk = skb_steal_sock(skb); //IP层调用UDP的early_demux函数设置的sk if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; if (unlikely(sk->sk_rx_dst != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); //sock处理skb报文 sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ if (ret > 0) return -ret; return 0; } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, //组播报文上送 saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); //全量检索sock if (sk) { int ret; if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, //如果skb->ip_summed == CHECKSUM_NONE,重置csum inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); //sock处理skb报文 sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ if (ret > 0) return -ret; return 0; } if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) //ipset安全检测 goto drop; nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_lib_checksum_complete(skb)) //没有socket,如果checksum出错则直接丢弃 goto csum_error; UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); //发送icmp,通知发送方目的不可达 /* * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ kfree_skb(skb); return 0; short_packet: net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), ulen, skb->len, &daddr, ntohs(uh->dest)); goto drop; csum_error: /* * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); //UDP csum错误统计 drop: UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); //UDP丢包统计 kfree_skb(skb); return 0; }3、udp_queue_rcv_skb函数
int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; int is_udplite = IS_UDPLITE(sk); /* * Charge it to the socket, dropping if the queue is full. */ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) //ipset检测 goto drop; nf_reset(skb); if (static_key_false(&udp_encap_needed) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* * This is an encapsulation socket so pass the skb to * the socket's udp_encap_rcv() hook. Otherwise, just * fall through and pass this up the UDP socket. * up->encap_rcv() returns the following value: * =0 if skb was successfully passed to the encap * handler or was discarded by it. * >0 if skb should be passed on to UDP. * <0 if skb should be resubmitted as proto -N */ /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ if (udp_lib_checksum_complete(skb)) goto csum_error; ret = encap_rcv(sk, skb); //报文处理,例如vxlan报文就是通过此方式实现 if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } } /* FALLTHROUGH -- it's a UDP Packet */ } /* * UDP-Lite specific tests, ignored on UDP sockets */ if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { /* * MIB statistics other than incrementing the error count are * disabled for the following two types of errors: these depend * on the application settings, not on the functioning of the * protocol stack as such. * * RFC 3828 here recommends (sec 3.3): "There should also be a * way ... to ... at least let the receiving application block * delivery of packets with coverage values less than a value * provided by the application." */ if (up->pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } /* The next case involves violating the min. coverage requested * by the receiver. This is subtle: if receiver wants x and x is * greater than the buffersize/MTU then receiver will complain * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) //报文checksum检测 goto csum_error; if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } rc = 0; ipv4_pktinfo_prepare(sk, skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); //报文保存到sk的接收队列,并通知用户进程 else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { bh_unlock_sock(sk); goto drop; } bh_unlock_sock(sk); return rc; csum_error: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; }UDP有两条处理路径:1)放到sk的接收队列,通知等待进程;2)调用udp_sock的encap_rcv函数,类似vxlan的实现,可以直接在内核中进行处理。UDP处理中,checksum校验未展开分析,待后续单独分析。