【Linux4.1.12源码分析】二层报文发送之报文GSO分段(skb_segment)

skg_segment是实现封装报文GSO分段的基础,直接抛代码。

/**
 *	skb_segment - Perform protocol segmentation on skb.
 *	@head_skb: buffer to segment
 *	@features: features for the output path (see dev->features)
 *
 *	This function performs segmentation on the given skb.  It returns
 *	a pointer to the first in a list of new skbs for the segments.
 *	In case of error it returns ERR_PTR(err).
 */
struct sk_buff *skb_segment(struct sk_buff *head_skb,
			    netdev_features_t features)
{
	struct sk_buff *segs = NULL;
	struct sk_buff *tail = NULL;
	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
	skb_frag_t *frag = skb_shinfo(head_skb)->frags;	
	unsigned int mss = skb_shinfo(head_skb)->gso_size;
	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);  //得到内层报头的长度
	struct sk_buff *frag_skb = head_skb;
	unsigned int offset = doffset;
	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);	//得到外层报头的长度,非封装报文该值为0, 是支持封装报文GSO的基础
	unsigned int headroom;
	unsigned int len;
	__be16 proto;
	bool csum;
	int sg = !!(features & NETIF_F_SG);	//是否支持SG
	int nfrags = skb_shinfo(head_skb)->nr_frags;
	int err = -ENOMEM;
	int i = 0;
	int pos;
	int dummy;

	__skb_push(head_skb, doffset);		//报文移到内层报文的mac头
	proto = skb_network_protocol(head_skb, &dummy);	//报文协议类型
	if (unlikely(!proto))
		return ERR_PTR(-EINVAL);

	csum = !head_skb->encap_hdr_csum &&
	    !!can_checksum_protocol(features, proto);

	headroom = skb_headroom(head_skb);	//得到报文的headroom大小
	pos = skb_headlen(head_skb);		//报文线性区长度

	do {
		struct sk_buff *nskb;
		skb_frag_t *nskb_frag;
		int hsize;
		int size;

		len = head_skb->len - offset;	//计算报文待拷贝的长度,不包括包头
		if (len > mss)			
			len = mss;		//len超过mss,则只能拷贝mss长度

		hsize = skb_headlen(head_skb) - offset;	//待拷贝的线性区长度
		if (hsize < 0)
			hsize = 0;
		if (hsize > len || !sg)
			hsize = len;

		if (!hsize && i >= nfrags && skb_headlen(list_skb) &&	//frag_list中还有数据
		    (skb_headlen(list_skb) == len || sg)) {
			BUG_ON(skb_headlen(list_skb) > len);	//frag_list中的skb线性区长度不超过len,即mss值

			i = 0;
			nfrags = skb_shinfo(list_skb)->nr_frags;
			frag = skb_shinfo(list_skb)->frags;
			frag_skb = list_skb;
			pos += skb_headlen(list_skb);	//增加线性区长度

			while (pos < offset + len) {	//只能拷贝len长度
				BUG_ON(i >= nfrags);

				size = skb_frag_size(frag);
				if (pos + size > offset + len)
					break;

				i++;
				pos += size;		//增加frag的长度
				frag++;
			}

			nskb = skb_clone(list_skb, GFP_ATOMIC);	//克隆报文,该报文包含完整的数据,需要裁剪
			list_skb = list_skb->next;

			if (unlikely(!nskb))
				goto err;

			if (unlikely(pskb_trim(nskb, len))) {	//裁剪报文到len长度
				kfree_skb(nskb);
				goto err;
			}

			hsize = skb_end_offset(nskb);
			if (skb_cow_head(nskb, doffset + headroom)) {	//扩展head,以容得下外层报头
				kfree_skb(nskb);
				goto err;
			}

			nskb->truesize += skb_end_offset(nskb) - hsize;	//truesize值刷新
			skb_release_head_state(nskb);
			__skb_push(nskb, doffset);	//skb移动到内层报文的mac头
		} else {
			nskb = __alloc_skb(hsize + doffset + headroom,	//skb的frag还未使用完,采用新申请skb的方式
					   GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
					   NUMA_NO_NODE);

			if (unlikely(!nskb))
				goto err;

			skb_reserve(nskb, headroom);	//skb预留headroom长度
			__skb_put(nskb, doffset);	//线性区扩展内层报头长度
		}

		if (segs)
			tail->next = nskb;
		else
			segs = nskb;
		tail = nskb;

		__copy_skb_header(nskb, head_skb);	//拷贝skb的相关信息,包括header都拷贝了

		skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);	//刷新header值
		skb_reset_mac_len(nskb);	//重置mac len值

		skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,	//拷贝外两层报头(如果封装的话)
						 nskb->data - tnl_hlen,
						 doffset + tnl_hlen);

		if (nskb->len == len + doffset)		//对于使用frag_list场景,满足条件;拷贝frag场景不满足
			goto perform_csum_check;

		if (!sg && !nskb->remcsum_offload) {
			nskb->ip_summed = CHECKSUM_NONE;
			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,	//计算cusm值
							    skb_put(nskb, len),
							    len, 0);
			SKB_GSO_CB(nskb)->csum_start =
			    skb_headroom(nskb) + doffset;
			continue;
		}

		nskb_frag = skb_shinfo(nskb)->frags;

		skb_copy_from_linear_data_offset(head_skb, offset,	//拷贝线性区数据
						 skb_put(nskb, hsize), hsize);

		skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
			SKBTX_SHARED_FRAG;

		while (pos < offset + len) {	
			if (i >= nfrags) {
				BUG_ON(skb_headlen(list_skb));

				i = 0;
				nfrags = skb_shinfo(list_skb)->nr_frags;
				frag = skb_shinfo(list_skb)->frags;
				frag_skb = list_skb;

				BUG_ON(!nfrags);

				list_skb = list_skb->next;	//frag_list场景,取下一个skb
			}

			if (unlikely(skb_shinfo(nskb)->nr_frags >=
				     MAX_SKB_FRAGS)) {
				net_warn_ratelimited(
					"skb_segment: too many frags: %u %u\n",
					pos, mss);
				goto err;
			}

			if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
				goto err;

			*nskb_frag = *frag;	//frag_list的逻辑和frag的逻辑合并在了一起,增加了复杂度
			__skb_frag_ref(nskb_frag);
			size = skb_frag_size(nskb_frag);

			if (pos < offset) {
				nskb_frag->page_offset += offset - pos;
				skb_frag_size_sub(nskb_frag, offset - pos);  //frag分拆
			}

			skb_shinfo(nskb)->nr_frags++;

			if (pos + size <= offset + len) {
				i++;
				frag++;
				pos += size;
			} else {
				skb_frag_size_sub(nskb_frag, pos + size - (offset + len));	//frag分拆
				goto skip_fraglist;
			}

			nskb_frag++;
		}

skip_fraglist:
		nskb->data_len = len - hsize;
		nskb->len += nskb->data_len;
		nskb->truesize += nskb->data_len;

perform_csum_check:
		if (!csum && !nskb->remcsum_offload) {
			nskb->csum = skb_checksum(nskb, doffset,
						  nskb->len - doffset, 0);	//计算csum值
			nskb->ip_summed = CHECKSUM_NONE;
			SKB_GSO_CB(nskb)->csum_start =
			    skb_headroom(nskb) + doffset;
		}
	} while ((offset += len) < head_skb->len);

	/* Some callers want to get the end of the list.
	 * Put it in segs->prev to avoid walking the list.
	 * (see validate_xmit_skb_list() for example)
	 */
	segs->prev = tail;

	/* Following permits correct backpressure, for protocols
	 * using skb_set_owner_w().
	 * Idea is to tranfert ownership from head_skb to last segment.
	 */
	if (head_skb->destructor == sock_wfree) {
		swap(tail->truesize, head_skb->truesize);
		swap(tail->destructor, head_skb->destructor);
		swap(tail->sk, head_skb->sk);
	}
	return segs;

err:
	kfree_skb_list(segs);
	return ERR_PTR(err);
}


你可能感兴趣的:(Linux4.1.12源码分析)