TCP的URG标志和内核实现之二:发送的实现

Linxu内核在默认情况下,把urgent data实现为OOB数据

发送URG数据的接口

在内核态,使用kernel_sendmsg/kernel_sendpage完成发送,只不过需要加上MSG_OOB标志,表示要发送的URG数据。


URG数据发送接口的实现

分片主要在kernel_sendmsg中完成,在OOB数据的处理上,它和kernel_sendpage是一致


int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		size_t size)
{
	。。。。。。。。。。。。。。
	/*如果flags设置了MSG_OOB该接口其实返回的mss_now关闭了TSO功能*/
	mss_now = tcp_send_mss(sk, &size_goal, flags);
	。。。。。。。。。。。。。。
	while (--iovlen >= 0) {
		size_t seglen = iov->iov_len;
		unsigned char __user *from = iov->iov_base;

		iov++;

		while (seglen > 0) {
			int copy = 0;
			int max = size_goal;

			skb = tcp_write_queue_tail(sk);
			if (tcp_send_head(sk)) {
				if (skb->ip_summed == CHECKSUM_NONE)
					max = mss_now;
				copy = max - skb->len;
			}

			if (copy <= 0) {
new_segment:
				/* Allocate new segment. If the interface is SG,
				 * allocate skb fitting to single page.
				 */
				if (!sk_stream_memory_free(sk))
					goto wait_for_sndbuf;

				skb = sk_stream_alloc_skb(sk,
							  select_size(sk, sg),
							  sk->sk_allocation);
				if (!skb)
					goto wait_for_memory;

				/*
				 * Check whether we can use HW checksum.
				 */
				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
					skb->ip_summed = CHECKSUM_PARTIAL;

				skb_entail(sk, skb);
				copy = size_goal;
				max = size_goal;
			}

			/* Try to append data to the end of skb. */
			if (copy > seglen)
				copy = seglen;

			/* Where to copy to? */
			if (skb_availroom(skb) > 0) {
				/* We have some space in skb head. Superb! */
				copy = min_t(int, copy, skb_availroom(skb));
				err = skb_add_data_nocache(sk, skb, from, copy);
				if (err)
					goto do_fault;
			} else {
				int merge = 0;
				int i = skb_shinfo(skb)->nr_frags;
				struct page *page = sk->sk_sndmsg_page;
				int off;

				if (page && page_count(page) == 1)
					sk->sk_sndmsg_off = 0;

				off = sk->sk_sndmsg_off;

				if (skb_can_coalesce(skb, i, page, off) &&
				    off != PAGE_SIZE) {
					/* We can extend the last page
					 * fragment. */
					merge = 1;
				} else if (i == MAX_SKB_FRAGS || !sg) {
					/* Need to add new fragment and cannot
					 * do this because interface is non-SG,
					 * or because all the page slots are
					 * busy. */
					tcp_mark_push(tp, skb);
					goto new_segment;
				} else if (page) {
					if (off == PAGE_SIZE) {
						put_page(page);
						sk->sk_sndmsg_page = page = NULL;
						off = 0;
					}
				} else
					off = 0;

				if (copy > PAGE_SIZE - off)
					copy = PAGE_SIZE - off;
				if (!sk_wmem_schedule(sk, copy))
					goto wait_for_memory;

				if (!page) {
					/* Allocate new cache page. */
					if (!(page = sk_stream_alloc_page(sk)))
						goto wait_for_memory;
				}

				/* Time to copy data. We are close to
				 * the end! */
				err = skb_copy_to_page_nocache(sk, from, skb,
							       page, off, copy);
				if (err) {
					/* If this page was new, give it to the
					 * socket so it does not get leaked.
					 */
					if (!sk->sk_sndmsg_page) {
						sk->sk_sndmsg_page = page;
						sk->sk_sndmsg_off = 0;
					}
					goto do_error;
				}

				/* Update the skb. */
				if (merge) {
					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
				} else {
					skb_fill_page_desc(skb, i, page, off, copy);
					if (sk->sk_sndmsg_page) {
						get_page(page);
					} else if (off + copy < PAGE_SIZE) {
						get_page(page);
						sk->sk_sndmsg_page = page;
					}
				}

				sk->sk_sndmsg_off = off + copy;
			}

			if (!copied)
				TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;

			tp->write_seq += copy;
			TCP_SKB_CB(skb)->end_seq += copy;
			skb_shinfo(skb)->gso_segs = 0;

			from += copy;
			copied += copy;
			if ((seglen -= copy) == 0 && iovlen == 0)
				goto out;
			/*对于OOB数据,即使一个分片用光,如果还有
			send_buff和OOB数据,就继续积累分片*/
			if (skb->len < max || (flags & MSG_OOB))
				continue;

			if (forced_push(tp)) {
				tcp_mark_push(tp, skb);
				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
			} else if (skb == tcp_send_head(sk))
				tcp_push_one(sk, mss_now);
			continue;

wait_for_sndbuf:
			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
			if (copied)
				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);

			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
				goto do_error;

			mss_now = tcp_send_mss(sk, &size_goal, flags);
		}
	}

out:
	if (copied)
		tcp_push(sk, flags, mss_now, tp->nonagle);
	release_sock(sk);
	return copied;

do_fault:
	if (!skb->len) {
		tcp_unlink_write_queue(skb, sk);
		/* It is the one place in all of TCP, except connection
		 * reset, where we can be unlinking the send_head.
		 */
		tcp_check_send_head(sk, skb);
		sk_wmem_free_skb(sk, skb);
	}

do_error:
	if (copied)
		goto out;
out_err:
	err = sk_stream_error(sk, flags, err);
	release_sock(sk);
	return err;
}

tcp_sendmsg中,涉及对OOB数据的处理主要有:

1、在调用tcp_send_mss确定分片大小的时候:

static int tcp_send_mss(struct sock *sk,int *size_goal, int flags)

{

       intmss_now;

       mss_now= tcp_current_mss(sk);

       /*如果是OOB数据,large_allowed=0,关闭TSO*/

       *size_goal= tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));

       returnmss_now;

}

如果是OOB数据,其实是关闭了TSO功能,这样做的原因是:天知道各个网卡芯片在执行分片的时候咋个处理TCP报头中的URG标志和urgent point

2、在确定何时开始执行分片的发送的时候:

如果是OOB数据,即使当前已经积累了一整个分片,也不会想普通的数据一样执行发送(tcp_push),而是继续积累直到用户下发的数据全部分片或者snd_buf/内存用尽。

3、执行tcp_push的时候:

在用户下发的数据全部分片或者snd_buf/内存用尽后,进入tcp_push执行发送操作(所有的OOB数据,都会通过这个接口来执行发送)

static inline void tcp_push(struct sock*sk, int flags, int mss_now,

                         int nonagle)

{

       if(tcp_send_head(sk)) {

              structtcp_sock *tp = tcp_sk(sk);

              if(!(flags & MSG_MORE) || forced_push(tp))

                     tcp_mark_push(tp,tcp_write_queue_tail(sk));          

                    *tcp_mark_urg设置tp->snd_up,标识进入OOB数据发送模式,设置urgent point

                    指向urgentdata接受后的第一个字符*/

              tcp_mark_urg(tp,flags);

              __tcp_push_pending_frames(sk,mss_now,

                                     (flags & MSG_MORE) ? TCP_NAGLE_CORK :nonagle);

       }

}

发送处理

使用struct tcp_sock中的snd_up来标识当前的urgent point,同时也使用该数据来判断当前是否处于urgent data发送模式,在普通数据的发送模式中tcp_sock::snd_up总是和tcp_sock::snd_una相等,只有在有urgent data发送的时候,才在tcp_push--->tcp_mark_urg中设置为urgentpoint,进入到urgent data的处理模式

tcp_transmit_skb中的以下代码段负责urgent data相关的处理:

if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
	if (before(tp->snd_up, tcb->seq + 0x10000)) {
		th->urg_ptr = htons(tp->snd_up - tcb->seq);
		th->urg = 1;
	} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
		th->urg_ptr = htons(0xFFFF);
		th->urg = 1;
	}
}

只要当前待发送的skb的seq在tcp_sock记录的urgent point前面,就需要在报头中对URG标志置位,同时如果tcp_sock记录的urgent point。如果该报文的seq距离大于16为能表示的最大值,就置TCP报头中的urgentpoint为65535。

切换回普通模式:

在收到对方ACK的处理流程tcp_ack--->tcp_clean_rtx_queue中:

if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
	tp->snd_up = tp->snd_una;

报文体现

根据对发送代码的分析,可以看到:如果用户使用MSG_OOB数据发送一段比较长(若干个MSS)的数据,那么线路上的报文应该是分成了若干组,每组由若干个长度为MSS的报文构成,组内的每个报文有一样的urgent pointer,指向下一组报文的起始seq,每一组的长度最长为65535





你可能感兴趣的:(TCP协议和linux实现)