TCP三次握手源代码跟踪分析

1.前言
1.1TCP基本概念

TCP协议:面向连接的、可靠的字节流服务

SYN
同步序列编号(Synchronize Sequence Numbers)。是TCP/IP建立连接时使用的握手信号。

三次握手
客户机首先发出一个SYN消息,服务器使用SYN+ACK应答表示接收到了这个消息,最后客户机再以ACK消息响应。

TCP是进程到进程间的传输协议,主机使用端口来区分不同的进程

1.2环境

内核环境是linux3.18.6

以TCP/IP为例(net/ipv4目录下)

2. TCP协议栈从上到下提供的接口
client
server
server
函数指针
函数指针
接口函数
接口函数
触发TCP数据发过程
触发TCP数据收过程
挂载TCP模块
TCP三次握手
socket API
sys_connect
sys_accept
sys_socket
sock->opt->connect
sock->opt->accept
tcp_v4_connect
inet_csk_accept
tcp_transmit_skb
tcp_v4_rcv
sock_create
__sock_create
pf->create
inet_create
2.1创建socket
  • 创建TCP socket调用接口
    TCP三次握手源代码跟踪分析_第1张图片
  • 在创建socket套接字描述符, sys_socket内核函数会根据指定的协议(例如socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP))挂载对应的协议处理函数
250 static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
251{
...
262     /* Look for the requested type/protocol pair. */
263     lookup_protocol:
264     err = -ESOCKTNOSUPPORT;
265     rcu_read_lock();

           // TCP套接字、UDP套接字、原始套接字的inet_protosw实 例都在inetsw_array数组中定义,
           //这些实例会调inet_register_protosw()注册到inetsw中
          //根据protocol查找要创建的套接字对应的四层传输协议。
266     list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
268           ...
283     }
284
           //如果没有找到,则调用request_module()来尝试加载协议所属的模块,正常情况下不会发生。
285     if (unlikely(err)) {
286             if (try_loading_module < 2) {
287                     rcu_read_unlock();
...
}

参考文档:网络编程常用接口的内核实现sys_socket()

2.2 三次握手
  • 结构体变量struct proto tcp_prot指定了TCP协议栈的访问接口函数
    TCP三次握手源代码跟踪分析_第2张图片
2.2.1 首先客户端发送SYN报文

tcp_v4_connect函数

设置TCP_SYN_SENT
构造SYN并发送
tcp_v4_connect
调用IP层服务
发送SYN
ip_route_connect
ip_route_newports
...
tcp_set_state
tcp_connect

140/* This will initiate an outgoing connection. */
141int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142{
...
171    rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172                          RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173                          IPPROTO_TCP,
174                          orig_sport, orig_dport, sk);
...
214215    /* Socket identity is still unknown (sport may be zero).
216     * However we set state to SYN-SENT and not releasing socket
217     * lock select source port, enter ourselves into the hash tables and
218     * complete initialization after this.
219     */
220   tcp_set_state(sk, TCP_SYN_SENT);
...
227    rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228                           inet->inet_sport, inet->inet_dport, sk);
...
246    err = tcp_connect(sk);
...
}
265EXPORT_SYMBOL(tcp_v4_connect);

tcp_connect函数

构造一个携带SYN标志位的TCP头
发送带有SYN的TCP报文
设置计时器超时重发
调用函数指针
触发IP协议栈发送数据
tcp_connect
tcp_init_nondata_skb
tcp_transmit_skb
inet_csk_reset_xmit_timer
icsk->icsk_af_ops->queue_xmit
ip_queue_xmit

3090/* Build a SYN and send it off. */
3091int tcp_connect(struct sock *sk)
3092{
...
3108       /* Reserve space for headers. */
3109       skb_reserve(buff, MAX_TCP_HEADER);
3110
3111       tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3112       tp->retrans_stamp = tcp_time_stamp;
3113       tcp_connect_queue_skb(sk, buff);
3114       tcp_ecn_send_syn(sk, buff);
3115
3116       /* Send off SYN; include data in Fast Open. */
3117       err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3118             tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
...
3129       /* Timer for repeating the SYN until an answer. */
3130      inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3131                                 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
...
}
3134EXPORT_SYMBOL(tcp_connect);

tcp_transmit_skb函数

876/* This routine actually transmits TCP packets queued in by
877 * tcp_do_sendmsg().  This is used by both the initial
878 * transmission and possible later retransmissions.
879 * All SKB's seen here are completely headerless.  It is our
880 * job to build the TCP header, and pass the packet down to
881 * IP so it can do the same plus pass the packet off to the
882 * device.
883 *
884 * We are working here with either a clone of the original
885 * SKB, or a fresh unique copy made by the retransmit engine.
886 */

887static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
888                      gfp_t gfp_mask)
889{
...
1012      err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
...
1020}

ip_queue_xmit函数

//这部分代码暂不分析
2.2.2 另一头服务端accept等待连接请求

inet_csk_accept函数

获取连接请求
Y
N
inet_csk_accept
reqsk_queue_empty
queue->rskq_accept_head == NULL
inet_csk_wait_for_connect
接收数据放入accept队列
289/*
290 * This will accept the next outstanding connection.
291 */
292struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
293{


302       /* We need to make sure that this socket is listening,
303         * and that it has something pending.
304         */
305        error = -EINVAL;
306        if (sk->sk_state != TCP_LISTEN)
307                goto out_err;
308
309        /* Find already established connection */
310       if (reqsk_queue_empty(queue)) {
311                long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
...
318                error = inet_csk_wait_for_connect(sk, timeo);
...
350}
351EXPORT_SYMBOL(inet_csk_accept);

reqsk_queue_empty函数

183static inline int reqsk_queue_empty(struct request_sock_queue *queue)
184{
           //判断请求是否为空
185     return queue->rskq_accept_head == NULL;
186}

inet_csk_wait_for_connect函数


241/*
242 * Wait for an incoming connection, avoid race conditions. This must be called
243 * with the socket locked.
244 */245static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
246{
...           //无限for循环,一旦有请求则跳出
265        for (;;) {
266                prepare_to_wait_exclusive(sk_sleep(sk), &wait,
267                                          TASK_INTERRUPTIBLE);
...
287}
2.2.3 三次握手中携带SYN/ACK的TCP头数据的发送和接收
  • 连接建立成功后,接收数据放入accept队列

跟踪这部分代码的思路:

网卡接收到数据需要通知上层协议来接收并处理数据,那么应该有TCP协议的接收数据的函数被底层网络驱动callback方式进行调用,针对这个思路我们需要回头来看TCP/IP协议栈的初始化过程是不是有将recv的函数指针发布给网络底层代码

  • TCP/IP协议栈初始化
    inet_init函数
添加所有基础协议
inet_init
inet_add_protocol
icmp_protocol
udp_protocol
tcp_protocol
other...
.handler
tcp_v4_rcv
处理SYN/ACK标志的入口

1498static const struct net_protocol tcp_protocol = {
1499  .early_demux      =       tcp_v4_early_demux,
1500 .handler      =       tcp_v4_rcv,
1501  .err_handler      =       tcp_v4_err,
1502  .no_policy  =       1,
1503  .netns_ok    =       1,
1504  .icmp_strict_tag_validation = 1,
1505};
...
1674static int __init inet_init(void)
1675{
...
1708 /*
1709   *      Add all the base protocols.
1710  */
1711
1712  if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1713          pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1714  if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
1715          pr_crit("%s: Cannot add UDP protocol\n", __func__);
1716  if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
1717          pr_crit("%s: Cannot add TCP protocol\n", __func__);
1718#ifdef CONFIG_IP_MULTICAST1719  if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1720         pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1721#endif
...
1795}
2.2.4 服务端接收客户端发来的SYN,发送SYN+ACK

tcp_v4_do_rcv函数

检查是否处于半连接状态
返回值为空
是SYN报文
是ACK报文
tcp_v4_do_rcv
tcp_v4_hnd_req
discard
tcp_rcv_state_process
return 0
接收SYN报文
并发送SYN+ACK报文
同时向syn_table中插入一项表明此次连接的sk

1419/* The socket must have it's spinlock held when we get
1420 * here.
1421 *
1422 * We have a potential double-lock case here, so even when
1423 * doing backlog processing we use the BH locking scheme.
1424 * This is because we cannot sleep with the original spinlock
1425 * held.
1426 */
1427int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1428{
...
1449 if (sk->sk_state == TCP_LISTEN) {
1450                struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1451         if (!nsk)
1452                 goto discard;
1453
1454         if (nsk != sk) {
...
1460                        return 0;
1461         }
1462 } else
1463         sock_rps_save_rxhash(sk, skb);
1464
1465 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1466         rsk = sk;
1467         goto reset;
1468 }
...
1486}
1487EXPORT_SYMBOL(tcp_v4_do_rcv);
2.2.5 客户端收到服务端的SYN+ACK,发送ACK

tcp_rcv_synsent_state_process函数

5354static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5355                                         const struct tcphdr *th, unsigned int len)
5356{
5357     
..
5487           tcp_send_ack(sk);
...
}
  • 到这里我们已经从linux网络核心的角度从架构上整体理解了三次握手,即携带SYN/ACK标志的数据收发过程

TCP三次握手源代码跟踪分析_第3张图片

2.2.6 gdb调试

TCP三次握手源代码跟踪分析_第4张图片TCP三次握手源代码跟踪分析_第5张图片

2.2.7 剩下的工作

1.进一步深入三次握手过程中携带SYN/ACK标志的数据收发过程(tcp_transmit_skb和tcp_v4_rcv)
2.连接建立成功后放到accpet队列的代码
3.正常数据的收发过程和关闭连接的过程

你可能感兴趣的:(庖丁解牛Linux网络核心)