TCP协议:面向连接的、可靠的字节流服务
SYN
同步序列编号(Synchronize Sequence Numbers)。是TCP/IP建立连接时使用的握手信号。
三次握手
客户机首先发出一个SYN消息,服务器使用SYN+ACK应答表示接收到了这个消息,最后客户机再以ACK消息响应。
TCP是进程到进程间的传输协议,主机使用端口来区分不同的进程
内核环境是linux3.18.6
以TCP/IP为例(net/ipv4目录下)
250 static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
251{
...
262 /* Look for the requested type/protocol pair. */
263 lookup_protocol:
264 err = -ESOCKTNOSUPPORT;
265 rcu_read_lock();
// TCP套接字、UDP套接字、原始套接字的inet_protosw实 例都在inetsw_array数组中定义,
//这些实例会调inet_register_protosw()注册到inetsw中
//根据protocol查找要创建的套接字对应的四层传输协议。
266 list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
268 ...
283 }
284
//如果没有找到,则调用request_module()来尝试加载协议所属的模块,正常情况下不会发生。
285 if (unlikely(err)) {
286 if (try_loading_module < 2) {
287 rcu_read_unlock();
...
}
参考文档:网络编程常用接口的内核实现sys_socket()
tcp_v4_connect函数
140/* This will initiate an outgoing connection. */
141int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142{
...
171 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 IPPROTO_TCP,
174 orig_sport, orig_dport, sk);
...
214215 /* Socket identity is still unknown (sport may be zero).
216 * However we set state to SYN-SENT and not releasing socket
217 * lock select source port, enter ourselves into the hash tables and
218 * complete initialization after this.
219 */
220 tcp_set_state(sk, TCP_SYN_SENT);
...
227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228 inet->inet_sport, inet->inet_dport, sk);
...
246 err = tcp_connect(sk);
...
}
265EXPORT_SYMBOL(tcp_v4_connect);
tcp_connect函数
3090/* Build a SYN and send it off. */
3091int tcp_connect(struct sock *sk)
3092{
...
3108 /* Reserve space for headers. */
3109 skb_reserve(buff, MAX_TCP_HEADER);
3110
3111 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3112 tp->retrans_stamp = tcp_time_stamp;
3113 tcp_connect_queue_skb(sk, buff);
3114 tcp_ecn_send_syn(sk, buff);
3115
3116 /* Send off SYN; include data in Fast Open. */
3117 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3118 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
...
3129 /* Timer for repeating the SYN until an answer. */
3130 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3131 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
...
}
3134EXPORT_SYMBOL(tcp_connect);
tcp_transmit_skb函数
876/* This routine actually transmits TCP packets queued in by
877 * tcp_do_sendmsg(). This is used by both the initial
878 * transmission and possible later retransmissions.
879 * All SKB's seen here are completely headerless. It is our
880 * job to build the TCP header, and pass the packet down to
881 * IP so it can do the same plus pass the packet off to the
882 * device.
883 *
884 * We are working here with either a clone of the original
885 * SKB, or a fresh unique copy made by the retransmit engine.
886 */
887static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
888 gfp_t gfp_mask)
889{
...
1012 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
...
1020}
ip_queue_xmit函数
//这部分代码暂不分析
inet_csk_accept函数
289/*
290 * This will accept the next outstanding connection.
291 */
292struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
293{
302 /* We need to make sure that this socket is listening,
303 * and that it has something pending.
304 */
305 error = -EINVAL;
306 if (sk->sk_state != TCP_LISTEN)
307 goto out_err;
308
309 /* Find already established connection */
310 if (reqsk_queue_empty(queue)) {
311 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
...
318 error = inet_csk_wait_for_connect(sk, timeo);
...
350}
351EXPORT_SYMBOL(inet_csk_accept);
reqsk_queue_empty函数
183static inline int reqsk_queue_empty(struct request_sock_queue *queue)
184{
//判断请求是否为空
185 return queue->rskq_accept_head == NULL;
186}
inet_csk_wait_for_connect函数
241/*
242 * Wait for an incoming connection, avoid race conditions. This must be called
243 * with the socket locked.
244 */245static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
246{
... //无限for循环,一旦有请求则跳出
265 for (;;) {
266 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
267 TASK_INTERRUPTIBLE);
...
287}
跟踪这部分代码的思路:
网卡接收到数据需要通知上层协议来接收并处理数据,那么应该有TCP协议的接收数据的函数被底层网络驱动callback方式进行调用,针对这个思路我们需要回头来看TCP/IP协议栈的初始化过程是不是有将recv的函数指针发布给网络底层代码
1498static const struct net_protocol tcp_protocol = {
1499 .early_demux = tcp_v4_early_demux,
1500 .handler = tcp_v4_rcv,
1501 .err_handler = tcp_v4_err,
1502 .no_policy = 1,
1503 .netns_ok = 1,
1504 .icmp_strict_tag_validation = 1,
1505};
...
1674static int __init inet_init(void)
1675{
...
1708 /*
1709 * Add all the base protocols.
1710 */
1711
1712 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
1713 pr_crit("%s: Cannot add ICMP protocol\n", __func__);
1714 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
1715 pr_crit("%s: Cannot add UDP protocol\n", __func__);
1716 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
1717 pr_crit("%s: Cannot add TCP protocol\n", __func__);
1718#ifdef CONFIG_IP_MULTICAST1719 if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
1720 pr_crit("%s: Cannot add IGMP protocol\n", __func__);
1721#endif
...
1795}
tcp_v4_do_rcv函数
1419/* The socket must have it's spinlock held when we get
1420 * here.
1421 *
1422 * We have a potential double-lock case here, so even when
1423 * doing backlog processing we use the BH locking scheme.
1424 * This is because we cannot sleep with the original spinlock
1425 * held.
1426 */
1427int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1428{
...
1449 if (sk->sk_state == TCP_LISTEN) {
1450 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1451 if (!nsk)
1452 goto discard;
1453
1454 if (nsk != sk) {
...
1460 return 0;
1461 }
1462 } else
1463 sock_rps_save_rxhash(sk, skb);
1464
1465 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1466 rsk = sk;
1467 goto reset;
1468 }
...
1486}
1487EXPORT_SYMBOL(tcp_v4_do_rcv);
tcp_rcv_synsent_state_process函数
5354static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5355 const struct tcphdr *th, unsigned int len)
5356{
5357
..
5487 tcp_send_ack(sk);
...
}
1.进一步深入三次握手过程中携带SYN/ACK标志的数据收发过程(tcp_transmit_skb和tcp_v4_rcv)
2.连接建立成功后放到accpet队列的代码
3.正常数据的收发过程和关闭连接的过程