tcp_v4_connect函数初始化一个对外的连接请求,创建一个SYN包并发送出去,把套接字的状态从CLOSE切换到SYN_SENT,初始化TCP部分选项数据包序列号、窗口大小、MSS、套接字传送超时等,下面分析tcp_v4_connect函数。
1、初始化工作
输入参数
sk:套接字指针。
uaddr:对端ip和端口。
addr_len:套接字地址长度。
检查目的IP长度、协议、如果设置了源路由选项而且数据包目的地址不为空,则从用户给定的源路由列表中取一个IP地址赋给网关地址。
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
//指向高速缓冲区的路由
struct rtable *rt;
__be32 daddr, nexthop;
int tmp;
int err;
//地址长度检查
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
//协议族检查
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
//是否设置源路由选项
nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt && inet->opt->srr) {
if (!daddr)
return -EINVAL;
nexthop = inet->opt->faddr;
}
...
}
2、选路由
根据目的ip、目的端口、网络设备接口调用ip_route_connect选路由,路由结构保存到rt->rt_dst中,实际调用的函数是ip_route_output_flow,如果是广播地址、组地址就返回。
...
//选路由,路由保存在rt->rt_dst中
tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
inet->inet_sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
//组传送地址、广播地址则返回错误
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
//如果没有设置源路由ip选项,就使用路由表寻址的路由
if (!inet->opt || !inet->opt->srr)
daddr = rt->rt_dst;
if (!inet->inet_saddr)
inet->inet_saddr = rt->rt_src;
inet->inet_rcv_saddr = inet->inet_saddr;
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
...
3、连接状态切换为SYN_SENT
调用tcp_set_state设置套接字状态为TCP_SYN_SENT,本把套接字sk加入到连接管理哈希链表中,为连接分配一个临时端口
...
//设置套接字状态为TCP_SYN_SENT
tcp_set_state(sk, TCP_SYN_SENT);
//将套接字sk放入TCP连接管理哈希链表中
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
//为连接分配一个临时端口
err = ip_route_newports(&rt, IPPROTO_TCP,
inet->inet_sport, inet->inet_dport, sk);
if (err)
goto failure;
...
4、发送连接请求
初始化第一个序列号,调用tcp_connect函数完成建立连接,包括发送SYN,tcp_connect将创建号的SYN数据段加入到套接字发送队列,最后调用tcp_transmit_skb数据包发送到IP层。
...
if (!tp->write_seq)
//初始化TCP数据段序列号
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
inet->inet_id = tp->write_seq ^ jiffies;
//构建SYN包调用tcp_transmit_skb发送到IP层
err = tcp_connect(sk);
rt = NULL;
if (err)
goto failure;
...
5、连接建立失败
如果连接建立失败,就将TCP状态切换回CLOSE,将套接字从连接管理哈希表中移除,释放本地端口。
...
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
//失败设置套接字状态为CLOSED
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
...
tcp_v4_connect完整代码:
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
//指向高速缓冲区的路由
struct rtable *rt;
__be32 daddr, nexthop;
int tmp;
int err;
//地址长度检查
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
//协议族检查
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
//是否设置源路由选项
nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt && inet->opt->srr) {
if (!daddr)
return -EINVAL;
nexthop = inet->opt->faddr;
}
//选路由,路由保存在rt->rt_dst中
tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
inet->inet_sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
//组传送地址、广播地址则返回错误
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
//如果没有设置源路由ip选项,就使用路由表寻址的路由
if (!inet->opt || !inet->opt->srr)
daddr = rt->rt_dst;
if (!inet->inet_saddr)
inet->inet_saddr = rt->rt_src;
inet->inet_rcv_saddr = inet->inet_saddr;
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
//获取套接字最近使用的时间
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/*
* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state
* TIME-WAIT * and initialize rx_opt.ts_recent from it,
* when trying new connection.
*/
if (peer != NULL &&
(u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
}
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet->opt)
inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
//设置套接字状态为TCP_SYN_SENT
tcp_set_state(sk, TCP_SYN_SENT);
//将套接字sk放入TCP连接管理哈希链表中
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
//为连接分配一个临时端口
err = ip_route_newports(&rt, IPPROTO_TCP,
inet->inet_sport, inet->inet_dport, sk);
if (err)
goto failure;
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->u.dst);
if (!tp->write_seq)
//初始化TCP数据段序列号
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
inet->inet_id = tp->write_seq ^ jiffies;
//构建SYN包调用tcp_transmit_skb发送到IP层
err = tcp_connect(sk);
rt = NULL;
if (err)
goto failure;
return 0;
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
//失败设置套接字状态为CLOSED
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
return err;
}