网络篇之socket
网络篇之bind
网络篇之listen
网络篇之accept
网络篇之connect
一 服务端发送SYN+ACK
客户端调用connect发起连接请求时,发送了第一次握手(SYN包) 。
我们先回到服务端的角度,看下服务端接收第一次握手包时,都有哪些处理。此时的服务端,处于LISTEN状态。
数据包经过数据链路层,进入网络层,再经过netif_rx_action -> ip_rcv -> ip_local_deliver -> ip_local_deliver_finish到达传输层的tcp_v4_rcv。
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
int sdif = inet_sdif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
int ret;
// 不是本机的数据包
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/* Count it even if it's bad */
__TCP_INC_STATS(net, TCP_MIB_INSEGS);
// 检查tcp协议头的正确性
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
th = (const struct tcphdr *)skb->data;
// 头部长度检查
if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
goto bad_packet;
if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it;
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
// 检查校验和
if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
goto csum_error;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
lookup:
// 查找匹配的套接字
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
if (!sk)
// 丢弃
goto no_tcp_socket;
process:
// 套接字状态是time_wait就调转到超时处理
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
if (sk->sk_state == TCP_NEW_SYN_RECV) {
... ...
}
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
// ipsec策略检查处理
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
if (tcp_v4_inbound_md5_hash(sk, skb))
goto discard_and_relse;
nf_reset(skb);
if (tcp_filter(sk, skb))
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
skb->dev = NULL;
if (sk->sk_state == TCP_LISTEN) {
ret = tcp_v4_do_rcv(sk, skb);
goto put_and_return;
}
... ...
put_and_return:
if (refcounted)
sock_put(sk); // 应用计数减1
return ret;
no_tcp_socket:
... ...
discard_it:
/* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sk_drops_add(sk, skb);
if (refcounted)
sock_put(sk);
goto discard_it;
do_time_wait:
... ...
}
tcp_v4_rcv中对数据包进行了一系列的检查,检查完成后,调用了tcp_v4_do_rcv -> tcp_rcv_state_process继续处理。
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
bool acceptable;
switch (sk->sk_state) {
case TCP_CLOSE:
goto discard;
case TCP_LISTEN:
if (th->ack)
return 1;
if (th->rst)
goto discard;
if (th->syn) {
if (th->fin)
goto discard;
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH right there.
*/
local_bh_disable();
acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
local_bh_enable();
if (!acceptable)
return 1;
consume_skb(skb);
return 0;
}
goto discard;
case TCP_SYN_SENT:
... ...
}
tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
if (!tcp_check_req(sk, skb, req, true))
goto discard;
}
if (!th->ack && !th->rst && !th->syn)
goto discard;
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
/* step 5: check the ACK field */
acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
FLAG_UPDATE_TS_RECENT |
FLAG_NO_CHALLENGE_ACK) > 0;
if (!acceptable) {
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk, skb);
goto discard;
}
switch (sk->sk_state) {
case TCP_SYN_RECV:
... ...
case TCP_FIN_WAIT1: {
... ...
break;
}
case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
goto discard;
}
break;
case TCP_LAST_ACK:
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
}
break;
}
/* step 6: check the URG bit */
tcp_urg(sk, skb, th);
/* step 7: process the segment text */
switch (sk->sk_state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
/* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
... ...
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb);
queued = 1;
break;
}
/* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
}
if (!queued) {
discard:
tcp_drop(sk, skb);
}
return 0;
}
tcp_rcv_state_process方法中,会根据连接不同的状态进行不同的处理。
此时服务端处于LISTEN状态,因此switch语句走到 TCP_LISTEN分支。
1. 客户端构造SYN包时,ack字段设置的是0,因此ack非0,则返回错误;
2. 如果syn为0,则数据包异常,丢弃此包;
3. 其他情况,调用icsk->icsk_af_ops->conn_request,实际调用的是tcp_v4_conn_request。
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
struct request_sock *req;
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
... ...
if (sk_acceptq_is_full(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
}
req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
if (!req)
goto drop;
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
af_ops->init_req(req, sk, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
if (tmp_opt.tstamp_ok)
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
dst = af_ops->route_req(sk, &fl, req);
if (!dst)
goto drop_and_free;
... ...
tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst);
if (!want_cookie) {
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
sk->sk_data_ready(sk);
bh_unlock_sock(fastopen_sk);
sock_put(fastopen_sk);
} else {
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie)
inet_csk_reqsk_queue_hash_add(sk, req,
tcp_timeout_init((struct sock *)req));
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
TCP_SYNACK_COOKIE);
if (want_cookie) {
reqsk_free(req);
return 0;
}
}
reqsk_put(req);
return 0;
drop_and_release:
dst_release(dst);
drop_and_free:
reqsk_free(req);
drop:
tcp_listendrop(sk);
return 0;
}
1. inet_reqsk_alloc申请一个struct request_sock类型的结构,并将其状态改成SYN_RECV:
struct inet_request_sock *ireq = inet_rsk(req);
ireq->ireq_state = TCP_NEW_SYN_RECV;
2. 调用tcp_openreq_init设置源/目标端口
ireq->ir_rmt_port = tcp_hdr(skb)->source; // 目的端口,取skb的源端口
ireq->ir_num = ntohs(tcp_hdr(skb)->dest); // 本地端口,取skb的目的端口
3. 调用af_ops->init_req (tcp_v4_init_req)设置源/目标地址
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
4. 最后调用af_ops->send_synack (tcp_v4_send_synack)
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
int err = -1;
struct sk_buff *skb;
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
skb = tcp_make_synack(sk, dst, req, foc, synack_type);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
return err;
}
1. 调用tcp_make_synack根据请求参数req组装发送包skb,并设置tcp头信息
设置syn和ack标志
th->syn = 1; // SYN和ACK标志置1
th->ack = 1;
设置源/目标端口
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
2. 调用ip_build_and_send_pkt设置ip头信息,并通过ip_local_out发送数据包