TCP/IP tcp.c

0. tcp proto

 

struct proto tcp_prot = { sock_wmalloc, sock_rmalloc, sock_wfree, sock_rfree, sock_rspace, sock_wspace, tcp_close, tcp_read, tcp_write, tcp_sendto, tcp_recvfrom, ip_build_header, tcp_connect, tcp_accept, ip_queue_xmit, tcp_retransmit, tcp_write_wakeup, tcp_read_wakeup, tcp_rcv, tcp_select, tcp_ioctl, NULL, tcp_shutdown, tcp_setsockopt, tcp_getsockopt, 128, 0, {NULL,}, "TCP", 0, 0 };

 

1. tcp_mib => snmp

 

struct tcp_mib { unsigned long TcpRtoAlgorithm; unsigned long TcpRtoMin; unsigned long TcpRtoMax; unsigned long TcpMaxConn; unsigned long TcpActiveOpens; unsigned long TcpPassiveOpens; unsigned long TcpAttemptFails; unsigned long TcpEstabResets; unsigned long TcpCurrEstab; unsigned long TcpInSegs; unsigned long TcpOutSegs; unsigned long TcpRetransSegs; };

 

2. tcp_select_window

 

/* * This routine picks a TCP windows for a socket based on * the following constraints * * 1. The window can never be shrunk once it is offered (RFC 793) * 2. We limit memory per socket * * For now we use NET2E3's heuristic of offering half the memory * we have handy. All is not as bad as this seems however because * of two things. Firstly we will bin packets even within the window * in order to get the data we are waiting for into the memory limit. * Secondly we bin common duplicate forms at receive time * Better heuristics welcome */ int tcp_select_window(struct sock *sk) { int new_window = sk->prot->rspace(sk); if(sk->window_clamp) new_window=min(sk->window_clamp,new_window); /* * Two things are going on here. First, we don't ever offer a * window less than min(sk->mss, MAX_WINDOW/2). This is the * receiver side of SWS as specified in RFC1122. * Second, we always give them at least the window they * had before, in order to avoid retracting window. This * is technically allowed, but RFC1122 advises against it and * in practice it causes trouble. * * Fixme: This doesn't correctly handle the case where * new_window > sk->window but not by enough to allow for the * shift in sequence space. */ if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window) return(sk->window); return(new_window); }

 

3. 对于重发的数据包,其IP 首部中用于标识数据包的id 字段将被赋予新值,以区别于之前发送的数据包。只要数据包中TCP 首部中本地序列号不发生改变,其表示的数据依然是原先发送的数据,不会在本地发生人为的数据包乱序问题。

 

/* * A socket has timed out on its send queue and wants to do a * little retransmitting. Currently this means TCP. */ void tcp_do_retransmit(struct sock *sk, int all) { ... /* * In general it's OK just to use the old packet. However we * need to use the current ack and window fields. Urg and * urg_ptr could possibly stand to be updated as well, but we * don't keep the necessary data. That shouldn't be a problem, * if the other end is doing the right thing. Since we're * changing the packet, we have to issue a new IP identifier. */ iph = (struct iphdr *)(skb->data + dev->hard_header_len); th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); size = skb->len - (((unsigned char *) th) - skb->data); /* * Note: We ought to check for window limits here but * currently this is done (less efficiently) elsewhere. * We do need to check for a route change but can't handle * that until we have the new 1.3.x buffers in. * */ iph->id = htons(ip_id_count++); ip_send_check(iph); ... }

 

4. 发送数据包

 

/* * This is the main buffer sending routine. We queue the buffer * having checked it is sane seeming. */ static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) { int size; struct tcphdr * th = skb->h.th; /* * length of packet (not counting length of pre-tcp headers) */ size = skb->len - ((unsigned char *) th - skb->data); /* * Sanity check it.. */ if (size < sizeof(struct tcphdr) || size > skb->len) { printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)/n", skb, skb->data, th, skb->len); kfree_skb(skb, FREE_WRITE); return; } /* * If we have queued a header size packet.. (these crash a few * tcp stacks if ack is not set) */ if (size == sizeof(struct tcphdr)) { /* If it's got a syn or fin it's notionally included in the size..*/ if(!th->syn && !th->fin) { printk("tcp_send_skb: attempt to queue a bogon./n"); kfree_skb(skb,FREE_WRITE); return; } } /* * Actual processing. */ tcp_statistics.TcpOutSegs++; skb->h.seq = ntohl(th->seq) + size - 4*th->doff; /* * We must queue if * * a) The right edge of this frame exceeds the window * b) We are retransmitting (Nagle's rule) * c) We have too many packets 'in flight' */ if (after(skb->h.seq, sk->window_seq) || (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) || sk->packets_out >= sk->cong_window) { /* checksum will be supplied by tcp_write_xmit. So * we shouldn't need to set it at all. I'm being paranoid */ th->check = 0; if (skb->next != NULL) { printk("tcp_send_partial: next != NULL/n"); skb_unlink(skb); } skb_queue_tail(&sk->write_queue, skb); /* * If we don't fit we have to start the zero window * probes. This is broken - we really need to do a partial * send _first_ (This is what causes the Cisco and PC/TCP * grief). */ if (before(sk->window_seq, sk->write_queue.next->h.seq) && sk->send_head == NULL && sk->ack_backlog == 0) reset_xmit_timer(sk, TIME_PROBE0, sk->rto); } else { /* * This is going straight out */ th->ack_seq = ntohl(sk->acked_seq); th->window = ntohs(tcp_select_window(sk)); tcp_send_check(th, sk->saddr, sk->daddr, size, sk); sk->sent_seq = sk->write_seq; /* * This is mad. The tcp retransmit queue is put together * by the ip layer. This causes half the problems with * unroutable FIN's and other things. */ sk->prot->queue_xmit(sk, skb->dev, skb, 0); /* * Set for next retransmit based on expected ACK time. * FIXME: We set this every time which means our * retransmits are really about a window behind. */ reset_xmit_timer(sk, TIME_WRITE, sk->rto); } }

 

发送用户数据

 

/* * This routine copies from a user buffer into a socket, * and starts the transmit system. */ static int tcp_write(struct sock *sk, unsigned char *from, int len, int nonblock, unsigned flags);

 

发送过程中的两个队列:
1〉写队列,对应sock 结构中write_queue 字段指向的队列,这是一个双向队列。该队列接收应用层发送的数据包(传输层将数据封装为数据包,将其挂接到write_queue 队列中)。该队列中数据包尚未发送出去。
2〉重发队列,对应 sock 结构中send_head, send_tail 字段指向的队列,这是一个单向队列,send_head 指向队列头部,send_tail 指向队列尾部。传输层(实际上是网络层)将数据包发送出去以后,将数据包缓存到该队列中,以防止发送的数据包可能丢失后的重发工作。

5. 接收数据

/* * This routine copies from a sock struct into the user buffer. */ static int tcp_read(struct sock *sk, unsigned char *to, int len, int nonblock, unsigned flags);

 

6. 半关闭

 

/* * Shutdown the sending side of a connection. Much like close except * that we don't receive shut down or set sk->dead=1. */ void tcp_shutdown(struct sock *sk, int how);

 

7. 处理一个新的连接请求

 

/* * This routine handles a connection request. * It should make sure we haven't already responded. * Because of the way BSD works, we have to send a syn/ack now. * This also means it will be harder to close a socket which is * listening. */ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, unsigned long daddr, unsigned long saddr, struct options *opt, struct device *dev, unsigned long seq)

 

 

 

你可能感兴趣的:(TCP/IP tcp.c)