在TCP/IP协议栈中,3层对应的是IP层,4层对应TCP层,在这里,从3层到4层转化主要做了两件重要的事情:IP分片重组和TCP会话重组。本篇先分析其中TCP会话重组的部分(自顶向下嘛,哈哈)。
OK,先看下重要的数据结构,在tcp.h中:
struct skbuff { //万年不变的next和prev,这向我们昭示了这是一个双向队列。 //对于每个TCP会话(ip:端口<- ->ip:端口)都要维护两个skbuf队列(每个方向都有一个嘛) //每个skbuf对应网络上的一个IP包,TCP流就是一个接一个的IP包嘛。 struct skbuff *next; struct skbuff *prev; void *data; u_int len; u_int truesize; u_int urg_ptr; char fin; char urg; u_int seq; u_int ack; };
这个结构体就是模仿的内核中的sk_buff结构体,只不过比内核中的要小很多(你懂的,因为这里只做会话重组)。
下面是在nids.h中的
struct tuple4 { u_short source; u_short dest; u_int saddr; u_int daddr; };这是用来表示一个TCP连接的,不解释。
struct half_stream { char state; char collect; char collect_urg; char *data; //这里存放着已经按顺序集齐排列好的数据 int offset; int count; //这里存放data中数据的字节数 int count_new; //这里存放data中还没回调过的数据的字节数 int bufsize; int rmem_alloc; int urg_count; u_int acked; u_int seq; u_int ack_seq; u_int first_data_seq; u_char urgdata; u_char count_new_urg; u_char urg_seen; u_int urg_ptr; u_short window; u_char ts_on; //tcp时间戳选项是否打开 u_char wscale_on; //窗口扩展选项是否打开 u_int curr_ts; u_int wscale; //下面是ip包缓冲区 struct skbuff *list; struct skbuff *listtail; }
这个是用来表示“半个TCP会话”,其实就是一个方向上的TCP流。
还有
struct tcp_stream { struct tuple4 addr; char nids_state; struct lurker_node *listeners; struct half_stream client; struct half_stream server; struct tcp_stream *next_node; struct tcp_stream *prev_node; int hash_index; struct tcp_stream *next_time; struct tcp_stream *prev_time; int read; struct tcp_stream *next_free; void *user; };
显然,这是用来表示一个完整的TCP会话了,最后是static struct tcp_stream **tcp_stream_table;一个TCP会话指针的数组,其实就是hash表了。
下面来看处理过程,先是初始化:
int tcp_init(int size) { ... //初始化全局tcp会话哈希表 tcp_stream_table_size = size; tcp_stream_table = calloc(tcp_stream_table_size, sizeof(char *)); if (!tcp_stream_table) { nids_params.no_mem("tcp_init"); return -1; } //设置最大会话数,为了哈希的效率,哈希表的元素个数上限设为3/4表大小 max_stream = 3 * tcp_stream_table_size / 4; //先将max_stream个tcp会话结构体申请好,放着(避免后面陆陆续续申请浪费时间)。 streams_pool = (struct tcp_stream *) malloc((max_stream + 1) * sizeof(struct tcp_stream)); if (!streams_pool) { nids_params.no_mem("tcp_init"); return -1; } //ok,将这个数组初始化成链表 for (i = 0; i < max_stream; i++) streams_pool[i].next_free = &(streams_pool[i + 1]); streams_pool[max_stream].next_free = 0; free_streams = streams_pool; ... return 0; }
很简单,做了两件事:1.初始化tcp会话哈希表。2.初始化会话池。这个初始化函数只在库初始化时执行一次。
初始化完成之后,就进入了pcap_loop中了,nids中的回调函数是nids_pcap_handler,在这个函数里面做了些ip分片重组(等下篇再说)后(tcp包)便来到了process_tcp函数,这里tcp会话重组开始了。来看看。
void process_tcp(u_char * data, int skblen){ //处理头,得到ip包和tcp包 struct ip *this_iphdr = (struct ip *)data; struct tcphdr *this_tcphdr = (struct tcphdr *)(data + 4 * this_iphdr->ip_hl); ...//此处忽略安检代码 //在哈希表里找找,如果没有此tcp会话则看看是不是要新建一个 if (!(a_tcp = find_stream(this_tcphdr, this_iphdr, &from_client))) { //这里判断此包是否是tcp回话周期中的第一个包(由客户端发出的syn包) //如果是,说明客户端发起了一个连接,那就新建一个回话 if ((this_tcphdr->th_flags & TH_SYN) && !(this_tcphdr->th_flags & TH_ACK) && !(this_tcphdr->th_flags & TH_RST)) add_new_tcp(this_tcphdr, this_iphdr); //否则,果断忽略 return; } //如果找到会话,根据数据流向,将发送方(snd)和接收方(rcv)设置好 if (from_client) { snd = &a_tcp->client; rcv = &a_tcp->server; } else { rcv = &a_tcp->client; snd = &a_tcp->server; } //来了一个SYN包 if ((this_tcphdr->th_flags & TH_SYN)) { //syn包是用来建立新连接的,所以,要么来自客户端且没标志(前面处理了),要么来自服务端且加ACK标志 //所以这里只能来自服务器,检查服务器状态是否正常,不正常的话果断忽略这个包 if (from_client || a_tcp->client.state != TCP_SYN_SENT || a_tcp->server.state != TCP_CLOSE || !(this_tcphdr->th_flags & TH_ACK)) return; //忽略流水号错误的包 if (a_tcp->client.seq != ntohl(this_tcphdr->th_ack)) return; //自此,说明此包是服务端的第二次握手包,初始化连接(初始状态、流水号、窗口大小等等) a_tcp->server.state = TCP_SYN_RECV; a_tcp->server.seq = ntohl(this_tcphdr->th_seq) + 1; a_tcp->server.first_data_seq = a_tcp->server.seq; a_tcp->server.ack_seq = ntohl(this_tcphdr->th_ack); a_tcp->server.window = ntohs(this_tcphdr->th_win); //下面处理tcp的一些附加选项 //先是时间戳选项 if (a_tcp->client.ts_on) { a_tcp->server.ts_on = get_ts(this_tcphdr, &a_tcp->server.curr_ts); if (!a_tcp->server.ts_on) a_tcp->client.ts_on = 0; } else a_tcp->server.ts_on = 0; //再是窗口扩大选项 if (a_tcp->client.wscale_on) { a_tcp->server.wscale_on = get_wscale(this_tcphdr, &a_tcp->server.wscale); if (!a_tcp->server.wscale_on) { a_tcp->client.wscale_on = 0; a_tcp->client.wscale = 1; a_tcp->server.wscale = 1; } } else { a_tcp->server.wscale_on = 0; a_tcp->server.wscale = 1; } //syn包处理完,返回 return; } if ( ! ( !datalen && ntohl(this_tcphdr->th_seq) == rcv->ack_seq )/*不是流水号正确且没数据的包*/ &&//而且这个包不再当前窗口之内 ( !before(ntohl(this_tcphdr->th_seq), rcv->ack_seq + rcv->window*rcv->wscale) || //流水号大于等于窗口右侧 before(ntohl(this_tcphdr->th_seq) + datalen, rcv->ack_seq) //数据包尾部小于窗口左侧 ) ) //这个包不正常,果断放弃 return; //如果是rst包,ok,关闭连接 //将现有数据推给注册的回调方,然后销毁这个会话。 if ((this_tcphdr->th_flags & TH_RST)) { if (a_tcp->nids_state == NIDS_DATA) { struct lurker_node *i; a_tcp->nids_state = NIDS_RESET; //下面回调所有的钩子 for (i = a_tcp->listeners; i; i = i->next) (i->item) (a_tcp, &i->data); } nids_free_tcp_stream(a_tcp); return; } /* PAWS(防止重复报文)check 检查时间戳*/ if (rcv->ts_on && get_ts(this_tcphdr, &tmp_ts) && before(tmp_ts, snd->curr_ts)) return; //好的,ack包来了 if ((this_tcphdr->th_flags & TH_ACK)) { //如果是从客户端来的,且两边都在第二次握手的状态上 if (from_client && a_tcp->client.state == TCP_SYN_SENT && a_tcp->server.state == TCP_SYN_RECV) { //在此情况下,流水号又对得上,好的,这个包是第三次握手包,连接建立成功 if (ntohl(this_tcphdr->th_ack) == a_tcp->server.seq) { a_tcp->client.state = TCP_ESTABLISHED;//更新客户端状态 a_tcp->client.ack_seq = ntohl(this_tcphdr->th_ack);//更新ack序号 { struct proc_node *i; struct lurker_node *j; void *data; a_tcp->server.state = TCP_ESTABLISHED;//更新服务端状态 a_tcp->nids_state = NIDS_JUST_EST;//这个是安全方面的,这里无视之 //下面这个循环是回调所有钩子函数,告知连接建立 for (i = tcp_procs; i; i = i->next) { char whatto = 0; char cc = a_tcp->client.collect; char sc = a_tcp->server.collect; char ccu = a_tcp->client.collect_urg; char scu = a_tcp->server.collect_urg; (i->item) (a_tcp, &data);//回调 if (cc < a_tcp->client.collect) whatto |= COLLECT_cc; if (ccu < a_tcp->client.collect_urg) whatto |= COLLECT_ccu; if (sc < a_tcp->server.collect) whatto |= COLLECT_sc; if (scu < a_tcp->server.collect_urg) whatto |= COLLECT_scu; if (nids_params.one_loop_less) { if (a_tcp->client.collect >=2) { a_tcp->client.collect=cc; whatto&=~COLLECT_cc; } if (a_tcp->server.collect >=2 ) { a_tcp->server.collect=sc; whatto&=~COLLECT_sc; } } if (whatto) { j = mknew(struct lurker_node); j->item = i->item; j->data = data; j->whatto = whatto; j->next = a_tcp->listeners; a_tcp->listeners = j; } } if (!a_tcp->listeners) { nids_free_tcp_stream(a_tcp); return; } a_tcp->nids_state = NIDS_DATA; } } // return; } } //自此,握手包处理完毕 //下面就是挥手包了 if ((this_tcphdr->th_flags & TH_ACK)) { //先调用handle_ack更新ack序号 handle_ack(snd, ntohl(this_tcphdr->th_ack)); //更新状态,回调告知连接关闭,然后释放连接 if (rcv->state == FIN_SENT) rcv->state = FIN_CONFIRMED; if (rcv->state == FIN_CONFIRMED && snd->state == FIN_CONFIRMED) { struct lurker_node *i; a_tcp->nids_state = NIDS_CLOSE; for (i = a_tcp->listeners; i; i = i->next) (i->item) (a_tcp, &i->data); nids_free_tcp_stream(a_tcp); return; } } //下面处理数据包,和初始的fin包 if (datalen + (this_tcphdr->th_flags & TH_FIN) > 0) //就将数据更新到接收方缓冲区 tcp_queue(a_tcp, this_tcphdr, snd, rcv, (char *) (this_tcphdr) + 4 * this_tcphdr->th_off, datalen, skblen); //更新窗口大小 snd->window = ntohs(this_tcphdr->th_win); //如果缓存溢出(说明出了问题),果断释放连接 if (rcv->rmem_alloc > 65535) prune_queue(rcv, this_tcphdr); if (!a_tcp->listeners) nids_free_tcp_stream(a_tcp); }
好了,tcp包的基本处理流程就这些了,主要做了连接的建立、释放、状态迁移这些工作,下面看看连接的缓冲区是如何维护的(主要就是如何更新的)。来看tcp_queue函数:
static void tcp_queue(struct tcp_stream * a_tcp, struct tcphdr * this_tcphdr, struct half_stream * snd, struct half_stream * rcv, char *data, int datalen, int skblen ) { u_int this_seq = ntohl(this_tcphdr->th_seq); struct skbuff *pakiet, *tmp; /* * Did we get anything new to ack? */ //EXP_SEQ是目前已集齐的数据流水号,我们希望收到从这里开始的数据 //先判断数据是不是在EXP_SEQ之前开始 if (!after(this_seq, EXP_SEQ)) { //再判断数据长度是不是在EXP_SEQ之后,如果是,说明有新数据,否则是重发的包,无视之 if (after(this_seq + datalen + (this_tcphdr->th_flags & TH_FIN), EXP_SEQ)) { /* the packet straddles our window end */ get_ts(this_tcphdr, &snd->curr_ts); //ok,更新集齐的数据区,值得一提的是add_from_skb函数一旦发现集齐了一段数据之后 //便立刻调用notify函数,在notify函数里面将数据推给回调方 add_from_skb(a_tcp, rcv, snd, (u_char *)data, datalen, this_seq, (this_tcphdr->th_flags & TH_FIN), (this_tcphdr->th_flags & TH_URG), ntohs(this_tcphdr->th_urp) + this_seq - 1); /* * Do we have any old packets to ack that the above * made visible? (Go forward from skb) */ //此时EXP_SEQ有了变化了,看看缓冲区里的包有没有符合条件能用同样的方法处理掉的 //有就处理掉,然后释放 pakiet = rcv->list; while (pakiet) { if (after(pakiet->seq, EXP_SEQ)) break; if (after(pakiet->seq + pakiet->len + pakiet->fin, EXP_SEQ)) { add_from_skb(a_tcp, rcv, snd, pakiet->data, pakiet->len, pakiet->seq, pakiet->fin, pakiet->urg, pakiet->urg_ptr + pakiet->seq - 1); } rcv->rmem_alloc -= pakiet->truesize; if (pakiet->prev) pakiet->prev->next = pakiet->next; else rcv->list = pakiet->next; if (pakiet->next) pakiet->next->prev = pakiet->prev; else rcv->listtail = pakiet->prev; tmp = pakiet->next; free(pakiet->data); free(pakiet); pakiet = tmp; } } else return; } //这里说明现在这个包是个乱序到达的(数据开始点超过了EXP_SEQ),放到缓冲区等待处理,注意保持缓冲区有序 else { struct skbuff *p = rcv->listtail; pakiet = mknew(struct skbuff); pakiet->truesize = skblen; rcv->rmem_alloc += pakiet->truesize; pakiet->len = datalen; pakiet->data = malloc(datalen); if (!pakiet->data) nids_params.no_mem("tcp_queue"); memcpy(pakiet->data, data, datalen); pakiet->fin = (this_tcphdr->th_flags & TH_FIN); /* Some Cisco - at least - hardware accept to close a TCP connection * even though packets were lost before the first TCP FIN packet and * never retransmitted; this violates RFC 793, but since it really * happens, it has to be dealt with... The idea is to introduce a 10s * timeout after TCP FIN packets were sent by both sides so that * corresponding libnids resources can be released instead of waiting * for retransmissions which will never happen. -- Sebastien Raveau */ if (pakiet->fin) { snd->state = TCP_CLOSING; if (rcv->state == FIN_SENT || rcv->state == FIN_CONFIRMED) add_tcp_closing_timeout(a_tcp); } pakiet->seq = this_seq; pakiet->urg = (this_tcphdr->th_flags & TH_URG); pakiet->urg_ptr = ntohs(this_tcphdr->th_urp); for (;;) { if (!p || !after(p->seq, this_seq)) break; p = p->prev; } if (!p) { pakiet->prev = 0; pakiet->next = rcv->list; if (rcv->list) rcv->list->prev = pakiet; rcv->list = pakiet; if (!rcv->listtail) rcv->listtail = pakiet; } else { pakiet->next = p->next; p->next = pakiet; pakiet->prev = p; if (pakiet->next) pakiet->next->prev = pakiet; else rcv->listtail = pakiet; } } }