在TCP/IP协议栈中,3层对应的是IP层,4层对应TCP层,在这里,从3层到4层转化主要做了两件重要的事情:IP分片重组和TCP会话重组。本篇先分析其中TCP会话重组的部分(自顶向下嘛,哈哈)。
OK,先看下重要的数据结构,在tcp.h中:
struct skbuff
{
//万年不变的next和prev,这向我们昭示了这是一个双向队列。
//对于每个TCP会话(ip:端口<- ->ip:端口)都要维护两个skbuf队列(每个方向都有一个嘛)
//每个skbuf对应网络上的一个IP包,TCP流就是一个接一个的IP包嘛。
struct skbuff *next;
struct skbuff *prev;
void *data;
u_int len;
u_int truesize;
u_int urg_ptr;
char fin;
char urg;
u_int seq;
u_int ack;
};
这个结构体就是模仿的内核中的sk_buff结构体,只不过比内核中的要小很多(你懂的,因为这里只做会话重组)。
下面是在nids.h中的
struct tuple4
{
u_short source;
u_short dest;
u_int saddr;
u_int daddr;
};
这是用来表示一个TCP连接的,不解释。
struct half_stream
{
char state;
char collect;
char collect_urg;
char *data; //这里存放着已经按顺序集齐排列好的数据
int offset;
int count; //这里存放data中数据的字节数
int count_new; //这里存放data中还没回调过的数据的字节数
int bufsize;
int rmem_alloc;
int urg_count;
u_int acked;
u_int seq;
u_int ack_seq;
u_int first_data_seq;
u_char urgdata;
u_char count_new_urg;
u_char urg_seen;
u_int urg_ptr;
u_short window;
u_char ts_on; //tcp时间戳选项是否打开
u_char wscale_on; //窗口扩展选项是否打开
u_int curr_ts;
u_int wscale;
//下面是ip包缓冲区
struct skbuff *list;
struct skbuff *listtail;
}
这个是用来表示“半个TCP会话”,其实就是一个方向上的TCP流。
还有
struct tcp_stream
{
struct tuple4 addr;
char nids_state;
struct lurker_node *listeners;
struct half_stream client;
struct half_stream server;
struct tcp_stream *next_node;
struct tcp_stream *prev_node;
int hash_index;
struct tcp_stream *next_time;
struct tcp_stream *prev_time;
int read;
struct tcp_stream *next_free;
void *user;
};
显然,这是用来表示一个完整的TCP会话了,最后是static struct tcp_stream **tcp_stream_table;一个TCP会话指针的数组,其实就是hash表了。
下面来看处理过程,先是初始化:
int tcp_init(int size)
{
...
//初始化全局tcp会话哈希表
tcp_stream_table_size = size;
tcp_stream_table = calloc(tcp_stream_table_size, sizeof(char *));
if (!tcp_stream_table) {
nids_params.no_mem("tcp_init");
return -1;
}
//设置最大会话数,为了哈希的效率,哈希表的元素个数上限设为3/4表大小
max_stream = 3 * tcp_stream_table_size / 4;
//先将max_stream个tcp会话结构体申请好,放着(避免后面陆陆续续申请浪费时间)。
streams_pool = (struct tcp_stream *) malloc((max_stream + 1) * sizeof(struct tcp_stream));
if (!streams_pool) {
nids_params.no_mem("tcp_init");
return -1;
}
//ok,将这个数组初始化成链表
for (i = 0; i < max_stream; i++)
streams_pool[i].next_free = &(streams_pool[i + 1]);
streams_pool[max_stream].next_free = 0;
free_streams = streams_pool;
...
return 0;
}
很简单,做了两件事:1.初始化tcp会话哈希表。2.初始化会话池。这个初始化函数只在库初始化时执行一次。
初始化完成之后,就进入了pcap_loop中了,nids中的回调函数是nids_pcap_handler,在这个函数里面做了些ip分片重组(等下篇再说)后(tcp包)便来到了process_tcp函数,这里tcp会话重组开始了。来看看。
void process_tcp(u_char * data, int skblen){
//处理头,得到ip包和tcp包
struct ip *this_iphdr = (struct ip *)data;
struct tcphdr *this_tcphdr = (struct tcphdr *)(data + 4 * this_iphdr->ip_hl);
...//此处忽略安检代码
//在哈希表里找找,如果没有此tcp会话则看看是不是要新建一个
if (!(a_tcp = find_stream(this_tcphdr, this_iphdr, &from_client))) {
//这里判断此包是否是tcp回话周期中的第一个包(由客户端发出的syn包)
//如果是,说明客户端发起了一个连接,那就新建一个回话
if ((this_tcphdr->th_flags & TH_SYN) &&
!(this_tcphdr->th_flags & TH_ACK) &&
!(this_tcphdr->th_flags & TH_RST))
add_new_tcp(this_tcphdr, this_iphdr);
//否则,果断忽略
return;
}
//如果找到会话,根据数据流向,将发送方(snd)和接收方(rcv)设置好
if (from_client) {
snd = &a_tcp->client;
rcv = &a_tcp->server;
}
else {
rcv = &a_tcp->client;
snd = &a_tcp->server;
}
//来了一个SYN包
if ((this_tcphdr->th_flags & TH_SYN)) {
//syn包是用来建立新连接的,所以,要么来自客户端且没标志(前面处理了),要么来自服务端且加ACK标志
//所以这里只能来自服务器,检查服务器状态是否正常,不正常的话果断忽略这个包
if (from_client || a_tcp->client.state != TCP_SYN_SENT ||
a_tcp->server.state != TCP_CLOSE || !(this_tcphdr->th_flags & TH_ACK))
return;
//忽略流水号错误的包
if (a_tcp->client.seq != ntohl(this_tcphdr->th_ack))
return;
//自此,说明此包是服务端的第二次握手包,初始化连接(初始状态、流水号、窗口大小等等)
a_tcp->server.state = TCP_SYN_RECV;
a_tcp->server.seq = ntohl(this_tcphdr->th_seq) + 1;
a_tcp->server.first_data_seq = a_tcp->server.seq;
a_tcp->server.ack_seq = ntohl(this_tcphdr->th_ack);
a_tcp->server.window = ntohs(this_tcphdr->th_win);
//下面处理tcp的一些附加选项
//先是时间戳选项
if (a_tcp->client.ts_on) {
a_tcp->server.ts_on = get_ts(this_tcphdr, &a_tcp->server.curr_ts);
if (!a_tcp->server.ts_on)
a_tcp->client.ts_on = 0;
} else a_tcp->server.ts_on = 0;
//再是窗口扩大选项
if (a_tcp->client.wscale_on) {
a_tcp->server.wscale_on = get_wscale(this_tcphdr, &a_tcp->server.wscale);
if (!a_tcp->server.wscale_on) {
a_tcp->client.wscale_on = 0;
a_tcp->client.wscale = 1;
a_tcp->server.wscale = 1;
}
} else {
a_tcp->server.wscale_on = 0;
a_tcp->server.wscale = 1;
}
//syn包处理完,返回
return;
}
if (
! ( !datalen && ntohl(this_tcphdr->th_seq) == rcv->ack_seq )/*不是流水号正确且没数据的包*/
&&//而且这个包不再当前窗口之内
( !before(ntohl(this_tcphdr->th_seq), rcv->ack_seq + rcv->window*rcv->wscale) || //流水号大于等于窗口右侧
before(ntohl(this_tcphdr->th_seq) + datalen, rcv->ack_seq) //数据包尾部小于窗口左侧
)
)
//这个包不正常,果断放弃
return;
//如果是rst包,ok,关闭连接
//将现有数据推给注册的回调方,然后销毁这个会话。
if ((this_tcphdr->th_flags & TH_RST)) {
if (a_tcp->nids_state == NIDS_DATA) {
struct lurker_node *i;
a_tcp->nids_state = NIDS_RESET;
//下面回调所有的钩子
for (i = a_tcp->listeners; i; i = i->next)
(i->item) (a_tcp, &i->data);
}
nids_free_tcp_stream(a_tcp);
return;
}
/* PAWS(防止重复报文)check 检查时间戳*/
if (rcv->ts_on && get_ts(this_tcphdr, &tmp_ts) &&
before(tmp_ts, snd->curr_ts))
return;
//好的,ack包来了
if ((this_tcphdr->th_flags & TH_ACK)) {
//如果是从客户端来的,且两边都在第二次握手的状态上
if (from_client && a_tcp->client.state == TCP_SYN_SENT &&
a_tcp->server.state == TCP_SYN_RECV) {
//在此情况下,流水号又对得上,好的,这个包是第三次握手包,连接建立成功
if (ntohl(this_tcphdr->th_ack) == a_tcp->server.seq) {
a_tcp->client.state = TCP_ESTABLISHED;//更新客户端状态
a_tcp->client.ack_seq = ntohl(this_tcphdr->th_ack);//更新ack序号
{
struct proc_node *i;
struct lurker_node *j;
void *data;
a_tcp->server.state = TCP_ESTABLISHED;//更新服务端状态
a_tcp->nids_state = NIDS_JUST_EST;//这个是安全方面的,这里无视之
//下面这个循环是回调所有钩子函数,告知连接建立
for (i = tcp_procs; i; i = i->next) {
char whatto = 0;
char cc = a_tcp->client.collect;
char sc = a_tcp->server.collect;
char ccu = a_tcp->client.collect_urg;
char scu = a_tcp->server.collect_urg;
(i->item) (a_tcp, &data);//回调
if (cc < a_tcp->client.collect)
whatto |= COLLECT_cc;
if (ccu < a_tcp->client.collect_urg)
whatto |= COLLECT_ccu;
if (sc < a_tcp->server.collect)
whatto |= COLLECT_sc;
if (scu < a_tcp->server.collect_urg)
whatto |= COLLECT_scu;
if (nids_params.one_loop_less) {
if (a_tcp->client.collect >=2) {
a_tcp->client.collect=cc;
whatto&=~COLLECT_cc;
}
if (a_tcp->server.collect >=2 ) {
a_tcp->server.collect=sc;
whatto&=~COLLECT_sc;
}
}
if (whatto) {
j = mknew(struct lurker_node);
j->item = i->item;
j->data = data;
j->whatto = whatto;
j->next = a_tcp->listeners;
a_tcp->listeners = j;
}
}
if (!a_tcp->listeners) {
nids_free_tcp_stream(a_tcp);
return;
}
a_tcp->nids_state = NIDS_DATA;
}
}
// return;
}
}
//自此,握手包处理完毕
//下面就是挥手包了
if ((this_tcphdr->th_flags & TH_ACK)) {
//先调用handle_ack更新ack序号
handle_ack(snd, ntohl(this_tcphdr->th_ack));
//更新状态,回调告知连接关闭,然后释放连接
if (rcv->state == FIN_SENT)
rcv->state = FIN_CONFIRMED;
if (rcv->state == FIN_CONFIRMED && snd->state == FIN_CONFIRMED) {
struct lurker_node *i;
a_tcp->nids_state = NIDS_CLOSE;
for (i = a_tcp->listeners; i; i = i->next)
(i->item) (a_tcp, &i->data);
nids_free_tcp_stream(a_tcp);
return;
}
}
//下面处理数据包,和初始的fin包
if (datalen + (this_tcphdr->th_flags & TH_FIN) > 0)
//就将数据更新到接收方缓冲区
tcp_queue(a_tcp, this_tcphdr, snd, rcv,
(char *) (this_tcphdr) + 4 * this_tcphdr->th_off,
datalen, skblen);
//更新窗口大小
snd->window = ntohs(this_tcphdr->th_win);
//如果缓存溢出(说明出了问题),果断释放连接
if (rcv->rmem_alloc > 65535)
prune_queue(rcv, this_tcphdr);
if (!a_tcp->listeners)
nids_free_tcp_stream(a_tcp);
}
好了,tcp包的基本处理流程就这些了,主要做了连接的建立、释放、状态迁移这些工作,下面看看连接的缓冲区是如何维护的(主要就是如何更新的)。来看tcp_queue函数:
static void
tcp_queue(struct tcp_stream * a_tcp, struct tcphdr * this_tcphdr,
struct half_stream * snd, struct half_stream * rcv,
char *data, int datalen, int skblen
)
{
u_int this_seq = ntohl(this_tcphdr->th_seq);
struct skbuff *pakiet, *tmp;
/*
* Did we get anything new to ack?
*/
//EXP_SEQ是目前已集齐的数据流水号,我们希望收到从这里开始的数据
//先判断数据是不是在EXP_SEQ之前开始
if (!after(this_seq, EXP_SEQ)) {
//再判断数据长度是不是在EXP_SEQ之后,如果是,说明有新数据,否则是重发的包,无视之
if (after(this_seq + datalen + (this_tcphdr->th_flags & TH_FIN), EXP_SEQ)) {
/* the packet straddles our window end */
get_ts(this_tcphdr, &snd->curr_ts);
//ok,更新集齐的数据区,值得一提的是add_from_skb函数一旦发现集齐了一段数据之后
//便立刻调用notify函数,在notify函数里面将数据推给回调方
add_from_skb(a_tcp, rcv, snd, (u_char *)data, datalen, this_seq,
(this_tcphdr->th_flags & TH_FIN),
(this_tcphdr->th_flags & TH_URG),
ntohs(this_tcphdr->th_urp) + this_seq - 1);
/*
* Do we have any old packets to ack that the above
* made visible? (Go forward from skb)
*/
//此时EXP_SEQ有了变化了,看看缓冲区里的包有没有符合条件能用同样的方法处理掉的
//有就处理掉,然后释放
pakiet = rcv->list;
while (pakiet) {
if (after(pakiet->seq, EXP_SEQ))
break;
if (after(pakiet->seq + pakiet->len + pakiet->fin, EXP_SEQ)) {
add_from_skb(a_tcp, rcv, snd, pakiet->data,
pakiet->len, pakiet->seq, pakiet->fin, pakiet->urg,
pakiet->urg_ptr + pakiet->seq - 1);
}
rcv->rmem_alloc -= pakiet->truesize;
if (pakiet->prev)
pakiet->prev->next = pakiet->next;
else
rcv->list = pakiet->next;
if (pakiet->next)
pakiet->next->prev = pakiet->prev;
else
rcv->listtail = pakiet->prev;
tmp = pakiet->next;
free(pakiet->data);
free(pakiet);
pakiet = tmp;
}
}
else
return;
}
//这里说明现在这个包是个乱序到达的(数据开始点超过了EXP_SEQ),放到缓冲区等待处理,注意保持缓冲区有序
else {
struct skbuff *p = rcv->listtail;
pakiet = mknew(struct skbuff);
pakiet->truesize = skblen;
rcv->rmem_alloc += pakiet->truesize;
pakiet->len = datalen;
pakiet->data = malloc(datalen);
if (!pakiet->data)
nids_params.no_mem("tcp_queue");
memcpy(pakiet->data, data, datalen);
pakiet->fin = (this_tcphdr->th_flags & TH_FIN);
/* Some Cisco - at least - hardware accept to close a TCP connection
* even though packets were lost before the first TCP FIN packet and
* never retransmitted; this violates RFC 793, but since it really
* happens, it has to be dealt with... The idea is to introduce a 10s
* timeout after TCP FIN packets were sent by both sides so that
* corresponding libnids resources can be released instead of waiting
* for retransmissions which will never happen. -- Sebastien Raveau
*/
if (pakiet->fin) {
snd->state = TCP_CLOSING;
if (rcv->state == FIN_SENT || rcv->state == FIN_CONFIRMED)
add_tcp_closing_timeout(a_tcp);
}
pakiet->seq = this_seq;
pakiet->urg = (this_tcphdr->th_flags & TH_URG);
pakiet->urg_ptr = ntohs(this_tcphdr->th_urp);
for (;;) {
if (!p || !after(p->seq, this_seq))
break;
p = p->prev;
}
if (!p) {
pakiet->prev = 0;
pakiet->next = rcv->list;
if (rcv->list)
rcv->list->prev = pakiet;
rcv->list = pakiet;
if (!rcv->listtail)
rcv->listtail = pakiet;
}
else {
pakiet->next = p->next;
p->next = pakiet;
pakiet->prev = p;
if (pakiet->next)
pakiet->next->prev = pakiet;
else
rcv->listtail = pakiet;
}
}
}