在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。
先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:
struct hostfrags { struct ipq *ipqueue;//这里维护IP碎片队列 int ip_frag_mem; u_int ip;//主机对应的IP地址 //很明显,下面三行告诉我们,这是哈希表的一个元素 int hash_index; struct hostfrags *prev; struct hostfrags *next; }; //下面这个就是维护所有IP的哈希表了。 static struct hostfrags **fragtable;
/* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { unsigned char *mac; /* pointer to MAC header */ struct ip *iph; /* pointer to IP header */ int len; /* total length of original datagram */ short ihlen; /* length of the IP header */ short maclen; /* length of the MAC header */ struct timer_list timer; /* when will this queue expire? */ struct ipfrag *fragments; /* linked list of received fragments */ struct hostfrags *hf; struct ipq *next; /* linked list pointers */ struct ipq *prev; // struct device *dev; /* Device - for icmp replies */ };
/* Describe an IP fragment. */ struct ipfrag { int offset; /* offset of fragment in IP datagram */ int end; /* last byte of data in datagram */ int len; /* length of this fragment */ struct sk_buff *skb; /* complete received fragment */ unsigned char *ptr; /* pointer into real fragment data */ struct ipfrag *next; /* linked list pointers */ struct ipfrag *prev; };
由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。
好了步入处理逻辑,照例,先看初始化:
void ip_frag_init(int n) { struct timeval tv; gettimeofday(&tv, 0); time0 = tv.tv_sec; fragtable = (struct hostfrags **) calloc(n, sizeof(struct hostfrags *)); if (!fragtable) nids_params.no_mem("ip_frag_init"); hash_size = n; }简单到不能再简单——分片了一个主机的哈希表。分完手工。好吧,看重组逻辑:
//先是判断是否为分片的函数 int ip_defrag_stub(struct ip *iph, struct ip **defrag) { int offset, flags, tot_len; struct sk_buff *skb; numpack++; //先处理超时事件 timenow = 0;//刷新时间 while (timer_head && timer_head->expires < jiffies()) { this_host = ((struct ipq *) (timer_head->data))->hf; timer_head->function(timer_head->data); } //然后计算分片的偏移 offset = ntohs(iph->ip_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; //此包不是分片 if (((flags & IP_MF) == 0) && (offset == 0)) { ip_defrag(iph, 0); return IPF_NOTF; } //此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数 tot_len = ntohs(iph->ip_len); skb = (struct sk_buff *) malloc(tot_len + sizeof(struct sk_buff)); if (!skb) nids_params.no_mem("ip_defrag_stub"); skb->data = (char *) (skb + 1); memcpy(skb->data, iph, tot_len); skb->truesize = tot_len + 16 + nids_params.dev_addon; skb->truesize = (skb->truesize + 15) & ~15; skb->truesize += nids_params.sk_buff_size; //如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理 //否则,返回IPF_ISF,跳过ip包处理 if ((*defrag = (struct ip *)ip_defrag((struct ip *) (skb->data), skb))) return IPF_NEW; return IPF_ISF; } /* Process an incoming IP datagram fragment. */ //这里就是分片重组的主要逻辑了 static char *ip_defrag(struct ip *iph, struct sk_buff *skb) { struct ipfrag *prev, *next, *tmp; struct ipfrag *tfp; struct ipq *qp; char *skb2; unsigned char *ptr; int flags, offset; int i, ihl, end; //如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个 //此处还负责将this_host变量设为当前ip对应的host if (!hostfrag_find(iph) && skb) hostfrag_create(iph); /* Start by cleaning up the memory. */ //内存用太多了,panic之,然后释放当前host分片所用的内存 if (this_host) if (this_host->ip_frag_mem > IPFRAG_HIGH_THRESH) ip_evictor(); /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ //这里,找到这个ip包对应的ip分片链表 if (this_host) qp = ip_find(iph); else qp = 0; /* Is this a non-fragmented datagram? */ offset = ntohs(iph->ip_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; if (((flags & IP_MF) == 0) && (offset == 0)) { if (qp != NULL) ip_free(qp); /* Fragmented frame replaced by full unfragmented copy */ return 0; } /* ip_evictor() could have removed all queues for the current host */ if (!this_host) hostfrag_create(iph); offset <<= 3; /* offset is in 8-byte chunks */ ihl = iph->ip_hl * 4; /* If the queue already existed, keep restarting its timer as long as we still are receiving fragments. Otherwise, create a fresh queue entry. */ //如果当前host下来过此包的碎片 if (qp != NULL) { /* ANK. If the first fragment is received, we should remember the correct IP header (with options) */ if (offset == 0) { qp->ihlen = ihl; memcpy(qp->iph, iph, ihl + 8); } del_timer(&qp->timer); qp->timer.expires = jiffies() + IP_FRAG_TIME; /* about 30 seconds */ qp->timer.data = (unsigned long) qp; /* pointer to queue */ qp->timer.function = ip_expire; /* expire function */ add_timer(&qp->timer); } //否则新建一个碎片队列 else { /* If we failed to create it, then discard the frame. */ if ((qp = ip_create(iph)) == NULL) { kfree_skb(skb, FREE_READ); return NULL; } } /* Attempt to construct an oversize packet. */ //再大的ip包也不能大过65535啊,一经发现,直接放弃 if (ntohs(iph->ip_len) + (int) offset > 65535) { // NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr))); nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0); kfree_skb(skb, FREE_READ); return NULL; } //下面就开始在碎片队列里面找位置了,同时处理好重叠 //如果有重叠,把重叠的旧的部分去掉 /* Determine the position of this fragment. */ end = offset + ntohs(iph->ip_len) - ihl; /* Point into the IP datagram 'data' part. */ ptr = (unsigned char *)(skb->data + ihl); /* Is this the final fragment? */ if ((flags & IP_MF) == 0) qp->len = end; /* Find out which fragments are in front and at the back of us in the chain of fragments so far. We must know where to put this fragment, right? */ prev = NULL; for (next = qp->fragments; next != NULL; next = next->next) { if (next->offset >= offset) break; /* bingo! */ prev = next; } /* We found where to put this one. Check for overlap with preceding fragment, and, if needed, align things so that any overlaps are eliminated. */ if (prev != NULL && offset < prev->end) { nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0); i = prev->end - offset; offset += i; /* ptr into datagram */ ptr += i; /* ptr into fragment data */ } /* Look for overlap with succeeding segments. If we can merge fragments, do it. */ for (tmp = next; tmp != NULL; tmp = tfp) { tfp = tmp->next; if (tmp->offset >= end) break; /* no overlaps at all */ nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0); i = end - next->offset; /* overlap is 'i' bytes */ tmp->len -= i; /* so reduce size of */ tmp->offset += i; /* next fragment */ tmp->ptr += i; /* If we get a frag size of <= 0, remove it and the packet that it goes with. We never throw the new frag away, so the frag being dumped has always been charged for. */ if (tmp->len <= 0) { if (tmp->prev != NULL) tmp->prev->next = tmp->next; else qp->fragments = tmp->next; if (tmp->next != NULL) tmp->next->prev = tmp->prev; next = tfp; /* We have killed the original next frame */ frag_kfree_skb(tmp->skb, FREE_READ); frag_kfree_s(tmp, sizeof(struct ipfrag)); } } //下面往队列中插入当前碎片 /* Insert this fragment in the chain of fragments. */ tfp = NULL; tfp = ip_frag_create(offset, end, skb, ptr); /* No memory to save the fragment - so throw the lot. If we failed the frag_create we haven't charged the queue. */ if (!tfp) { nids_params.no_mem("ip_defrag"); kfree_skb(skb, FREE_READ); return NULL; } /* From now on our buffer is charged to the queues. */ tfp->prev = prev; tfp->next = next; if (prev != NULL) prev->next = tfp; else qp->fragments = tfp; if (next != NULL) next->prev = tfp; /* OK, so we inserted this new fragment into the chain. Check if we now have a full IP datagram which we can bump up to the IP layer... */ //查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回 if (ip_done(qp)) { skb2 = ip_glue(qp); /* glue together the fragments */ return (skb2); } return (NULL); }