libnids中TCP/IP栈实现细节分析(下)——IP分片重组

好了,有时间了,来看看libnids中IP分片重组的方法吧。

在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。

先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:

1
2
3
4
5
6
7
8
9
10
11
struct hostfrags {
   struct ipq *ipqueue; //这里维护IP碎片队列
   int ip_frag_mem;
   u_int ip; //主机对应的IP地址
   //很明显,下面三行告诉我们,这是哈希表的一个元素
   int hash_index;
   struct hostfrags *prev;
   struct hostfrags *next;
};
//下面这个就是维护所有IP的哈希表了。
static struct hostfrags **fragtable;

每个IP下面又有很多的被分片的IP包——IP碎片队列,IP碎片队列的定义在这:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
   unsigned char *mac;        /* pointer to MAC header                */
   struct ip *iph;        /* pointer to IP header                 */
   int len;            /* total length of original datagram    */
   short ihlen;            /* length of the IP header              */
   short maclen;            /* length of the MAC header             */
   struct timer_list timer;    /* when will this queue expire?         */
   struct ipfrag *fragments;    /* linked list of received fragments    */
   struct hostfrags *hf;
   struct ipq *next;        /* linked list pointers                 */
   struct ipq *prev;
   // struct device *dev;    /* Device - for icmp replies */
};

最终的IP碎片的定义在这:

1
2
3
4
5
6
7
8
9
10
/* Describe an IP fragment. */
struct ipfrag {
   int offset;            /* offset of fragment in IP datagram    */
   int end;            /* last byte of data in datagram        */
   int len;            /* length of this fragment              */
   struct sk_buff *skb;        /* complete received fragment           */
   unsigned char *ptr;        /* pointer into real fragment data      */
   struct ipfrag *next;        /* linked list pointers                 */
   struct ipfrag *prev;
};

由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。

好了步入处理逻辑,照例,先看初始化:

1
2
3
4
5
6
7
8
9
10
11
12
void
ip_frag_init( int n)
{
   struct timeval tv;
 
   gettimeofday(&tv, 0);
   time0 = tv.tv_sec;
   fragtable = ( struct hostfrags **) calloc (n, sizeof ( struct hostfrags *));
   if (!fragtable)
     nids_params.no_mem( "ip_frag_init" );
   hash_size = n;
}

简单到不能再简单——分片了一个主机的哈希表。分完手工。好吧,看重组逻辑:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
//先是判断是否为分片的函数
int
ip_defrag_stub( struct ip *iph, struct ip **defrag)
{
   int offset, flags, tot_len;
   struct sk_buff *skb;
 
   numpack++;
   //先处理超时事件
   timenow = 0; //刷新时间
   while (timer_head && timer_head->expires < jiffies()) {
     this_host = (( struct ipq *) (timer_head->data))->hf;
     timer_head->function(timer_head->data);
   }
 
   //然后计算分片的偏移
   offset = ntohs(iph->ip_off);
   flags = offset & ~IP_OFFSET;
   offset &= IP_OFFSET;
 
   //此包不是分片
   if (((flags & IP_MF) == 0) && (offset == 0)) {
     ip_defrag(iph, 0);
     return IPF_NOTF;
   }
 
   //此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数
   tot_len = ntohs(iph->ip_len);
   skb = ( struct sk_buff *) malloc (tot_len + sizeof ( struct sk_buff));
   if (!skb)
       nids_params.no_mem( "ip_defrag_stub" );
   skb->data = ( char *) (skb + 1);
   memcpy (skb->data, iph, tot_len);
   skb->truesize = tot_len + 16 + nids_params.dev_addon;
   skb->truesize = (skb->truesize + 15) & ~15;
   skb->truesize += nids_params.sk_buff_size;
 
   //如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理
   //否则,返回IPF_ISF,跳过ip包处理
   if ((*defrag = ( struct ip *)ip_defrag(( struct ip *) (skb->data), skb)))
     return IPF_NEW;
 
   return IPF_ISF;
}
 
/* Process an incoming IP datagram fragment. */
//这里就是分片重组的主要逻辑了
static char *
ip_defrag( struct ip *iph, struct sk_buff *skb)
{
   struct ipfrag *prev, *next, *tmp;
   struct ipfrag *tfp;
   struct ipq *qp;
   char *skb2;
   unsigned char *ptr;
   int flags, offset;
   int i, ihl, end;
 
   //如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个
   //此处还负责将this_host变量设为当前ip对应的host
   if (!hostfrag_find(iph) && skb)
     hostfrag_create(iph);
 
   /* Start by cleaning up the memory. */
   //内存用太多了,panic之,然后释放当前host分片所用的内存
   if (this_host)
     if (this_host->ip_frag_mem > IPFRAG_HIGH_THRESH)
       ip_evictor();
   
   /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
   //这里,找到这个ip包对应的ip分片链表
   if (this_host)
     qp = ip_find(iph);
   else
     qp = 0;
 
   /* Is this a non-fragmented datagram? */
   offset = ntohs(iph->ip_off);
   flags = offset & ~IP_OFFSET;
   offset &= IP_OFFSET;
   if (((flags & IP_MF) == 0) && (offset == 0)) {
     if (qp != NULL)
       ip_free(qp);      /* Fragmented frame replaced by full
                    unfragmented copy */
     return 0;
   }
 
   /* ip_evictor() could have removed all queues for the current host */
   if (!this_host)
     hostfrag_create(iph);
 
   offset <<= 3;           /* offset is in 8-byte chunks */
   ihl = iph->ip_hl * 4;
 
   /*
     If the queue already existed, keep restarting its timer as long as
     we still are receiving fragments.  Otherwise, create a fresh queue
     entry.
   */
   //如果当前host下来过此包的碎片
   if (qp != NULL) {
     /* ANK. If the first fragment is received, we should remember the correct
        IP header (with options) */
     if (offset == 0) {
       qp->ihlen = ihl;
       memcpy (qp->iph, iph, ihl + 8);
     }
     del_timer(&qp->timer);
     qp->timer.expires = jiffies() + IP_FRAG_TIME;    /* about 30 seconds */
     qp->timer.data = (unsigned long ) qp; /* pointer to queue */
     qp->timer.function = ip_expire;  /* expire function */
     add_timer(&qp->timer);
   }
   //否则新建一个碎片队列
   else {
     /* If we failed to create it, then discard the frame. */
     if ((qp = ip_create(iph)) == NULL) {
       kfree_skb(skb, FREE_READ);
       return NULL;
     }
   }
   /* Attempt to construct an oversize packet. */
   //再大的ip包也不能大过65535啊,一经发现,直接放弃
   if (ntohs(iph->ip_len) + ( int ) offset > 65535) {
     // NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr)));
     nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0);
     kfree_skb(skb, FREE_READ);
     return NULL;
   }
 
   //下面就开始在碎片队列里面找位置了,同时处理好重叠
   //如果有重叠,把重叠的旧的部分去掉
   /* Determine the position of this fragment. */
   end = offset + ntohs(iph->ip_len) - ihl;
 
   /* Point into the IP datagram 'data' part. */
   ptr = (unsigned char *)(skb->data + ihl);
 
   /* Is this the final fragment? */
   if ((flags & IP_MF) == 0)
     qp->len = end;
 
   /*
     Find out which fragments are in front and at the back of us in the
     chain of fragments so far.  We must know where to put this
     fragment, right?
   */
   prev = NULL;
   for (next = qp->fragments; next != NULL; next = next->next) {
     if (next->offset >= offset)
       break ;            /* bingo! */
     prev = next;
   }
   /*
     We found where to put this one.  Check for overlap with preceding
     fragment, and, if needed, align things so that any overlaps are
     eliminated.
   */
   if (prev != NULL && offset < prev->end) {
     nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
     i = prev->end - offset;
     offset += i;        /* ptr into datagram */
     ptr += i;           /* ptr into fragment data */
   }
   /*
     Look for overlap with succeeding segments.
     If we can merge fragments, do it.
   */
   for (tmp = next; tmp != NULL; tmp = tfp) {
     tfp = tmp->next;
     if (tmp->offset >= end)
       break ;            /* no overlaps at all */
     nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
     
     i = end - next->offset;  /* overlap is 'i' bytes */
     tmp->len -= i;       /* so reduce size of    */
     tmp->offset += i;        /* next fragment        */
     tmp->ptr += i;
     /*
       If we get a frag size of <= 0, remove it and the packet that it
       goes with. We never throw the new frag away, so the frag being
       dumped has always been charged for.
     */
     if (tmp->len <= 0) {
       if (tmp->prev != NULL)
     tmp->prev->next = tmp->next;
       else
     qp->fragments = tmp->next;
       
       if (tmp->next != NULL)
     tmp->next->prev = tmp->prev;
       
       next = tfp;       /* We have killed the original next frame */
 
       frag_kfree_skb(tmp->skb, FREE_READ);
       frag_kfree_s(tmp, sizeof ( struct ipfrag));
     }
   }
   //下面往队列中插入当前碎片
   /* Insert this fragment in the chain of fragments. */
   tfp = NULL;
   tfp = ip_frag_create(offset, end, skb, ptr);
   
   /*
     No memory to save the fragment - so throw the lot. If we failed
     the frag_create we haven't charged the queue.
   */
   if (!tfp) {
     nids_params.no_mem( "ip_defrag" );
     kfree_skb(skb, FREE_READ);
     return NULL;
   }
   /* From now on our buffer is charged to the queues. */
   tfp->prev = prev;
   tfp->next = next;
   if (prev != NULL)
     prev->next = tfp;
   else
     qp->fragments = tfp;
 
   if (next != NULL)
     next->prev = tfp;
 
   /*
     OK, so we inserted this new fragment into the chain.  Check if we
     now have a full IP datagram which we can bump up to the IP
     layer...
   */
   //查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回
   if (ip_done(qp)) {
     skb2 = ip_glue(qp);     /* glue together the fragments */
     return (skb2);
   }
   return (NULL);
}

好了,打完收工!


你可能感兴趣的:(timer,list,header,null,insert,Pointers)