好了,有时间了,来看看libnids中IP分片重组的方法吧。
在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。
先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:
1
2
3
4
5
6
7
8
9
10
11
|
struct
hostfrags {
struct
ipq *ipqueue;
//这里维护IP碎片队列
int
ip_frag_mem;
u_int ip;
//主机对应的IP地址
//很明显,下面三行告诉我们,这是哈希表的一个元素
int
hash_index;
struct
hostfrags *prev;
struct
hostfrags *next;
};
//下面这个就是维护所有IP的哈希表了。
static
struct
hostfrags **fragtable;
|
每个IP下面又有很多的被分片的IP包——IP碎片队列,IP碎片队列的定义在这:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
/* Describe an entry in the "incomplete datagrams" queue. */
struct
ipq {
unsigned
char
*mac;
/* pointer to MAC header */
struct
ip *iph;
/* pointer to IP header */
int
len;
/* total length of original datagram */
short
ihlen;
/* length of the IP header */
short
maclen;
/* length of the MAC header */
struct
timer_list timer;
/* when will this queue expire? */
struct
ipfrag *fragments;
/* linked list of received fragments */
struct
hostfrags *hf;
struct
ipq *next;
/* linked list pointers */
struct
ipq *prev;
// struct device *dev; /* Device - for icmp replies */
};
|
最终的IP碎片的定义在这:
1
2
3
4
5
6
7
8
9
10
|
/* Describe an IP fragment. */
struct
ipfrag {
int
offset;
/* offset of fragment in IP datagram */
int
end;
/* last byte of data in datagram */
int
len;
/* length of this fragment */
struct
sk_buff *skb;
/* complete received fragment */
unsigned
char
*ptr;
/* pointer into real fragment data */
struct
ipfrag *next;
/* linked list pointers */
struct
ipfrag *prev;
};
|
由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。
好了步入处理逻辑,照例,先看初始化:
1
2
3
4
5
6
7
8
9
10
11
12
|
void
ip_frag_init(
int
n)
{
struct
timeval tv;
gettimeofday(&tv, 0);
time0 = tv.tv_sec;
fragtable = (
struct
hostfrags **)
calloc
(n,
sizeof
(
struct
hostfrags *));
if
(!fragtable)
nids_params.no_mem(
"ip_frag_init"
);
hash_size = n;
}
|
简单到不能再简单——分片了一个主机的哈希表。分完手工。好吧,看重组逻辑:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
//先是判断是否为分片的函数
int
ip_defrag_stub(
struct
ip *iph,
struct
ip **defrag)
{
int
offset, flags, tot_len;
struct
sk_buff *skb;
numpack++;
//先处理超时事件
timenow = 0;
//刷新时间
while
(timer_head && timer_head->expires < jiffies()) {
this_host = ((
struct
ipq *) (timer_head->data))->hf;
timer_head->function(timer_head->data);
}
//然后计算分片的偏移
offset = ntohs(iph->ip_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
//此包不是分片
if
(((flags & IP_MF) == 0) && (offset == 0)) {
ip_defrag(iph, 0);
return
IPF_NOTF;
}
//此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数
tot_len = ntohs(iph->ip_len);
skb = (
struct
sk_buff *)
malloc
(tot_len +
sizeof
(
struct
sk_buff));
if
(!skb)
nids_params.no_mem(
"ip_defrag_stub"
);
skb->data = (
char
*) (skb + 1);
memcpy
(skb->data, iph, tot_len);
skb->truesize = tot_len + 16 + nids_params.dev_addon;
skb->truesize = (skb->truesize + 15) & ~15;
skb->truesize += nids_params.sk_buff_size;
//如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理
//否则,返回IPF_ISF,跳过ip包处理
if
((*defrag = (
struct
ip *)ip_defrag((
struct
ip *) (skb->data), skb)))
return
IPF_NEW;
return
IPF_ISF;
}
/* Process an incoming IP datagram fragment. */
//这里就是分片重组的主要逻辑了
static
char
*
ip_defrag(
struct
ip *iph,
struct
sk_buff *skb)
{
struct
ipfrag *prev, *next, *tmp;
struct
ipfrag *tfp;
struct
ipq *qp;
char
*skb2;
unsigned
char
*ptr;
int
flags, offset;
int
i, ihl, end;
//如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个
//此处还负责将this_host变量设为当前ip对应的host
if
(!hostfrag_find(iph) && skb)
hostfrag_create(iph);
/* Start by cleaning up the memory. */
//内存用太多了,panic之,然后释放当前host分片所用的内存
if
(this_host)
if
(this_host->ip_frag_mem > IPFRAG_HIGH_THRESH)
ip_evictor();
/* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
//这里,找到这个ip包对应的ip分片链表
if
(this_host)
qp = ip_find(iph);
else
qp = 0;
/* Is this a non-fragmented datagram? */
offset = ntohs(iph->ip_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
if
(((flags & IP_MF) == 0) && (offset == 0)) {
if
(qp != NULL)
ip_free(qp);
/* Fragmented frame replaced by full
unfragmented copy */
return
0;
}
/* ip_evictor() could have removed all queues for the current host */
if
(!this_host)
hostfrag_create(iph);
offset <<= 3;
/* offset is in 8-byte chunks */
ihl = iph->ip_hl * 4;
/*
If the queue already existed, keep restarting its timer as long as
we still are receiving fragments. Otherwise, create a fresh queue
entry.
*/
//如果当前host下来过此包的碎片
if
(qp != NULL) {
/* ANK. If the first fragment is received, we should remember the correct
IP header (with options) */
if
(offset == 0) {
qp->ihlen = ihl;
memcpy
(qp->iph, iph, ihl + 8);
}
del_timer(&qp->timer);
qp->timer.expires = jiffies() + IP_FRAG_TIME;
/* about 30 seconds */
qp->timer.data = (unsigned
long
) qp;
/* pointer to queue */
qp->timer.function = ip_expire;
/* expire function */
add_timer(&qp->timer);
}
//否则新建一个碎片队列
else
{
/* If we failed to create it, then discard the frame. */
if
((qp = ip_create(iph)) == NULL) {
kfree_skb(skb, FREE_READ);
return
NULL;
}
}
/* Attempt to construct an oversize packet. */
//再大的ip包也不能大过65535啊,一经发现,直接放弃
if
(ntohs(iph->ip_len) + (
int
) offset > 65535) {
// NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr)));
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0);
kfree_skb(skb, FREE_READ);
return
NULL;
}
//下面就开始在碎片队列里面找位置了,同时处理好重叠
//如果有重叠,把重叠的旧的部分去掉
/* Determine the position of this fragment. */
end = offset + ntohs(iph->ip_len) - ihl;
/* Point into the IP datagram 'data' part. */
ptr = (unsigned
char
*)(skb->data + ihl);
/* Is this the final fragment? */
if
((flags & IP_MF) == 0)
qp->len = end;
/*
Find out which fragments are in front and at the back of us in the
chain of fragments so far. We must know where to put this
fragment, right?
*/
prev = NULL;
for
(next = qp->fragments; next != NULL; next = next->next) {
if
(next->offset >= offset)
break
;
/* bingo! */
prev = next;
}
/*
We found where to put this one. Check for overlap with preceding
fragment, and, if needed, align things so that any overlaps are
eliminated.
*/
if
(prev != NULL && offset < prev->end) {
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
i = prev->end - offset;
offset += i;
/* ptr into datagram */
ptr += i;
/* ptr into fragment data */
}
/*
Look for overlap with succeeding segments.
If we can merge fragments, do it.
*/
for
(tmp = next; tmp != NULL; tmp = tfp) {
tfp = tmp->next;
if
(tmp->offset >= end)
break
;
/* no overlaps at all */
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
i = end - next->offset;
/* overlap is 'i' bytes */
tmp->len -= i;
/* so reduce size of */
tmp->offset += i;
/* next fragment */
tmp->ptr += i;
/*
If we get a frag size of <= 0, remove it and the packet that it
goes with. We never throw the new frag away, so the frag being
dumped has always been charged for.
*/
if
(tmp->len <= 0) {
if
(tmp->prev != NULL)
tmp->prev->next = tmp->next;
else
qp->fragments = tmp->next;
if
(tmp->next != NULL)
tmp->next->prev = tmp->prev;
next = tfp;
/* We have killed the original next frame */
frag_kfree_skb(tmp->skb, FREE_READ);
frag_kfree_s(tmp,
sizeof
(
struct
ipfrag));
}
}
//下面往队列中插入当前碎片
/* Insert this fragment in the chain of fragments. */
tfp = NULL;
tfp = ip_frag_create(offset, end, skb, ptr);
/*
No memory to save the fragment - so throw the lot. If we failed
the frag_create we haven't charged the queue.
*/
if
(!tfp) {
nids_params.no_mem(
"ip_defrag"
);
kfree_skb(skb, FREE_READ);
return
NULL;
}
/* From now on our buffer is charged to the queues. */
tfp->prev = prev;
tfp->next = next;
if
(prev != NULL)
prev->next = tfp;
else
qp->fragments = tfp;
if
(next != NULL)
next->prev = tfp;
/*
OK, so we inserted this new fragment into the chain. Check if we
now have a full IP datagram which we can bump up to the IP
layer...
*/
//查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回
if
(ip_done(qp)) {
skb2 = ip_glue(qp);
/* glue together the fragments */
return
(skb2);
}
return
(NULL);
}
|
好了,打完收工!