libnids中TCP/IP栈实现细节分析(下)——IP分片重组

在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。

先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:

struct hostfrags {
  struct ipq *ipqueue;//这里维护IP碎片队列
  int ip_frag_mem;
  u_int ip;//主机对应的IP地址
  //很明显,下面三行告诉我们,这是哈希表的一个元素
  int hash_index;
  struct hostfrags *prev;
  struct hostfrags *next;
};
//下面这个就是维护所有IP的哈希表了。
static struct hostfrags **fragtable;

每个IP下面又有很多的被分片的IP包——IP碎片队列,IP碎片队列的定义在这:

/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
  unsigned char *mac;        /* pointer to MAC header                */
  struct ip *iph;        /* pointer to IP header                 */
  int len;            /* total length of original datagram    */
  short ihlen;            /* length of the IP header              */
  short maclen;            /* length of the MAC header             */
  struct timer_list timer;    /* when will this queue expire?         */
  struct ipfrag *fragments;    /* linked list of received fragments    */
  struct hostfrags *hf;
  struct ipq *next;        /* linked list pointers                 */
  struct ipq *prev;
  // struct device *dev;    /* Device - for icmp replies */
};

最终的IP碎片的定义在这:

/* Describe an IP fragment. */
struct ipfrag 
{
  int offset;            /* offset of fragment in IP datagram    */
  int end;            /* last byte of data in datagram        */
  int len;            /* length of this fragment              */
  struct sk_buff *skb;        /* complete received fragment           */
  unsigned char *ptr;        /* pointer into real fragment data      */
  struct ipfrag *next;        /* linked list pointers                 */
  struct ipfrag *prev;
};

由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。

好了步入处理逻辑,照例,先看初始化:

void ip_frag_init(int n)
{
  struct timeval tv;
 
  gettimeofday(&tv, 0);
  time0 = tv.tv_sec;
  fragtable = (struct hostfrags **) calloc(n, sizeof(struct hostfrags *));
  if (!fragtable)
    nids_params.no_mem("ip_frag_init");
  hash_size = n;
}

简单到不能再简单——分片了一个主机的哈希表。分完手工。好吧,看重组逻辑:

//先是判断是否为分片的函数
int ip_defrag_stub(struct ip *iph, struct ip **defrag)
{
    int offset, flags, tot_len;
    struct sk_buff *skb;

    numpack++;
    //先处理超时事件
    timenow = 0;//刷新时间
    while (timer_head && timer_head->expires < jiffies())
    {
        this_host = ((struct ipq *) (timer_head->data))->hf;
        timer_head->function(timer_head->data);
    }

    //然后计算分片的偏移
    offset = ntohs(iph->ip_off);
    flags = offset & ~IP_OFFSET;
    offset &= IP_OFFSET;

    //此包不是分片
    if (((flags & IP_MF) == 0) && (offset == 0))
    {
        ip_defrag(iph, 0);
        return IPF_NOTF;
    }

    //此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数
    tot_len = ntohs(iph->ip_len);
    skb = (struct sk_buff *) malloc(tot_len + sizeof(struct sk_buff));
    if (!skb)
        nids_params.no_mem("ip_defrag_stub");
    skb->data = (char *) (skb + 1);
    memcpy(skb->data, iph, tot_len);
    skb->truesize = tot_len + 16 + nids_params.dev_addon;
    skb->truesize = (skb->truesize + 15) & ~15;
    skb->truesize += nids_params.sk_buff_size;

    //如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理
    //否则,返回IPF_ISF,跳过ip包处理
    if ((*defrag = (struct ip *)ip_defrag((struct ip *) (skb->data), skb)))
        return IPF_NEW;

    return IPF_ISF;
}


/* Process an incoming IP datagram fragment. */
//这里就是分片重组的主要逻辑了
static char *ip_defrag(struct ip *iph, struct sk_buff *skb)
{
    struct ipfrag *prev, *next, *tmp;
    struct ipfrag *tfp;
    struct ipq *qp;
    char *skb2;
    unsigned char *ptr;
    int flags, offset;
    int i, ihl, end;

    //如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个
    //此处还负责将this_host变量设为当前ip对应的host
    if (!hostfrag_find(iph) && skb)
        hostfrag_create(iph);

    /* Start by cleaning up the memory. */
    //内存用太多了,panic之,然后释放当前host分片所用的内存
    if (this_host)
        if (this_host->ip_frag_mem > IPFRAG_HIGH_THRESH)
            ip_evictor();

    /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
    //这里,找到这个ip包对应的ip分片链表
    if (this_host)
        qp = ip_find(iph);
    else
        qp = 0;

    /* Is this a non-fragmented datagram? */
    offset = ntohs(iph->ip_off);
    flags = offset & ~IP_OFFSET;
    offset &= IP_OFFSET;
    if (((flags & IP_MF) == 0) && (offset == 0))
    {
        if (qp != NULL)
            ip_free(qp);      /* Fragmented frame replaced by full
                   unfragmented copy */
        return 0;
    }

    /* ip_evictor() could have removed all queues for the current host */
    if (!this_host)
        hostfrag_create(iph);

    offset <<= 3;           /* offset is in 8-byte chunks */
    ihl = iph->ip_hl * 4;

    /*
      If the queue already existed, keep restarting its timer as long as
      we still are receiving fragments.  Otherwise, create a fresh queue
      entry.
    */
    //如果当前host下来过此包的碎片
    if (qp != NULL)
    {
        /* ANK. If the first fragment is received, we should remember the correct
           IP header (with options) */
        if (offset == 0)
        {
            qp->ihlen = ihl;
            memcpy(qp->iph, iph, ihl + 8);
        }
        del_timer(&qp->timer);
        qp->timer.expires = jiffies() + IP_FRAG_TIME;    /* about 30 seconds */
        qp->timer.data = (unsigned long) qp; /* pointer to queue */
        qp->timer.function = ip_expire;  /* expire function */
        add_timer(&qp->timer);
    }
    //否则新建一个碎片队列
    else
    {
        /* If we failed to create it, then discard the frame. */
        if ((qp = ip_create(iph)) == NULL)
        {
            kfree_skb(skb, FREE_READ);
            return NULL;
        }
    }
    /* Attempt to construct an oversize packet. */
    //再大的ip包也不能大过65535啊,一经发现,直接放弃
    if (ntohs(iph->ip_len) + (int) offset > 65535)
    {
        // NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr)));
        nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0);
        kfree_skb(skb, FREE_READ);
        return NULL;
    }

    //下面就开始在碎片队列里面找位置了,同时处理好重叠
    //如果有重叠,把重叠的旧的部分去掉
    /* Determine the position of this fragment. */
    end = offset + ntohs(iph->ip_len) - ihl;

    /* Point into the IP datagram 'data' part. */
    ptr = (unsigned char *)(skb->data + ihl);

    /* Is this the final fragment? */
    if ((flags & IP_MF) == 0)
        qp->len = end;

    /*
      Find out which fragments are in front and at the back of us in the
      chain of fragments so far.  We must know where to put this
      fragment, right?
    */
    prev = NULL;
    for (next = qp->fragments; next != NULL; next = next->next)
    {
        if (next->offset >= offset)
            break;            /* bingo! */
        prev = next;
    }
    /*
      We found where to put this one.  Check for overlap with preceding
      fragment, and, if needed, align things so that any overlaps are
      eliminated.
    */
    if (prev != NULL && offset < prev->end)
    {
        nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
        i = prev->end - offset;
        offset += i;        /* ptr into datagram */
        ptr += i;           /* ptr into fragment data */
    }
    /*
      Look for overlap with succeeding segments.
      If we can merge fragments, do it.
    */
    for (tmp = next; tmp != NULL; tmp = tfp)
    {
        tfp = tmp->next;
        if (tmp->offset >= end)
            break;            /* no overlaps at all */
        nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);

        i = end - next->offset;  /* overlap is 'i' bytes */
        tmp->len -= i;       /* so reduce size of    */
        tmp->offset += i;        /* next fragment        */
        tmp->ptr += i;
        /*
          If we get a frag size of <= 0, remove it and the packet that it
          goes with. We never throw the new frag away, so the frag being
          dumped has always been charged for.
        */
        if (tmp->len <= 0)
        {
            if (tmp->prev != NULL)
                tmp->prev->next = tmp->next;
            else
                qp->fragments = tmp->next;

            if (tmp->next != NULL)
                tmp->next->prev = tmp->prev;

            next = tfp;       /* We have killed the original next frame */

            frag_kfree_skb(tmp->skb, FREE_READ);
            frag_kfree_s(tmp, sizeof(struct ipfrag));
        }
    }
    //下面往队列中插入当前碎片
    /* Insert this fragment in the chain of fragments. */
    tfp = NULL;
    tfp = ip_frag_create(offset, end, skb, ptr);

    /*
      No memory to save the fragment - so throw the lot. If we failed
      the frag_create we haven't charged the queue.
    */
    if (!tfp)
    {
        nids_params.no_mem("ip_defrag");
        kfree_skb(skb, FREE_READ);
        return NULL;
    }
    /* From now on our buffer is charged to the queues. */
    tfp->prev = prev;
    tfp->next = next;
    if (prev != NULL)
        prev->next = tfp;
    else
        qp->fragments = tfp;

    if (next != NULL)
        next->prev = tfp;

    /*
      OK, so we inserted this new fragment into the chain.  Check if we
      now have a full IP datagram which we can bump up to the IP
      layer...
    */
    //查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回
    if (ip_done(qp))
    {
        skb2 = ip_glue(qp);     /* glue together the fragments */
        return (skb2);
    }
    return (NULL);
}


你可能感兴趣的:(libnids中TCP/IP栈实现细节分析(下)——IP分片重组)