linux内核 路由fib表之创建

2.2.2 路由创建

当通过netlink,操作类型为RTM_NEWROUTE时,调用inet_rtm_newroute函数添加路由。

功能:

         a)、将用户空间配置内容传过来

         b)、路由表的创建

         c)、路由表项的添加

流程:

linux内核 路由fib表之创建_第1张图片

代码:

static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
//其中nlh为配置路由的参数,有目的地址、掩码长度、路由表table_id、网关地址等。                
{
    struct net *net = sock_net(skb->sk);
    struct fib_config cfg;
    struct fib_table *tb;
    int err;
 
    err = rtm_to_fib_config(net, skb, nlh, &cfg);   //将netlink传递的消息nlh赋值给fib_config cfg
    if (err < 0)
        goto errout;
 
    tb = fib_new_table(net, cfg.fc_table);  //根据给定路由表ID,获取路由表
    if (tb == NULL) {
        err = -ENOBUFS;
        goto errout;
    }
 
    err = tb->tb_insert(tb, &cfg);  //获取路由表后,通过insert创建路由表项并添到该路由表
errout:
    return err;
}

2.2.2.1 接收用户空间消息

rtm_to_fib_config(net, skb, nlh, &cfg)用于将nlh内容,传递到cfg中。
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
                struct nlmsghdr *nlh, struct fib_config *cfg)
{
    struct nlattr *attr;
    int err, remaining;
    struct rtmsg *rtm;
 
    err=nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
    if (err < 0)
       goto errout;
 
   memset(cfg, 0, sizeof(*cfg));
    //跳过nlh的硬件头部,让rtm指向nlh的内容,即将nlh赋值给rtm
    rtm= nlmsg_data(nlh); 
    //将rtm的内容,赋值给cfg
    cfg->fc_dst_len = rtm->rtm_dst_len; //掩码长度
    cfg->fc_tos = rtm->rtm_tos; //好像是默认为0
    cfg->fc_table = rtm->rtm_table;  //路由表id: connected为0;kernel route为255      //如果id为0,kernel会将id设为254
    cfg->fc_protocol = rtm->rtm_protocol; //协议类型:connected和kernel route都为11
    cfg->fc_scope = rtm->rtm_scope; //范围:connected为253;kernel route为254
    cfg->fc_type = rtm->rtm_type; //类型:connected为1;kernel route为2
    cfg->fc_flags = rtm->rtm_flags; //connected和kernel route都为1024
    cfg->fc_nlflags = nlh->nlmsg_flags;
    cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
    cfg->fc_nlinfo.nlh = nlh;
    cfg->fc_nlinfo.nl_net = net;
    if (cfg->fc_type > RTN_MAX) {
       err = -EINVAL;
       goto errout;
    }
 
   nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
       switch (nla_type(attr)) {
       case RTA_DST:
           cfg->fc_dst = nla_get_be32(attr);
           break;
       case RTA_OIF:
           cfg->fc_oif = nla_get_u32(attr);
           break;
       case RTA_GATEWAY:
           cfg->fc_gw =nla_get_be32(attr);
           break;
       case RTA_PRIORITY:
           cfg->fc_priority = nla_get_u32(attr);
           break;
       case RTA_PREFSRC:
           cfg->fc_prefsrc = nla_get_be32(attr);
           break;
       case RTA_METRICS:
           cfg->fc_mx = nla_data(attr);
           cfg->fc_mx_len = nla_len(attr);
           break;
       case RTA_MULTIPATH:
           cfg->fc_mp = nla_data(attr);
           cfg->fc_mp_len = nla_len(attr);
           break;
       case RTA_FLOW:
           cfg->fc_flow = nla_get_u32(attr);
           break;
       case RTA_TABLE:
           cfg->fc_table = nla_get_u32(attr);
           break;
       }
    }
 
    return 0;
errout:
    return err;
}

2.2.2.2 路由表的创建(先查找,若不存在,则创建)

fib_new_table用于获取指定的路由表,下面只考虑支持策略路由的情况。

功能:

    a)、路由表的查找,若存在,则返回该路由表;否则,继续下面

         b)、路由表的创建

         c)、路由表的添加到fib_table_hash表

流程:

linux内核 路由fib表之创建_第2张图片

代码:

struct fib_table *fib_new_table(struct net *net, u32 id)
{
    struct fib_table *tb;
    unsigned int h;
 
    if (id == 0)
       id =RT_TABLE_MAIN;//如果用户空间传过来的是0,内核会将路由id设为254
    tb = fib_get_table(net, id);    //在net->ipv4.fib_table_hash[]散列表中查找指定的路由表
    if (tb)     //若存在,返回路由表
       return tb;
 
    tb = fib_hash_table(id);    //若不存在,则创建路由表
    if (!tb)
       return NULL;
    h = id & (FIB_TABLE_HASHSZ - 1);
   hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);    //将新创建的路由表插入到net->ipv4.fib_table_hash[]散列表
    return tb;
}

(1.1)路由表的查找

利用此函数struct fib_table *fib_get_table(struct net *net, u32 id),根据指定id,(此id是从netlink传过来的,用户态传过来从哪个路由表中查找)从net->ipv4.fib_table_hash[h]中查找路由表。

struct fib_table *fib_get_table(struct net*net, u32 id)
{
    struct fib_table *tb;
    struct hlist_node *node;
    struct hlist_head *head;
    unsigned int h;
 
    if (id == 0)
       id =RT_TABLE_MAIN;
    h = id & (FIB_TABLE_HASHSZ- 1);
 
   rcu_read_lock();
   head = &net->ipv4.fib_table_hash[h];
   hlist_for_each_entry_rcu(tb, node, head, tb_hlist){
       if (tb->tb_id == id){
           rcu_read_unlock();
           return tb;
       }
    }
   rcu_read_unlock();
    return NULL;
}

(1.2)路由表的创建

         若根据id查不到指定路由表,则进行路由表的创建fib_hash_table(id)

struct fib_table *fib_hash_table(u32 id)
{
    struct fib_table *tb;
 
    tb = kmalloc(sizeof(struct fib_table)+ sizeof(struct fn_hash),
            GFP_KERNEL);
    if (tb == NULL)
       return NULL;
 
    tb->tb_id = id;
    tb->tb_default= -1;
    tb->tb_lookup= fn_hash_lookup;//路由表项查找
    tb->tb_insert= fn_hash_insert;//路由表项插入
    tb->tb_delete= fn_hash_delete;//路由表项删除
    tb->tb_flush= fn_hash_flush;  //路由表项刷新
    tb->tb_select_default=fn_hash_select_default;//选择默认路由表项
    tb->tb_dump = fn_hash_dump;    //拷贝,备份 ??
   memset(tb->tb_data,0, sizeof(struct fn_hash));
    return tb;
}

(1.3)路由表添加到散列表ipv4.fib_table_hash[h]

h = id &(FIB_TABLE_HASHSZ -1);
   hlist_add_head_rcu(&tb->tb_hlist,&net->ipv4.fib_table_hash[h]);

2.2.2.3 路由表项的添加

         接下来会调用tb->tb_insert(tb, &cfg)引用的函数fn_hash_insert,进行路由表项的插入。

功能:

    1)查找fn_zone,若不存在,创建

    2)得到要插入路由表项的key值

    3)根据netlink传入数据,创建fib_info

    4)利用key值,在fn_zone中查找对应的fib_node节点

        4.1)若fib_node存在,根据tos、priority在fib_node中查找fib_alias;

            4.1.1)若fib_alias与要插入的路由表项全匹配,则返回

        4.2)若fib_node不存在,则创建新fib_node

    5)创建新的fib_alias,并赋值

    6)将fib_node插入fib_zone,将fib_alias插入fib_node中alias链表

    7)刷新路由缓存,通知用户空间

思维导图:

linux内核 路由fib表之创建_第3张图片

流程

linux内核 路由fib表之创建_第4张图片

代码:

static int fn_hash_insert(struct fib_table*tb,struct fib_config *cfg)
{
    struct fn_hash *table= (struct fn_hash*) tb->tb_data;
    struct fib_node *new_f= NULL;
    struct fib_node *f;
    struct fib_alias *fa,*new_fa;
    struct fn_zone *fz;
    struct fib_info *fi;
    u8tos = cfg->fc_tos;
   __be32 key;
    int err;
 
    if (cfg->fc_dst_len> 32)
       return -EINVAL;
    //根据目的IP的掩码长度,找到相对应的fn_zone变量
    fz = table->fn_zones[cfg->fc_dst_len];
    //若找不到,则调用fn_new_zone创建;若找到,继续
    if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
       return -ENOBUFS;
 
    key= 0;
    if (cfg->fc_dst){
       if (cfg->fc_dst & ~FZ_MASK(fz))
           return -EINVAL;
       //根据目的IP和掩码长度进行与操作,得到搜索关键字;
       //该搜索关键字,用来在fn_zone[i]中查找fib_node节点
       key = fz_key(cfg->fc_dst, fz);
    }
    //打印路由表id;此fn_zone的掩码长度;目的ip/掩码长度;key应该是网段
   printk("%s-->table_id:%u fz_order:%d dest:%pI4/%d key:%pI4\n",__FUNCTION__,\
                 tb->tb_id,fz->fz_order,&cfg->fc_dst,cfg->fc_dst_len,&key);
        
    //根据netlink传递的内容cfg,构建下一跳信息fib_info结构
    fi = fib_create_info(cfg);
    if (IS_ERR(fi))
    {
           DEBUG_V4Route("%s-->err:%ld\n",__FUNCTION__,PTR_ERR(fi));
           return PTR_ERR(fi);
    }
    //打印当前标志;协议类型;优先级
   printk("%s-->create new fib_info--fib_flags:0x%x fib_protocol:%d  fib_priority:%u\n",\
                     __FUNCTION__,fi->fib_flags,fi->fib_protocol,fi->fib_priority);
       
    //若当前fn_zone[i]中的fib_node节点数大于当前容量,则扩充
    if (fz->fz_nent > (fz->fz_divisor<<1)&&
       fz->fz_divisor< FZ_MAX_DIVISOR &&
       (cfg->fc_dst_len== 32||
        (1<< cfg->fc_dst_len)> fz->fz_divisor))
       fn_rehash_zone(fz);
    //(1)用key在fn_zone[i]中查找对应的fib_node节点
    f = fib_find_node(fz, key);
 
    //(1.1)若fib_node f节点为空,则让fib_alias fa为空
    //(1.2)若fib_node f节点不为空,则通过tos、priority查找对应的fib_alias节点
    if (!f)
       fa = NULL;
    else{
       fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
    }
       
 
    /* Now fa, if non-NULL, points to the first fib alias
     *with the same keys [prefix,tos,priority], if such key already
     *exists or to the node before which we will insert new one.
     *
     *If fa is NULL, we will need to allocate a new one and
     *insert to the head of f.
     *
     *If f is NULL, no fib node matched the destination key
     *and we need to allocate a new one of those as well.
     */
    
    //(1.2)若fib_alias fa存在,且fib_alias的tos与要添加的路由tos相等,
    //且fib_alias的fib_info的priority与要添加的路由优先级也相等
   
    if (fa && fa->fa_tos== tos &&
       fa->fa_info->fib_priority== fi->fib_priority){
           ios_debug_out("fa->fa_tos == tos   %s-->fa_tos:%d fib_priority:%u\n",__FUNCTION__,tos,fi->fib_priority);
       struct fib_alias *fa_first,*fa_match;
 
       err = -EEXIST;
       //(1.2.1)若应用层添加路由的操作位为NLM_F_EXCL,则程序返回(路由表项已存在)
       if (cfg->fc_nlflags& NLM_F_EXCL)
           goto out;
 
        /* We have 2 goals:
        * 1. Find exact match for type, scope, fib_info to avoid
        * duplicate routes
        * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
        */
        //(1.2.3)若fib_alias fa的类型、作用范围与要添加的路由相等
        //且该fib_alias关联的fib_info与我们通过要添加的路由构造的fib_info fi相等
        //若找到一个全匹配的fib_alias fa_match,则程序返回(路由表项已存在)
        //若未找到,则进行添加路由表项操作
       fa_match = NULL;
       fa_first = fa;
       fa = list_entry(fa->fa_list.prev,struct fib_alias, fa_list);
        list_for_each_entry_continue(fa,&f->fn_alias, fa_list){
           if (fa->fa_tos != tos)
                break;
           if (fa->fa_info->fib_priority!= fi->fib_priority)
                break;
           if (fa->fa_type == cfg->fc_type&&
                fa->fa_scope== cfg->fc_scope&&
                fa->fa_info== fi){
                fa_match = fa;
                break;
           }
       }
       //(1.2.2)若应用层添加路由的操作位为NLM_F_REPLACE,
       //则替换已存在的tos与priority相等的fib_alias、fib_info
       //刷新缓存区、
       //通知用户空间,并返回程序
       if (cfg->fc_nlflags& NLM_F_REPLACE){
           struct fib_info *fi_drop;
           u8 state;
 
           fa = fa_first;
           if (fa_match) {
                if (fa == fa_match)
                    err =0;
                goto out;
           }
           write_lock_bh(&fib_hash_lock);
           fi_drop = fa->fa_info;
           fa->fa_info= fi;
           fa->fa_type= cfg->fc_type;
           fa->fa_scope= cfg->fc_scope;
           state = fa->fa_state;
           fa->fa_state&= ~FA_S_ACCESSED;
           fib_hash_genid++;
           write_unlock_bh(&fib_hash_lock);
 
           fib_release_info(fi_drop);
           if (state & FA_S_ACCESSED)
                rt_cache_flush(cfg->fc_nlinfo.nl_net,-1);
           rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
                  &cfg->fc_nlinfo, NLM_F_REPLACE);
           return0;
        }
 
       /* Error if we find a perfect match which
        * uses the same scope, type, and nexthop
        * information.
        */
       if (fa_match)
           goto out;
       //成立,则在表头添加新的fib_alias
       if (!(cfg->fc_nlflags& NLM_F_APPEND))
           fa = fa_first;
    }
   
    err= -ENOENT;
    //(2)若应用层未设置标志位为NLM_F_CREATE,则不进行添加操作,程序返回
    if (!(cfg->fc_nlflags& NLM_F_CREATE))
       goto out;
 
    err= -ENOBUFS;
 
       printk("%s-->create new route\n",__FUNCTION__);
    //(3)添加路由表项操作
   
    //(1.1)若fib_node节点不存在,则创建fib_node
    //并对fn_hash、fn_alias初始化,设置key值
    if (!f) {
                printk("%s-->createnew fib_node\n",__FUNCTION__);
       new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
       if (new_f == NULL)
           goto out;
 
       INIT_HLIST_NODE(&new_f->fn_hash);
       INIT_LIST_HEAD(&new_f->fn_alias);
       new_f->fn_key= key;
       f = new_f;
    }
    //(3.1)创建新的fib_alias,并赋值
   new_fa = &f->fn_embedded_alias;
    if (new_fa->fa_info!= NULL){
                printk("%s-->createnew fib_alias\n",__FUNCTION__);
       new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
       if (new_fa == NULL)
           goto out;
    }
   new_fa->fa_info= fi;
   new_fa->fa_tos= tos;
   new_fa->fa_type= cfg->fc_type;
   new_fa->fa_scope= cfg->fc_scope;
   new_fa->fa_state= 0;
 
       printk("new_fa set value %s-->fa_tos:%d fa_type:%d fa_scope:%d\n",__FUNCTION__,\
                                   tos,cfg->fc_type,cfg->fc_scope);
 
    /*
     *Insert new entry to the list.
     */
    //(3.2)若新建了fib_node,则将fib_node插入fib_zone,将创建的fib_alias new_fa添加到fib_alias链表中
    //并让fz_nent计数+1
    //刷新路由缓存
    //通知用户空间
   write_lock_bh(&fib_hash_lock);
    if (new_f)
       fib_insert_node(fz, new_f);
   list_add_tail(&new_fa->fa_list,
        (fa ? &fa->fa_list: &f->fn_alias));
   fib_hash_genid++;
   write_unlock_bh(&fib_hash_lock);
 
    if (new_f)
       fz->fz_nent++;
   rt_cache_flush(cfg->fc_nlinfo.nl_net,-1);
 
   rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
         &cfg->fc_nlinfo,0);
    return 0;
 
out:
    if (new_f)
       kmem_cache_free(fn_hash_kmem, new_f);
   fib_release_info(fi);
    return err;
}

         接下来分析一下其中关键的函数

(1)创建fn_zone结构fn_new_zone

static struct fn_zone*fn_new_zone(struct fn_hash*table,int z)
//参数:z为掩码长度,
//返回值:新的fn_zone
{
    int i;
    struct fn_zone *fz= kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
    if (!fz)
       return NULL;
    //掩码不为0,创建fn_zone[z]的fz_hash链表个数为16个
    if (z) {
       fz->fz_divisor= 16;
    } else {
       fz->fz_divisor= 1;
    }
    fz->fz_hashmask= (fz->fz_divisor- 1);
    fz->fz_hash = fz_hash_ alloc(fz->fz_divisor);
    if (!fz->fz_hash){
        kfree(fz);
       return NULL;
    }
    //设置掩码长度和网络掩码
    fz->fz_order= z;
    fz->fz_mask = inet_make_mask(z);
    //fn_zone是按掩码长度由大到小排列的,以下是链表的插入
    /* Find the first not empty zone with more specific mask*/
    for (i=z+1; i<=32; i++)
       if (table->fn_zones[i])
           break;
   write_lock_bh(&fib_hash_lock);
    if (i>32){
       /* No more specific masks, we are the first.*/
       fz->fz_next= table->fn_zone_list;
       table->fn_zone_list= fz;
    } else {
       fz->fz_next= table->fn_zones[i]->fz_next;
       table->fn_zones[i]->fz_next= fz;
    }
   table->fn_zones[z]= fz;
   fib_hash_genid++;
   write_unlock_bh(&fib_hash_lock);
    return fz;
}

(2)获得key值,fz_key

static inline __be32 fz_key(__be32 dst,struct fn_zone *fz)
//参数:dst为目的ip;fz为掩码长度
//返回值:key
{
    //根据目的IP和掩码长度进行与操作,得到搜索关键字
    return dst & FZ_MASK(fz);
}
(3)创建fib_info

         structfib_info *fib_create_info(struct fib_config *cfg)

         //待补充

(4)fn_zone扩容

         voidfn_rehash_zone(struct fn_zone *fz)

         先将fn_zone[i]散列表容量fz_divisor扩大2倍,最大1024;

         接着根据新的fz_divisor,创建扩容后新的fn_zone[i]

         //待补充

(5)查找fib_node,在fn_zone中

         staticstruct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)

         //待补充

(6)查找fib_alias,在fib_node中

         structfib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)

         //待补充

(7)插入fib_node,在fn_zone中

         staticinline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)

         //待补充

(8)释放fib_info

         voidfib_release_info(struct fib_info *fi)

         //待补充

(9)刷新路由缓存

         voidrt_cache_flush(struct net *net, int delay)

//待补充

(10)通知用户空间

         voidrtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id,struct nl_info *info, unsigned int nlm_flags)

         //待补充

你可能感兴趣的:(linux,网络协议)