2.2.2 路由创建
当通过netlink,操作类型为RTM_NEWROUTE时,调用inet_rtm_newroute函数添加路由。
功能:
a)、将用户空间配置内容传过来
b)、路由表的创建
c)、路由表项的添加
流程:
代码:
static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
//其中nlh为配置路由的参数,有目的地址、掩码长度、路由表table_id、网关地址等。
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
struct fib_table *tb;
int err;
err = rtm_to_fib_config(net, skb, nlh, &cfg); //将netlink传递的消息nlh赋值给fib_config cfg
if (err < 0)
goto errout;
tb = fib_new_table(net, cfg.fc_table); //根据给定路由表ID,获取路由表
if (tb == NULL) {
err = -ENOBUFS;
goto errout;
}
err = tb->tb_insert(tb, &cfg); //获取路由表后,通过insert创建路由表项并添到该路由表
errout:
return err;
}
2.2.2.1 接收用户空间消息
rtm_to_fib_config(net, skb, nlh, &cfg)用于将nlh内容,传递到cfg中。
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_config *cfg)
{
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
err=nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
if (err < 0)
goto errout;
memset(cfg, 0, sizeof(*cfg));
//跳过nlh的硬件头部,让rtm指向nlh的内容,即将nlh赋值给rtm
rtm= nlmsg_data(nlh);
//将rtm的内容,赋值给cfg
cfg->fc_dst_len = rtm->rtm_dst_len; //掩码长度
cfg->fc_tos = rtm->rtm_tos; //好像是默认为0
cfg->fc_table = rtm->rtm_table; //路由表id: connected为0;kernel route为255 //如果id为0,kernel会将id设为254
cfg->fc_protocol = rtm->rtm_protocol; //协议类型:connected和kernel route都为11
cfg->fc_scope = rtm->rtm_scope; //范围:connected为253;kernel route为254
cfg->fc_type = rtm->rtm_type; //类型:connected为1;kernel route为2
cfg->fc_flags = rtm->rtm_flags; //connected和kernel route都为1024
cfg->fc_nlflags = nlh->nlmsg_flags;
cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = net;
if (cfg->fc_type > RTN_MAX) {
err = -EINVAL;
goto errout;
}
nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
switch (nla_type(attr)) {
case RTA_DST:
cfg->fc_dst = nla_get_be32(attr);
break;
case RTA_OIF:
cfg->fc_oif = nla_get_u32(attr);
break;
case RTA_GATEWAY:
cfg->fc_gw =nla_get_be32(attr);
break;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
case RTA_PREFSRC:
cfg->fc_prefsrc = nla_get_be32(attr);
break;
case RTA_METRICS:
cfg->fc_mx = nla_data(attr);
cfg->fc_mx_len = nla_len(attr);
break;
case RTA_MULTIPATH:
cfg->fc_mp = nla_data(attr);
cfg->fc_mp_len = nla_len(attr);
break;
case RTA_FLOW:
cfg->fc_flow = nla_get_u32(attr);
break;
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
}
}
return 0;
errout:
return err;
}
2.2.2.2 路由表的创建(先查找,若不存在,则创建)
fib_new_table用于获取指定的路由表,下面只考虑支持策略路由的情况。
功能:
a)、路由表的查找,若存在,则返回该路由表;否则,继续下面
b)、路由表的创建
c)、路由表的添加到fib_table_hash表
流程:
代码:
struct fib_table *fib_new_table(struct net *net, u32 id)
{
struct fib_table *tb;
unsigned int h;
if (id == 0)
id =RT_TABLE_MAIN;//如果用户空间传过来的是0,内核会将路由id设为254
tb = fib_get_table(net, id); //在net->ipv4.fib_table_hash[]散列表中查找指定的路由表
if (tb) //若存在,返回路由表
return tb;
tb = fib_hash_table(id); //若不存在,则创建路由表
if (!tb)
return NULL;
h = id & (FIB_TABLE_HASHSZ - 1);
hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); //将新创建的路由表插入到net->ipv4.fib_table_hash[]散列表
return tb;
}
(1.1)路由表的查找
利用此函数struct fib_table *fib_get_table(struct net *net, u32 id),根据指定id,(此id是从netlink传过来的,用户态传过来从哪个路由表中查找)从net->ipv4.fib_table_hash[h]中查找路由表。
struct fib_table *fib_get_table(struct net*net, u32 id)
{
struct fib_table *tb;
struct hlist_node *node;
struct hlist_head *head;
unsigned int h;
if (id == 0)
id =RT_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ- 1);
rcu_read_lock();
head = &net->ipv4.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, node, head, tb_hlist){
if (tb->tb_id == id){
rcu_read_unlock();
return tb;
}
}
rcu_read_unlock();
return NULL;
}
(1.2)路由表的创建
若根据id查不到指定路由表,则进行路由表的创建fib_hash_table(id)
struct fib_table *fib_hash_table(u32 id)
{
struct fib_table *tb;
tb = kmalloc(sizeof(struct fib_table)+ sizeof(struct fn_hash),
GFP_KERNEL);
if (tb == NULL)
return NULL;
tb->tb_id = id;
tb->tb_default= -1;
tb->tb_lookup= fn_hash_lookup;//路由表项查找
tb->tb_insert= fn_hash_insert;//路由表项插入
tb->tb_delete= fn_hash_delete;//路由表项删除
tb->tb_flush= fn_hash_flush; //路由表项刷新
tb->tb_select_default=fn_hash_select_default;//选择默认路由表项
tb->tb_dump = fn_hash_dump; //拷贝,备份 ??
memset(tb->tb_data,0, sizeof(struct fn_hash));
return tb;
}
(1.3)路由表添加到散列表ipv4.fib_table_hash[h]
h = id &(FIB_TABLE_HASHSZ -1);
hlist_add_head_rcu(&tb->tb_hlist,&net->ipv4.fib_table_hash[h]);
2.2.2.3 路由表项的添加
接下来会调用tb->tb_insert(tb, &cfg)引用的函数fn_hash_insert,进行路由表项的插入。
功能:
1)查找fn_zone,若不存在,创建
2)得到要插入路由表项的key值
3)根据netlink传入数据,创建fib_info
4)利用key值,在fn_zone中查找对应的fib_node节点
4.1)若fib_node存在,根据tos、priority在fib_node中查找fib_alias;
4.1.1)若fib_alias与要插入的路由表项全匹配,则返回
4.2)若fib_node不存在,则创建新fib_node
5)创建新的fib_alias,并赋值
6)将fib_node插入fib_zone,将fib_alias插入fib_node中alias链表
7)刷新路由缓存,通知用户空间
思维导图:
流程:
代码:
static int fn_hash_insert(struct fib_table*tb,struct fib_config *cfg)
{
struct fn_hash *table= (struct fn_hash*) tb->tb_data;
struct fib_node *new_f= NULL;
struct fib_node *f;
struct fib_alias *fa,*new_fa;
struct fn_zone *fz;
struct fib_info *fi;
u8tos = cfg->fc_tos;
__be32 key;
int err;
if (cfg->fc_dst_len> 32)
return -EINVAL;
//根据目的IP的掩码长度,找到相对应的fn_zone变量
fz = table->fn_zones[cfg->fc_dst_len];
//若找不到,则调用fn_new_zone创建;若找到,继续
if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
return -ENOBUFS;
key= 0;
if (cfg->fc_dst){
if (cfg->fc_dst & ~FZ_MASK(fz))
return -EINVAL;
//根据目的IP和掩码长度进行与操作,得到搜索关键字;
//该搜索关键字,用来在fn_zone[i]中查找fib_node节点
key = fz_key(cfg->fc_dst, fz);
}
//打印路由表id;此fn_zone的掩码长度;目的ip/掩码长度;key应该是网段
printk("%s-->table_id:%u fz_order:%d dest:%pI4/%d key:%pI4\n",__FUNCTION__,\
tb->tb_id,fz->fz_order,&cfg->fc_dst,cfg->fc_dst_len,&key);
//根据netlink传递的内容cfg,构建下一跳信息fib_info结构
fi = fib_create_info(cfg);
if (IS_ERR(fi))
{
DEBUG_V4Route("%s-->err:%ld\n",__FUNCTION__,PTR_ERR(fi));
return PTR_ERR(fi);
}
//打印当前标志;协议类型;优先级
printk("%s-->create new fib_info--fib_flags:0x%x fib_protocol:%d fib_priority:%u\n",\
__FUNCTION__,fi->fib_flags,fi->fib_protocol,fi->fib_priority);
//若当前fn_zone[i]中的fib_node节点数大于当前容量,则扩充
if (fz->fz_nent > (fz->fz_divisor<<1)&&
fz->fz_divisor< FZ_MAX_DIVISOR &&
(cfg->fc_dst_len== 32||
(1<< cfg->fc_dst_len)> fz->fz_divisor))
fn_rehash_zone(fz);
//(1)用key在fn_zone[i]中查找对应的fib_node节点
f = fib_find_node(fz, key);
//(1.1)若fib_node f节点为空,则让fib_alias fa为空
//(1.2)若fib_node f节点不为空,则通过tos、priority查找对应的fib_alias节点
if (!f)
fa = NULL;
else{
fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
}
/* Now fa, if non-NULL, points to the first fib alias
*with the same keys [prefix,tos,priority], if such key already
*exists or to the node before which we will insert new one.
*
*If fa is NULL, we will need to allocate a new one and
*insert to the head of f.
*
*If f is NULL, no fib node matched the destination key
*and we need to allocate a new one of those as well.
*/
//(1.2)若fib_alias fa存在,且fib_alias的tos与要添加的路由tos相等,
//且fib_alias的fib_info的priority与要添加的路由优先级也相等
if (fa && fa->fa_tos== tos &&
fa->fa_info->fib_priority== fi->fib_priority){
ios_debug_out("fa->fa_tos == tos %s-->fa_tos:%d fib_priority:%u\n",__FUNCTION__,tos,fi->fib_priority);
struct fib_alias *fa_first,*fa_match;
err = -EEXIST;
//(1.2.1)若应用层添加路由的操作位为NLM_F_EXCL,则程序返回(路由表项已存在)
if (cfg->fc_nlflags& NLM_F_EXCL)
goto out;
/* We have 2 goals:
* 1. Find exact match for type, scope, fib_info to avoid
* duplicate routes
* 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
*/
//(1.2.3)若fib_alias fa的类型、作用范围与要添加的路由相等
//且该fib_alias关联的fib_info与我们通过要添加的路由构造的fib_info fi相等
//若找到一个全匹配的fib_alias fa_match,则程序返回(路由表项已存在)
//若未找到,则进行添加路由表项操作
fa_match = NULL;
fa_first = fa;
fa = list_entry(fa->fa_list.prev,struct fib_alias, fa_list);
list_for_each_entry_continue(fa,&f->fn_alias, fa_list){
if (fa->fa_tos != tos)
break;
if (fa->fa_info->fib_priority!= fi->fib_priority)
break;
if (fa->fa_type == cfg->fc_type&&
fa->fa_scope== cfg->fc_scope&&
fa->fa_info== fi){
fa_match = fa;
break;
}
}
//(1.2.2)若应用层添加路由的操作位为NLM_F_REPLACE,
//则替换已存在的tos与priority相等的fib_alias、fib_info
//刷新缓存区、
//通知用户空间,并返回程序
if (cfg->fc_nlflags& NLM_F_REPLACE){
struct fib_info *fi_drop;
u8 state;
fa = fa_first;
if (fa_match) {
if (fa == fa_match)
err =0;
goto out;
}
write_lock_bh(&fib_hash_lock);
fi_drop = fa->fa_info;
fa->fa_info= fi;
fa->fa_type= cfg->fc_type;
fa->fa_scope= cfg->fc_scope;
state = fa->fa_state;
fa->fa_state&= ~FA_S_ACCESSED;
fib_hash_genid++;
write_unlock_bh(&fib_hash_lock);
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(cfg->fc_nlinfo.nl_net,-1);
rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo, NLM_F_REPLACE);
return0;
}
/* Error if we find a perfect match which
* uses the same scope, type, and nexthop
* information.
*/
if (fa_match)
goto out;
//成立,则在表头添加新的fib_alias
if (!(cfg->fc_nlflags& NLM_F_APPEND))
fa = fa_first;
}
err= -ENOENT;
//(2)若应用层未设置标志位为NLM_F_CREATE,则不进行添加操作,程序返回
if (!(cfg->fc_nlflags& NLM_F_CREATE))
goto out;
err= -ENOBUFS;
printk("%s-->create new route\n",__FUNCTION__);
//(3)添加路由表项操作
//(1.1)若fib_node节点不存在,则创建fib_node
//并对fn_hash、fn_alias初始化,设置key值
if (!f) {
printk("%s-->createnew fib_node\n",__FUNCTION__);
new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
if (new_f == NULL)
goto out;
INIT_HLIST_NODE(&new_f->fn_hash);
INIT_LIST_HEAD(&new_f->fn_alias);
new_f->fn_key= key;
f = new_f;
}
//(3.1)创建新的fib_alias,并赋值
new_fa = &f->fn_embedded_alias;
if (new_fa->fa_info!= NULL){
printk("%s-->createnew fib_alias\n",__FUNCTION__);
new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
goto out;
}
new_fa->fa_info= fi;
new_fa->fa_tos= tos;
new_fa->fa_type= cfg->fc_type;
new_fa->fa_scope= cfg->fc_scope;
new_fa->fa_state= 0;
printk("new_fa set value %s-->fa_tos:%d fa_type:%d fa_scope:%d\n",__FUNCTION__,\
tos,cfg->fc_type,cfg->fc_scope);
/*
*Insert new entry to the list.
*/
//(3.2)若新建了fib_node,则将fib_node插入fib_zone,将创建的fib_alias new_fa添加到fib_alias链表中
//并让fz_nent计数+1
//刷新路由缓存
//通知用户空间
write_lock_bh(&fib_hash_lock);
if (new_f)
fib_insert_node(fz, new_f);
list_add_tail(&new_fa->fa_list,
(fa ? &fa->fa_list: &f->fn_alias));
fib_hash_genid++;
write_unlock_bh(&fib_hash_lock);
if (new_f)
fz->fz_nent++;
rt_cache_flush(cfg->fc_nlinfo.nl_net,-1);
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo,0);
return 0;
out:
if (new_f)
kmem_cache_free(fn_hash_kmem, new_f);
fib_release_info(fi);
return err;
}
接下来分析一下其中关键的函数
(1)创建fn_zone结构fn_new_zone
static struct fn_zone*fn_new_zone(struct fn_hash*table,int z)
//参数:z为掩码长度,
//返回值:新的fn_zone
{
int i;
struct fn_zone *fz= kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
if (!fz)
return NULL;
//掩码不为0,创建fn_zone[z]的fz_hash链表个数为16个
if (z) {
fz->fz_divisor= 16;
} else {
fz->fz_divisor= 1;
}
fz->fz_hashmask= (fz->fz_divisor- 1);
fz->fz_hash = fz_hash_ alloc(fz->fz_divisor);
if (!fz->fz_hash){
kfree(fz);
return NULL;
}
//设置掩码长度和网络掩码
fz->fz_order= z;
fz->fz_mask = inet_make_mask(z);
//fn_zone是按掩码长度由大到小排列的,以下是链表的插入
/* Find the first not empty zone with more specific mask*/
for (i=z+1; i<=32; i++)
if (table->fn_zones[i])
break;
write_lock_bh(&fib_hash_lock);
if (i>32){
/* No more specific masks, we are the first.*/
fz->fz_next= table->fn_zone_list;
table->fn_zone_list= fz;
} else {
fz->fz_next= table->fn_zones[i]->fz_next;
table->fn_zones[i]->fz_next= fz;
}
table->fn_zones[z]= fz;
fib_hash_genid++;
write_unlock_bh(&fib_hash_lock);
return fz;
}
(2)获得key值,fz_key
static inline __be32 fz_key(__be32 dst,struct fn_zone *fz)
//参数:dst为目的ip;fz为掩码长度
//返回值:key
{
//根据目的IP和掩码长度进行与操作,得到搜索关键字
return dst & FZ_MASK(fz);
}
(3)创建fib_info
structfib_info *fib_create_info(struct fib_config *cfg)
//待补充
(4)fn_zone扩容
voidfn_rehash_zone(struct fn_zone *fz)
先将fn_zone[i]散列表容量fz_divisor扩大2倍,最大1024;
接着根据新的fz_divisor,创建扩容后新的fn_zone[i]
//待补充
(5)查找fib_node,在fn_zone中
staticstruct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
//待补充
(6)查找fib_alias,在fib_node中
structfib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
//待补充
(7)插入fib_node,在fn_zone中
staticinline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
//待补充
(8)释放fib_info
voidfib_release_info(struct fib_info *fi)
//待补充
(9)刷新路由缓存
voidrt_cache_flush(struct net *net, int delay)
//待补充
(10)通知用户空间
voidrtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id,struct nl_info *info, unsigned int nlm_flags)
//待补充