功能:
(1)创建输入路由缓存项
(2)生成hash值,插入缓存链表中static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
const struct flowi *fl,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos)
{
struct rtable* rth = NULL;
int err;
unsigned hash;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
{
DEBUG_V4Route("%s-->Multipath fib_nhs:%d\n",__FUNCTION__,res->fi->fib_nhs);
fib_select_multipath(fl, res);
}
#endif
/* create a routing cache entry */
err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);//创建输入路由缓存项
if (err)
return err;
/*build services of route cache for policy route*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if(FW_PR_CONFIG){
if(skb->nfct){
DEBUG_V4Route("%s-->CTINFO2DIR(skb->nfctinfo):%d\n",\
__FUNCTION__,CTINFO2DIR(skb->nfctinfo));
if (CTINFO2DIR(skb->nfctinfo) == IP_CT_DIR_ORIGINAL)
rt_service_set(rth, skb);
}
}
#endif
if(IS_IPV4_DEBUG_ROUTE)
ipv4_route_dump(skb,rth);
/* put it into the cache */
hash = rt_hash(daddr, saddr, fl->iif,rt_genid(dev_net(rth->u.dst.dev)));
return rt_intern_hash(hash, rth, NULL, skb);//插入缓存链表
}
static int __mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
struct rtable *rth;
int err;
struct in_device *out_dev;
unsigned flags = 0;
__be32 spec_dst;
u32 itag;
/* get a working reference to the output device */
out_dev = in_dev_get(FIB_RES_DEV(*res));//获取输出报文的网络设备
if (out_dev == NULL) {
if (net_ratelimit())
printk(KERN_CRIT "Bug in ip_route_input" \
"_slow(). Please, report\n");
return -EINVAL;
}
//路由合法性检查,在调用该函数前,已经找到一条从saddr->daddr的路由项,
//需要进行判断daddr->saddr反向路由是否存在,否则认为它是非法的。
err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
in_dev->dev, &spec_dst, &itag, skb->mark);
if (err < 0) {
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,saddr);
err = -EINVAL;
goto cleanup;
}
if (err)
flags |= RTCF_DIRECTSRC;
if (out_dev == in_dev && err &&
(IN_DEV_SHARED_MEDIA(out_dev) ||
inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
flags |= RTCF_DOREDIRECT;
if (skb->protocol != htons(ETH_P_IP)) {
/* Not IP (i.e. ARP). Do not create route, if it is
* invalid for proxy arp. DNAT routes are always valid.
*/
if (out_dev == in_dev) {
err = -EINVAL;
goto cleanup;
}
}
rth = dst_alloc(&ipv4_dst_ops);//创建新的路由缓存项
if (!rth) {
err = -ENOBUFS;
goto cleanup;
}
atomic_set(&rth->u.dst.__refcnt, 1);//对路由缓存项进行初始化
rth->u.dst.flags= DST_HOST;
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
if (IN_DEV_CONF_GET(out_dev, NOXFRM))
rth->u.dst.flags |= DST_NOXFRM;
rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr;
rth->fl.fl4_tos = tos;
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
rth->rt_gateway = daddr;
rth->rt_iif =rth->fl.iif= in_dev->dev->ifindex;
rth->u.dst.dev = (out_dev)->dev;
dev_hold(rth->u.dst.dev);
rth->idev = in_dev_get(rth->u.dst.dev);
rth->fl.oif = 0;
rth->rt_spec_dst= spec_dst;
rth->u.dst.input = ip_forward;//设置路由缓存的输入、输出函数指针
rth->u.dst.output = ip_output;
rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
rt_set_nexthop(rth, res, itag);
rth->rt_flags = flags;
#if defined(CONFIG_MV_ETH_NFP_FIB_LEARN)
rth->nfp = false;
if (!(rth->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL | RTCF_REJECT))) {
if (!nfp_hook_fib_rule_add(AF_INET, (u8*)(&rth->rt_src), (u8*)(&rth->rt_dst),
(u8*)(&rth->rt_gateway), rth->rt_iif, rth->u.dst.dev->ifindex))
rth->nfp = true;
}
#endif /* CONFIG_MV_ETH_NFP_FIB_LEARN */
*result = rth;
#ifdef CONFIG_MV_ETH_NFP
if ( !fp_disable_flag &&
!(rth->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL | RTCF_REJECT))) {
fp_routing_info_set( rth->rt_src, rth->rt_dst,
rth->rt_gateway, rth->rt_iif, rth->u.dst.dev->ifindex);
}
#endif /* CONFIG_MV_ETH_NFP */
err = 0;
cleanup:
/* release the working reference to the output device */
in_dev_put(out_dev);
return err;
}
流程图:
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb)
//hash:hash值;rt:待插入的路由缓存;skb:待查找的路由报文
//rp:插入的路由缓存
{
struct rtable *rth, **rthp;
unsigned long now;
struct rtable *cand, **candp;
u32 min_score;
int chain_length;
int attempts = !in_softirq();
restart:
chain_length = 0;
min_score = ~(u32)0;
cand = NULL;
candp = NULL;
now = jiffies;
if (!rt_caching(dev_net(rt->u.dst.dev))) {
/*
* If we're not caching, just tell the caller we
* were successful and don't touch the route. The
* caller hold the sole reference to the cache entry, and
* it will be released when the caller is done with it.
* If we drop it here, the callers have no way to resolve routes
* when we're not caching. Instead, just point *rp at rt, so
* the caller gets a single use out of the route
* Note that we do rt_free on this new route entry, so that
* once its refcount hits zero, we are still able to reap it
* (Thanks Alexey)
* Note also the rt_free uses call_rcu. We don't actually
* need rcu protection here, this is just our path to get
* on the route gc list.
*/
if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
int err = arp_bind_neighbour(&rt->u.dst);
if (err) {
if (net_ratelimit())
printk(KERN_WARNING
"Neighbour table failure & not caching routes.\n");
rt_drop(rt);
return err;
}
}
rt_free(rt);
goto skip_hashing;
}
rthp = &rt_hash_table[hash].chain;//根据hash值,找对应的rtable
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {//遍历hash表
if (rt_is_expired(rth)) {//是否过期
*rthp = rth->u.dst.rt_next;//过期,删除并释放空间
rt_free(rth);
continue;
}
//未过期,并找到匹配的路由
if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
/* Put it first */
*rthp = rth->u.dst.rt_next;//将缓存项拿到链表的最新端
/*
* Since lookup is lockfree, the deletion
* must be visible to another weakly ordered CPU before
* the insertion at the start of the hash chain.
*/
rcu_assign_pointer(rth->u.dst.rt_next,
rt_hash_table[hash].chain);
/*
* Since lookup is lockfree, the update writes
* must be ordered for consistency on SMP.
*/
rcu_assign_pointer(rt_hash_table[hash].chain, rth);
dst_use(&rth->u.dst, now);//增加引用计数
spin_unlock_bh(rt_hash_lock_addr(hash));
rt_drop(rt);//释放新建的待插入的缓存项内存
if (rp)
*rp = rth;
else
skb_dst_set(skb, &rth->u.dst);
return 0;
}
if (!atomic_read(&rth->u.dst.__refcnt)) {
u32 score = rt_score(rth);
if (score <= min_score) {
cand = rth;
candp = rthp;
min_score = score;
}
}
chain_length++;
rthp = &rth->u.dst.rt_next;
}
if (cand) {
/* ip_rt_gc_elasticity used to be average length of chain
* length, when exceeded gc becomes really aggressive.
*
* The second limit is less certain. At the moment it allows
* only 2 entries per bucket. We will see.
*/
if (chain_length > ip_rt_gc_elasticity) {
*candp = cand->u.dst.rt_next;
rt_free(cand);
}
} else {
if (chain_length > rt_chain_length_max) {//hash碰撞太严重,需要重构哈希表
struct net *net = dev_net(rt->u.dst.dev);
int num = ++net->ipv4.current_rt_cache_rebuild_count;
if (!rt_caching(dev_net(rt->u.dst.dev))) {
printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
rt->u.dst.dev->name, num);
}
rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev));
}
}
/* Try to bind route to arp only if it is output
route or unicast forwarding path.
*/
if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {//单播转发报文或者本地发包
int err = arp_bind_neighbour(&rt->u.dst);//绑定ARP
if (err) {//绑定失败。垃圾回收,实现内存释放,再重新开始
spin_unlock_bh(rt_hash_lock_addr(hash));
if (err != -ENOBUFS) {
rt_drop(rt);
return err;
}
/* Neighbour tables are full and nothing
can be released. Try to shrink route cache,
it is most likely it holds some neighbour records.
*/
if (attempts-- > 0) {
int saved_elasticity = ip_rt_gc_elasticity;
int saved_int = ip_rt_gc_min_interval;
ip_rt_gc_elasticity = 1;
ip_rt_gc_min_interval = 0;
rt_garbage_collect(&ipv4_dst_ops);
ip_rt_gc_min_interval = saved_int;
ip_rt_gc_elasticity = saved_elasticity;
goto restart;
}
if (net_ratelimit())
printk(KERN_WARNING "Neighbour table overflow.\n");
rt_drop(rt);
return -ENOBUFS;
}
}
rt->u.dst.rt_next = rt_hash_table[hash].chain;//将该表放到哈希链表的头部
#if RT_CACHE_DEBUG >= 2
if (rt->u.dst.rt_next) {
struct rtable *trt;
printk(KERN_DEBUG "rt_cache @%02x: %pI4",
hash, &rt->rt_dst);
for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
printk(" . %pI4", &trt->rt_dst);
printk("\n");
}
#endif
/*
* Since lookup is lockfree, we must make sure
* previous writes to rt are comitted to memory
* before making rt visible to other CPUS.
*/
rcu_assign_pointer(rt_hash_table[hash].chain, rt);
spin_unlock_bh(rt_hash_lock_addr(hash));
skip_hashing:
if (rp)
*rp = rt;
else
skb_dst_set(skb, &rt->u.dst);
return 0;
}
3.3 创建输出路由缓存
ip_mkroute_outputstatic int ip_mkroute_output(struct rtable **rp,
struct fib_result *res,
const struct flowi *fl,
const struct flowi *oldflp,
struct net_device *dev_out,
unsigned flags)
{
struct rtable *rth = NULL;
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
unsigned hash;
if (err == 0) {
hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
rt_genid(dev_net(dev_out)));
err = rt_intern_hash(hash, rth, rp, NULL);
}
return err;
}