linux内核 路由fib表之输入查找

2.2.3 路由查找
-->ip_rt_init
    -->ip_fib_init //注册路由的创建、删除、dump函数
    -->rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); //注册路由的查找函数(这里我们围绕它展开)
static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
    struct net *net = sock_net(in_skb->sk);
    struct rtmsg *rtm;
    struct nlattr *tb[RTA_MAX+1];
    struct rtable *rt = NULL;
    __be32 dst = 0;
    __be32 src = 0;
    u32 iif;
    int err;
    struct sk_buff *skb;
    //解析netlink传过来的数据nlh
    err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
    if (err < 0)
        goto errout;
    //将nlh偏移指针赋值给rtm
    rtm = nlmsg_data(nlh);
    //申请skb
    skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
    if (skb == NULL) {
        err = -ENOBUFS;
        goto errout;
    }

    /* Reserve room for dummy headers, this skb can pass
       through good chunk of routing engine.
     */
    skb_reset_mac_header(skb);
    skb_reset_network_header(skb);

    /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
    ip_hdr(skb)->protocol = IPPROTO_ICMP;
    skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));

    src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
    dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
    iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;

    if (iif) {
        struct net_device *dev;

        dev = __dev_get_by_index(net, iif);
        if (dev == NULL) {
            err = -ENODEV;
            goto errout_free;
        }

        skb->protocol   = htons(ETH_P_IP);
        skb->dev    = dev;
        local_bh_disable();
        //输入路由查找
        err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
        local_bh_enable();

        rt = skb_rtable(skb);
        if (err == 0 && rt->u.dst.error)
            err = -rt->u.dst.error;
    } else {
        struct flowi fl = {
            .nl_u = {
                .ip4_u = {
                    .daddr = dst,
                    .saddr = src,
                    .tos = rtm->rtm_tos,
                },
            },
            .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
        };
        //输出路由查找
        err = ip_route_output_key(net, &rt, &fl);
    }

    if (err)
        goto errout_free;

    skb_dst_set(skb, &rt->u.dst);
    if (rtm->rtm_flags & RTM_F_NOTIFY)
        rt->rt_flags |= RTCF_NOTIFY;

    err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
               RTM_NEWROUTE, 0, 0);
    if (err <= 0)
        goto errout_free;

    err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
errout:
    return err;

errout_free:
    kfree_skb(skb);
    goto errout;
}
2.2.3.1 输入路由查找
ip_route_input
功能:
    1)判断是否存在缓存
    2)判断是否是多播
    3)查路由表
程序流程图

linux内核 路由fib表之输入查找_第1张图片

extern unsigned long fastroute_enable;
int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
           u8 tos, struct net_device *dev)
//参数说明:skb为输入报文的sk_buff;
//         daddr为报文的目的地址;
//         saddr为报文的源地址;
//         tos为ip包服务类型
//         dev为报文输入的网络设备
{
    struct rtable * rth;
    unsigned    hash;
    int iif = dev->ifindex;
    struct net *net;
    //通过输入报文的DEV,找到对应的NET结构
    net = dev_net(dev);
    //路由缓存查找,
    //如果需要hash表冲突太严重,重构则增加重构计数current_rt_cache_rebuild_count的值,
    //rt_caching()函数就是简单地判断该值是否超过最大值来断定缓存是否正在进行的,
    //最大值sysctl_rt_cache_rebuild_count为4。
    if (!rt_caching(net))
        goto skip_cache;

    tos &= IPTOS_RT_MASK;
    //计算hash值
    hash = rt_hash(daddr, saddr, iif, rt_genid(net));

    rcu_read_lock();
    for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
            rth = rcu_dereference(rth->u.dst.rt_next)) 
    {
        if (((rth->fl.fl4_dst ^ daddr) |
            (rth->fl.fl4_src ^ saddr) |
            (rth->fl.iif ^ iif) |
            rth->fl.oif |
            (rth->fl.fl4_tos ^ tos)) == 0                      
                &&rth->fl.mark == skb->mark &&
                //判断路由缓存中目的dev的net 是否与 报文中net 相等
                net_eq(dev_net(rth->u.dst.dev), net) &&
                !rt_is_expired(rth)) //路由表项是否过期
        {
            if(fastroute_enable||(!FW_PR_CONFIG || CTINFO2DIR(skb->nfctinfo) == IP_CT_DIR_REPLY 
                || ((CTINFO2DIR(skb->nfctinfo) == IP_CT_DIR_ORIGINAL) && rt_service_match(rth,skb)))) 
            {
                dst_use(&rth->u.dst, jiffies);//找到表项会更新最后一次使用时间
                RT_CACHE_STAT_INC(in_hit);
                rcu_read_unlock();
                skb_dst_set(skb, &rth->u.dst);//将缓存中目的地址,赋值给skb
                return 0;   
            }
                
        }
        RT_CACHE_STAT_INC(in_hlist_search);
    }
    rcu_read_unlock();
        
skip_cache:
    //判断目的地址是否是组播
     if (ipv4_is_multicast(daddr)) 
     {
        struct in_device *in_dev;
        rcu_read_lock();
        if ((in_dev = __in_dev_get_rcu(dev)) != NULL) 
        {
            int our = ip_check_mc(in_dev, daddr, saddr,ip_hdr(skb)->protocol);
            if (our
#ifdef CONFIG_IP_MROUTE
            || (!ipv4_is_local_multicast(daddr) &&
            IN_DEV_MFORWARD(in_dev))
#endif
            )  {
                    rcu_read_unlock();
                    return ip_route_input_mc(skb, daddr, saddr,tos, dev, our);
                }
        }
        rcu_read_unlock();
        return -EINVAL;
    }
    return ip_route_input_slow(skb, daddr, saddr, tos, dev);
}

2.2.3.1.1 结构体描述
    (1)路由查找结构体

struct fib_result {
    unsigned char   prefixlen;//掩码长度
    unsigned char   nh_sel;//输出转发地址的编号,根据该值和fib_info就能找到下一跳fib_nh
    unsigned char   type;//类型有组播、单播、广播
    unsigned char   scope;//范围RT_SCOPE_UNIVERSE、RT_SCOPE_LINK
    struct fib_info *fi;//指向关联的fib_info
#ifdef CONFIG_IP_MULTIPLE_TABLES
    struct fib_rule *r;//指向关联的策略路由fib_rule
#endif
    int prset;
    u32 oldtype;
    u32 oldnexthop;
};
2.2.3.1.2 ip_route_input_slow//慢速查找
功能:

    1)路由查找
    2)根据路由查找结果,创建路由缓存项

程序流程图

linux内核 路由fib表之输入查找_第2张图片

           linux内核 路由fib表之输入查找_第3张图片

/*
 *  NOTE. We drop all the packets that has local source
 *  addresses, because every properly looped back packet
 *  must have correct destination already attached by output routine.
 *
 *  Such approach solves two big problems:
 *  1. Not simplex devices are handled properly.
 *  2. IP spoofing attempts are filtered with 100% of guarantee.
 */
//丢弃所有127.0.0.1的包
//慢速查找
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev)
{
    struct fib_result res;
    struct fib_result pr_res;
    struct in_device *in_dev = in_dev_get(dev);//这里要使用输入网络设备dev,增加引用计数
    struct flowi fl=
    { .nl_u=
        { .ip4_u=
            {
                .daddr = daddr,
                .saddr = saddr,
                .tos = tos,
                .scope = RT_SCOPE_UNIVERSE,
            } 
        },
        .mark = skb->mark,
        .iif = dev->ifindex 
    };
    unsigned    flags = 0;
    u32     itag = 0;
    struct rtable * rth;
    unsigned    hash;
    __be32      spec_dst;
    int     err = -EINVAL;
    int     free_res = 0;
    struct net    * net = dev_net(dev);//获取输入设备dev的网络net信息,有IP\mac
    int     proute_err;
    int     ret = 0;

    /* IP on this device is disabled. */
    if (!in_dev)
        goto out;

    /* Check for the most weird martians, which can be not detected by fib_lookup.*/
    //源IP是否为组播、广播、回环
    if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
        ipv4_is_loopback(saddr))
        goto martian_source;
    //目的地址是否全为1,源IP和目的IP全为0
    if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
        goto brd_input;
    //目的地址是否为本地广播
    if (inet_if_subnet_broadcast(daddr))
        goto sbc_input;

    /* Accept zero addresses only to limited broadcast;
    * I even do not know to fix it or not. Waiting for complains :-)
    */
    //源IP是否全为0
    if (ipv4_is_zeronet(saddr))
        goto martian_source; //处理非法源IP报文
    
    //目的地址是否为受限广播,网段是否为0,是否回环
    if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
        ipv4_is_loopback(daddr))
        goto martian_destination;

    DEBUG_V4Route("%s-->begin to slow looking\n",__FUNCTION__);

    res.prset = 0;
    /*
    *   Now we are ready to route packet.
    */
    if ((err = fib_lookup(net, &fl, &res)) != 0) //查路由
    {//失败
        DEBUG_V4Route("%s-->slow looking error(%d)\n",__FUNCTION__,err);
#ifdef CONFIG_FW_POLICY 
        if(FW_PR_CONFIG)
            //查找策略路由
            ret = lookup_pr_policy_by_skb(skb, dev, &res, &fl, &proute_err);
            DEBUG_V4Route("%s-->looking policy route(%d)\n",__FUNCTION__,ret);
            if(ret == 1){//true
                goto fib_ok;
            } else if (ret == 2){//false
                err = proute_err;
                goto done;
            } else
#endif
        if (!IN_DEV_FORWARD(in_dev))//设备不支持转发
            goto e_hostunreach;
        goto no_route;
    }
    else
    {//成功
        DEBUG_V4Route("%s-->slow looking sucess\n",__FUNCTION__);
        if(res.type == RTN_LOCAL)//本地
            goto fib_ok;
        else if(FIB_RES_NH(res).nh_scope == RT_SCOPE_HOST)//直连
            goto fib_ok;
#ifdef CONFIG_FW_POLICY
        else if(FW_PR_CONFIG)//查找策略路由
            ret = lookup_pr_policy_by_skb_to_replace(skb, dev, &res, &pr_res, &fl, &proute_err);
            DEBUG_V4Route("%s-->looking policy route(%d)\n",__FUNCTION__,ret);
            if (ret == 2){//失败
                err = proute_err;
                goto done;
            }else if(ret == 1)//成功
            {   /* fix bug MSG00011070, free old fib result */
                fib_res_put(&res);
                memcpy(&res, &pr_res, sizeof(struct fib_result));
            }
    #endif          
    }
    
fib_ok:
    free_res = 1;
    RT_CACHE_STAT_INC(in_slow_tot);//统计
    if (res.type == RTN_BROADCAST)//广播
        goto brd_input; //广播发包
    if (res.type == RTN_LOCAL) {//本地
        int result;
        result = fib_validate_source(saddr, daddr, tos,//发给本地的包,检查源地址是否合法(源IP是否为广播或者本地)
                 net->loopback_dev->ifindex, dev, &spec_dst, &itag, skb->mark);
        if (result < 0)
            goto martian_source;
        if (result)
            flags |= RTCF_DIRECTSRC;
        spec_dst = daddr;
        goto local_input;//本地发包
    }

    if (!IN_DEV_FORWARD(in_dev))//设备不支持转发
        goto e_hostunreach;
    if (res.type != RTN_UNICAST)//不是单播,广播和本地前面已经处理过
        goto martian_destination;

    DEBUG_V4Route("%s-->Find Forward Route dest:%pI4 source:%pI4 outdev:%s\n",__FUNCTION__,\
                                &daddr,&saddr,(FIB_RES_DEV(res)?FIB_RES_DEV(res)->name:"NULL"));
    

    err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);//创建路由缓冲项
    if(res.prset){
            FIB_RES_GW(res) = res.oldnexthop;
            FIB_RES_NH(res).nh_scope = res.oldtype;
            res.prset = 0;
    }
    
done:
    in_dev_put(in_dev);
    if (free_res)
        fib_res_put(&res);
out:    
    return err;

brd_input://广播输入
    if (skb->protocol != htons(ETH_P_IP))//不是IP协议
        goto e_inval;

sbc_input:
    if (ipv4_is_zeronet(saddr))//源地址是否为0
        spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
    else 
    {
        err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, skb->mark);
        if (err < 0)
            goto martian_source;
        if (err)
            flags |= RTCF_DIRECTSRC;
    }
    flags |= RTCF_BROADCAST;
    res.type = RTN_BROADCAST;
    RT_CACHE_STAT_INC(in_brd);

local_input:
    DEBUG_V4Route("%s-->Find Local Route dest:%pI4 source:%pI4 outdev:%s\n",__FUNCTION__,\
                                &daddr,&saddr,(FIB_RES_DEV(res)?FIB_RES_DEV(res)->name:"NULL"));
    rth = dst_alloc(&ipv4_dst_ops); //分配rtable,其中input和output函数为dst_discard
    if (!rth)
        goto e_nobufs;

    rth->u.dst.output= ip_rt_bug;//修改output为 向内核日志输出错误信息
    rth->rt_genid = rt_genid(net);

    atomic_set(&rth->u.dst.__refcnt, 1);
    rth->u.dst.flags= DST_HOST;
    if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
        rth->u.dst.flags |= DST_NOPOLICY;
    rth->fl.fl4_dst = daddr;
    rth->rt_dst = daddr;
    rth->fl.fl4_tos = tos;
    rth->fl.mark    = skb->mark;
    rth->fl.fl4_src = saddr;
    rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
    rth->u.dst.tclassid = itag;
#endif
    rth->rt_iif =rth->fl.iif    = dev->ifindex;
    rth->u.dst.dev  = net->loopback_dev;
    dev_hold(rth->u.dst.dev);
    rth->idev   = in_dev_get(rth->u.dst.dev);
    rth->rt_gateway = daddr;
    rth->rt_spec_dst= spec_dst;
    rth->u.dst.input= ip_local_deliver;//转到上层函数
    rth->rt_flags   = flags|RTCF_LOCAL;
    if (res.type == RTN_UNREACHABLE) {
            rth->u.dst.input= ip_error;
            rth->u.dst.error= -err;
            rth->rt_flags   &= ~RTCF_LOCAL;
    }
    rth->rt_type    = res.type;
    hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
    err = rt_intern_hash(hash, rth, NULL, skb);//插入hash表中
    goto done;

no_route:
    RT_CACHE_STAT_INC(in_no_route);
    spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
    res.type = RTN_UNREACHABLE;
    if (err == -ESRCH)
        err = -ENETUNREACH;
    if(IS_IPV4_DEBUG_ROUTE)
        msg_skb_debug(skb, IPV4_DEBUG_ROUTE, "route error, destination %u.%u.%u.%u unreachable\n",
                                    NIPQUAD(daddr));
    goto local_input;

    /*
    *   Do not cache martian addresses: they should be logged (RFC1812)
    */
martian_destination:
    RT_CACHE_STAT_INC(in_martian_dst);
#ifdef CONFIG_IP_ROUTE_VERBOSE
    if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
        printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n",
                    &daddr, &saddr, dev->name);
#endif

e_hostunreach:
    err = -EHOSTUNREACH;
    goto done;

e_inval:
    err = -EINVAL;
    goto done;

e_nobufs:
    err = -ENOBUFS;
    goto done;

martian_source: //处理非法报文
    ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
    goto e_inval;
}

你可能感兴趣的:(linux,网络协议)