netfilter之链接跟踪做nat

转载自: https://blog.csdn.net/City_of_skey/article/details/84996887

 

上一节我们将了NAT是基于链接跟踪实现的,当一条链接跟踪建立要改变它的tuple的reply方向才能做nat,这个链接跟踪的nat是函数nf_nat_setup_info实现

1、nf_nat_setup_info

nf_nat_setup_info对链接跟踪的做NAT,只会改变链接跟踪reply方向的ip、端口,不会改变数据包的ip、端口,数据包的DAT在上一节已经介绍了是在PRE_ROUTING、POST_ROUTING链的hook点根据链接跟踪的reply方向对数据包做NAT。

nf_nat_setup_info主要做以下几件事

(1)获取链接跟踪和nat关联的结构体struct nf_conn_nat ,如果是空就直接返回

(2)调用nf_nat_initialized判断是否已经做了链接跟踪的NAT

(3)nf_ct_invert_tuplepr获取reply方向的tuple然后取反赋值给curr_tuple也是是orig tuple

(4)get_unique_tuple这个函数是关键,这个就是得到一个新的tuple,new_tuple,这个new_tuple是做了NAT的orig方向。

(5)调用nf_ct_tuple_equal比较curr_tuple和new_tuple是否相等如果不相等就要做NAT改变链接跟踪reply的tuple

(5)调用nf_ct_invert_tuplepr对new_reply取反调用orig方向的tuple reply

(6)nf_conntrack_alter_reply改变链接跟踪tuple的reply方向完成链接跟踪的NAT

(7)做了NAT的链接跟踪如果没有在nat_bysource链表中就要添加进去

(8)设置已经做NAT的标志IPS_DST_NAT_DONE_BIT/IPS_SRC_NAT_DONE_BIT

unsigned int
nf_nat_setup_info(struct nf_conn *ct,
          const struct nf_nat_range *range,
          enum nf_nat_manip_type maniptype)
{
    struct net *net = nf_ct_net(ct);
    struct nf_conntrack_tuple curr_tuple, new_tuple;
    struct nf_conn_nat *nat;
    int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 
    /* nat helper or nfctnetlink also setup binding */
    nat = nfct_nat(ct);
    if (!nat) {
        nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
        if (nat == NULL) {
            pr_debug("failed to add NAT extension\n");
            return NF_ACCEPT;
        }
    }
 
    NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
             maniptype == IP_NAT_MANIP_DST);
    BUG_ON(nf_nat_initialized(ct, maniptype));
 
    /* What we've got will look like inverse of reply. Normally
       this is what is in the conntrack, except for prior
       manipulations (future optimization: if num_manips == 0,
       orig_tp =
       conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
       /*获取reply方向的tuple做反方向复制给curr_tuple*/
    nf_ct_invert_tuplepr(&curr_tuple,
                 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
    /*根据原始的tuple获取新的唯一tuple*/
    get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
 
    /*新的orig方向和原始orig方向不相等就要做链接跟踪的NAT
    也就是改变tuple的reply方向*/
    if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
        struct nf_conntrack_tuple reply;
 
        /* Alter conntrack table so will recognize replies. */
        /*新的tuple取反*/
        nf_ct_invert_tuplepr(&reply, &new_tuple);
        /*将取反后的tuple赋值给reply方向
        也就是链接跟踪做NAT*/
        nf_conntrack_alter_reply(ct, &reply);
 
        /* Non-atomic: we own this at the moment. */
        if (maniptype == IP_NAT_MANIP_SRC)
            ct->status |= IPS_SRC_NAT;
        else
            ct->status |= IPS_DST_NAT;
    }
 
    /* Place in source hash if this is the first time. */
    if (have_to_hash) {
        unsigned int srchash;
 
        srchash = hash_by_src(net, nf_ct_zone(ct),
                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
        spin_lock_bh(&nf_nat_lock);
        /* nf_conntrack_alter_reply might re-allocate exntension aera */
        nat = nfct_nat(ct);
        nat->ct = ct;
        hlist_add_head_rcu(&nat->bysource,
                   &net->ipv4.nat_bysource[srchash]);
        spin_unlock_bh(&nf_nat_lock);
    }
 
    /* It's done. */
    /*设置已经做了SNAT/DNAT标志*/
    if (maniptype == IP_NAT_MANIP_DST)
        set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
    else
        set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
 
    return NF_ACCEPT;
}
2、nf_ct_invert_tuplepr

调用__nf_ct_l3proto_find获取三层链接跟踪的操作函数结构体struct nf_conntrack_l3proto实例,调用__nf_ct_l4proto_find获取四层链接跟踪的操作函数结构体struct nf_conntrack_l4proto实例,然后调用nf_ct_invert_tuple根据orig方向取反方向的tuple。

bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
              const struct nf_conntrack_tuple *orig)
{
    bool ret;
 
    rcu_read_lock();
    ret = nf_ct_invert_tuple(inverse, orig,
                 __nf_ct_l3proto_find(orig->src.l3num),
                 __nf_ct_l4proto_find(orig->src.l3num,
                              orig->dst.protonum));
    rcu_read_unlock();
    return ret;
}
nf_ct_invert_tuple调用三层、四层的invert_tuple根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple。

bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
           const struct nf_conntrack_tuple *orig,
           const struct nf_conntrack_l3proto *l3proto,
           const struct nf_conntrack_l4proto *l4proto)
{
    memset(inverse, 0, sizeof(*inverse));
 
    inverse->src.l3num = orig->src.l3num;
    /*三层根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple*/
    if (l3proto->invert_tuple(inverse, orig) == 0)
        return false;
 
    inverse->dst.dir = !orig->dst.dir;
 
    inverse->dst.protonum = orig->dst.protonum;
    /*四层根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple*/
    return l4proto->invert_tuple(inverse, orig);
}
3、get_unique_tuple

这个函数主要是获取唯一的做了nat的tuple。

(1)首先如果是SNAT就调用find_appropriate_src在nat_bysource链表中查找已经做了NAT的tuple如果找到了而且没有被其他使用就返回

(2)find_best_ips_proto做ip地址的nat

(3)四层协议做NAT,如果是IP_NAT_RANGE_PROTO_RANDOM标志也就是随机的,就调用四层协议的unique_tuple获取唯一没有被使用的端口做NAT,如果是IP_NAT_RANGE_PROTO_SPECIFIED也就是指定端口,就要调用in_range判断此端口是否在合理返回内。

static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
         const struct nf_conntrack_tuple *orig_tuple,
         const struct nf_nat_range *range,
         struct nf_conn *ct,
         enum nf_nat_manip_type maniptype)
{
    struct net *net = nf_ct_net(ct);
    const struct nf_nat_protocol *proto;
    u16 zone = nf_ct_zone(ct);
 
    /* 1) If this srcip/proto/src-proto-part is currently mapped,
       and that same mapping gives a unique tuple within the given
       range, use that.
       This is only required for source (ie. NAT/masq) mappings.
       So far, we don't do local source mappings, so multiple
       manips not an issue.  */
    if (maniptype == IP_NAT_MANIP_SRC &&
        !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
        if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
            pr_debug("get_unique_tuple: Found current src map\n");
            /*没有被其他的使用就直接返回*/
            if (!nf_nat_used_tuple(tuple, ct))
                return;
        }
    }
 
    /* 2) Select the least-used IP/proto combination in the given
       range. */
    *tuple = *orig_tuple;
    /*IP地址做NAT*/
    find_best_ips_proto(zone, tuple, range, ct, maniptype);
 
    /* 3) The per-protocol part of the manip is made to map into
       the range to make a unique tuple. */
 
    rcu_read_lock();
    /*查找四层协议nat实例结构体struct nf_nat_protocol*/
    proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
 
    /* Change protocol info to have some randomization */
    /*支持IP_NAT_RANGE_PROTO_RANDOM就调用四层协议函数unique_tuple
    随机获取一个唯一的四层tuple*/
    if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
        proto->unique_tuple(tuple, range, maniptype, ct);
        goto out;
    }
 
    /* Only bother mapping if it's not already in range and unique */
    /*IP_NAT_RANGE_PROTO_SPECIFIED这个是用户指定的四层端口要调用
    ip_range判断是否在合理范围内,而且要判断是否已经被其他使用*/
    if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
         proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
        !nf_nat_used_tuple(tuple, ct))
        goto out;
 
    /* Last change: get protocol to try to obtain unique tuple. */
    /*调用unique_tuple后去选择一个唯一的没有被使用的四层端口完成nat*/
    proto->unique_tuple(tuple, range, maniptype, ct);
out:
    rcu_read_unlock();
}
3.1 find_appropriate_src

对应SNAT就会调用这个函数在已经做了NAT的表nat_bysource中查找已经存在的tuple,如果找到了而且没有被使用就对这个reply方向的tuple取反得到目标tuple。然后调用in_range判断目标tuple是否在合理范围内,

/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net, u16 zone,
             const struct nf_conntrack_tuple *tuple,
             struct nf_conntrack_tuple *result,
             const struct nf_nat_range *range)
{
    unsigned int h = hash_by_src(net, zone, tuple);
    const struct nf_conn_nat *nat;
    const struct nf_conn *ct;
    const struct hlist_node *n;
 
    rcu_read_lock();
    /*遍历bysource链表*/
    hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
        ct = nat->ct;
        /*找到了而且等于自己*/
        if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
            /* Copy source part from reply tuple. */
            /*对reply方向的tuple取反得到目的tuple*/
            nf_ct_invert_tuplepr(result,
                       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
            result->dst = tuple->dst;
            /*调用四层协议的in_range判断是否在合理范围内*/
            if (in_range(result, range)) {
                rcu_read_unlock();
                return 1;
            }
        }
    }
    rcu_read_unlock();
    return 0;
}
3.2 find_best_ips_proto

这个函数是根据range选择一个合理范围的Ip地址做NAT

static void
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
            const struct nf_nat_range *range,
            const struct nf_conn *ct,
            enum nf_nat_manip_type maniptype)
{
    __be32 *var_ipp;
    /* Host order */
    u_int32_t minip, maxip, j;
 
    /* No IP mapping?  Do nothing. */
    if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
        return;
 
    if (maniptype == IP_NAT_MANIP_SRC)
        var_ipp = &tuple->src.u3.ip;
    else
        var_ipp = &tuple->dst.u3.ip;
 
    /* Fast path: only one choice. */
    if (range->min_ip == range->max_ip) {
        *var_ipp = range->min_ip;
        return;
    }
 
    /* Hashing source and destination IPs gives a fairly even
     * spread in practice (if there are a small number of IPs
     * involved, there usually aren't that many connections
     * anyway).  The consistency means that servers see the same
     * client coming from the same IP (some Internet Banking sites
     * like this), even across reboots. */
    minip = ntohl(range->min_ip);
    maxip = ntohl(range->max_ip);
    j = jhash_2words((__force u32)tuple->src.u3.ip,
             range->flags & IP_NAT_RANGE_PERSISTENT ?
                0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
    j = ((u64)j * (maxip - minip + 1)) >> 32;
    *var_ipp = htonl(minip + j);
}
 
--------------------- 
作者:TCH_world 
原文:https://blog.csdn.net/City_of_skey/article/details/84996887 

你可能感兴趣的:(Linux,netfilter)