Linux下NAT功能的实现

本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝、转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: [email protected]
来源: http://yfydz.cublog.cn

1. 前言
 
在2.4/2.6内核的Linux中的防火墙代码netfilter中支持源NAT(SNAT)和目的NAT
(DNAT),基本可以满足各种类型的NAT需求,本文介绍Linux下的NAT的具体实现过程,所引的内核代码版本2.4.26,NAT原理部分不在此介绍,有兴趣者可先看我的另一篇NAT原理介绍的文章。
 
2. NAT hook
 
NAT操作也是以netfilter节点形式挂接在相应的处理点上的,DNAT挂接在NF_IP_PRE_ROUTING点上,优先级高于 FILTER低于MANGLE,表示在mangle表后处理,但在filter表前处理数据包;SNAT挂接在NF_IP_POST_ROUTING点上,优先级低于FILTER,表示在filter表后面处理数据包。
 
在net/ipv4/netfilter/ip_nat_standalone.c中:
目的NAT的hook节点:
/* Before packet filtering, change destination */
static struct nf_hook_ops ip_nat_in_ops
= { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST };
 
源NAT的hook节点:
/* After packet filtering, change source */
static struct nf_hook_ops ip_nat_out_ops
= { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC};
 
include/linux/netfilter_ipv4.h
enum nf_ip_hook_priorities {
 NF_IP_PRI_FIRST = INT_MIN,
 NF_IP_PRI_CONNTRACK = -200,  // 连接跟踪
 NF_IP_PRI_MANGLE = -150,     // mangle table
 NF_IP_PRI_NAT_DST = -100,    // DNAT
 NF_IP_PRI_FILTER = 0,        // filter table
 NF_IP_PRI_NAT_SRC = 100,     // SNAT
 NF_IP_PRI_LAST = INT_MAX,
};
 
ip_nat_fn()是NAT hook的主处理函数,ip_nat_out()函数也是在数据合法性检查后调用ip_nat_fn()函数。
 
3. NAT处理相关结构
 
在状态连接结构struct ip_conntrack中包含了关于NAT的相关结构(include/linux/netfilter/ip_conntrack.h):
 
struct ip_conntrack
{
......
#ifdef CONFIG_IP_NF_NAT_NEEDED
 struct {
  struct ip_nat_info info;
  union ip_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
  int masq_index;
#endif
 } nat;
#endif /* CONFIG_IP_NF_NAT_NEEDED */
};
 
其中比较重要的是struct ip_nat_info结构,而union ip_conntrack_nat_help是各协议NAT时需要特殊处理的结构描述,不过在2.4.26内核中都没定义,联合为空。
 
#define IP_NAT_MAX_MANIPS (2*3)
 
// 此结构描述数据包中要修改部分的信息
struct ip_nat_info_manip
{
 /* The direction. */
 u_int8_t direction;
 /* Which hook the manipulation happens on. */
 u_int8_t hooknum;
 /* The manipulation type. */
 u_int8_t maniptype; // 修改类型: SNAT / DNAT
// 连接的数据包要修改的信息,包括地址和上层的协议信息
 /* Manipulations to occur at each conntrack in this dirn. */
 struct ip_conntrack_manip manip;
};
 
/* The structure embedded in the conntrack structure. */
struct ip_nat_info
{
 /* Set to zero when conntrack created: bitmask of maniptypes */
 int initialized; // 实际最多用两位
 unsigned int num_manips;
 /* Manipulations to be done on this conntrack. */
// 每个最多可以记录6个NAT信息
 struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];
 struct ip_nat_hash bysource, byipsproto; // 按地址和协议的HASH表
 /* Helper (NULL if none). */
 struct ip_nat_helper *helper; // 多连接协议的NAT时的helper
 struct ip_nat_seq seq[IP_CT_DIR_MAX]; // 描述两个方向的序列号变化情况
};

4. ip_nat_fn()函数
 
ip_nat_fn()是NAT hook的基本处理函数(net/ipv4/netfilter/ip_nat_standalone.c),目的是建立连接的NAT info信息, 并修改数据包中的相应部分。
 
static unsigned int
ip_nat_fn(unsigned int hooknum,
   struct sk_buff **pskb,
   const struct net_device *in,
   const struct net_device *out,
   int (*okfn)(struct sk_buff *))
{
 struct ip_conntrack *ct;
 enum ip_conntrack_info ctinfo;
 struct ip_nat_info *info;
 /* maniptype == SRC for postrouting. */
// 根据hooknum来确定进行哪种方式的NAT,netfilter在hook点是能进行哪种NAT是固定的:
// NF_IP_PRE_ROUTING点进行的是DNAT,maniptype=1
// NF_IP_POST_ROUTING点进行的是SNAT,maniptype=0
 enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
 /* We never see fragments: conntrack defrags on pre-routing
    and local-out, and ip_nat_out protects post-routing. */
 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
         & htons(IP_MF|IP_OFFSET)));
 (*pskb)->nfcache |= NFC_UNKNOWN;
 /* If we had a hardware checksum before, it's now invalid */
 if ((*pskb)->ip_summed == CHECKSUM_HW)
  (*pskb)->ip_summed = CHECKSUM_NONE;
 
// 进行NAT的包必须都经过的连接跟踪处理,如果找不到该包对应的连接,不对其进行NAT处理
// 连接跟踪优先级最高,是数据包一进入netfilter就要进行处理的
 ct = ip_conntrack_get(*pskb, &ctinfo);
 /* Can't track?  It's not due to stress, or conntrack would
    have dropped it.  Hence it's the user's responsibilty to
    packet filter it out, or implement conntrack/NAT for that
    protocol. 8) --RR */
 if (!ct) {
  /* Exception: ICMP redirect to new connection (not in
                   hash table yet).  We must not let this through, in
                   case we're doing NAT to the same network. */
  struct iphdr *iph = (*pskb)->nh.iph;
  struct icmphdr *hdr = (struct icmphdr *)
   ((u_int32_t *)iph + iph->ihl);
  if (iph->protocol == IPPROTO_ICMP
      && hdr->type == ICMP_REDIRECT)
   return NF_DROP;
  return NF_ACCEPT;
 }
 
 switch (ctinfo) {
//对于相关连接、相关连接的回复、新连接的包进行NAT信息的构建
 case IP_CT_RELATED:
 case IP_CT_RELATED+IP_CT_IS_REPLY:
  if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
   return icmp_reply_translation(*pskb, ct, hooknum,
            CTINFO2DIR(ctinfo));
  }
  /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
 case IP_CT_NEW:
  info = &ct->nat.info;
  WRITE_LOCK(&ip_nat_lock);
  /* Seen it before?  This can happen for loopback, retrans,
     or local packets.. */
// 检查是否已经进行相应方向的初始化,注意初始化可以是两个方向同时进行的
// 这就是说一个数据包可以同时修改源和目的, 这在服务器和内网在相同网段时会用到,
// netfilter已经能自动处理这种情况,根本不需要进行修改,以前我的理解有误,以为
// 只能修改一个方向的数据
  if (!(info->initialized & (1 << maniptype))
#ifndef CONFIG_IP_NF_NAT_LOCAL
      /* If this session has already been confirmed we must not
       * touch it again even if there is no mapping set up.
       * Can only happen on local->local traffic with
       * CONFIG_IP_NF_NAT_LOCAL disabled.
       */
      && !(ct->status & IPS_CONFIRMED)
#endif
      ) {
   unsigned int ret;
   if (ct->master
       && master_ct(ct)->nat.info.helper
       && master_ct(ct)->nat.info.helper->expect) {
// 多连接协议情况, 如果是子连接, 调用主连接相关的expect函数处理填写NAT info信息
    ret = call_expect(master_ct(ct), pskb,
        hooknum, ct, info);
   } else {
#ifdef CONFIG_IP_NF_NAT_LOCAL
    /* LOCAL_IN hook doesn't have a chain!  */
    if (hooknum == NF_IP_LOCAL_IN)
     ret = alloc_null_binding(ct, info,
         hooknum);
    else
#endif
// 否则根据NAT规则表查找规则, 执行规则的动作: SNAT或DNAT, 填写NAT info信息
    ret = ip_nat_rule_find(pskb, hooknum, in, out,
             ct, info);
   }
// 返回值不是接受的话直接返回, 数据包将被丢弃
   if (ret != NF_ACCEPT) {
    WRITE_UNLOCK(&ip_nat_lock);
    return ret;
   }
  } else
   DEBUGP("Already setup manip %s for ct %p\n",
          maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
          ct);
  WRITE_UNLOCK(&ip_nat_lock);
  break;
 default:
// 连接的NAT信息已经填好, 直接使用
  /* ESTABLISHED */
  IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
        || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
  info = &ct->nat.info;
 }
 IP_NF_ASSERT(info);
// 根据NAT info信息对数据包的相应部分进行修改
 return do_bindings(ct, ctinfo, info, hooknum, pskb);
}

4. do_bindings()函数
 
do_bindings()是完成具体的NAT操作部分的函数(net/ipv4/netfilter/ip_nat_core.c),修改地址端口等信息,必要时修改数据内容部分信息(这种情况下可能数据包长度会变,序列号/确认号相应会改变,这些都累计进NAT info参数中),并重新各种校验和(TCP/UDP/ICMP校验和,IP头校验和):
 
/* Do packet manipulations according to binding. */
unsigned int
do_bindings(struct ip_conntrack *ct,
     enum ip_conntrack_info ctinfo,
     struct ip_nat_info *info,
     unsigned int hooknum,
     struct sk_buff **pskb)
{
 unsigned int i;
 struct ip_nat_helper *helper;
// 数据方向:original or reply
 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
// 是否是TCP协议,TCP协议要处理序列号/确认号
 int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP;
 
 /* Need nat lock to protect against modification, but neither
    conntrack (referenced) and helper (deleted with
    synchronize_bh()) can vanish. */
 READ_LOCK(&ip_nat_lock);
 for (i = 0; i < info->num_manips; i++) {
  /* raw socket (tcpdump) may have clone of incoming
                   skb: don't disturb it --RR */
  if (skb_cloned(*pskb) && !(*pskb)->sk) {
   struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
   if (!nskb) {
    READ_UNLOCK(&ip_nat_lock);
    return NF_DROP;
   }
   kfree_skb(*pskb);
   *pskb = nskb;
  }
// 检查数据包方向和hooknum是否是与NAT info中规定的一致
  if (info->manips[i].direction == dir
      && info->manips[i].hooknum == hooknum) {
   DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
          *pskb,
          info->manips[i].maniptype == IP_NAT_MANIP_SRC
          ? "SRC" : "DST",
          NIPQUAD(info->manips[i].manip.ip),
          htons(info->manips[i].manip.u.all));
// 进行具体的NAT操作,修改IP头的地址、TCP、UDP等的端口
   manip_pkt((*pskb)->nh.iph->protocol,
      (*pskb)->nh.iph,
      (*pskb)->len,
      &info->manips[i].manip,
      info->manips[i].maniptype,
      &(*pskb)->nfcache);
  }
 }
 helper = info->helper;
 READ_UNLOCK(&ip_nat_lock);
// 多连接协议
 if (helper) {
  struct ip_conntrack_expect *exp = NULL;
  struct list_head *cur_item;
  int ret = NF_ACCEPT;
  int helper_called = 0;
  DEBUGP("do_bindings: helper existing for (%p)\n", ct);
  /* Always defragged for helpers */
  IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
          & htons(IP_MF|IP_OFFSET)));
  /* Have to grab read lock before sibling_list traversal */
  READ_LOCK(&ip_conntrack_lock);
// 主连接的子连接链表是倒着搜索的
  list_for_each_prev(cur_item, &ct->sibling_list) {
// 取得期待的连接信息
   exp = list_entry(cur_item, struct ip_conntrack_expect,
      expected_list);
     
   /* if this expectation is already established, skip */
// 期待的子连接已经到了,不用再处理
   if (exp->sibling)
    continue;
// 检查数据包是否是要修改的数据包,对于UDP、ICMP函数返回始终是1,TCP协议是才可能返回0
   if (exp_for_packet(exp, pskb)) {
    /* FIXME: May be true multiple times in the
     * case of UDP!! */
    DEBUGP("calling nat helper (exp=%p) for packet\n", exp);
// 调用多连接协议的help函数修改内容部分的相关数据
    ret = helper->help(ct, exp, info, ctinfo,
         hooknum, pskb);
    if (ret != NF_ACCEPT) {
     READ_UNLOCK(&ip_conntrack_lock);
     return ret;
    }
    helper_called = 1;
   }
  }
  /* Helper might want to manip the packet even when there is no
   * matching expectation for this packet */
  if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
   DEBUGP("calling nat helper for packet without expectation\n");
   ret = helper->help(ct, NULL, info, ctinfo,
        hooknum, pskb);
   if (ret != NF_ACCEPT) {
    READ_UNLOCK(&ip_conntrack_lock);
    return ret;
   }
  }
  READ_UNLOCK(&ip_conntrack_lock);
  
  /* Adjust sequence number only once per packet
   * (helper is called at all hooks) */
// 调整TCP的序列号
  if (is_tcp && (hooknum == NF_IP_POST_ROUTING
          || hooknum == NF_IP_LOCAL_IN)) {
   DEBUGP("ip_nat_core: adjusting sequence number\n");
   /* future: put this in a l4-proto specific function,
    * and call this function here. */
   ip_nat_seq_adjust(*pskb, ct, ctinfo);
  }
  return ret;
 } else
  return NF_ACCEPT;
 /* not reached */
}
 
manip_pkt()函数(net/ipv4/netfilter/ip_nat_core.c)相对就比较简单了,先修改传输层部分的数据参数(如TCP、UDP端口),再修改IP头中的地址:
 
static void
manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
   const struct ip_conntrack_manip *manip,
   enum ip_nat_manip_type maniptype,
   __u32 *nfcache)
{
 *nfcache |= NFC_ALTERED;
// find_nat_proto函数始终会返回一个协议,因为如果不是能处理的协议,将
// 返回缺省的未知协议处理,由此也可知在IP上层协议NAT处理结构中的
// manip_pkt()函数不能为空,这个函数可以什么都不作,但不能为NULL
 find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype);
 
// 根据NAT类型,修改源或目的IP地址
 if (maniptype == IP_NAT_MANIP_SRC) {
  iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
      iph->check);
  iph->saddr = manip->ip;
 } else {
  iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
      iph->check);
  iph->daddr = manip->ip;
 }
#if 0
 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
  DEBUGP("IP: checksum on packet bad.\n");
 if (proto == IPPROTO_TCP) {
  void *th = (u_int32_t *)iph + iph->ihl;
  if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr,
     csum_partial((char *)th, len-4*iph->ihl, 0)))
   DEBUGP("TCP: checksum on packet bad\n");
 }
#endif
}
 
6. SNAT、DNAT目标函数
 
前面在ip_nat_fn()函数中调用的ip_nat_rule_find()用来查找NAT规则,执行规则的动作,规则目标不是SNAT就是DNAT,该目标的具体实现在net/ipv4/netfilter/ip_nat_rule.c中。不论是SNAT还是DNAT规则,其目标函数最终都是调用ip_nat_setup_info()函数来建立连接的NAT info信息。
net/ipv4/netfilter/ip_nat_rule.c:

/* Source NAT */
static unsigned int ipt_snat_target(struct sk_buff **pskb,
        unsigned int hooknum,
        const struct net_device *in,
        const struct net_device *out,
        const void *targinfo,
        void *userinfo)
{
 struct ip_conntrack *ct;
 enum ip_conntrack_info ctinfo;
 IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
 ct = ip_conntrack_get(*pskb, &ctinfo);
 /* Connection must be valid and new. */
 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 IP_NF_ASSERT(out);
// 只有新连接才进行NAT info的建立
// targinfo实际是struct ip_nat_multi_range结构指针,记录转换后的
// 地址、端口等信息, 一个NAT规则可以转换到可以转换到多个地址端口上
 return ip_nat_setup_info(ct, targinfo, hooknum);
}
 
static unsigned int ipt_dnat_target(struct sk_buff **pskb,
        unsigned int hooknum,
        const struct net_device *in,
        const struct net_device *out,
        const void *targinfo,
        void *userinfo)
{
 struct ip_conntrack *ct;
 enum ip_conntrack_info ctinfo;
#ifdef CONFIG_IP_NF_NAT_LOCAL
 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
       || hooknum == NF_IP_LOCAL_OUT);
#else
 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING);
#endif
 ct = ip_conntrack_get(*pskb, &ctinfo);
 /* Connection must be valid and new. */
 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
// 只有新连接才进行NAT info的建立
// targinfo实际是struct ip_nat_multi_range结构指针,记录转换后的
// 地址、端口等信息, 一个NAT规则可以转换到可以转换到多个地址端口上
 return ip_nat_setup_info(ct, targinfo, hooknum);
}
......
int ip_nat_rule_find(struct sk_buff **pskb,
       unsigned int hooknum,
       const struct net_device *in,
       const struct net_device *out,
       struct ip_conntrack *ct,
       struct ip_nat_info *info)
{
 int ret;
 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
 if (ret == NF_ACCEPT) {
// 数据接受但有没有初始化,分配一个NULL binding,实际不作任何修改,也就是
// 说对该包没有相应的NAT规则对于,不需要进行NAT处理
  if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
   /* NUL mapping */
   ret = alloc_null_binding(ct, info, hooknum);
 }
 return ret;
}

7. ip_nat_setup_info()函数
 
ip_nat_setup_info()函数是建立连接的NAT info的基本函数,在net/ipv4/netfiler/ip_nat_core.c中定义:
 
/* Where to manip the reply packets (will be reverse manip). */
static unsigned int opposite_hook[NF_IP_NUMHOOKS]
= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
    [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
#ifdef CONFIG_IP_NF_NAT_LOCAL
    [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
    [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
#endif
};
 
unsigned int
ip_nat_setup_info(struct ip_conntrack *conntrack,
    const struct ip_nat_multi_range *mr,
    unsigned int hooknum)
{
 struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
 struct ip_conntrack_tuple orig_tp;
 struct ip_nat_info *info = &conntrack->nat.info;
// 如果info->initialized不为0,表示已经初始化过了
 int in_hashes = info->initialized;
 MUST_BE_WRITE_LOCKED(&ip_nat_lock);
 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
       || hooknum == NF_IP_POST_ROUTING
       || hooknum == NF_IP_LOCAL_OUT);
 IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
 IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
 /* What we've got will look like inverse of reply. Normally
    this is what is in the conntrack, except for prior
    manipulations (future optimization: if num_manips == 0,
    orig_tp =
    conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
// 根据连接的回应方向的tuple进行反转得到原始方向的tuple
 invert_tuplepr(&orig_tp,
         &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
#if 0
 {
 unsigned int i;
 DEBUGP("Hook %u (%s), ", hooknum,
        HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
 DUMP_TUPLE(&orig_tp);
 DEBUGP("Range %p: ", mr);
 for (i = 0; i < mr->rangesize; i++) {
  DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
         i,
         (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
         ? " MAP_IPS" : "",
         (mr->range[i].flags
   & IP_NAT_RANGE_PROTO_SPECIFIED)
         ? " PROTO_SPECIFIED" : "",
         (mr->range[i].flags & IP_NAT_RANGE_FULL)
         ? " FULL" : "",
         NIPQUAD(mr->range[i].min_ip),
         NIPQUAD(mr->range[i].max_ip),
         mr->range[i].min.all,
         mr->range[i].max.all);
 }
 }
#endif
 
 do {
// 找一个未使用的进行了转换后的tuple结构参数,mr是NAT规则确定的要转换后的
// 地址端口参数, new_tuple保持转换后的连接原始方向的tuple
  if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack,
          hooknum)) {
   DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",
          conntrack);
   return NF_DROP;
  }
#if 0
  DEBUGP("Hook %u (%s) %p\n", hooknum,
         HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST",
         conntrack);
  DEBUGP("Original: ");
  DUMP_TUPLE(&orig_tp);
  DEBUGP("New: ");
  DUMP_TUPLE(&new_tuple);
#endif
  /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
     the original (A/B/C/D') and the mangled one (E/F/G/H').
     We're only allowed to work with the SRC per-proto
     part, so we create inverses of both to start, then
     derive the other fields we need.  */
  /* Reply connection: simply invert the new tuple
                   (G/H/E/F') */
// 建立连接地址转换后的反向的tuple,这使netfilter能自动对连接的反方向数据
// 进行处理,也就是说定义了一条SNAT规则后,并不需要再定义一条DNAT规则来处理
// 返回的数据,netfilter已经自动处理了
  invert_tuplepr(&reply, &new_tuple);
  /* Alter conntrack table so it recognizes replies.
                   If fail this race (reply tuple now used), repeat. */

// 修改连接参数使能正确识别返回数据,如果reply已经对应一条连接
// ip_conntrack_alter_reply()函数返回0,表示要继续修改转换后的参数值
 } while (!ip_conntrack_alter_reply(conntrack, &reply));
 /* FIXME: We can simply used existing conntrack reply tuple
           here --RR */
 /* Create inverse of original: C/D/A/B' */
 invert_tuplepr(&inv_tuple, &orig_tp);
 
 /* Has source changed?. */
// 源NAT
 if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
  /* In this direction, a source manip. */
// 连接正方向是SNAT
  info->manips[info->num_manips++] =
   ((struct ip_nat_info_manip)
    { IP_CT_DIR_ORIGINAL, hooknum,
      IP_NAT_MANIP_SRC, new_tuple.src });
  IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
  /* In the reverse direction, a destination manip. */
// 连接反方向是DNAT
  info->manips[info->num_manips++] =
   ((struct ip_nat_info_manip)
    { IP_CT_DIR_REPLY, opposite_hook[hooknum],
      IP_NAT_MANIP_DST, orig_tp.src });
  IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
 }
 
 /* Has destination changed? */
// 目的NAT
 if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
  /* In this direction, a destination manip */
// 连接正方向是DNAT
  info->manips[info->num_manips++] =
   ((struct ip_nat_info_manip)
    { IP_CT_DIR_ORIGINAL, hooknum,
      IP_NAT_MANIP_DST, reply.src });
  IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
  /* In the reverse direction, a source manip. */
// 连接反方向是SNAT
  info->manips[info->num_manips++] =
   ((struct ip_nat_info_manip)
    { IP_CT_DIR_REPLY, opposite_hook[hooknum],
      IP_NAT_MANIP_SRC, inv_tuple.src });
  IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
 }
 
 /* If there's a helper, assign it; based on new tuple. */
// 对于主连接检查是否有应用层协议的NAT helper结构
 if (!conntrack->master)
  info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
      &reply);
 
 /* It's done. */
// 完成该方向的NAT info初始化
 info->initialized |= (1 << HOOK2MANIP(hooknum));
// 将NAT info添加到HASH表中
 if (in_hashes) {
  IP_NF_ASSERT(info->bysource.conntrack);
  replace_in_hashes(conntrack, info);
 } else {
  place_in_hashes(conntrack, info);
 }
 return NF_ACCEPT;
}
 
8. 结论
 
Linux下的NAT流程可以大致表示如下:
         hook_ops
            |
            V
       ip_nat_fn()
            |
            V          否
       是否是新连接------------
            |                 |
            | 是              |
            |                 |
            V                 |
     ip_nat_rule_find()       |
            |                 |
     ip_snat_taget()          |
     ip_dnat_target()         |
            |                 |
            V                 |
     ip_nat_setup_info()      |
            |                 |
            | <---------------+
            |
            V
      do_bindings()
            |
            V
         hook返回
 
总体来说,netfilter的NAT整体思路比较清楚,读起来比较方便。在NAT info结构中的manips有6项,也就是说对同一个包应该可以作3次NAT。
有点疑问的是对于不进行NAT操作的包,也进行了null_binding,似乎没什么必要。

你可能感兴趣的:(数据结构,.net,linux,socket,防火墙)