【内核协议栈】Netfilter 之 iptable_nat

1、NAT表的初始化

入口在net/ipv4/netfilter/iptable_nat.c中

1.1、NAT表信息

static const struct xt_table nf_nat_ipv4_table = {
	.name		= "nat",
	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
			  (1 << NF_INET_POST_ROUTING) |
			  (1 << NF_INET_LOCAL_OUT) |
			  (1 << NF_INET_LOCAL_IN),
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
	.table_init	= iptable_nat_table_init,
};

1.2、内核加载初始化 iptable_nat_init

static int __init iptable_nat_init(void)
{
	int ret = register_pernet_subsys(&iptable_nat_net_ops);

	if (ret)
		return ret;

	ret = iptable_nat_table_init(&init_net);
	if (ret)
		unregister_pernet_subsys(&iptable_nat_net_ops);
	return ret;
}

1.3、NAT表的初始化 iptable_nat_table_init

static int __net_init iptable_nat_table_init(struct net *net)
{
	struct ipt_replace *repl;
	int ret;
	/* nat表已经初始化过了 */
	if (net->ipv4.nat_table)
		return 0;
 	/* 分配初始化表,用于下面的注册 */
	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
	if (repl == NULL)
		return -ENOMEM;
	 /* 表注册 */
	ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
				 NULL, &net->ipv4.nat_table);
	if (ret < 0) {
		kfree(repl);
		return ret;
	}

	ret = ipt_nat_register_lookups(net);
	if (ret < 0) {
		ipt_unregister_table(net, net->ipv4.nat_table, NULL);
		net->ipv4.nat_table = NULL;
	}

	kfree(repl);
	return ret;
}

14、iptable_nat_do_chain

这个hook函数的实现直接调用了ipt_do_table这个标准规则匹配函数,会根据当前链上的nat规则逐一匹配和处理。

static unsigned int iptable_nat_do_chain(void *priv,
					 struct sk_buff *skb,
					 const struct nf_hook_state *state)
{
	return ipt_do_table(skb, state, state->net->ipv4.nat_table);
}

14、ipt_do_table

/* 遍历钩子链上的所有规则,进行标准匹配和扩展匹配,执行其target操作 */
unsigned int
ipt_do_table(struct sk_buff *skb,
	     const struct nf_hook_state *state,
	     struct xt_table *table)
{
	unsigned int hook = state->hook;
	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
	const struct iphdr *ip;
	/* Initializing verdict to NF_DROP keeps gcc happy. */
	unsigned int verdict = NF_DROP;
	const char *indev, *outdev;
	const void *table_base;
	struct ipt_entry *e, **jumpstack;
	unsigned int stackidx, cpu;
	const struct xt_table_info *private;
	struct xt_action_param acpar;
	unsigned int addend;

	/* Initialization */
	stackidx = 0;
	ip = ip_hdr(skb);
	indev = state->in ? state->in->name : nulldevname;
	outdev = state->out ? state->out->name : nulldevname;
	/* We handle fragments by dealing with the first fragment as
	 * if it was a normal packet.  All other fragments are treated
	 * normally, except that they will NEVER match rules that ask
	 * things we don't know, ie. tcp syn flag or ports).  If the
	 * rule is also a fragment-specific rule, non-fragments won't
	 * match it. */
	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
	acpar.thoff   = ip_hdrlen(skb);
	acpar.hotdrop = false;
	acpar.state   = state;

	WARN_ON(!(table->valid_hooks & (1 << hook)));
	local_bh_disable();
	addend = xt_write_recseq_begin();
	private = READ_ONCE(table->private); /* Address dependency. */
	cpu        = smp_processor_id();
	 /* 首个规则地址 */
	table_base = private->entries;
	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];

	/* Switch to alternate jumpstack if we're being invoked via TEE.
	 * TEE issues XT_CONTINUE verdict on original skb so we must not
	 * clobber the jumpstack.
	 *
	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
	 * but it is no problem since absolute verdict is issued by these.
	 */
	if (static_key_false(&xt_tee_enabled))
		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
	/* 获取对应链上的首个匹配规则 */
	e = get_entry(table_base, private->hook_entry[hook]);

	do {
		const struct xt_entry_target *t;
		const struct xt_entry_match *ematch;
		struct xt_counters *counter;

		WARN_ON(!e);
		/* 标准match */
		if (!ip_packet_match(ip, indev, outdev,
		    &e->ip, acpar.fragoff)) {
 no_match:
 			/* 未匹配成功,继续下一个规则 */
			e = ipt_next_entry(e);
			continue;
		}
		/* 扩展match */
		xt_ematch_foreach(ematch, e) {
			acpar.match     = ematch->u.kernel.match;
			acpar.matchinfo = ematch->data;
			/* 只要有返回不匹配的,则说明匹配当前规则失败 */
			if (!acpar.match->match(skb, &acpar))
				goto no_match;
		}

		counter = xt_get_this_cpu_counter(&e->counters);
		ADD_COUNTER(*counter, skb->len, 1);
		/* 标准match和扩展match都成功 */
		/* 获取target */
		t = ipt_get_target_c(e);
		WARN_ON(!t->u.kernel.target);

#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
		/* The packet is traced: log it */
		if (unlikely(skb->nf_trace))
			trace_packet(state->net, skb, hook, state->in,
				     state->out, table->name, private, e);
#endif
		/* Standard target? */
		/* 标准target */
		if (!t->u.kernel.target->target) {
			int v;

			v = ((struct xt_standard_target *)t)->verdict;
			/* 不会跳转到用户自定义规则 */
			if (v < 0) {
				/* Pop from stack? */
				/* 不是XT_RETURN,则跳出处理结果 */
				if (v != XT_RETURN) {
					verdict = (unsigned int)(-v) - 1;
					break;
				}
				 /* XT_RETURN则继续匹配下一条规则 */
				if (stackidx == 0) {
					e = get_entry(table_base,
					    private->underflow[hook]);
				} else {
					e = jumpstack[--stackidx];
					e = ipt_next_entry(e);
				}
				continue;
			}
			/* 记录跳转规则,以便返回时获取下一跳规则进行后续匹配 */
			if (table_base + v != ipt_next_entry(e) &&
			    !(e->ip.flags & IPT_F_GOTO)) {
				if (unlikely(stackidx >= private->stacksize)) {
					verdict = NF_DROP;
					break;
				}
				jumpstack[stackidx++] = e;
			}
			/* 获取自定义规则 */
			e = get_entry(table_base, v);
			continue;
		}
		 /* 扩展target,执行target回调 */
		acpar.target   = t->u.kernel.target;
		acpar.targinfo = t->data;

		verdict = t->u.kernel.target->target(skb, &acpar);
		/* 需要继续匹配 */
		if (verdict == XT_CONTINUE) {
			/* Target might have changed stuff. */
			ip = ip_hdr(skb);
			e = ipt_next_entry(e);
		/* 跳出处理匹配结果 */
		} else {
			/* Verdict */
			break;
		}
	/* 无hotdrop,继续匹配 */
	} while (!acpar.hotdrop);

	xt_write_recseq_end(addend);
	local_bh_enable();
	/* drop标记 */
	if (acpar.hotdrop)
		return NF_DROP;
	/* 返回匹配结果 */
	else return verdict;
}

2、钩子及钩子函数

入口在net/ipv4/netfilter/nf_nat_proto.c中

2.1、钩子函数数组

static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
	/* Before packet filtering, change destination */
	{
		.hook		= nf_nat_ipv4_in,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
		.hook		= nf_nat_ipv4_out,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_POST_ROUTING,
		.priority	= NF_IP_PRI_NAT_SRC,
	},
	/* Before packet filtering, change destination */
	{
		.hook		= nf_nat_ipv4_local_fn,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_OUT,
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
		.hook		= nf_nat_ipv4_fn,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_IN,
		.priority	= NF_IP_PRI_NAT_SRC,
	},
};

2.2 nf_nat_ipv4_in

函数工作在PRE_ROUTING钩子点,进行DNAT转换

static unsigned int
nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
	       const struct nf_hook_state *state)
{
	unsigned int ret;
	/* 获取目的地址 */
	__be32 daddr = ip_hdr(skb)->daddr;
	/* DNAT转换 */
	ret = nf_nat_ipv4_fn(priv, skb, state);
	/* 转换之后,目的地址发生变化,释放路由缓存 */
	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
		skb_dst_drop(skb);

	return ret;
}

2.3 nf_nat_ipv4_fn

函数工作在LOCAL_IN钩子点,进行SNAT转换;

static unsigned int
nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
	       const struct nf_hook_state *state)
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;

	ct = nf_ct_get(skb, &ctinfo);
	if (!ct)
		return NF_ACCEPT;

	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
							   state->hook))
				return NF_DROP;
			else
				return NF_ACCEPT;
		}
	}

	return nf_nat_inet_fn(priv, skb, state);
}

2.4 nf_nat_ipv4_out

函数工作在LOCAL_OUT钩子点,进行DNAT转换

static unsigned int
nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
		     const struct nf_hook_state *state)
{
	const struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	unsigned int ret;
	int err;
	/* DNAT转换 */
	ret = nf_nat_ipv4_fn(priv, skb, state);
	if (ret != NF_ACCEPT)
		return ret;

	ct = nf_ct_get(skb, &ctinfo);
	 /* 转换成功 */
	if (ct) {
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
		/* ip地址发生变化 */
		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
		    ct->tuplehash[!dir].tuple.src.u3.ip) {
		    /* 重新查路由 */
			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
			if (err < 0)
				ret = NF_DROP_ERR(err);
		}
#ifdef CONFIG_XFRM
		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
			 ct->tuplehash[dir].tuple.dst.u.all !=
			 ct->tuplehash[!dir].tuple.src.u.all) {
			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
			if (err < 0)
				ret = NF_DROP_ERR(err);
		}
#endif
	}
	return ret;
}

2.5 nf_nat_ipv4_out

函数工作在POST_ROUTING钩子点,进行SNAT转换;

static unsigned int
nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
		const struct nf_hook_state *state)
{
#ifdef CONFIG_XFRM
	const struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	int err;
#endif
	unsigned int ret;
	/* SNAT转换 */
	ret = nf_nat_ipv4_fn(priv, skb, state);
#ifdef CONFIG_XFRM
	if (ret != NF_ACCEPT)
		return ret;

	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
		return ret;

	ct = nf_ct_get(skb, &ctinfo);
	if (ct) {
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);

		if (ct->tuplehash[dir].tuple.src.u3.ip !=
		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
		     ct->tuplehash[dir].tuple.src.u.all !=
		     ct->tuplehash[!dir].tuple.dst.u.all)) {
			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
			if (err < 0)
				ret = NF_DROP_ERR(err);
		}
	}
#endif
	return ret;
}

2.6 nf_nat_ipv4_fn

nf_nat_ipv4_fn完成具体的SNAT或者DNAT的转换流程,上面的四个钩子函数都会调用该函数

static unsigned int
nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
	       const struct nf_hook_state *state)
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;

	ct = nf_ct_get(skb, &ctinfo);
	if (!ct)
		return NF_ACCEPT;

	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
							   state->hook))
				return NF_DROP;
			else
				return NF_ACCEPT;
		}
	}

	return nf_nat_inet_fn(priv, skb, state);
}

2.7 nf_nat_inet_fn

unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
	       const struct nf_hook_state *state)
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	struct nf_conn_nat *nat;
	/* maniptype == SRC for postrouting. */
	/* 获取是进行DNAT还是SNAT,其中PRE_ROUTING和LOCAL_OUT进行DNAT,LOCAL_IN和POST_ROUTING进行SNAT */
	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
	/* 获取skb关联的连接跟踪sf_conn */
	ct = nf_ct_get(skb, &ctinfo);
	/* Can't track?  It's not due to stress, or conntrack would
	 * have dropped it.  Hence it's the user's responsibilty to
	 * packet filter it out, or implement conntrack/NAT for that
	 * protocol. 8) --RR
	 */
	if (!ct)
		return NF_ACCEPT;
	/* 获取NAT扩展 */
	nat = nfct_nat(ct);
	/* 判断连接跟踪状态 */
	switch (ctinfo) {
	case IP_CT_RELATED:
	case IP_CT_RELATED_REPLY:
		/* Only ICMPs can be IP_CT_IS_REPLY.  Fallthrough */
	case IP_CT_NEW:
		/* Seen it before?  This can happen for loopback, retrans,
		 * or local packets.
		 */
		 /* 尚未进行过NAT转换 */
		if (!nf_nat_initialized(ct, maniptype)) {
			struct nf_nat_lookup_hook_priv *lpriv = priv;
			struct nf_hook_entries *e = rcu_dereference(lpriv->entries);
			unsigned int ret;
			int i;

			if (!e)
				goto null_bind;

			for (i = 0; i < e->num_hook_entries; i++) {
				ret = e->hooks[i].hook(e->hooks[i].priv, skb,
						       state);
				if (ret != NF_ACCEPT)
					return ret;
				/* 打NAT转换标记 */
				if (nf_nat_initialized(ct, maniptype))
					goto do_nat;
			}
null_bind:
			/* 连接跟踪进行NAT */
			ret = nf_nat_alloc_null_binding(ct, state->hook);
			if (ret != NF_ACCEPT)
				return ret;
		} else {
			/* 进行过NAT转换 */
			pr_debug("Already setup manip %s for ct %p (status bits 0x%lx)\n",
				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
				 ct, ct->status);
			/* 出接口发生改变 */
			if (nf_nat_oif_changed(state->hook, ctinfo, nat,
					       state->out))
				goto oif_changed;
		}
		break;
	default:
		/* ESTABLISHED */
		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
			ctinfo != IP_CT_ESTABLISHED_REPLY);
		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
			goto oif_changed;
	}
do_nat:
	/* skb数据包进行NAT转换修改 */
	return nf_nat_packet(ct, ctinfo, state->hook, skb);

oif_changed:
	nf_ct_kill_acct(ct, ctinfo, skb);
	return NF_DROP;
}

2.8 nf_nat_alloc_null_binding

unsigned int
nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
	return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}

2.9 __nf_nat_alloc_null_binding

static unsigned int
__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
	/* Force range to this IP; let proto decide mapping for
	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
	 * Use reply in case it's already been mangled (eg local packet).
	 */
	 /* 使用应答方向的ip地址,LOCAL_OUT会先经过mangle,可能改变了 */
	union nf_inet_addr ip =
		(manip == NF_NAT_MANIP_SRC ?
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
	/* 设置range */
	struct nf_nat_range2 range = {
		.flags		= NF_NAT_RANGE_MAP_IPS,
		.min_addr	= ip,
		.max_addr	= ip,
	};
	/* 进行NAT转换 */
	return nf_nat_setup_info(ct, &range, manip);
}

2.8 nf_nat_setup_info

unsigned int
nf_nat_setup_info(struct nf_conn *ct,
		  const struct nf_nat_range2 *range,
		  enum nf_nat_manip_type maniptype)
{
	struct net *net = nf_ct_net(ct);
	struct nf_conntrack_tuple curr_tuple, new_tuple;

	/* Can't setup nat info for confirmed ct. */
	/* 已经确认的,返回accpet */
	if (nf_ct_is_confirmed(ct))
		return NF_ACCEPT;

	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
		maniptype != NF_NAT_MANIP_DST);

	if (WARN_ON(nf_nat_initialized(ct, maniptype)))
		return NF_DROP;

	/* What we've got will look like inverse of reply. Normally
	 * this is what is in the conntrack, except for prior
	 * manipulations (future optimization: if num_manips == 0,
	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
	 */
	 /* 从应答tuple反向得到当前tuple */
	nf_ct_invert_tuple(&curr_tuple,
			   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
	 /* 根据当前tuple和range得到NAT转换之后的的tuple */
	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
	/* NAT转换之后和之前的tuple不同 */
	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
		struct nf_conntrack_tuple reply;

		/* Alter conntrack table so will recognize replies. */
		 /* 通过新tuple得到reply_tuple */
		nf_ct_invert_tuple(&reply, &new_tuple);
		 /* 加入到reply hash */
		nf_conntrack_alter_reply(ct, &reply);

		/* Non-atomic: we own this at the moment. */
		/* 更新状态需要做NAT */
		if (maniptype == NF_NAT_MANIP_SRC)
			ct->status |= IPS_SRC_NAT;
		else
			ct->status |= IPS_DST_NAT;
		/* 扩展项的调整 */
		if (nfct_help(ct) && !nfct_seqadj(ct))
			if (!nfct_seqadj_ext_add(ct))
				return NF_DROP;
	}
	 /* SNAT */
	if (maniptype == NF_NAT_MANIP_SRC) {
		unsigned int srchash;
		spinlock_t *lock;

		srchash = hash_by_src(net,
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
		spin_lock_bh(lock);
		hlist_add_head_rcu(&ct->nat_bysource,
				   &nf_nat_bysource[srchash]);
		spin_unlock_bh(lock);
	}

	/* It's done. */
	/* NAT转换完成 */
	if (maniptype == NF_NAT_MANIP_DST)
		ct->status |= IPS_DST_NAT_DONE;
	else
		ct->status |= IPS_SRC_NAT_DONE;

	return NF_ACCEPT;
}

2.8 get_unique_tuple

static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
		 const struct nf_conntrack_tuple *orig_tuple,
		 const struct nf_nat_range2 *range,
		 struct nf_conn *ct,
		 enum nf_nat_manip_type maniptype)
{
	const struct nf_conntrack_zone *zone;
	struct net *net = nf_ct_net(ct);

	zone = nf_ct_zone(ct);

	/* 1) If this srcip/proto/src-proto-part is currently mapped,
	 * and that same mapping gives a unique tuple within the given
	 * range, use that.
	 *
	 * This is only required for source (ie. NAT/masq) mappings.
	 * So far, we don't do local source mappings, so multiple
	 * manips not an issue.
	 */
	 /* SNAT && 没有打RANDOM_ALL标记 */
	if (maniptype == NF_NAT_MANIP_SRC &&
	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
		/* try the original tuple first */
		/* 查看orig_tuple是否满足范围要求 */
		if (in_range(orig_tuple, range)) {
			/* tuple尚未被使用 */
			if (!nf_nat_used_tuple(orig_tuple, ct)) {
				/* 使用原tuple */
				*tuple = *orig_tuple;
				return;
			}
		/* ori_range不满足要求,则从bysource_table中查找一个满足范围的tuple */
		} else if (find_appropriate_src(net, zone,
						orig_tuple, tuple, range)) {
			pr_debug("get_unique_tuple: Found current src map\n");
			 /* tuple尚未被使用 */
			if (!nf_nat_used_tuple(tuple, ct))
				return;
		}
	}
	/* 从给定range中选择一个最少使用的组合 */
	/* 2) Select the least-used IP/proto combination in the given range */
	*tuple = *orig_tuple;
	find_best_ips_proto(zone, tuple, range, ct, maniptype);

	/* 3) The per-protocol part of the manip is made to map into
	 * the range to make a unique tuple.
	 */

	/* Only bother mapping if it's not already in range and unique */
	/* 没有打RANDOM_ALL标记 */
	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
		/* 有SPECIFIED标记,对端口号进行检查 */
		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
			/* 端口号已经在范围之内&&(端口最小最大范围相等||tuple没有使用) */
			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
			    l4proto_in_range(tuple, maniptype,
			          &range->min_proto,
			          &range->max_proto) &&
			    (range->min_proto.all == range->max_proto.all ||
			     !nf_nat_used_tuple(tuple, ct)))
				return;
		/* 没有SPECIFIED标记,端口号不变,tuple没有被使用 */
		} else if (!nf_nat_used_tuple(tuple, ct)) {
			return;
		}
	}

	/* Last chance: get protocol to try to obtain unique tuple. */
	 /* 随机选择端口号 */
	nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct);
}

2.9 nf_nat_packet

unsigned int nf_nat_packet(struct nf_conn *ct,
			   enum ip_conntrack_info ctinfo,
			   unsigned int hooknum,
			   struct sk_buff *skb)
{
	/* 获取进行SNAT还是DNAT */
	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
	/* 获取方向 */
	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
	unsigned int verdict = NF_ACCEPT;
	unsigned long statusbit;
	/* 设置NAT标记 */
	if (mtype == NF_NAT_MANIP_SRC)
		statusbit = IPS_SRC_NAT;
	else
		statusbit = IPS_DST_NAT;

	/* Invert if this is reply dir. */
	/* 应答方向需要取反 */
	if (dir == IP_CT_DIR_REPLY)
		statusbit ^= IPS_NAT_MASK;

	/* Non-atomic: these bits don't change. */
	/* 需要做NAT */
	if (ct->status & statusbit)
	/* 将ip地址和端口的NAT转换结果写入skb */
		verdict = nf_nat_manip_pkt(skb, ct, mtype, dir);

	return verdict;
}

你可能感兴趣的:(网络,前端,linux)