netfilter之链接跟踪做nat

上一节我们将了NAT是基于链接跟踪实现的,当一条链接跟踪建立要改变它的tuple的reply方向才能做nat,这个链接跟踪的nat是函数nf_nat_setup_info实现

1、nf_nat_setup_info

nf_nat_setup_info对链接跟踪的做NAT,只会改变链接跟踪reply方向的ip、端口,不会改变数据包的ip、端口,数据包的DAT在上一节已经介绍了是在PRE_ROUTING、POST_ROUTING链的hook点根据链接跟踪的reply方向对数据包做NAT。

nf_nat_setup_info主要做以下几件事

(1)获取链接跟踪和nat关联的结构体struct nf_conn_nat ,如果是空就直接返回

(2)调用nf_nat_initialized判断是否已经做了链接跟踪的NAT

(3)nf_ct_invert_tuplepr获取reply方向的tuple然后取反赋值给curr_tuple也是是orig tuple

(4)get_unique_tuple这个函数是关键,这个就是得到一个新的tuple,new_tuple,这个new_tuple是做了NAT的orig方向。

(5)调用nf_ct_tuple_equal比较curr_tuple和new_tuple是否相等如果不相等就要做NAT改变链接跟踪reply的tuple

(5)调用nf_ct_invert_tuplepr对new_reply取反调用orig方向的tuple reply

(6)nf_conntrack_alter_reply改变链接跟踪tuple的reply方向完成链接跟踪的NAT

(7)做了NAT的链接跟踪如果没有在nat_bysource链表中就要添加进去

(8)设置已经做NAT的标志IPS_DST_NAT_DONE_BIT/IPS_SRC_NAT_DONE_BIT

unsigned int
nf_nat_setup_info(struct nf_conn *ct,
		  const struct nf_nat_range *range,
		  enum nf_nat_manip_type maniptype)
{
	struct net *net = nf_ct_net(ct);
	struct nf_conntrack_tuple curr_tuple, new_tuple;
	struct nf_conn_nat *nat;
	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);

	/* nat helper or nfctnetlink also setup binding */
	nat = nfct_nat(ct);
	if (!nat) {
		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
		if (nat == NULL) {
			pr_debug("failed to add NAT extension\n");
			return NF_ACCEPT;
		}
	}

	NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
		     maniptype == IP_NAT_MANIP_DST);
	BUG_ON(nf_nat_initialized(ct, maniptype));

	/* What we've got will look like inverse of reply. Normally
	   this is what is in the conntrack, except for prior
	   manipulations (future optimization: if num_manips == 0,
	   orig_tp =
	   conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
	   /*获取reply方向的tuple做反方向复制给curr_tuple*/
	nf_ct_invert_tuplepr(&curr_tuple,
			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);

	/*根据原始的tuple获取新的唯一tuple*/
	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);

	/*新的orig方向和原始orig方向不相等就要做链接跟踪的NAT
	也就是改变tuple的reply方向*/
	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
		struct nf_conntrack_tuple reply;

		/* Alter conntrack table so will recognize replies. */
		/*新的tuple取反*/
		nf_ct_invert_tuplepr(&reply, &new_tuple);
		/*将取反后的tuple赋值给reply方向
		也就是链接跟踪做NAT*/
		nf_conntrack_alter_reply(ct, &reply);

		/* Non-atomic: we own this at the moment. */
		if (maniptype == IP_NAT_MANIP_SRC)
			ct->status |= IPS_SRC_NAT;
		else
			ct->status |= IPS_DST_NAT;
	}

	/* Place in source hash if this is the first time. */
	if (have_to_hash) {
		unsigned int srchash;

		srchash = hash_by_src(net, nf_ct_zone(ct),
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
		spin_lock_bh(&nf_nat_lock);
		/* nf_conntrack_alter_reply might re-allocate exntension aera */
		nat = nfct_nat(ct);
		nat->ct = ct;
		hlist_add_head_rcu(&nat->bysource,
				   &net->ipv4.nat_bysource[srchash]);
		spin_unlock_bh(&nf_nat_lock);
	}

	/* It's done. */
	/*设置已经做了SNAT/DNAT标志*/
	if (maniptype == IP_NAT_MANIP_DST)
		set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
	else
		set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);

	return NF_ACCEPT;
}

2、nf_ct_invert_tuplepr

调用__nf_ct_l3proto_find获取三层链接跟踪的操作函数结构体struct nf_conntrack_l3proto实例,调用__nf_ct_l4proto_find获取四层链接跟踪的操作函数结构体struct nf_conntrack_l4proto实例,然后调用nf_ct_invert_tuple根据orig方向取反方向的tuple。

bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
			  const struct nf_conntrack_tuple *orig)
{
	bool ret;

	rcu_read_lock();
	ret = nf_ct_invert_tuple(inverse, orig,
				 __nf_ct_l3proto_find(orig->src.l3num),
				 __nf_ct_l4proto_find(orig->src.l3num,
						      orig->dst.protonum));
	rcu_read_unlock();
	return ret;
}

nf_ct_invert_tuple调用三层、四层的invert_tuple根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple。

bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
		   const struct nf_conntrack_tuple *orig,
		   const struct nf_conntrack_l3proto *l3proto,
		   const struct nf_conntrack_l4proto *l4proto)
{
	memset(inverse, 0, sizeof(*inverse));

	inverse->src.l3num = orig->src.l3num;
	/*三层根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple*/
	if (l3proto->invert_tuple(inverse, orig) == 0)
		return false;

	inverse->dst.dir = !orig->dst.dir;

	inverse->dst.protonum = orig->dst.protonum;
	/*四层根据orig的nf_conntrack_tuple获取反方向的nf_conntrack_tuple*/
	return l4proto->invert_tuple(inverse, orig);
}

3、get_unique_tuple

这个函数主要是获取唯一的做了nat的tuple。

(1)首先如果是SNAT就调用find_appropriate_src在nat_bysource链表中查找已经做了NAT的tuple如果找到了而且没有被其他使用就返回

(2)find_best_ips_proto做ip地址的nat

(3)四层协议做NAT,如果是IP_NAT_RANGE_PROTO_RANDOM标志也就是随机的,就调用四层协议的unique_tuple获取唯一没有被使用的端口做NAT,如果是IP_NAT_RANGE_PROTO_SPECIFIED也就是指定端口,就要调用in_range判断此端口是否在合理返回内。

static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
		 const struct nf_conntrack_tuple *orig_tuple,
		 const struct nf_nat_range *range,
		 struct nf_conn *ct,
		 enum nf_nat_manip_type maniptype)
{
	struct net *net = nf_ct_net(ct);
	const struct nf_nat_protocol *proto;
	u16 zone = nf_ct_zone(ct);

	/* 1) If this srcip/proto/src-proto-part is currently mapped,
	   and that same mapping gives a unique tuple within the given
	   range, use that.

	   This is only required for source (ie. NAT/masq) mappings.
	   So far, we don't do local source mappings, so multiple
	   manips not an issue.  */
	if (maniptype == IP_NAT_MANIP_SRC &&
	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
		if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
			pr_debug("get_unique_tuple: Found current src map\n");
			/*没有被其他的使用就直接返回*/
			if (!nf_nat_used_tuple(tuple, ct))
				return;
		}
	}

	/* 2) Select the least-used IP/proto combination in the given
	   range. */
	*tuple = *orig_tuple;
    /*IP地址做NAT*/
	find_best_ips_proto(zone, tuple, range, ct, maniptype);

	/* 3) The per-protocol part of the manip is made to map into
	   the range to make a unique tuple. */

	rcu_read_lock();
	/*查找四层协议nat实例结构体struct nf_nat_protocol*/
	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);

	/* Change protocol info to have some randomization */
	/*支持IP_NAT_RANGE_PROTO_RANDOM就调用四层协议函数unique_tuple
	随机获取一个唯一的四层tuple*/
	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
		proto->unique_tuple(tuple, range, maniptype, ct);
		goto out;
	}

	/* Only bother mapping if it's not already in range and unique */
	/*IP_NAT_RANGE_PROTO_SPECIFIED这个是用户指定的四层端口要调用
	ip_range判断是否在合理范围内,而且要判断是否已经被其他使用*/
	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
	     proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
	    !nf_nat_used_tuple(tuple, ct))
		goto out;

	/* Last change: get protocol to try to obtain unique tuple. */
	/*调用unique_tuple后去选择一个唯一的没有被使用的四层端口完成nat*/
	proto->unique_tuple(tuple, range, maniptype, ct);
out:
	rcu_read_unlock();
}

3.1 find_appropriate_src

对应SNAT就会调用这个函数在已经做了NAT的表nat_bysource中查找已经存在的tuple,如果找到了而且没有被使用就对这个reply方向的tuple取反得到目标tuple。然后调用in_range判断目标tuple是否在合理范围内,

/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net, u16 zone,
		     const struct nf_conntrack_tuple *tuple,
		     struct nf_conntrack_tuple *result,
		     const struct nf_nat_range *range)
{
	unsigned int h = hash_by_src(net, zone, tuple);
	const struct nf_conn_nat *nat;
	const struct nf_conn *ct;
	const struct hlist_node *n;

	rcu_read_lock();
	/*遍历bysource链表*/
	hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
		ct = nat->ct;
		/*找到了而且等于自己*/
		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
			/* Copy source part from reply tuple. */
			/*对reply方向的tuple取反得到目的tuple*/
			nf_ct_invert_tuplepr(result,
				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
			result->dst = tuple->dst;
			/*调用四层协议的in_range判断是否在合理范围内*/
			if (in_range(result, range)) {
				rcu_read_unlock();
				return 1;
			}
		}
	}
	rcu_read_unlock();
	return 0;
}

3.2 find_best_ips_proto

这个函数是根据range选择一个合理范围的Ip地址做NAT

static void
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
		    const struct nf_nat_range *range,
		    const struct nf_conn *ct,
		    enum nf_nat_manip_type maniptype)
{
	__be32 *var_ipp;
	/* Host order */
	u_int32_t minip, maxip, j;

	/* No IP mapping?  Do nothing. */
	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
		return;

	if (maniptype == IP_NAT_MANIP_SRC)
		var_ipp = &tuple->src.u3.ip;
	else
		var_ipp = &tuple->dst.u3.ip;

	/* Fast path: only one choice. */
	if (range->min_ip == range->max_ip) {
		*var_ipp = range->min_ip;
		return;
	}

	/* Hashing source and destination IPs gives a fairly even
	 * spread in practice (if there are a small number of IPs
	 * involved, there usually aren't that many connections
	 * anyway).  The consistency means that servers see the same
	 * client coming from the same IP (some Internet Banking sites
	 * like this), even across reboots. */
	minip = ntohl(range->min_ip);
	maxip = ntohl(range->max_ip);
	j = jhash_2words((__force u32)tuple->src.u3.ip,
			 range->flags & IP_NAT_RANGE_PERSISTENT ?
				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
	j = ((u64)j * (maxip - minip + 1)) >> 32;
	*var_ipp = htonl(minip + j);
}

 

你可能感兴趣的:(网络,协议栈,个人笔记,IP层,netfilter,连接跟踪,nat)