netfilter连接跟踪初始化

连接跟踪的初始化主要有三个地方

(1)连接跟踪本身初始化。

(3)在对应的Hook上注册连接跟踪的处理函数。

(4)初始化连接跟踪和三层协议、四层协议相关的函数。

1、连接跟踪本身初始化

连接跟踪本身初始化函数是nf_conntrack_net_init主要做两件事情:为连接跟踪分配slab缓冲、初始化后proc文件系统

1.1、nf_conntrack_net_init

nf_conntrack_net_init是连接跟踪初始化的入口函数,主要调用nf_conntrack_init函数分配slab缓冲,调用nf_conntrack_standalone_init_proc初始化proc文件系统。

static int nf_conntrack_net_init(struct net *net)
{
	int ret;

	/*初始化*/
	ret = nf_conntrack_init(net);
	if (ret < 0)
		goto out_init;
	/*proc文件系统初始化*/
	ret = nf_conntrack_standalone_init_proc(net);
	if (ret < 0)
		goto out_proc;
	net->ct.sysctl_checksum = 1;
	net->ct.sysctl_log_invalid = 0;
	ret = nf_conntrack_standalone_init_sysctl(net);
	if (ret < 0)
		goto out_sysctl;
	return 0;

out_sysctl:
	nf_conntrack_standalone_fini_proc(net);
out_proc:
	nf_conntrack_cleanup(net);
out_init:
	return ret;
}

1.2、nf_conntrack_init

nf_conntrack_init主要调用nf_conntrack_init_init_ net

int nf_conntrack_init(struct net *net)
{
	int ret;

	if (net_eq(net, &init_net)) {
		ret = nf_conntrack_init_init_net();
		if (ret < 0)
			goto out_init_net;
	}
	ret = nf_conntrack_init_net(net);
	if (ret < 0)
		goto out_net;

	if (net_eq(net, &init_net)) {
		/* For use by REJECT target */
		rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
		rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);

		/* Howto get NAT offsets */
		rcu_assign_pointer(nf_ct_nat_offset, NULL);
	}
	return 0;

out_net:
	if (net_eq(net, &init_net))
		nf_conntrack_cleanup_init_net();
out_init_net:
	return ret;
}

1.3、nf_conntrack_init_init_net

static int nf_conntrack_init_init_net(void)
{
	int max_factor = 8;
	int ret;

	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
	if (!nf_conntrack_htable_size) {
		/*取内存的16384分之一*/
		nf_conntrack_htable_size
			= (((totalram_pages << PAGE_SHIFT) / 16384)
			   / sizeof(struct hlist_head));
		/*内存大于1G则取16384*/
		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
			nf_conntrack_htable_size = 16384;
		if (nf_conntrack_htable_size < 32)
			nf_conntrack_htable_size = 32;

		/* Use a max. factor of four by default to get the same max as
		 * with the old struct list_heads. When a table size is given
		 * we use the old value of 8 to avoid reducing the max.
		 * entries. */
		max_factor = 4;
	}
	nf_conntrack_max = max_factor * nf_conntrack_htable_size;

	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
	       nf_conntrack_max);

	/*初始化三层协议数组nf_ct_l3protos*/
	ret = nf_conntrack_proto_init();
	if (ret < 0)
		goto err_proto;

	ret = nf_conntrack_helper_init();
	if (ret < 0)
		goto err_helper;

#ifdef CONFIG_NF_CONNTRACK_ZONES
	ret = nf_ct_extend_register(&nf_ct_zone_extend);
	if (ret < 0)
		goto err_extend;
#endif
	/* Set up fake conntrack: to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
	nf_conntrack_untracked.ct_net = &init_net;
#endif
	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
	/*  - and look it like as a confirmed connection */
	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);

	return 0;

#ifdef CONFIG_NF_CONNTRACK_ZONES
err_extend:
	nf_conntrack_helper_fini();
#endif
err_helper:
	nf_conntrack_proto_fini();
err_proto:
	return ret;
}

2、注册连接跟踪的hook函数

2.1、ipv4_defrag_ops

在PREROUTING和OUT链上注册ipv4_conntrack_defrag,这个函数主要是对数据包进行分片操作,PREROUTING和OUT链都是netflter框架的两个入口,一个是接受外界的数据入口,一个是本机产生数据包的入口。

static struct nf_hook_ops ipv4_defrag_ops[] = {
	{
		/*对数据进行分片*/
		.hook		= ipv4_conntrack_defrag,
		.owner		= THIS_MODULE,
		.pf		= PF_INET,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
	},
	{
		.hook           = ipv4_conntrack_defrag,
		.owner          = THIS_MODULE,
		.pf             = PF_INET,
		.hooknum        = NF_INET_LOCAL_OUT,
		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
	},
};

2.2、ipv4_conntrack_ops

netfilter框架有两个入口(PREROUTING链、OUT链),两个出口(LOCAL_IN链、POSTROUTING链),PREROUTING是接受外界数据包进入的第一个链,OUT链是本机产生数据包进入的第一个链。LOCAL_IN是本机接受数据包的最后一个链,POSTROUTING是数据包前送最后一个链。所以在PREROUTING、OUT链上注册ipvr_conntrack_in函数建立连接跟踪,在LOCAL_IN和POSTROUTING链上注册ipv4_confirm确认一条连接跟踪。

static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
	{
		/*刚进入netfilter框架在第一个PREROUTEING链上建立连接跟踪*/
		.hook		= ipv4_conntrack_in,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_PRE_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK,
	},
	{
		/*本机产生的数据包在OUT链上建立连接跟踪*/
		.hook		= ipv4_conntrack_local,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_OUT,
		.priority	= NF_IP_PRI_CONNTRACK,
	},
	{
		/*数据包最后出去在POSTROUTING链上连接跟踪确认*/
		.hook		= ipv4_confirm,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_POST_ROUTING,
		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
	},
	{
		/*在LOCAL_IN链进入本机的数据连接跟踪确认*/
		.hook		= ipv4_confirm,
		.owner		= THIS_MODULE,
		.pf		= NFPROTO_IPV4,
		.hooknum	= NF_INET_LOCAL_IN,
		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
	},
};

2.3、注册hook

调用nf_register_hooks注册连接跟踪的hook函数

static int __init nf_conntrack_l3proto_ipv4_init(void)
{

    ...
      /*注册连接跟踪的hook处理函数*/
    ret = nf_register_hooks(ipv4_conntrack_ops,
				ARRAY_SIZE(ipv4_conntrack_ops));
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
		goto cleanup_ipv4;
	}
    ...
}

nf_register_hooks函数

int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
	unsigned int i;
	int err = 0;

	for (i = 0; i < n; i++) {
		err = nf_register_hook(®[i]);
		if (err)
			goto err;
	}
	return err;

err:
	if (i > 0)
		nf_unregister_hooks(reg, i);
	return err;
}

nf_register_hook函数

int nf_register_hook(struct nf_hook_ops *reg)
{
	struct nf_hook_ops *elem;
	int err;

	err = mutex_lock_interruptible(&nf_hook_mutex);
	if (err < 0)
		return err;
	//nf_hooks二维数组,一维坐标是协议号,二维坐标是链
	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
		//按优先级插入
		if (reg->priority < elem->priority)
			break;
	}
	list_add_rcu(®->list, elem->list.prev);
	mutex_unlock(&nf_hook_mutex);
	return 0;
}

nf_hooks是一个二维数组链表

extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];

3、注册三层、四层相关的处理函数

ip、icmp、udp协议都一样这里就举例tcp协议

调用nf_conntrack_l4proto_register函数注册nf_conntrack_l4proto_tcp4到全局数组nf_ct_protos中

static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...

	/*注册tcp协议和连接相关处理函数到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register tcp.\n");
		goto cleanup_sockopt;
	}
	/*注册udp协议和连接相关处理函数到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register udp.\n");
		goto cleanup_tcp;
	}
	/*注册icmp协议和连接相关处理函数到nf_ct_protos*/
	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register icmp.\n");
		goto cleanup_udp;
	}
	/*注册ip协议和连接相关处理函数到nf_ct_protos*/
	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
	if (ret < 0) {
		pr_err("nf_conntrack_ipv4: can't register ipv4\n");
		goto cleanup_icmp;
	}
...

}

nf_conntrack_l4proto_register

int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
...
    /*注册到全局数据nf_ct_protos数组*/
	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
			   l4proto);

out_unlock:
	mutex_unlock(&nf_ct_proto_mutex);
	return ret;
}

nf_conntrack_l4proto_tcp4

struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
	.l3proto		= PF_INET,
	.l4proto 		= IPPROTO_TCP,
	.name 			= "tcp",
	/*从tcp协议skb中获取一个tuple的源端口、目的端口*/
	.pkt_to_tuple 		= tcp_pkt_to_tuple,
	/*tcp协议把orig方向的源端口、目的端口赋值给reply方向的目的端口、源端口*/
	.invert_tuple 		= tcp_invert_tuple,
	/*打印输出tuple的源端口、目的端口*/
	.print_tuple 		= tcp_print_tuple,
	.print_conntrack 	= tcp_print_conntrack,
	.packet 		= tcp_packet,
	.new 			= tcp_new,
	.error			= tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
	/*tuple中的源端口、目的端口转换为nfnetlink格式填充*/
	.to_nlattr		= tcp_to_nlattr,
	.nlattr_size		= tcp_nlattr_size,
	/*nfnetlink格式的源端口、目的端口转换为tuple*/
	.from_nlattr		= nlattr_to_tcp,
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
	.nla_policy		= nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
	.ctl_table_users	= &tcp_sysctl_table_users,
	.ctl_table_header	= &tcp_sysctl_header,
	.ctl_table		= tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	.ctl_compat_table	= tcp_compat_sysctl_table,
#endif
#endif
};

tcp_pkt_to_tuple从数据包skb中获取源端口、目的端口

static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
			     struct nf_conntrack_tuple *tuple)
{
	const struct tcphdr *hp;
	struct tcphdr _hdr;

	/* Actually only need first 8 bytes. */
	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
	if (hp == NULL)
		return false;
	/*获取源端口*/
	tuple->src.u.tcp.port = hp->source;
	/*获取目的端口*/
	tuple->dst.u.tcp.port = hp->dest;

	return true;
}

tcp_invert_tuple将orig方向的端赋值给reply方向

static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
			     const struct nf_conntrack_tuple *orig)
{
	/*将orig方向的端口赋值给reply方向*/
	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
	return true;
}

 

你可能感兴趣的:(协议栈,个人笔记,netfilter,连接跟踪)