连接跟踪的初始化主要有三个地方
(1)连接跟踪本身初始化。
(3)在对应的Hook上注册连接跟踪的处理函数。
(4)初始化连接跟踪和三层协议、四层协议相关的函数。
1、连接跟踪本身初始化
连接跟踪本身初始化函数是nf_conntrack_net_init主要做两件事情:为连接跟踪分配slab缓冲、初始化后proc文件系统
1.1、nf_conntrack_net_init
nf_conntrack_net_init是连接跟踪初始化的入口函数,主要调用nf_conntrack_init函数分配slab缓冲,调用nf_conntrack_standalone_init_proc初始化proc文件系统。
static int nf_conntrack_net_init(struct net *net)
{
int ret;
/*初始化*/
ret = nf_conntrack_init(net);
if (ret < 0)
goto out_init;
/*proc文件系统初始化*/
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
net->ct.sysctl_checksum = 1;
net->ct.sysctl_log_invalid = 0;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
return 0;
out_sysctl:
nf_conntrack_standalone_fini_proc(net);
out_proc:
nf_conntrack_cleanup(net);
out_init:
return ret;
}
1.2、nf_conntrack_init
nf_conntrack_init主要调用nf_conntrack_init_init_ net
int nf_conntrack_init(struct net *net)
{
int ret;
if (net_eq(net, &init_net)) {
ret = nf_conntrack_init_init_net();
if (ret < 0)
goto out_init_net;
}
ret = nf_conntrack_init_net(net);
if (ret < 0)
goto out_net;
if (net_eq(net, &init_net)) {
/* For use by REJECT target */
rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
/* Howto get NAT offsets */
rcu_assign_pointer(nf_ct_nat_offset, NULL);
}
return 0;
out_net:
if (net_eq(net, &init_net))
nf_conntrack_cleanup_init_net();
out_init_net:
return ret;
}
1.3、nf_conntrack_init_init_net
static int nf_conntrack_init_init_net(void)
{
int max_factor = 8;
int ret;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
if (!nf_conntrack_htable_size) {
/*取内存的16384分之一*/
nf_conntrack_htable_size
= (((totalram_pages << PAGE_SHIFT) / 16384)
/ sizeof(struct hlist_head));
/*内存大于1G则取16384*/
if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
nf_conntrack_htable_size = 16384;
if (nf_conntrack_htable_size < 32)
nf_conntrack_htable_size = 32;
/* Use a max. factor of four by default to get the same max as
* with the old struct list_heads. When a table size is given
* we use the old value of 8 to avoid reducing the max.
* entries. */
max_factor = 4;
}
nf_conntrack_max = max_factor * nf_conntrack_htable_size;
printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
nf_conntrack_max);
/*初始化三层协议数组nf_ct_l3protos*/
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
ret = nf_conntrack_helper_init();
if (ret < 0)
goto err_helper;
#ifdef CONFIG_NF_CONNTRACK_ZONES
ret = nf_ct_extend_register(&nf_ct_zone_extend);
if (ret < 0)
goto err_extend;
#endif
/* Set up fake conntrack: to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
nf_conntrack_untracked.ct_net = &init_net;
#endif
atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
/* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
return 0;
#ifdef CONFIG_NF_CONNTRACK_ZONES
err_extend:
nf_conntrack_helper_fini();
#endif
err_helper:
nf_conntrack_proto_fini();
err_proto:
return ret;
}
2、注册连接跟踪的hook函数
2.1、ipv4_defrag_ops
在PREROUTING和OUT链上注册ipv4_conntrack_defrag,这个函数主要是对数据包进行分片操作,PREROUTING和OUT链都是netflter框架的两个入口,一个是接受外界的数据入口,一个是本机产生数据包的入口。
static struct nf_hook_ops ipv4_defrag_ops[] = {
{
/*对数据进行分片*/
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
};
2.2、ipv4_conntrack_ops
netfilter框架有两个入口(PREROUTING链、OUT链),两个出口(LOCAL_IN链、POSTROUTING链),PREROUTING是接受外界数据包进入的第一个链,OUT链是本机产生数据包进入的第一个链。LOCAL_IN是本机接受数据包的最后一个链,POSTROUTING是数据包前送最后一个链。所以在PREROUTING、OUT链上注册ipvr_conntrack_in函数建立连接跟踪,在LOCAL_IN和POSTROUTING链上注册ipv4_confirm确认一条连接跟踪。
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
/*刚进入netfilter框架在第一个PREROUTEING链上建立连接跟踪*/
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
/*本机产生的数据包在OUT链上建立连接跟踪*/
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
/*数据包最后出去在POSTROUTING链上连接跟踪确认*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
/*在LOCAL_IN链进入本机的数据连接跟踪确认*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
2.3、注册hook
调用nf_register_hooks注册连接跟踪的hook函数
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...
/*注册连接跟踪的hook处理函数*/
ret = nf_register_hooks(ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register hooks.\n");
goto cleanup_ipv4;
}
...
}
nf_register_hooks函数
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_hook(®[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_hooks(reg, i);
return err;
}
nf_register_hook函数
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
int err;
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
//nf_hooks二维数组,一维坐标是协议号,二维坐标是链
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
//按优先级插入
if (reg->priority < elem->priority)
break;
}
list_add_rcu(®->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
return 0;
}
nf_hooks是一个二维数组链表
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
3、注册三层、四层相关的处理函数
ip、icmp、udp协议都一样这里就举例tcp协议
调用nf_conntrack_l4proto_register函数注册nf_conntrack_l4proto_tcp4到全局数组nf_ct_protos中
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
...
/*注册tcp协议和连接相关处理函数到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register tcp.\n");
goto cleanup_sockopt;
}
/*注册udp协议和连接相关处理函数到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register udp.\n");
goto cleanup_tcp;
}
/*注册icmp协议和连接相关处理函数到nf_ct_protos*/
ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register icmp.\n");
goto cleanup_udp;
}
/*注册ip协议和连接相关处理函数到nf_ct_protos*/
ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register ipv4\n");
goto cleanup_icmp;
}
...
}
nf_conntrack_l4proto_register
int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
...
/*注册到全局数据nf_ct_protos数组*/
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
nf_conntrack_l4proto_tcp4
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
.name = "tcp",
/*从tcp协议skb中获取一个tuple的源端口、目的端口*/
.pkt_to_tuple = tcp_pkt_to_tuple,
/*tcp协议把orig方向的源端口、目的端口赋值给reply方向的目的端口、源端口*/
.invert_tuple = tcp_invert_tuple,
/*打印输出tuple的源端口、目的端口*/
.print_tuple = tcp_print_tuple,
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.new = tcp_new,
.error = tcp_error,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
/*tuple中的源端口、目的端口转换为nfnetlink格式填充*/
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
/*nfnetlink格式的源端口、目的端口转换为tuple*/
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
.ctl_table_users = &tcp_sysctl_table_users,
.ctl_table_header = &tcp_sysctl_header,
.ctl_table = tcp_sysctl_table,
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
.ctl_compat_table = tcp_compat_sysctl_table,
#endif
#endif
};
tcp_pkt_to_tuple从数据包skb中获取源端口、目的端口
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
/* Actually only need first 8 bytes. */
hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
if (hp == NULL)
return false;
/*获取源端口*/
tuple->src.u.tcp.port = hp->source;
/*获取目的端口*/
tuple->dst.u.tcp.port = hp->dest;
return true;
}
tcp_invert_tuple将orig方向的端赋值给reply方向
static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
/*将orig方向的端口赋值给reply方向*/
tuple->src.u.tcp.port = orig->dst.u.tcp.port;
tuple->dst.u.tcp.port = orig->src.u.tcp.port;
return true;
}