netfilter filter表

iptables是linux下常用的一个防火墙软件,可以实现对网络访问的各种限制。iptables相当于防火墙的客户端,与用户进行交换,其后台依赖于内核的netfilter模块。iptables的各种配置,最终都是netfilter模块来实现的。

iptables分为4个表:filter表,nat表,raw表,mangle表。filter表为默认表。如下每的指令,不指定表名,默认操作的是filter表。

iptables -I INPUT -s 1.2.3.4 -j ACCEPT

 再如下面的指令,操作的是nat表,需要用-t指令,指明操作的表名。

iptables -t nat -A PREROUTING -d 1.2.3.4 -p tcp -m tcp --dport 81 -j DNAT --to-destination 192.168.0.2:8180 

 ipv4 filter的初始化函数是iptable_filter_init,代码在net/ipv4/netfilter/iptable_filter.c文件中。

static int __init iptable_filter_init(void)
{
	int ret;

	// 将packet_filter表的nf_hooks_ops对象的钩子回调函数设置为iptable_filter_hook
	filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
	if (IS_ERR(filter_ops))
		return PTR_ERR(filter_ops);

	ret = register_pernet_subsys(&iptable_filter_net_ops);
	if (ret < 0)
		kfree(filter_ops);

	return ret;
}

packet_filter的定义如下:

#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \

(1 << NF_INET_FORWARD) | \

(1 << NF_INET_LOCAL_OUT))

static const struct xt_table packet_filter = {

.name = "filter",

.valid_hooks = FILTER_VALID_HOOKS,

.me = THIS_MODULE,

.af = NFPROTO_IPV4,

.priority = NF_IP_PRI_FILTER,

.table_init = iptable_filter_table_init,

};

xt_hook_ops_alloc的处理逻辑是根据valid_hooks计算需要添加几个钩子处理函数。有FILTER_VALID_HOOKS的定义可知,其3个bit位是1,因此num_hooks为3。调用kcalloc申请三个nf_hook_ops对象,并分别赋值。

xt_hook_ops_alloc代码如下:

struct nf_hook_ops *
xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
{
	unsigned int hook_mask = table->valid_hooks;

	// num_hooks即hook_mask有几个bit位是1,对于packet_filter,num_hooks为3
	uint8_t i, num_hooks = hweight32(hook_mask);
	uint8_t hooknum;
	struct nf_hook_ops *ops;

	if (!num_hooks)
		return ERR_PTR(-EINVAL);

	ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
	if (ops == NULL)
		return ERR_PTR(-ENOMEM);

	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
	     hook_mask >>= 1, ++hooknum) {
		if (!(hook_mask & 1))
			continue;
		ops[i].hook     = fn;
		ops[i].pf       = table->af; // NFPROTO_IPV4
		ops[i].hooknum  = hooknum;
		ops[i].priority = table->priority; // NF_IP_PRI_FILTER
		++i;
	}

	return ops;
}

iptable_filter_net_ops的初始化方法是,iptable_filter_net_init,最后调到iptable_filter_table_init。

static int __net_init iptable_filter_table_init(struct net *net)
{
	struct ipt_replace *repl;
	int err;

	if (net->ipv4.iptable_filter)
		return 0;

	repl = ipt_alloc_initial_table(&packet_filter);
	if (repl == NULL)
		return -ENOMEM;
	/* Entry 1 is the FORWARD hook */
	((struct ipt_standard *)repl->entries)[1].target.verdict =
		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;

	err = ipt_register_table(net, &packet_filter, repl, filter_ops,
				 &net->ipv4.iptable_filter);
	kfree(repl);
	return err;
}

 将ipt_alloc_initial_table展开后,代码如下:

void *ipt_alloc_initial_table(const struct xt_table *info)
{
	unsigned int hook_mask = info->valid_hooks; 
	unsigned int nhooks = hweight32(hook_mask); //3
	unsigned int bytes = 0, hooknum = 0, i = 0; 
	struct { 
		struct ipt_replace repl; 
		struct ipt_standard entries[]; 
	} *tbl; 
	struct ipt_error *term; 
	size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + 
		__alignof__(*term) - 1) & ~(__alignof__(*term) - 1); 
	tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); 
	if (tbl == NULL) 
		return NULL; 
	term = (struct ipt_error *)&(((char *)tbl)[term_offset]); 
	strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); 
	*term = (struct ipt_error)IPT_ERROR_INIT;  
	tbl->repl.valid_hooks = hook_mask; 
	tbl->repl.num_entries = nhooks + 1; 
	tbl->repl.size = nhooks * sizeof(struct ipt_standard) + 
			 sizeof(struct ipt_error); 
	for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { 
		if (!(hook_mask & 1)) 
			continue; 
		tbl->repl.hook_entry[hooknum] = bytes; 
		tbl->repl.underflow[hooknum]  = bytes; 
		tbl->entries[i++] = (struct ipt_standard) 
			IPT_STANDARD_INIT(NF_ACCEPT); 
		bytes += sizeof(struct ipt_standard); 
	} 
	return tbl; 
}

在分析ipt_register_table前,先看下xt_alloc_table_info的代码:

struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
	struct xt_table_info *info = NULL;
	size_t sz = sizeof(*info) + size;

	if (sz < sizeof(*info))
		return NULL;

	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
		return NULL;

	info = kvmalloc(sz, GFP_KERNEL);
	if (!info)
		return NULL;

	memset(info, 0, sizeof(*info));
	info->size = size;
	return info;
}

申请的实际大小为sizeof(xt_table_info) + size,且xt_table_info类型的info的size设置为入参。

int ipt_register_table(struct net *net, const struct xt_table *table,
		       const struct ipt_replace *repl,
		       const struct nf_hook_ops *ops, struct xt_table **res)
{
	int ret;
	struct xt_table_info *newinfo;
	struct xt_table_info bootstrap = {0};
	void *loc_cpu_entry;
	struct xt_table *new_table;

	// repl->size为{nhooks * sizeof(struct ipt_standard) + sizeof(struct ipt_error)}
	// 本次分析,nhooks为3
	// xt_alloc_table_info申请的空间,实际为:1个xt_table_info + 3个ipt_standard + 1个ipt_error
	newinfo = xt_alloc_table_info(repl->size);
	if (!newinfo)
		return -ENOMEM;

	loc_cpu_entry = newinfo->entries;

	// repl->entries为IPT_STANDARD_INIT(NF_ACCEPT),在ipt_alloc_initial_table中设置
	memcpy(loc_cpu_entry, repl->entries, repl->size);

	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
	if (ret != 0)
		goto out_free;

	// 复制input_table,将拷贝的对象添加到net->xt.tables[table->af]列表上
	// new_table->private为newinfo
	new_table = xt_register_table(net, table, &bootstrap, newinfo);
	if (IS_ERR(new_table)) {
		ret = PTR_ERR(new_table);
		goto out_free;
	}

	/* set res now, will see skbs right after nf_register_net_hooks */
	// 将net->ipv4.iptable_filter的内容修改为new_table的内容
	WRITE_ONCE(*res, new_table);

	// 注册ops,即 filter_ops
	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
	if (ret != 0) {
		__ipt_unregister_table(net, new_table);
		*res = NULL;
	}

	return ret;

out_free:
	xt_free_table_info(newinfo);
	return ret;
}

注册完filter_ops后,经过NF_INET_LOCAL_IN,NF_INET_FORWARD,NF_INET_LOCAL_OUT链的数据,会调用iptable_filter_hook进行处理。iptable_filter_hook的代码如下:

static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
		    const struct nf_hook_state *state)
{
	if (state->hook == NF_INET_LOCAL_OUT &&
	    (skb->len < sizeof(struct iphdr) ||
	     ip_hdrlen(skb) < sizeof(struct iphdr)))
		/* root is playing with raw sockets. */
		return NF_ACCEPT;

	return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
unsigned int
ipt_do_table(struct sk_buff *skb,
	     const struct nf_hook_state *state,
	     struct xt_table *table)
{
	unsigned int hook = state->hook; // NF_INET_LOCAL_IN等
	const struct iphdr *ip;
	/* Initializing verdict to NF_DROP keeps gcc happy. */
	unsigned int verdict = NF_DROP;
	const char *indev, *outdev;
	const void *table_base;
    struct ipt_entry *e;
	unsigned int stackidx, cpu;
	const struct xt_table_info *private;
	struct xt_action_param acpar;

	ip = ip_hdr(skb); // 网络层头

	private = table->private;
	table_base = private->entries; // entries为xt_table_info最后一个成员

	// 获取hook点的ipt_entry
    // get_entry ==> (struct ipt_entry *)(base + offset)
	e = get_entry(table_base, private->hook_entry[hook]);

	do {
		const struct xt_entry_target *t;
		const struct xt_entry_match *ematch;
		struct xt_counters *counter;

		WARN_ON(!e);

		// 匹配ip包,成功则继续匹配下去,否则跳到下一个规则
		// ip_packet_match匹配标准match,如:源/目的地址,进/出网口,协议等
		if (!ip_packet_match(ip, indev, outdev,
		    &e->ip, acpar.fragoff)) {
 no_match:
			e = ipt_next_entry(e);
			continue;
		}

	} while (!acpar.hotdrop);

	if (acpar.hotdrop)
		return NF_DROP;
	else return verdict;
}

总结一下:一个xt_table结构,代表netfilter一个表,其中filter表对应的是packet_filter。packet_filter的信息,保存到了net->ipv4.iptable_filter上。通过table->private获取到xt_table_info的指针。通过get_entry获取struct ipt_entry结构的指针。一个struct ipt_entry代表filter链中的一条配置。

struct ipt_entry {
    struct ipt_ip ip;

    /* Mark with fields that we care about. */
    unsigned int nfcache;

    /* Size of ipt_entry + matches */
    __u16 target_offset;
    /* Size of ipt_entry + matches + target */
    __u16 next_offset; 

    /* Back pointer */
    unsigned int comefrom;

    /* Packet and byte counters. */
    struct xt_counters counters;

    /* The matches (if any), then the target. */
    unsigned char elems[0];
};

说起来有点抽象,让我们进入内核,把这写信息打印出来吧。

首先在filter表的INPUT链添加几条数据,指令如下:

iptables -I INPUT -s 1.2.3.4 -j ACCEPT
iptables -I INPUT -p tcp ! -s 1.2.3.5 -j ACCEPT
iptables -I INPUT -p udp -d 1.2.3.0/24 -j ACCEPT

 配置结果为下:

netfilter filter表_第1张图片

 怎么把内核里的数据,打印出来呢。可以通过编写字符驱动,实现驱动的打开操作,在打开操作的回调函数中,将所需的信息打印出来。

 

void ipt_entry_print(struct ipt_entry* ipt_entry)
{
	if (NULL == ipt_entry)
	{
		printk("ipt_entry is null\n");
		return;
	}

	struct ipt_ip ip = ipt_entry->ip;
	// 打印ip
	printk("---ip---\n");
	printk("src ip: %X, src mask: %X\n", ip.src, ip.smsk);
	printk("dst ip: %X, dst mask: %X\n", ip.dst, ip.dmsk);
	printk("iniface: %s\n", ip.iniface);
	printk("outiface: %s\n", ip.outiface);

	/*
	IPPROTO_IP		0
	IPPROTO_TCP		6
	IPPROTO_UDP		17
	*/
	printk("proto: %d\n", ip.proto); // 6-tcp
	printk("flags: %d\n", ip.flags);

	/*
	取反标志
	#define IPT_INV_VIA_IN		0x01	Invert the sense of IN IFACE. 
	#define IPT_INV_VIA_OUT		0x02	Invert the sense of OUT IFACE 
	#define IPT_INV_TOS			0x04	Invert the sense of TOS. 
	#define IPT_INV_SRCIP		0x08	Invert the sense of SRC IP. 
	#define IPT_INV_DSTIP		0x10	Invert the sense of DST OP. 
	#define IPT_INV_FRAG		0x20	Invert the sense of FRAG. 
	#define IPT_INV_PROTO		XT_INV_PROTO
	#define IPT_INV_MASK		0x7F	All possible flag bits mask. 
	*/
	printk("invflags: %d\n", ip.invflags); // 8-源地址取反

	printk("---ipt_entry---\n");
	printk("nfcache: %d\n", ipt_entry->nfcache);
	printk("target_offset: %d\n", ipt_entry->target_offset);
	printk("next_offset: %d\n", ipt_entry->next_offset);
	printk("comefrom: %d\n", ipt_entry->comefrom);
}

static int hello_open(struct inode* inode, struct file*filep)
{
	printk("hello_open\n");
	struct task_struct *tsk = current;
	struct net *net;
	struct xt_table *xt_filter;
	struct xt_table_info *filter_info;
	const void* table_base;
	int i = 0;
	int local_in_hook_entry;
	struct ipt_entry* ipt_entry;

	struct nsproxy *nsprx = tsk->nsproxy; //命名空间
	if (NULL == nsprx)
	{
		printk("nsprx is null\n");
		return 0;
	}

	printk("hello_open get net\n");
	net = nsprx->net_ns;
	if (NULL == net)
	{
		printk("net is null\n");
		return 0;
	}

	printk("hello_open get xt_table\n");
	xt_filter = net->ipv4.iptable_filter;
	if (NULL == xt_filter)
	{
		printk("xt_filter is null\n");
		return 0;
	}

	// 打印xt_table信息
	printk("xt_table: af - %d\n", xt_filter->af);
	printk("xt_table: name - %s\n", xt_filter->name);
	printk("xt_table: valid_hooks - %d\n", xt_filter->valid_hooks);
	printk("xt_table: priority - %d\n", xt_filter->priority);

	filter_info = xt_filter->private;
	if (NULL == filter_info)
	{
		printk("filter_info is null\n");
		return 0;
	}

	printk("filter_info: size - %d\n", filter_info->size);
	printk("filter_info: number - %d\n", filter_info->number); // 4?
	printk("filter_info: initial_entries - %d\n", filter_info->initial_entries);
	printk("filter_info: stacksize - %d\n", filter_info->stacksize);

	table_base = filter_info->entries;

	local_in_hook_entry = filter_info->hook_entry[NF_INET_LOCAL_IN];
	printk("filter_info: local_in_hook_entry - %d\n", local_in_hook_entry);

	// 其实获取到的是ipt_standard的地址,ipt_entry在ipt_standardh中
	ipt_entry = table_base + local_in_hook_entry;
	if (NULL == ipt_entry)
	{
		printk("ipt_entry is null\n");
		return 0;
	}

	for (; i < 24; ++i)
	{
		printk("***BEGIN***\n");
		ipt_entry_print(ipt_entry);

		ipt_entry = (void *)ipt_entry + ipt_entry->next_offset;
		printk("***END***\n\n");
	}

	//struct ipt_entry* nxt_ipt_entry = (void *)ipt_entry + ipt_entry->next_offset;
	//ipt_entry_print(nxt_ipt_entry);

	printk("hello_open finish\n");
	return 0;
}

驱动编译,安装完成后,再编写客户端程序,打开字符驱动文件,如:

int fd = open("/dev/test",O_RDWR);

打开/dev/test后,可触发字符驱动执行hello_open。打印的内核日志,可以通过dmesg指令查看。

 

[ 7539.033208] hello_open
[ 7539.033209] hello_open get net
[ 7539.033209] hello_open get xt_table
[ 7539.033210] xt_table: af - 2  // NFPROTO_IPV4
[ 7539.033210] xt_table: name - filter // 名称
[ 7539.033211] xt_table: valid_hooks - 14 // 第1,2,3 bit位为1(从0开始)
[ 7539.033211] xt_table: priority - 0
[ 7539.033212] filter_info: size - 4272
[ 7539.033212] filter_info: number - 26 //共26条配置?
[ 7539.033213] filter_info: initial_entries - 4
[ 7539.033213] filter_info: stacksize - 5
[ 7539.033213] filter_info: local_in_hook_entry - 0
[ 7539.033214] ***BEGIN***
[ 7539.033214] ---ip--- // 对应第1条配置,目的地址为 1.2.3.0/24
[ 7539.033215] src ip: 0, src mask: 0
[ 7539.033215] dst ip: 30201, dst mask: FFFFFF
[ 7539.033216] iniface: 
[ 7539.033216] outiface: 
[ 7539.033216] proto: 17 // UPD协议
[ 7539.033217] flags: 0
[ 7539.033217] invflags: 0
[ 7539.033217] ---ipt_entry---
[ 7539.033218] nfcache: 0
[ 7539.033218] target_offset: 112
[ 7539.033219] next_offset: 152
[ 7539.033219] comefrom: 2
[ 7539.033219] ***END***

[ 7539.033219] ***BEGIN***
[ 7539.033220] ---ip--- // 对应第2条配置,源地址不是1.2.3.5
[ 7539.033220] src ip: 5030201, src mask: FFFFFFFF
[ 7539.033221] dst ip: 0, dst mask: 0
[ 7539.033221] iniface: 
[ 7539.033221] outiface: 
[ 7539.033222] proto: 6 // TCP协议
[ 7539.033222] flags: 0
[ 7539.033223] invflags: 8 // #define IPT_INV_SRCIP      0x08
[ 7539.033223] ---ipt_entry---
[ 7539.033223] nfcache: 0
[ 7539.033224] target_offset: 112
[ 7539.033224] next_offset: 152
[ 7539.033224] comefrom: 2
[ 7539.033225] ***END***

[ 7539.033225] ***BEGIN***
[ 7539.033225] ---ip--- // 对应第3条配置,对源地址1.2.3.4放行
[ 7539.033226] src ip: 4030201, src mask: FFFFFFFF
[ 7539.033226] dst ip: 0, dst mask: 0
[ 7539.033226] iniface: 
[ 7539.033227] outiface: 
[ 7539.033227] proto: 0 // 所有协议
[ 7539.033227] flags: 0
[ 7539.033228] invflags: 0
[ 7539.033228] ---ipt_entry---
[ 7539.033228] nfcache: 0
[ 7539.033229] target_offset: 112
[ 7539.033229] next_offset: 152
[ 7539.033229] comefrom: 2
[ 7539.033230] ***END***

你可能感兴趣的:(netfilter,linux,内核,网络,netfilter)