本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn:
[email protected]
来源:http://yfydz.cublog.cn
1. 前言
宏NF_HOOK是实现netfilter挂接点的,在桥代码打入内核后,又添加了两个挂接点:NF_HOOK_COND和NF_HOOK_THRESH,本文分析这两个宏的用途。
2. 宏定义
2.1 不带桥代码时的宏定义
以下是2.4.26内核中的定义:
#ifdef CONFIG_NETFILTER_DEBUG
#define NF_HOOK nf_hook_slow
#else
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
(list_empty(&nf_hooks[(pf)][(hook)]) \
? (okfn)(skb) \
: nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn)))
#endif
就是检查在nf_hooks链表数组中是否挂接了过滤hooks点,是则进行检查,否则执行正常函数okfn。
2.2 带桥代码时的宏定义
以下是2.6.17.11内核中的定义:
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
可见NF_HOOK就是NF_HOOK_THRESH的一个特例。
NF_HOOK_THRESH定义如下:
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
({int __ret; \
if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
__ret = (okfn)(skb); \
__ret;})
该宏就是调用nf_hook_thresh函数,如果返回为1(即NF_ACCEPT),才执行正常函数okfn(),否则返回。
另一个宏NF_HOOK_COND定义如下:
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \
({int __ret; \
if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
__ret = (okfn)(skb); \
__ret;})
该宏就是调用nf_hook_thresh函数,只是函数中的条件参数cond是通过宏传入的,cond非0时才进行检查,而NF_HOOK_THRESH中cond固定为1。
nf_hook_thresh函数定义如下:
static inline int nf_hook_thresh(int pf, unsigned int hook,
struct sk_buff **pskb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh,
int cond)
{
// 检查是否有条件限制,cond非0时才进行hook检查
if (!cond)
return 1;
#ifndef CONFIG_NETFILTER_DEBUG
if (list_empty(&nf_hooks[pf][hook]))
return 1;
#endif
// 执行nf_hook_slow进行检查
return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
}
nf_hook_thresh函数定义如下,和2.4的区别是增加了一个参数hook_thresh(hook阈值):
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
// 该锁是用于关闭进程抢占
rcu_read_lock();
// hook点链表
elem = &nf_hooks[pf][hook];
next_hook:
// 进行链表循环检查
verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
outdev, &elem, okfn, hook_thresh);
......
nf_hook_thresh函数定义如下,和2.4的区别是增加了hook_thresh参数:
unsigned int nf_iterate(struct list_head *head,
struct sk_buff **skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
unsigned int verdict;
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
list_for_each_continue_rcu(*i, head) {
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
// 如果hook_thresh大于hook点的权值,忽略该节点
// hook链表是按权值排序的,权值越小,表示优先级越高,在链表中越靠前
// 所以hook_thresh作用就是就是只遍历比该权值的优先级低的节点,高优先级
// 的节点就不过滤了,这样就可以有选择地减少一些处理操作
// NF_HOOK和NF_HOOK_COND宏中定义的thresh都是INT_MIN,也就是最高优先级,
// 所以就要遍历hook链表中的所有节点。
if (hook_thresh > elem->priority)
continue;
...
3. 具体应用
/* linux-2.6.17.11/net/bridge/br_netfilter.c */
// 在桥模式下为支持netfilter,定义相关的hook操作链,
// 由于此时数据包已经进行过一些处理,没必要再重复,
// 所以设置hook阈值选择hook点
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct net_device *realindev, *realoutdev;
struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
int pf;
if (!skb->nf_bridge)
return NF_ACCEPT;
if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
pf = PF_INET;
else
pf = PF_INET6;
#ifdef CONFIG_NETFILTER_DEBUG
/* Sometimes we get packets with NULL ->dst here (for example,
* running a dhcp client daemon triggers this). This should now
* be fixed, but let's keep the check around. */
if (skb->dst == NULL) {
printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
return NF_ACCEPT;
}
#endif
nf_bridge = skb->nf_bridge;
nf_bridge->physoutdev = skb->dev;
realindev = nf_bridge->physindev;
/* Bridged, take PF_BRIDGE/FORWARD.
* (see big note in front of br_nf_pre_routing_finish) */
if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
}
if (skb->protocol == htons(ETH_P_8021Q)) {
skb_push(skb, VLAN_HLEN);
skb->nh.raw -= VLAN_HLEN;
}
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
skb->dev, br_forward_finish);
goto out;
}
realoutdev = bridge_parent(skb->dev);
if (!realoutdev)
return NF_DROP;
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
/* iptables should match -o br0.x */
if (nf_bridge->netoutdev)
realoutdev = nf_bridge->netoutdev;
#endif
if (skb->protocol == htons(ETH_P_8021Q)) {
skb_pull(skb, VLAN_HLEN);
(*pskb)->nh.raw += VLAN_HLEN;
}
/* IP forwarded traffic has a physindev, locally
* generated traffic hasn't. */
if (realindev != NULL) {
if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
struct net_device *parent = bridge_parent(realindev);
if (parent)
realindev = parent;
}
// 用FORWARD点的hook链表进行过滤,这样在FORWARD链制定的规则在此起效
// 优先级阈值是NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1,也就是在连接跟踪
// 之后的所有hook点都起效,包括mangle, nat, filter等hook
// 因为数据包这时已经进行了连接跟踪了,不用重新跟踪
NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
} else {
// 用OUTPUT点的hook链表进行过滤,这样在OUTPUT链制定的规则在此起效
// 优先级阈值是NF_IP_PRI_BRIDGE_SABOTAGE_OUT + 1,也就是在目的NAT
// 之后的所有hook点都起效,包括filter, 源NAT等hook
NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
}
out:
return NF_STOLEN;
}
4. 结论
NF_HOOK_COND和NF_HOOK_THRESH是在网络代码其他位置,如bridge,也要应用netfilter过滤架构时增加的,因为此时不需要遍历全部链表节点,所以增加相关的条件项进行了一些限制处理。