ipv4_confirm函数注册在LOCAL_IN链、POST_ROUTING链,这个是netfiler的两个出口,从函数名字看得出函数的功能是链接跟踪确认,下面来分析这个函数。
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
...
{
/*数据包最后出去在POSTROUTING链上连接跟踪确认*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
/*在LOCAL_IN链进入本机的数据连接跟踪确认*/
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
1、判断链接是否建立
判断nf_conn是否为NULL,如果NULL表示链接还没有选项还没有建立就返回。或者数据包的状态是IP_CT_RELATED + IP_CT_IS_REPLY,这说明reply方向已经有数据包,此时链接也已经确认就返回。
static unsigned int ipv4_confirm(unsigned int hooknum,
....)
{
...
/* This is where we call the helper: as the packet goes out. */
/*链接还没建立或者reply方向已经已经有数据就返回*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
goto out;
...
out:
/* We've seen it coming out the other side: confirm it */
/*确认一条链接*/
return nf_conntrack_confirm(skb);
}
2、nf_conntrack_confirm
调用nf_ct_is_confirmed判断链接是否已经确认,如果确认也就返回,然后调用__nf_conntrack_confirm确认一条链接。
static inline int nf_conntrack_confirm(struct sk_buff *skb)
{
struct nf_conn *ct = (struct nf_conn *)skb->nfct;
int ret = NF_ACCEPT;
if (ct && ct != &nf_conntrack_untracked) {
/*链接跟踪还没有确认*/
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb);
if (likely(ret == NF_ACCEPT))
nf_ct_deliver_cached_events(ct);
}
return ret;
}
3、__nf_conntrack_confirm
__nf_conntrack_confirm主要做以下4件事
(1)__nf_conntrack_confirm只处理orig方向的数据包,如果不是orig方向直接返回
(2)*判断orig、reply方向的tuple是否在hnnode链表已经存在如果已存在就返回,如果不存在就从hnnode哈希表中删除orig方向
(3)设置链接时间、设置链接确认标志IPS_CONFIRMED_BIT、将orig方向、reply方向的tuple哈希值添加套hnnode哈希表中,方便快速查找。
(4)判断链接是否是期望链接,如果是就产生一个IPCT_RELATED event状态事件,如果不是就产生IPCT_NEW事件
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
unsigned int hash, repl_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
u16 zone;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
/* ipt_REJECT uses nf_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
which created connection will be IP_CT_NEW or for an
expected connection, IP_CT_RELATED. */
/*再次判断不是orig方向就返回*/
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
/*获取hash值*/
hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
connections for unconfirmed conns. But packet copies and
REJECT will give spurious warnings here. */
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means noone else could have
confirmed us. */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
spin_lock_bh(&nf_conntrack_lock);
/* We have to check the DYING flag inside the lock to prevent
a race against nf_ct_get_next_corpse() possibly called from
user context, else we insert an already 'dead' hash, blocking
further use of that particular connection -JM */
if (unlikely(nf_ct_is_dying(ct))) {
spin_unlock_bh(&nf_conntrack_lock);
return NF_ACCEPT;
}
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
/*判断orig、reply方向的tuple是否在hnnode链表已经存在
如果已存在就返回,如果不存在就从hnnode哈希表中删除orig方向*/
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
/* Remove from unconfirmed list */
/*从unconfirmed链表中删除*/
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
/*记录建立时间*/
ct->timeout.expires += jiffies;
/*将链接加入到定时器中*/
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
/*链接已经确立标志*/
set_bit(IPS_CONFIRMED_BIT, &ct->status);
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
/*将orig、reply方向的tuple加入到对应的hnnode表中*/
__nf_conntrack_hash_insert(ct, hash, repl_hash);
NF_CT_STAT_INC(net, insert);
spin_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
/*如果链接是一个期望链接,就设置IPCT_RELATED事件
否则设置IPCT_NEW事件*/
nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT;
out:
NF_CT_STAT_INC(net, insert_failed);
spin_unlock_bh(&nf_conntrack_lock);
return NF_DROP;
}