鉴于以上,逐步击破,解决方案就有了。
3.建立一个合理的cache替换自适应原则,保证在位者谋其职,不思进取者自退位的原则
因此,我的最终设计是以下的样子:
效果:数据流到达速率越快就越容易以极低的代价命中cache,数据流达到速率越慢越不容易命中cache,然而也不用付出高昂的代价。
我的中间步骤测试代码如下:
//修改net/netfilter/nf_conntrack_core.c
//Email:[email protected]
//1.定义
#define A
#ifdef A
/*
* MAX_CACHE动态计算原则:
* cache链表长度 = 平均冲突链表长度/3, 其中:
* 平均冲突链表长度 = net.nf_conntrack_max/net.netfilter.nf_conntrack_buckets
* 3 = 经验值
*
*/
#define MAX_CACHE 4
struct conntrack_cache {
struct nf_conntrack_tuple_hash *caches[MAX_CACHE];
};
DEFINE_PER_CPU(struct conntrack_cache, conntrack_cache);
#endif
//2.修改resolve_normal_ct
static inline struct nf_conn *
resolve_normal_ct(struct net *net,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
#ifdef A
int i;
struct conntrack_cache *cache;
#endif
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
l4proto)) {
pr_debug("resolve_normal_ct: Can't get tuple\n");
return NULL;
}
#ifdef A
cache = &__get_cpu_var(conntrack_cache);
rcu_read_lock();
if (0 /* 优化3 */) {
goto slowpath;
}
for (i = 0; i < MAX_CACHE; i++) {
struct nf_conntrack_tuple_hash *ch = cache->caches[i];
struct nf_conntrack_tuple_hash *ch0 = cache->caches[0];
if (ch && nf_ct_tuple_equal(&tuple, &ch->tuple)) {
ct = nf_ct_tuplehash_to_ctrack(ch);
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use))) {
h = NULL;
goto slowpath;
}
else {
if (unlikely(!nf_ct_tuple_equal(&tuple, &ch->tuple))) {
nf_ct_put(ct);
h = NULL;
goto slowpath;
}
}
/*************************************** 优化1简介 *****************************************/
/* 并非直接提升到第一个,而是根据两次cache命中的间隔酌情提升,提升的步数与时间间隔成反比 */
/* 这就避免了cache队列本身的剧烈抖动。事实上,命中的时间间隔如果能加权历史间隔值,效果更好 */
/*******************************************************************************************/
/*
* 基于时间局部性提升命中项的优先级
*/
if (i > 0 /* && 优化1 */) {
cache->caches[0] = ch;
cache->caches[i] = ch0;
}
h = ch;
}
}
ct = NULL;
slowpath:
rcu_read_unlock();
if (!h)
#endif
/* look for tuple match */
h = nf_conntrack_find_get(net, &tuple);
if (!h) {
h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
return (void *)h;
}
#ifdef A
else {
int j;
struct nf_conn *ctp;
struct nf_conntrack_tuple_hash *chp;
/*********************** 优化2简介 **************************/
/* 只有连续两个数据包到达的时间间隔小于n时才会执行cache替换 */
/* 这是为了避免诸如ICMP之类的慢速流导致的cache抖动 */
/************************************************************/
if (0 /* 优化2 */) {
goto skip;
}
/************************** 优化3简介 *****************************/
/* 只有在总的conntrack数量大于hash bucket数量的4倍时才启用cache */
/* 因为conntrack数量小的话,经过一次hash运算就可以一次定位, */
/* 或者经过遍历很短的冲突链表即可定位,使用cache反而降低了性能 */
/******************************************************************/
if (0 /* 优化3 */) {
goto skip;
}
ct = nf_ct_tuplehash_to_ctrack(h);
nf_conntrack_get(&ct->ct_general);
chp = cache->caches[MAX_CACHE-1];
for (j = MAX_CACHE-1; j > 0; j--) {
cache->caches[j] = cache->caches[j-1];
}
cache->caches[0] = h;
if (chp) {
ctp = nf_ct_tuplehash_to_ctrack(chp);
nf_conntrack_put(&ctp->ct_general);
}
}
skip:
if (!ct) {
ct = nf_ct_tuplehash_to_ctrack(h);
}
#else
ct = nf_ct_tuplehash_to_ctrack(h);
#endif
/* It exists; we have (non-exclusive) reference. */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
/* Please set reply bit if this packet OK */
*set_reply = 1;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
*ctinfo = IP_CT_ESTABLISHED;
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("nf_conntrack_in: related packet for %p\n",
ct);
*ctinfo = IP_CT_RELATED;
} else {
pr_debug("nf_conntrack_in: new packet for %p\n", ct);
*ctinfo = IP_CT_NEW;
}
*set_reply = 0;
}
skb->nfct = &ct->ct_general;
skb->nfctinfo = *ctinfo;
return ct;
}
//2.修改nf_conntrack_init
int nf_conntrack_init(struct net *net)
{
int ret;
#ifdef A
int i;
#endif
if (net_eq(net, &init_net)) {
ret = nf_conntrack_init_init_net();
if (ret < 0)
goto out_init_net;
}
ret = nf_conntrack_init_net(net);
if (ret < 0)
goto out_net;
if (net_eq(net, &init_net)) {
/* For use by REJECT target */
rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
/* Howto get NAT offsets */
rcu_assign_pointer(nf_ct_nat_offset, NULL);
}
#ifdef A
/* 初始化每CPU的conntrack cache队列 */
for_each_possible_cpu(i) {
int j;
struct conntrack_cache *cache;
cache = &per_cpu(conntrack_cache, i);
for (j = 0; j < MAX_CACHE; j++) {
cache->caches[j] = NULL;
}
}
#endif
return 0;
out_net:
if (net_eq(net, &init_net))
nf_conntrack_cleanup_init_net();
out_init_net:
return ret;
}