2010年03月03日 星期三 10时02分56秒
取得一个阶段性成果,该版本的simpLB可以完全实现“轮询调度” 。
连接管理中加入了超时机制,如果timeout时间内某个连接上没有数据包则删除该连接。
HTTP完美测试!
SSH测试存在一些问题:首先,调度的目的服务器可能和本地存储的known_hosts冲突,Clinet端删除~/.ssh/known_hosts中相应条目或删除该文件以解决;
其次,因为LoadBalancer上的连接管理有超时机制,所以如果ssh登录上以后timeout时间内没有任何动作,该连接会被删除,导致“Connection reset”;
最后,不很好地实现“轮询调度”。略知,lvs目前只支持FTP,但不知到它对SSH应用支持如何。
SSH测试的问题目前可以放在一边。
目前要重点解决的是:
完善目的服务器的表述,即构建合适的struct;
完善对目的服务器的管理,构建链表list,以及各种操作函数(增、删、查、改等)。
将LoadInfoCollector采集的数据送入内核层以便下一步动态负载调度之用。
采用/proc文件系统?
在内核中捕获UDP报文的数据,并解析?
1、simpLB.c
/*简单的轮询调度算法,添加了连接跟踪,可测试HTTP*/
/*用于连接跟踪的定时器,能释放超时连接*/
/*将处理回应包的模块从POST_ROUTING移到FORWARD点*/
#include <linux/kernel.h>
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h> /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/timer.h>
#include "sahu_lb_tools.h"
#define VPORT_TCP 80
#define DPORT_TCP 80
#define VPORT_UDP 4950
#define DPORT_UDP 4950
MODULE_LICENSE("GPL");
/* This is the structure we shall use to register our function */
/* IP address we want to Nat*/
static unsigned char *vmLB_ip = "/xc0/xa8/x7a/x01"; /* 192.168.122.1*/
static unsigned char *vm01_ip = "/xc0/xa8/x63/x65"; /* 192.168.99.101 */
static unsigned char *vm02_ip = "/xc0/xa8/x63/x66"; /* 192.168.99.102 */
static unsigned char *srv_list[2];
static int sahu_id = 0;
/* This is the hook function itself */
unsigned int sahu_lb_pre_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned char *srv_addr=NULL;
char addr_str[16];
struct sk_buff *sb = skb;
struct iphdr *iph;
struct tcphdr *tcph;
struct sahu_lb_conn *cp;
if(!sb) return NF_ACCEPT;
iph = ip_hdr(sb);
if(!iph) return NF_ACCEPT;
if (iph->daddr == *(__be32 *)vmLB_ip){/*local in*/
srv_addr = srv_list[(sahu_id++)%2];
if(iph->protocol == IPPROTO_TCP){
tcph = (void *)skb_network_header(skb) + ip_hdrlen(skb) ;
cp = sahu_lb_conn_in_get(iph->protocol,iph->saddr,tcph->source,iph->daddr,tcph->dest);
if(!cp){
printk("A new connection/n");
cp = sahu_lb_conn_new(iph->protocol,iph->saddr,tcph->source,*(__be32 *)vmLB_ip,tcph->dest,*(__be32 *)srv_addr,tcph->dest);
}else{
srv_addr = (unsigned char *)&cp->daddr;
inet_i2str(cp->daddr,addr_str);
printk("Alreay has a connection:cp->daddr:%s/n",addr_str);
}
tcp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
}else if(iph->protocol == IPPROTO_UDP){
udp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
}else{
}
iph->daddr= *(unsigned int *)srv_addr;
ip_send_check(iph);
// skb->local_df = 1;
printk("DNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
*vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3),
*srv_addr,*(srv_addr + 1), *(srv_addr + 2),*(srv_addr +3));
return NF_ACCEPT;
}else{
inet_i2str(iph->daddr,addr_str);
printk("No DNat for %s/n",addr_str);
return NF_ACCEPT;
}
}
unsigned int sahu_lb_forward(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned char *srv_addr=NULL;
char addr_str[16];
struct sk_buff *sb = skb;
struct iphdr *iph;
struct tcphdr *tcph;
struct sahu_lb_conn *cp;
if(!sb) return NF_ACCEPT;
iph = ip_hdr(sb);
if(!iph) return NF_ACCEPT;
//srv_addr = srv_list[(sahu_id+1)%2];
//if (iph->saddr == iph->saddr){//*(__be32 *)srv_addr){/*forward*/
if(iph->protocol == IPPROTO_TCP){
tcph = (void *)skb_network_header(skb) + ip_hdrlen(skb) ;
cp = sahu_lb_conn_in_get(iph->protocol,iph->daddr,tcph->dest,*(__be32 *)vmLB_ip,htons(VPORT_TCP));
if(!cp){
printk("Reply not handled!/n");
return NF_ACCEPT;
}else{
srv_addr = (unsigned char *)&cp->daddr;
inet_i2str(cp->daddr,addr_str);
printk("cp->daddr:%s/n",addr_str);
}
tcp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
}else if(iph->protocol == IPPROTO_UDP){
udp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
}else{
}
iph->saddr= *(unsigned int *)vmLB_ip;
ip_send_check(iph);
// skb->local_df = 1;
printk("SNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
*srv_addr, *(srv_addr + 1), *(srv_addr + 2),*(srv_addr + 3),
*vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3));
return NF_ACCEPT;
//}else{
// inet_i2str(iph->saddr,addr_str);
// printk("No SNat for %s/n",addr_str);
// return NF_ACCEPT;
//}
}
/* netfilter hooks in this kernel module*/
static struct nf_hook_ops sahu_ops[] __read_mostly = {
{
.hook = sahu_lb_pre_routing,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = 100,
},
{
.hook = sahu_lb_forward,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = 100,
}
};
/* Initialisation routine */
int init_module()
{
int ret;
ret = nf_register_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
if(ret<0){
pr_info("can't install simpLB into kernel!/n");
}else{
pr_info("simpLB install into kernel!/n");
}
srv_list[0] = vm01_ip;
srv_list[1] = vm02_ip;
sahu_lb_conn_init();
return 0;
}
/* Cleanup routine */
void cleanup_module()
{
nf_unregister_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
sahu_lb_conn_cleanup();
pr_info("simpLB removed from kernel!/n");
}
2、sahu_lb_tools.h
#include <linux/random.h>
#include <linux/jhash.h>
#define SAHU_LB_CONN_TAB_SIZE 100
#define SAHU_LB_CONN_TIMEOUT (3*HZ)
int inet_i2str(unsigned int addr,char *addr_str);
static struct list_head *sahu_lb_conn_tab;
static unsigned int sahu_lb_conn_rnd;
/*struct: sahu_lb_conn*/
struct sahu_lb_conn{
struct list_head c_list;
__u32 caddr;
__u32 vaddr;
__u32 daddr;
__u16 cport;
__u16 vport;
__u16 dport;
__u16 protocol;
atomic_t refcnt;
struct timer_list timer;
volatile unsigned long timeout;
};
/*function: sahu_lb_conn_init*/
int sahu_lb_conn_init(void){
int idx;
sahu_lb_conn_tab = vmalloc(SAHU_LB_CONN_TAB_SIZE*sizeof(struct list_head *));
if(!sahu_lb_conn_tab){
printk("sahu_lb_conn_init: error/n");
return -1;
}
for(idx=0;idx<SAHU_LB_CONN_TAB_SIZE;idx++){
INIT_LIST_HEAD(&sahu_lb_conn_tab[idx]);
}
get_random_bytes(&sahu_lb_conn_rnd,sizeof(sahu_lb_conn_rnd));
return 0;
}
/*function: sahu_lb_conn_cleanup*/
void sahu_lb_conn_cleanup(void){
struct sahu_lb_conn *cp;
list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
del_timer_sync(&cp->timer);
}
vfree(sahu_lb_conn_tab);
}
/*function: sahu_lb_conn_hashkey*/
static unsigned int sahu_lb_conn_hashkey(unsigned int proto,__u32 addr,__u16 port){
return jhash_3words((__force u32)addr,(__force u32)port,proto,sahu_lb_conn_rnd);
}
/*function: sahu_lb_conn_hash*/
static inline int sahu_lb_conn_hash(struct sahu_lb_conn *cp){
unsigned int hash;
hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
//list_add(&cp->c_list,&sahu_lb_conn_tab[hash]);
list_add(&cp->c_list,sahu_lb_conn_tab);
return 0;
}
/*function: sahu_lb_conn_unhash*/
static inline int sahu_lb_conn_unhash(struct sahu_lb_conn *cp){
unsigned int hash;
hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
list_del(&cp->c_list);
return 0;
}
/*function: sahu_lb_conn_expire*/
static void sahu_lb_conn_expire(unsigned long data){
int idx=0;
char addr_str[16];
struct sahu_lb_conn *cp = (struct sahu_lb_conn *)data;
if(atomic_read(&cp->refcnt)==0){
del_timer(&cp->timer);
sahu_lb_conn_unhash(cp);
kfree(cp);
return;
}
atomic_set(&cp->refcnt,0);
del_timer(&cp->timer);
cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
add_timer(&cp->timer);
inet_i2str(cp->daddr,addr_str);
printk("Time out! dest_addr: %s/t",addr_str);
list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
idx++;
}
printk("ConnTabSize:%d/n",idx);
}
/*function: sahu_lb_conn_new*/
struct sahu_lb_conn *
sahu_lb_conn_new(int proto, __u32 caddr,__u16 cport,__u32 vaddr,__u16 vport,__u32 daddr,__u16 dport){
struct sahu_lb_conn *cp;
cp = kmalloc(sizeof(struct sahu_lb_conn),GFP_ATOMIC);
if(cp == NULL){
printk("sahu_lb_conn_new: no memory avaliable./n");
return NULL;
}
memset(cp,0,sizeof(*cp));
INIT_LIST_HEAD(&cp->c_list);
init_timer(&cp->timer);
cp->timer.data = (unsigned long)cp;
cp->timer.function = &sahu_lb_conn_expire;
cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
add_timer(&cp->timer);
cp->protocol = proto;
cp->caddr = caddr;
cp->cport = cport;
cp->vaddr = vaddr;
cp->vport = vport;
cp->daddr = daddr;
cp->dport = dport;
atomic_set(&cp->refcnt,0);
sahu_lb_conn_hash(cp);
return cp;
}
/*funciton: sahu_lb_conn_in_get*/
static inline struct sahu_lb_conn *
sahu_lb_conn_in_get(int protocol,__u32 caddr,__u16 cport,__u32 vaddr,__u16 vport){
unsigned int hash;
struct sahu_lb_conn *cp;
hash = sahu_lb_conn_hashkey(protocol,caddr,cport);
//list_for_each_entry(cp,&sahu_lb_conn_tab[hash],c_list){
list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
if(caddr==cp->caddr && cport==cp->cport &&
vaddr==cp->vaddr && vport==cp->vport &&
protocol==cp->protocol){
atomic_set(&cp->refcnt,1);/*touch this connection*/
return cp;
}
}
return NULL;
}
/*function: inet_i2str*/
int inet_i2str(unsigned int addr,char *addr_str){
unsigned char *p;
int i;
p=(unsigned char *)(&addr);
for(i=0;i<4;i++){
addr_str[i*4+0]=*(p+i)/100+'0';
addr_str[i*4+1]=*(p+i)/10-(*(p+i)/100)*10+'0';
addr_str[i*4+2]=*(p+i)%10+'0';
addr_str[i*4+3]='.';
}
addr_str[15]='/0';
return 0;
}
/*function: sahu_lb_check_diff4*/
static inline __wsum sahu_lb_check_diff4(__be32 old, __be32 new, __wsum oldsum){
__be32 diff[2] = { ~old, new };
return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: sahu_lb_check_diff2*/
static inline __wsum sahu_lb_check_diff2(__be16 old, __be16 new, __wsum oldsum){
__be16 diff[2] = { ~old, new };
return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: tcp_snat_base*/
static int tcp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct tcphdr *tcph;
unsigned int tcphoff;
int oldlen;
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->source = vport;
tcph->check=
csum_fold(sahu_lb_check_diff4(daddr,vaddr,
sahu_lb_check_diff2(dport,vport,
~csum_unfold(tcph->check))));
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: tcp_dnat_base*/
static int tcp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct tcphdr *tcph;
unsigned int tcphoff;
int oldlen;
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->dest = dport;
tcph->check=
csum_fold(sahu_lb_check_diff4(vaddr,daddr,
sahu_lb_check_diff2(vport,dport,
~csum_unfold(tcph->check))));
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: udp_snat_base*/
static int udp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct udphdr *udph;
unsigned int udphoff;
int oldlen;
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->source = vport;
udph->check=
csum_fold(sahu_lb_check_diff4(daddr,vaddr,
sahu_lb_check_diff2(dport,vport,
~csum_unfold(udph->check))));
if(!udph->check){
udph->check = CSUM_MANGLED_0;
}
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: udp_dnat_base*/
static int udp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct udphdr *udph;
unsigned int udphoff;
int oldlen;
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->dest = dport;
udph->check=
csum_fold(sahu_lb_check_diff4(vaddr,daddr,
sahu_lb_check_diff2(vport,dport,
~csum_unfold(udph->check))));
if(!udph->check){
udph->check = CSUM_MANGLED_0;
}
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
3、Makefile
obj-m +=simpLB.o
all:
make -C /lib/modules/`uname -r`/build M=`pwd`
clean:
make -C /lib/modules/`uname -r`/build M=`pwd` clean
install:
/sbin/insmod simpLB.ko
remove:
/sbin/rmmod simpLB
4、测试
参见simpLB_3_0