IPVS子系统的调度器统一注册在全局链表ip_vs_schedulers上。
函数register_ip_vs_scheduler负责向IPVS系统内注册调度器。内核中不同的调度器以名称作区分,所有注册的调度器链接在全局链表ip_vs_schedulers上。注册函数首先检查全局链表上是否已有相同名称的调度器,如果有的话,说明调度器已注册。否则,将调度器链接到全局链表ip_vs_schedulers上。
int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
struct ip_vs_scheduler *sched;
/* Make sure that the scheduler with this name doesn't exist in the scheduler list.
*/
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
if (strcmp(scheduler->name, sched->name) == 0) {
mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already existed in the system\n", __func__, scheduler->name);
return -EINVAL;
}
}
/* Add it into the d-linked scheduler list
*/
list_add(&scheduler->n_list, &ip_vs_schedulers);
目前内核注册的调度器有:
1)LBLCR - Locality-Based Least-Connection with Replication scheduler
2)FO - Weighted Fail Over module
3)WLC - Weighted Least-Connection Scheduling module
4)SED - Shortest Expected Delay scheduling module
5)RR - Round-Robin Scheduling module
6)WRR - Weighted Round-Robin Scheduling module
7)DH - Destination Hashing scheduling module
8)LBLC - Locality-Based Least-Connection scheduling module
9)NQ - Never Queue scheduling module
10)LC - Least-Connection Scheduling module
11)OVF - Overflow-Connection Scheduling module
12)SH - Source Hashing scheduling module
函数ip_vs_scheduler_get两次调用内部的调度器获取函数ip_vs_sched_getbyname,在第一次未找到的情况下,执行相应内核调度器模块的加载之后,在此调用调度器获取函数。完整的调度器模块名称为字符串ip_vs_加上调度器名称。
struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
{
struct ip_vs_scheduler *sched;
/* Search for the scheduler by sched_name
*/
sched = ip_vs_sched_getbyname(sched_name);
/* If scheduler not found, load the module and search again
*/
if (sched == NULL) {
request_module("ip_vs_%s", sched_name);
sched = ip_vs_sched_getbyname(sched_name);
}
return sched;
}
函数ip_vs_sched_getbyname遍历全局调度器链表ip_vs_schedulers,通过对比调度器名称参数,获取调度器结构。
static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
{
struct ip_vs_scheduler *sched;
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
if (sched->module && !try_module_get(sched->module)) {
/* This scheduler is just deleted */
continue;
}
if (strcmp(sched_name, sched->name)==0) {
mutex_unlock(&ip_vs_sched_mutex);
return sched; /* HIT */
}
}
如使用以下的命令配置IPVS虚拟服务,并制定使用RR调度器:
$ ipvsadm -A -t 207.175.44.110:80 -s rr
在以下的添加虚拟服务函数中,如果调度器名称不等于“none”,使用上节的函数ip_vs_scheduler_get获取注册的调度器结构。随后的函数ip_vs_bind_scheduler负责绑定调度器到虚拟服务中。
static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p)
{
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_service *svc = NULL;
if (strcmp(u->sched_name, "none")) {
sched = ip_vs_scheduler_get(u->sched_name);
if (!sched) {
pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
ret = -ENOENT;
goto out_err;
}
}
/* Bind the scheduler */
if (sched) {
ret = ip_vs_bind_scheduler(svc, sched);
if (ret)
goto out_err;
sched = NULL;
}
}
如下,首先调用调度器自身的服务初始化函数指针init_service,对于RR调度器,其指向函数ip_vs_rr_init_svc。最后,将调度器结构赋予虚拟服务结构的scheduler成员,完成绑定。
int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler)
{
int ret;
if (scheduler->init_service) {
ret = scheduler->init_service(svc);
if (ret) {
pr_err("%s(): init error\n", __func__);
return ret;
}
}
rcu_assign_pointer(svc->scheduler, scheduler);
return 0;
}
在IPVS入口函数ip_vs_in中,如果没有找到已有的连接,说明当前报文是一个新发起的连接,使用函数ip_vs_try_to_schedule尝试进行调度。
static unsigned int ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
pd = ip_vs_proto_data_get(ipvs, iph.protocol);
pp = pd->pp;
/* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(ipvs, af, skb, &iph);
if (unlikely(!cp)) {
int v;
if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
return v;
}
}
函数ip_vs_try_to_schedule将调用特定协议的连接调度函数指针conn_schedule,代码如下:
static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
struct ip_vs_protocol *pp = pd->pp;
if (!iph->fragoffs) {
/* Schedule and create new connection entry into cpp */
if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
return 0;
}
对于IPVS支持的协议类型UDP、AH/ESP,SCTP和TCP,对应的处理函数分别如下(AH和ESP使用同一个处理函数):
udp_conn_schedule
ah_esp_conn_schedule
sctp_conn_schedule
tcp_conn_schedule
以TCP协议为例,处理函数tcp_conn_schedule如下,其调用函数ip_vs_schedule进行调度处理。
static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
struct ip_vs_service *svc;
if (svc) {
/* Let the virtual server select a real server for the incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
}
最终由函数ip_vs_schedule调用虚拟服务中绑定的调度器使用其特定的调度函数指针schedule执行调度处理,返回选择的目的真实服务器。
struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd, int *ignored, struct ip_vs_iphdr *iph)
{
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
sched = rcu_dereference(svc->scheduler);
if (sched) {
/* read svc->sched_data after svc->scheduler */
smp_rmb();
dest = sched->schedule(svc, skb, iph);
}
内核版本 4.15