3、 什么时候调用qdisc_create函数
在函数tc_modify_qdisc中,会调用qdisc_create函数,而tc_modify_qdisc这个函数在pktsched_init(void)中,被注册到系统中
CODE
static int __init pktsched_init(void) { int err;
err = register_pernet_subsys(&psched_net_ops); if (err) { printk(KERN_ERR "pktsched_init: " "cannot initialize per netns operations/n"); return err; } /* *在系统默认的情况下, 系统内只注册了pfif0,bfifo,mq_qdisc,pfifl_head_drop *这几种qdisc */ register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL); rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
return 0; } |
下面看看rtnl_register函数具体做了哪些事情.
/** *rtnl_register函数会调用__rtnl_register函数来完成注册(undone) */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit) { if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d/n", protocol, msgtype); } ------------------------------------------------------------------------------------------------------- /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the * specified protocol family and message type is received. * * The special protocol family PF_UNSPEC may be used to define fallback * function pointers for the case when no entry for the specific protocol * family exists. * * Returns 0 on success or a negative error code. */ int __rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit) { struct rtnl_link *tab; int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype);
tab = rtnl_msg_handlers[protocol]; if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); if (tab == NULL) return -ENOBUFS;
rtnl_msg_handlers[protocol] = tab; }
if (doit) tab[msgindex].doit = doit;
if (dumpit) tab[msgindex].dumpit = dumpit;
return 0; } EXPORT_SYMBOL_GPL(__rtnl_register);
|
static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; |
看了这么的代码,得到tc_modify_qdisc 会被注册到rtnl_msg_handlers中。
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
首先通过PF_UNSPEC 在rtnl_msg_handlers找到相应的rtnl_link,注册到rtnl_link[RTM_NEWQDISC].doit中。即:
rtnl_msg_handlers[PF_UNSPEC][RTM_NEWQDISC].doit=tc_modify_qdisc
尽管知道了tc_modify_qdisc会被注册到rtnl_msg_handler中,但是不知道何时调用它?
其实在系统中:
rtnetlink_init会调用函数register_pernet_subsys(&rtnetlink_net_ops) rtnetlink_net_ops: static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, }; 而在函数register_pernet_subsys最终会调用rtnetlink_net_init |
下面看看rtnetlink_net_init函数
static int __net_init rtnetlink_net_init(struct net *net) { struct sock *sk; sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (!sk) return -ENOMEM; net->rtnl = sk; return 0; } |
在rtnetlink_net_init函数中,调用netlink_kernel_create函数,这个函数会把rtnetlink_rcv函数赋值给创建的netlink_sock->netlink_rcv,如果在netlink上接受数据时,会调用rtnetlink_rcv函数来处理。
再看看rtnetlink_rcv函数
static void rtnetlink_rcv(struct sk_buff *skb) { rtnl_lock(); netlink_rcv_skb(skb, &rtnetlink_rcv_msg); rtnl_unlock(); } |
在rtnetlink_rcv函数中,调用netlink_rcv_skb函数,这个函数会回调函数rtnetlink_rcv_msg。rtnetlink_rcv_msg正是我们找寻的函数。
Ok,下面看看rtnetlink_rcv_msg函数是如何找到相应的消息处理函数的。
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
rtnetlink_rcv_msg函数的有sk_buff和nlmsghdr。Sk_buff为要处理的数据,nlmsghdr则包含了这个消息的类型等参数。
Ø 通过nlmsghdr找到对应的family和type。
Ø (如果是需要调用doit函数)调用rtnl_get_doit(family, type)来得到相应的doit函数。如果是要调用tc_modify_qdisc函数,那么family=PF_UNSPACE, type=RTM_NEWQDISC
Ø 最后调用得到的doit函数,来处理数据
return doit(skb, nlh, (void *)&rta_buf[0]) |
CODE:rtnl_get_doit
static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) { struct rtnl_link *tab;
if (protocol <= RTNL_FAMILY_MAX) tab = rtnl_msg_handlers[protocol]; else tab = NULL;
if (tab == NULL || tab[msgindex].doit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC];
return tab ? tab[msgindex].doit : NULL; } |
使用strace tc qdisc add dev eth0 root handle 1: default 12 的结果如下:
(先列在吧,我也不知道是怎么回事L)
execve("/sbin/tc", ["tc", "qdisc", "add", "dev", "eth0", "root", "handle", "1:", "htb", "default", "12"], [/* 21 vars */]) = 0 …………………………………………………………………… open("/proc/net/psched", O_RDONLY) = 3 fstat64(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fdb000 read(3, "00000001 00000001 000f4240 00000"..., 4096) = 36 close(3) = 0 munmap(0xb7fdb000, 4096) = 0 socket(PF_NETLINK, SOCK_RAW, 0) = 3 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [32768], 4) = 0 bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 getsockname(3, {sa_family=AF_NETLINK, pid=23391, groups=00000000}, [12]) = 0 time(NULL) = 1306133974 open("/usr/lib/tc/q_htb.so", O_RDONLY) = -1 ENOENT (No such file or directory) sendto(3, "/24/0/0/0/22/0/1/3/327/5/332M/0/0/0/0/0/0/0/0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"/364/0/0/0/20/0/2/0/327/5/332M_[/0/0/0/0/4/3/1/0/0/0I/0/1/0/0/0/0/0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 736 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"/24/0/0/0/3/0/2/0/327/5/332M_[/0/0/0/0/0/0/1/0/0/0I/0/1/0/0/0/0/0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 20 sendmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H/0/0/0$/0/5/6/330/5/332M/0/0/0/0/0/0/0/0/2/0/0/0/0/0/1/0/377/377/377/377"..., 72}], msg_controllen=0, msg_flags=0}, 0) = 72 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"$/0/0/0/2/0/0/0/330/5/332M_[/0/0/0/0/0/0H/0/0/0$/0/5/6/330/5/332M"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 36 close(3) = 0 exit_group(0) |