TC执行过程解析

查看iproute2源码文件中的tc文件夹:

先看makefile

TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o /

       m_police.o m_estimator.o m_action.o m_ematch.o /

       emp_ematch.yacc.o emp_ematch.lex.o

 

include ../Config

 

TCMODULES :=

TCMODULES += q_fifo.o

TCMODULES += q_sfq.o

TCMODULES += q_red.o

TCMODULES += q_prio.o

TCMODULES += q_tbf.o

TCMODULES += q_cbq.o

TCMODULES += f_rsvp.o

TCMODULES += f_u32.o

TCMODULES += f_route.o

TCMODULES += f_fw.o

TCMODULES += f_basic.o

TCMODULES += q_dsmark.o

TCMODULES += q_gred.o

TCMODULES += f_tcindex.o

TCMODULES += q_ingress.o

TCMODULES += q_hfsc.o

TCMODULES += q_htb.o

TCMODULES += m_gact.o

TCMODULES += m_mirred.o

TCMODULES += m_ipt.o

TCMODULES += m_pedit.o

TCMODULES += p_ip.o

TCMODULES += p_icmp.o

TCMODULES += p_tcp.o

TCMODULES += p_udp.o

TCMODULES += em_nbyte.o

TCMODULES += em_cmp.o

TCMODULES += em_u32.o

TCMODULES += em_meta.o

 

TCOBJ += $(TCMODULES)

 

TCLIB := tc_core.o

TCLIB += tc_red.o

TCLIB += tc_cbq.o

TCLIB += tc_estimator.o

 

CFLAGS += -DCONFIG_GACT -DCONFIG_GACT_PROB

 

TCSO :=

TCSO += q_netem.so

ifeq ($(TC_CONFIG_ATM),y)

  TCSO += q_atm.so

endif

 

LDLIBS += -L. -ltc -lm -ldl

 

LDFLAGS += -Wl,-export-dynamic

 

YACC := bison

LEX := flex

 

%.so: %.c

       $(CC) $(CFLAGS) -shared -fpic $< -o $@

 

 

all: libtc.a tc $(TCSO)

 

tc: $(TCOBJ) $(LIBNETLINK) $(LIBUTIL) $(TCLIB)

 

libtc.a: $(TCLIB)

       $(AR) rcs $@ $(TCLIB)

 

install: all

       mkdir -p $(DESTDIR)/usr/lib/tc

       install -m 0755 -s tc $(DESTDIR)$(SBINDIR)

       for i in $(TCSO); /

       do install -m 755 -s $$i $(DESTDIR)/usr/lib/tc; /

       done

 

clean:

       rm -f $(TCOBJ) $(TCLIB) libtc.a tc *.so emp_ematch.yacc.h; /

       rm -f emp_ematch.yacc.output

 

q_atm.so: q_atm.c

       $(CC) $(CFLAGS) -shared -fpic -o q_atm.so q_atm.c -latm

 

%.yacc.c: %.y

       $(YACC) $(YACCFLAGS) -o $@ $<

 

%.lex.c: %.l

       $(LEX) $(LEXFLAGS) -o$@ $<

 

淡蓝色标记的为我们关注.

为了跟踪程序是如何执行的,以执行下面命令为例:

Code

tc qdisc add dev eth0 root handle 1: htb default 12[cmd1]

 

下面看tc.c中的main函数

int main(int argc, char **argv)

{

/**

*cmd1:argc=11(maybe 12)

*argv:{ `tc` ` qdisc` ` add` ` dev ` `eth0` ` root ` `handle`  1:` `htb`` default` *`12`}

*/

       int ret;

       int do_batching = 0;

       char *batchfile = NULL;

 

       while (argc > 1) {

              if (argv[1][0] != '-')

                     break;

              if (matches(argv[1], "-stats") == 0 ||

                      matches(argv[1], "-statistics") == 0) {

                     ++show_stats;

              } else if (matches(argv[1], "-details") == 0) {

                     ++show_details;

              } else if (matches(argv[1], "-raw") == 0) {

                     ++show_raw;

              } else if (matches(argv[1], "-Version") == 0) {

                     printf("tc utility, iproute2-ss%s/n", SNAPSHOT);

                     return 0;

              } else if (matches(argv[1], "-iec") == 0) {

                     ++use_iec;

              } else if (matches(argv[1], "-help") == 0) {

                     usage();

                     return 0;

              } else if (matches(argv[1], "-force") == 0) {

                     ++force;

              } else   if (matches(argv[1], "-batch") == 0) {

                     do_batching = 1;

                     if (argc > 2)

                            batchfile = argv[2];

                     argc--;  argv++;

              } else {

                     fprintf(stderr, "Option /"%s/" is unknown, try /"tc -help/"./n", argv[1]);

                     return -1;

              }

              argc--;  argv++;

       }

 

       if (do_batching)

              return batch(batchfile);

 

       if (argc <= 1) {

              usage();

              return 0;

       }

 

       tc_core_init();//从系统中读出一些设置参数

       if (rtnl_open(&rth, 0) < 0) {

/**

*用户层和系统内核通信采用的是rtnetlink机制,

*所以需要rtnetlink的支持

*/

              fprintf(stderr, "Cannot open rtnetlink/n");

              exit(1);

       }

/**

*tc的命令形式:tc ……

*所以传递给do_cmd的参数中,去掉了tc这个关键字

*这就是为什么argc-1argv+1

*argv:{ ` qdisc` ` add` ` dev ` `eth0` ` root ` `handle`  1:` default` *`12`}

*/

       ret = do_cmd(argc-1, argv+1);

       rtnl_close(&rth);

       return ret;

}

do_cmd会根据不同的类型,调用不同的函数来处理,例如,如果是qdisc,那么函数do_qdisc会被调用,当然传递给do_qdisc函数的参数,关键字qdisc也是会被去处的。那么[cmd1]do_qdisc为:

argv:{ ` add` ` dev ` `eth0` ` root ` `handle`  1:` `htb` ` default` *`12`}



Code::do_cmd

static int do_cmd(int argc, char **argv)

{

/**

*cssalp&csalp

*do argc-1 and argv+1 is to escape the key word, i.e.

*do_qdisc must escape 'qdisc'

*/

       if (matches(*argv, "qdisc") == 0)

              return do_qdisc(argc-1, argv+1);//这是要跟踪的函数

 

       if (matches(*argv, "class") == 0)

              return do_class(argc-1, argv+1);

 

       if (matches(*argv, "filter") == 0)

              return do_filter(argc-1, argv+1);

 

       if (matches(*argv, "actions") == 0)

              return do_action(argc-1, argv+1);

 

       if (matches(*argv, "help") == 0) {

              usage();

              return 0;

       }

      

       fprintf(stderr, "Object /"%s/" is unknown, try /"tc help/"./n",

              *argv);

       return -1;

}

 

下面看看do_qdisc

int do_qdisc(int argc, char **argv)

{

         if (argc < 1)

                   return tc_qdisc_list(0, NULL);//列出系统默认的qdiscfilter等信息

         if (matches(*argv, "add") == 0)

                   return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, argc-1, argv+1);

         if (matches(*argv, "change") == 0)

                   return tc_qdisc_modify(RTM_NEWQDISC, 0, argc-1, argv+1);

         if (matches(*argv, "replace") == 0)

                   return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);

         if (matches(*argv, "link") == 0)

                   return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_REPLACE, argc-1, argv+1);

         if (matches(*argv, "delete") == 0)

                   return tc_qdisc_modify(RTM_DELQDISC, 0,  argc-1, argv+1);

#if 0

         if (matches(*argv, "get") == 0)

                   return tc_qdisc_get(RTM_GETQDISC, 0,  argc-1, argv+1);

#endif

         if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0

             || matches(*argv, "lst") == 0)

                   return tc_qdisc_list(argc-1, argv+1);

         if (matches(*argv, "help") == 0)

                   usage();

         fprintf(stderr, "Command /"%s/" is unknown, try /"tc qdisc help/"./n", *argv);

         return -1;

}

 

tc_qdisc_modify函数

 

传递给tc_qdisc_modify函数的argv为:

argv:{ ` dev ` `eth0` ` root ` `handle`  1:` `htb`` default` *`12`}

tc_qdisc_modify(RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, argc-1, argv+1);

tc_qdisc_modify函数参数的说明:

1、定义rtnetlinknetlink)消息类型:RTM_NEWQDISC,即新建一个qdisc

2、设置标志flagflag的有如下几种:

Ø  NLM_F_REPLACE(ox100)覆盖以存在的

Ø  NLM_F_EXCL(ox200)如果存在就不创建

Ø  NLM_F_CREATE(ox400)不存在就创建

Ø  NLM_F_APPEND(ox800)添加到末尾

Code::flag

#define NLM_F_REPLACE       0x100   /* Override existing             */

#define NLM_F_EXCL 0x200  /* Do not touch, if it exists */

#define NLM_F_CREATE  0x400  /* Create, if it does not exist     */

#define NLM_F_APPEND  0x800  /* Add to end of list             */

/*linux系统里面有相同的定义*/

3、设置命令参数的个数和命令参数

Code::tc_qdisc_modify

int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)

{

/**

* argv:{ ` dev ` `eth0` ` root ` `handle`  1:` `htb`` default` *`12`}

*/

       struct qdisc_util *q = NULL;

       struct tc_estimator est;

       char  d[16];

       char  k[16];

       struct {

/**

* nlmsghdr为和系统内核通信的消息结构的头

*tcmsgtc消息结构

*/

              struct nlmsghdr     n;

              struct tcmsg            t;

              char                        buf[TCA_BUF_MAX];

       } req;

 

       memset(&req, 0, sizeof(req));

       memset(&est, 0, sizeof(est));

       memset(&d, 0, sizeof(d));

       memset(&k, 0, sizeof(k));

 

       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));

       req.n.nlmsg_flags = NLM_F_REQUEST|flags;

       req.n.nlmsg_type = cmd;//对应[cmd1],type=rtm_newqdisc

       req.t.tcm_family = AF_UNSPEC;//family

 

       while (argc > 0) {

              if (strcmp(*argv, "dev") == 0) {

                     NEXT_ARG();//point to nextà ethi(i is number)

                     if (d[0])

                            duparg("dev", *argv);

                     strncpy(d, *argv, sizeof(d)-1);//eth0

              } else if (strcmp(*argv, "handle") == 0) {

                     __u32 handle;

                     if (req.t.tcm_handle)

                            duparg("handle", *argv);

                     NEXT_ARG();

                     if (get_qdisc_handle(&handle, *argv))//handle=1

                            invarg(*argv, "invalid qdisc ID");

                     req.t.tcm_handle = handle;

              } else if (strcmp(*argv, "root") == 0) {

                     if (req.t.tcm_parent) {

/**

*如果存在parent 那么就不能为root

*/

                            fprintf(stderr, "Error: /"root/" is duplicate parent ID/n");

                            return -1;

                     }

                     req.t.tcm_parent = TC_H_ROOT;

#ifdef TC_H_INGRESS

              } else if (strcmp(*argv, "ingress") == 0) {

                     if (req.t.tcm_parent) {

                            fprintf(stderr, "Error: /"ingress/" is a duplicate parent ID/n");

                            return -1;

                     }

                     req.t.tcm_parent = TC_H_INGRESS;

                     strncpy(k, "ingress", sizeof(k)-1);

                     q = get_qdisc_kind(k);

                     req.t.tcm_handle = 0xffff0000;

 

                     argc--; argv++;

                     break;

#endif

              } else if (strcmp(*argv, "parent") == 0) {

                     __u32 handle;

                     NEXT_ARG();

                     if (req.t.tcm_parent)

                            duparg("parent", *argv);

                     if (get_tc_classid(&handle, *argv))

                            invarg(*argv, "invalid parent ID");

                     req.t.tcm_parent = handle;

              } else if (matches(*argv, "estimator") == 0) {

                     if (parse_estimator(&argc, &argv, &est))

                            return -1;

              } else if (matches(*argv, "help") == 0) {

                     usage();

              } else {

                     strncpy(k, *argv, sizeof(k)-1);//k=`htb`

/**

*get_qdisc_kind

*1、首先查看,与要添加的qdisc相关的qdisc_util是否已经存在了

* 如果存在,那么返回

*2、如果qdisc_util 还存在,那么查找系统的动态链接库,看是否存在

*通过函数dlopen打开动态链接库,然后通过函数q = dlsym(dlh, buf)

*得到相应的qdisc_util

*3、如果系统的动态链接也不存在,[cmd1]就是这种能够情况

*那么,就创建一个新的(q = malloc(sizeof(*q));),如果成功

*然后设置一下参数{

*            q->id = strcpy(malloc(strlen(str)+1), str);

*            q->parse_qopt = parse_noqopt;

*            q->print_qopt = print_noqopt; }

*4、最后将找到的qdisc_util,挂载到qdisc_list的上(链表),并返回q

*5、如果不成功,那么返回null(无能为力了)

*/

                     q = get_qdisc_kind(k);

                     argc--; argv++;

                     break;

              }

              argc--; argv++;

       }

/**

*addattr函数设置nlmsghdr中的一些参数,为和内核通信传递参数做准备

*/

       if (k[0])

              addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);

       if (est.ewma_log)

              addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));

 

       if (q) {

              if (!q->parse_qopt) {

                     fprintf(stderr, "qdisc '%s' does not support option parsing/n", k);

                     return -1;

              }

/**

*在这个例子中,parse_qopt=parse_noqopt, do nothing and return 0

*/

              if (q->parse_qopt(q, argc, argv, &req.n))

                     return 1;

       } else {

              if (argc) {

                     if (matches(*argv, "help") == 0)

                            usage();

 

                     fprintf(stderr, "Garbage instead of arguments /"%s .../". Try /"tc qdisc help/"./n", *argv);

                     return -1;

              }

       }

/**

*根据设备名,找到设备,并设置消息,传递设备信息到内核

*/

       if (d[0])  {

              int idx;

 

             ll_init_map(&rth);

 

              if ((idx = ll_name_to_index(d)) == 0) {

                     fprintf(stderr, "Cannot find device /"%s/"/n", d);

                     return 1;

              }

              req.t.tcm_ifindex = idx;

       }

/**

*rtnl_talk函数,实现和内核通信

*rtnl_talk函数中,会调用sendmsg函数,将信息传递到内核,

*在内核中,最终会调用tc_modify_qidsc函数来创建qdisc

*pktsched_init函数中(linux 内核)

*rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);

*而消息设定信息:

*  req.n.nlmsg_type = cmd;//对应[cmd1],type=RTM_NEWQDISC

*     req.t.tcm_family = AF_UNSPEC;

*当然rtnl_talk会监听内核返回的消息,并做相应的处理

*/

      if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)

              return 2;

 

       return 0;

}

 

你可能感兴趣的:(TC执行过程解析)