Linux 以太网驱动

Linux 以太网驱动(RTL8168 driver in Linux 3.10)

1.网络子系统初始化

在以太网驱动加载之前,首先会进行网络子系统初始化
subsys_initcall(net_dev_init)

net_dev_init():

static int __init net_dev_init(void)
{
    int i, rc = -ENOMEM;

    BUG_ON(!dev_boot_phase);

    if (dev_proc_init()) 
        goto out;

    if (netdev_kobject_init())
        goto out;

    INIT_LIST_HEAD(&ptype_all);
    for (i = 0; i < PTYPE_HASH_SIZE; i++)
        INIT_LIST_HEAD(&ptype_base[i]);

    INIT_LIST_HEAD(&offload_base);

    if (register_pernet_subsys(&netdev_net_ops))
        goto out;

    /*
     *  Initialise the packet receive queues.
     */

    for_each_possible_cpu(i) {
        struct softnet_data *sd = &per_cpu(softnet_data, i);

        memset(sd, 0, sizeof(*sd));
        skb_queue_head_init(&sd->input_pkt_queue);
        skb_queue_head_init(&sd->process_queue);
        sd->completion_queue = NULL;
        INIT_LIST_HEAD(&sd->poll_list);
        sd->output_queue = NULL;
        sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
        sd->csd.func = rps_trigger_softirq;
        sd->csd.info = sd;
        sd->csd.flags = 0;
        sd->cpu = i;
#endif

        sd->backlog.poll = process_backlog;
        sd->backlog.weight = weight_p;
        sd->backlog.gro_list = NULL;
        sd->backlog.gro_count = 0;
    }

    dev_boot_phase = 0;

    /* The loopback device is special if any other network devices
     * is present in a network namespace the loopback device must
     * be present. Since we now dynamically allocate and free the
     * loopback device ensure this invariant is maintained by
     * keeping the loopback device as the first device on the
     * list of network devices.  Ensuring the loopback devices
     * is the first device that appears and the last network device
     * that disappears.
     */
    if (register_pernet_device(&loopback_net_ops))
        goto out;

    if (register_pernet_device(&default_device_ops))
        goto out;

    open_softirq(NET_TX_SOFTIRQ, net_tx_action);
    open_softirq(NET_RX_SOFTIRQ, net_rx_action);

    hotcpu_notifier(dev_cpu_callback, 0);
    dst_init();
    rc = 0;
out:
    return rc;
}

dev_proc_init():

int __init dev_proc_init(void)
{
    int ret = register_pernet_subsys(&dev_proc_ops);
    if (!ret)
        return register_pernet_subsys(&dev_mc_net_ops);
    return ret;
}

register_pernet_subsys():

int register_pernet_subsys(struct pernet_operations *ops)
{
    int error;
    mutex_lock(&net_mutex);
    error =  register_pernet_operations(first_device, ops);
    mutex_unlock(&net_mutex);
    return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

register_pernet_operation()在没有定义CONFIG_NET_NS时,只有一个network namespace,就是init_net, 此时调用init_net 版本的__register_pernet_operations()函数。

static int __register_pernet_operations(struct list_head *list,
                    struct pernet_operations *ops)
{
    return ops_init(ops, &init_net);
}
struct net init_net = {
    .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);

此时该函数所做的只是以struct net init_net为参数调用ops->init()

dev_proc_ops定义:

static struct pernet_operations __net_initdata dev_proc_ops = {
    .init = dev_proc_net_init,
    .exit = dev_proc_net_exit,
};
static int __net_init dev_proc_net_init(struct net *net)
{
    int rc = -ENOMEM;

    if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
        goto out;
    if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
             &softnet_seq_fops))
        goto out_dev;
    if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
        goto out_softnet;

    if (wext_proc_init(net))
        goto out_ptype;
    rc = 0;
out:
    return rc;
out_ptype:
    remove_proc_entry("ptype", net->proc_net);
out_softnet:
    remove_proc_entry("softnet_stat", net->proc_net);
out_dev:
    remove_proc_entry("dev", net->proc_net);
    goto out;
}

在proc/net目录下创建dev, softnet_stat, ptype文件。
dev_mc_net_ops定义:

static struct pernet_operations __net_initdata dev_mc_net_ops = {
    .init = dev_mc_net_init,
    .exit = dev_mc_net_exit,
};
static int __net_init dev_mc_net_init(struct net *net)
{
    if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
        return -ENOMEM;
    return 0;
}

再proc/net目录下创建dev_mcast文件。
netdev_kobject_init()定义:

netdev_kobject_init()主要是在sys/class下创建net类目录,net_class中定义了网络设备都要有的设备属性文件。
int netdev_kobject_init(void)
{
    kobj_ns_type_register(&net_ns_type_operations);
    return class_register(&net_class);
}

ptype_all是接收任一(ETH_ALL)上层协议的协议管理结构struct pactet_type的链表。
ptype_base[PTYPE_HASH_SIZE]是管理特定上层协议管理结构的哈希表。
offload_base是管理用于GRO功能结构的链表。
register_pernet_subsys(&netdev_net_ops)最后调用netdev_net_ops.init().

static struct pernet_operations __net_initdata netdev_net_ops = {
    .init = netdev_init,
    .exit = netdev_exit,
};

netdev_init(struct net *net)分别创建以设备名和设备序号为key的HASH table。

static int __net_init netdev_init(struct net *net)
{
    if (net != &init_net)
        INIT_LIST_HEAD(&net->dev_base_head);

    net->dev_name_head = netdev_create_hash();
    if (net->dev_name_head == NULL)
        goto err_name;

    net->dev_index_head = netdev_create_hash();
    if (net->dev_index_head == NULL)
        goto err_idx;

    return 0;

err_idx:
    kfree(net->dev_name_head);
err_name:
    return -ENOMEM;
}

接下来获取每cpu struct softnet_data 结构并初始化。
register_pernet_device(&loopback_net_ops)会调用register_pernet_operations(&pernet_list, ops)
最后依然是调用loopback_net_ops.init()

int register_pernet_device(struct pernet_operations *ops)
{
    int error;
    mutex_lock(&net_mutex);
    error = register_pernet_operations(&pernet_list, ops);
    if (!error && (first_device == &pernet_list))
        first_device = &ops->list;
    mutex_unlock(&net_mutex);
    return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

loopback_net_ops.init()注册loopback设备。

register_pernet_device(&default_device_ops)由于default_device_opsinit成员为NULL,这里该调用默认没有做任何事情。

open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);

最后注册net_tx_action(处理发送的软中断)和net_rx_action(处理接收的软中断).
两个软中断定义:

static void net_tx_action(struct softirq_action *h)
{
    struct softnet_data *sd = &__get_cpu_var(softnet_data);

    if (sd->completion_queue) {
        struct sk_buff *clist;

        local_irq_disable();
        clist = sd->completion_queue;
        sd->completion_queue = NULL;
        local_irq_enable();

        while (clist) {
            struct sk_buff *skb = clist;
            clist = clist->next;

            WARN_ON(atomic_read(&skb->users));
            trace_kfree_skb(skb, net_tx_action);
            __kfree_skb(skb);
        }
    }

    if (sd->output_queue) {
        struct Qdisc *head;

        local_irq_disable();
        head = sd->output_queue;
        sd->output_queue = NULL;
        sd->output_queue_tailp = &sd->output_queue;
        local_irq_enable();

        while (head) {
            struct Qdisc *q = head;
            spinlock_t *root_lock;

            head = head->next_sched;

            root_lock = qdisc_lock(q);
            if (spin_trylock(root_lock)) {
                smp_mb__before_clear_bit();
                clear_bit(__QDISC_STATE_SCHED,
                      &q->state);
                qdisc_run(q);
                spin_unlock(root_lock);
            } else {
                if (!test_bit(__QDISC_STATE_DEACTIVATED,
                          &q->state)) {
                    __netif_reschedule(q);
                } else {
                    smp_mb__before_clear_bit();
                    clear_bit(__QDISC_STATE_SCHED,
                          &q->state);
                }
            }
        }
    }
}
static void net_rx_action(struct softirq_action *h)
{
    struct softnet_data *sd = &__get_cpu_var(softnet_data);
    unsigned long time_limit = jiffies + 2;
    int budget = netdev_budget;
    void *have;

    local_irq_disable();

    while (!list_empty(&sd->poll_list)) {
        struct napi_struct *n;
        int work, weight;

        /* If softirq window is exhuasted then punt.
         * Allow this to run for 2 jiffies since which will allow
         * an average latency of 1.5/HZ.
         */
        if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
            goto softnet_break;

        local_irq_enable();

        /* Even though interrupts have been re-enabled, this
         * access is safe because interrupts can only add new
         * entries to the tail of this list, and only ->poll()
         * calls can remove this head entry from the list.
         */
        n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

        have = netpoll_poll_lock(n);

        weight = n->weight;

        /* This NAPI_STATE_SCHED test is for avoiding a race
         * with netpoll's poll_napi().  Only the entity which
         * obtains the lock and sees NAPI_STATE_SCHED set will
         * actually make the ->poll() call.  Therefore we avoid
         * accidentally calling ->poll() when NAPI is not scheduled.
         */
        work = 0;
        if (test_bit(NAPI_STATE_SCHED, &n->state)) {
            work = n->poll(n, weight);
            trace_napi_poll(n);
        }

        WARN_ON_ONCE(work > weight);

        budget -= work;

        local_irq_disable();

        /* Drivers must not modify the NAPI state if they
         * consume the entire weight.  In such cases this code
         * still "owns" the NAPI instance and therefore can
         * move the instance around on the list at-will.
         */
        if (unlikely(work == weight)) {
            if (unlikely(napi_disable_pending(n))) {
                local_irq_enable();
                napi_complete(n);
                local_irq_disable();
            } else {
                if (n->gro_list) {
                    /* flush too old packets
                     * If HZ < 1000, flush all packets.
                     */
                    local_irq_enable();
                    napi_gro_flush(n, HZ >= 1000);
                    local_irq_disable();
                }
                list_move_tail(&n->poll_list, &sd->poll_list);
            }
        }

        netpoll_poll_unlock(have);
    }
out:
    net_rps_action_and_irq_enable(sd);

#ifdef CONFIG_NET_DMA
    /*
     * There may not be any more sk_buffs coming right now, so push
     * any pending DMA copies to hardware
     */
    dma_issue_pending_all();
#endif

    return;

softnet_break:
    sd->time_squeeze++;
    __raise_softirq_irqoff(NET_RX_SOFTIRQ);
    goto out;
}

2.设备驱动初始化

static struct platform_driver rtl8168_platform_driver = {
    .driver     = {
                .name   = MODULENAME,
                .owner  = THIS_MODULE,
    },
    .probe      = rtl8168_init_one,
    .remove     = __devexit_p(rtl8168_remove_one),
    .shutdown   = rtl8168_shutdown,
#ifdef CONFIG_PM
    .suspend    = rtl8168_suspend,
    .resume     = rtl8168_resume,
#endif
};

static int __init
rtl8168_init_module(void)
{
    return platform_driver_register(&rtl8168_platform_driver);
}

static void __exit
rtl8168_cleanup_module(void)
{
    platform_driver_unregister(&rtl8168_platform_driver);
}

module_init(rtl8168_init_module);
module_exit(rtl8168_cleanup_module);

驱动与设备匹配,调用xxx_driver.probe(). 这里是rtl8168_init_one( ).

static int __devinit
rtl8168_init_one(struct platform_device *pdev)
{
    struct net_device *dev = NULL;
    struct rtl8168_private *tp;
    void __iomem *ioaddr = NULL;
    static int board_idx = -1;
    struct resource *irq_res = NULL;

    int rc;

    assert(pdev != NULL);
    assert(ent != NULL);
    board_idx++;

    if (netif_msg_drv(&debug))
        printk(KERN_INFO "%s Gigabit Ethernet driver %s loaded\n",
               MODULENAME, RTL8168_VERSION);

    rc = rtl8168_init_board(pdev, &dev, &ioaddr, &irq_res);
    if (rc)
        return rc;
    tp = netdev_priv(dev);
    assert(ioaddr != NULL);

    tp->mmio_addr = ioaddr;
    tp->set_speed = rtl8168_set_speed_xmii;
    tp->get_settings = rtl8168_gset_xmii;
    tp->phy_reset_enable = rtl8168_xmii_reset_enable;
    tp->phy_reset_pending = rtl8168_xmii_reset_pending;
    tp->link_ok = rtl8168_xmii_link_ok;


    RTL_NET_DEVICE_OPS(rtl8168_netdev_ops); //注册使用socket操作网卡的接口

    SET_ETHTOOL_OPS(dev, &rtl8168_ethtool_ops);

    dev->watchdog_timeo = RTL8168_TX_TIMEOUT;
    dev->irq = irq_res->start;
    dev->base_addr = (unsigned long) ioaddr;

#ifdef CONFIG_R8168_NAPI
    RTL_NAPI_CONFIG(dev, tp, rtl8168_poll, R8168_NAPI_WEIGHT);
#endif

#ifdef CONFIG_R8168_VLAN
    if (tp->mcfg != CFG_METHOD_DEFAULT)
        dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
#endif

    tp->cp_cmd |= RTL_R16(CPlusCmd);
    if (tp->mcfg != CFG_METHOD_DEFAULT) {
        dev->features |= NETIF_F_IP_CSUM;
        dev->features |= NETIF_F_RXCSUM | NETIF_F_SG;
        dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
                           NETIF_F_RXCSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
        dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
                             NETIF_F_HIGHDMA;
    }

    tp->platform_dev = pdev;

    spin_lock_init(&tp->lock);

    spin_lock_init(&tp->phy_lock);

    rtl8168_init_software_variable(dev);

#ifdef ENABLE_DASH_SUPPORT
    if(tp->DASH)
        AllocateDashShareMemory(dev);
#endif

    rtl8168_exit_oob(dev);

    rtl8168_hw_init(dev);

    rtl8168_hw_reset(dev);

    /* Get production from EEPROM */
    if ((tp->mcfg == CFG_METHOD_21 ) && (mac_ocp_read(tp, 0xDC00) & BIT_3))
        tp->eeprom_type = EEPROM_TYPE_NONE;
    else
        rtl_eeprom_type(tp);

    if (tp->eeprom_type == EEPROM_TYPE_93C46 || tp->eeprom_type == EEPROM_TYPE_93C56)
        rtl_set_eeprom_sel_low(ioaddr);

    rtl8168_get_mac_address(dev);

    platform_set_drvdata(pdev, dev);

    if (netif_msg_probe(tp)) {
        printk(KERN_INFO "%s: 0x%lx, "
               "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
               "IRQ %d\n",
               dev->name,
               dev->base_addr,
               dev->dev_addr[0], dev->dev_addr[1],
               dev->dev_addr[2], dev->dev_addr[3],
               dev->dev_addr[4], dev->dev_addr[5], dev->irq);
    }

    rtl8168_link_option(board_idx, (u8*)&autoneg, (u16*)&speed, (u8*)&duplex);

    rtl8168_init_led(tp);

    rc = register_netdev(dev);
    if (rc) {
        rtl8168_release_board(pdev, dev, ioaddr);
        return rc;
    }

    //printk(KERN_INFO "%s: This product is covered by one or more of the following patents: US6,570,884, US6,115,776, and US6,327,625.\n", MODULENAME);

    netif_carrier_off(dev);

    //printk("%s", GPL_CLAIM);

    return 0;
}
static int __devinit
rtl8168_init_board(struct platform_device *pdev,
                   struct net_device **dev_out,
                   void __iomem **ioaddr_out,
                   struct resource **irq_out)
{
    void __iomem *ioaddr;
    struct net_device *dev;
    struct rtl8168_private *tp;
    int rc = 0, i;
    struct resource *addr_res, *irq_res;
    struct clk *clk;

    assert(ioaddr_out != NULL);

    /* dev zeroed in alloc_etherdev */
    dev = alloc_etherdev(sizeof (*tp));
    if (dev == NULL) {
        if (netif_msg_drv(&debug))
            dev_err(&pdev->dev, "unable to alloc new ethernet\n");
        rc = -1;
        goto err_out;
    }

    SET_MODULE_OWNER(dev);
    SET_NETDEV_DEV(dev, &pdev->dev);
    tp = netdev_priv(dev);
    tp->dev = dev;
    tp->msg_enable = netif_msg_init(debug.msg_enable, R8168_MSG_DEFAULT);

    addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
    if(addr_res == NULL || irq_res == NULL){
        dev_err(&pdev->dev,
                "insufficient resources\n");
        rc = -2;
        goto err_out_free;
    }

    if (resource_size(addr_res) < R8168_REGS_SIZE) {
        dev_err(&pdev->dev,
                "MMIO Resource too small\n");
        rc = -3;
        goto err_out_free;
    }

    if (!request_mem_region(addr_res->start, resource_size(addr_res),MODULENAME)) {
        dev_err(&pdev->dev,
                "cannot claim address reg area\n");
        rc = -4;
        goto err_out_free;
    }

    tp->addr_res = addr_res;
    tp->irq_res = irq_res;


    /* ioremap MMIO region */
    ioaddr = ioremap(addr_res->start, R8168_REGS_SIZE);
    if (ioaddr == NULL) {
        if (netif_msg_probe(tp))
            dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
        rc = -5;
        goto err_out_res;
    }
    /* Identify chip attached to board */
    rtl8168_get_mac_version(tp, ioaddr);

    rtl8168_print_mac_version(tp);

    for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) {
        if (tp->mcfg == rtl_chip_info[i].mcfg)
            break;
    }

    if (i < 0) {
        /* Unknown chip: assume array element #0, original RTL-8168 */
        if (netif_msg_probe(tp))
            dev_printk(KERN_DEBUG, &pdev->dev, "unknown chip version, assuming %s\n", rtl_chip_info[0].name);
        i++;
    }

    tp->chipset = i;

    *ioaddr_out = ioaddr;
    *dev_out = dev;
    *irq_out = irq_res;
out:
    return rc;

err_out_res:
        release_mem_region(addr_res->start, sizeof(struct rtl8168_private));
err_out_free:
        free_netdev(dev);
err_out:
    *ioaddr_out = NULL;
    *dev_out = NULL;
    *irq_out = NULL;
    goto out;
}

dev = alloc_etherdev(sizeof (*tp));中创建了以太网设备结构。网络设备结构由struct net_device和底部的硬件私有数据结构组成,如下图所示,左边可以看做是以太网设备,右边是网桥设备。
Linux 以太网驱动_第1张图片

struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
                      unsigned int rxqs)
{
    return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
}

alloc_etherdev_mqs()是分配网络设备结构的通用函数。

/**
 *  alloc_netdev_mqs - allocate network device
 *  @sizeof_priv:   size of private data to allocate space for
 *  @name:      device name format string
 *  @setup:     callback to initialize device
 *  @txqs:      the number of TX subqueues to allocate
 *  @rxqs:      the number of RX subqueues to allocate
 *
 *  Allocates a struct net_device with private data area for driver use
 *  and performs basic initialization.  Also allocates subquue structs
 *  for each queue on the device.
 */
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        void (*setup)(struct net_device *),
        unsigned int txqs, unsigned int rxqs)
{
    struct net_device *dev;
    size_t alloc_size;
    struct net_device *p;

    BUG_ON(strlen(name) >= sizeof(dev->name));

    if (txqs < 1) {
        pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
        return NULL;
    }

#ifdef CONFIG_RPS
    if (rxqs < 1) {
        pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
        return NULL;
    }
#endif

    alloc_size = sizeof(struct net_device);
    if (sizeof_priv) {
        /* ensure 32-byte alignment of private area */
        alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
        alloc_size += sizeof_priv;
    }
    /* ensure 32-byte alignment of whole construct */
    alloc_size += NETDEV_ALIGN - 1;

    p = kzalloc(alloc_size, GFP_KERNEL);
    if (!p)
        return NULL;

    dev = PTR_ALIGN(p, NETDEV_ALIGN);
    dev->padded = (char *)dev - (char *)p;

    dev->pcpu_refcnt = alloc_percpu(int);
    if (!dev->pcpu_refcnt)
        goto free_p;

    if (dev_addr_init(dev)) //创建struct netdev_hw_addr 硬件地址结构挂到struct netdev_hw_addr_list的连表中
        goto free_pcpu;

    dev_mc_init(dev);//初始化多播地址链表
    dev_uc_init(dev);//初始化单播地址链表

    dev_net_set(dev, &init_net);

    dev->gso_max_size = GSO_MAX_SIZE;
    dev->gso_max_segs = GSO_MAX_SEGS;

    INIT_LIST_HEAD(&dev->napi_list);
    INIT_LIST_HEAD(&dev->unreg_list);
    INIT_LIST_HEAD(&dev->link_watch_list);
    INIT_LIST_HEAD(&dev->upper_dev_list);
    dev->priv_flags = IFF_XMIT_DST_RELEASE;
    setup(dev);//调用私有初始化回调函数

    dev->num_tx_queues = txqs;
    dev->real_num_tx_queues = txqs;
    if (netif_alloc_netdev_queues(dev))
        goto free_all;

#ifdef CONFIG_RPS
    dev->num_rx_queues = rxqs;
    dev->real_num_rx_queues = rxqs;
    if (netif_alloc_rx_queues(dev))
        goto free_all;
#endif

    strcpy(dev->name, name);
    dev->group = INIT_NETDEV_GROUP;
    if (!dev->ethtool_ops)
        dev->ethtool_ops = &default_ethtool_ops;
    return dev;

free_all:
    free_netdev(dev);
    return NULL;

free_pcpu:
    free_percpu(dev->pcpu_refcnt);
    kfree(dev->_tx);
#ifdef CONFIG_RPS
    kfree(dev->_rx);
#endif

free_p:
    kfree(p);
    return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);

netif_alloc_netdev_queues(dev)

static int netif_alloc_netdev_queues(struct net_device *dev)
{
    unsigned int count = dev->num_tx_queues;
    struct netdev_queue *tx;

    BUG_ON(count < 1);

    tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
    if (!tx)
        return -ENOMEM;

    dev->_tx = tx;

    netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
    spin_lock_init(&dev->tx_global_lock);

    return 0;
}

注册socket操作网卡接口:
RTL_NET_DEVICE_OPS(rtl8168_netdev_ops)
dev->netdev_ops=&rtl8168_netdev_ops;

static const struct net_device_ops rtl8168_netdev_ops = {
    .ndo_open       = rtl8168_open,
    .ndo_stop       = rtl8168_close,
    .ndo_get_stats      = rtl8168_get_stats,
    .ndo_start_xmit     = rtl8168_start_xmit,
    .ndo_tx_timeout     = rtl8168_tx_timeout,
    .ndo_change_mtu     = rtl8168_change_mtu,
    .ndo_set_mac_address    = rtl8168_set_mac_address,
    .ndo_do_ioctl       = rtl8168_do_ioctl,
    .ndo_set_rx_mode    = rtl8168_set_rx_mode,
    .ndo_fix_features   = rtl8168_fix_features,
    .ndo_set_features   = rtl8168_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
    .ndo_poll_controller    = rtl8168_netpoll,
#endif
};

注册ethool操作网卡的接口:
SET_ETHTOOL_OPS(dev, &rtl8168_ethtool_ops);
(dev)->ethtool_ops = (&rtl8168_ethtool_ops)

static const struct ethtool_ops rtl8168_ethtool_ops = {
    .get_drvinfo        = rtl8168_get_drvinfo,
    .get_regs_len       = rtl8168_get_regs_len,
    .get_link           = ethtool_op_get_link,
    .get_settings       = rtl8168_get_settings,
    .set_settings       = rtl8168_set_settings,
    .get_msglevel       = rtl8168_get_msglevel,
    .set_msglevel       = rtl8168_set_msglevel,
    .get_regs           = rtl8168_get_regs,
    .get_wol            = rtl8168_get_wol,
    .set_wol            = rtl8168_set_wol,
    .get_strings        = rtl8168_get_strings,
    .get_sset_count     = rtl8168_get_sset_count,
    .get_ethtool_stats  = rtl8168_get_ethtool_stats,
    .get_ts_info        = ethtool_op_get_ts_info,

};

开启napi:
RTL_NAPI_CONFIG(dev, tp, rtl8168_poll, R8168_NAPI_WEIGHT)
netif_napi_add(dev, &tp->napi, rtl8168_poll, R8168_NAPI_WEIGHT)

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
            int (*poll)(struct napi_struct *, int), int weight)
{
    INIT_LIST_HEAD(&napi->poll_list);
    napi->gro_count = 0;
    napi->gro_list = NULL;
    napi->skb = NULL;
    napi->poll = poll;
    if (weight > NAPI_POLL_WEIGHT)
        pr_err_once("netif_napi_add() called with weight %d on device %s\n",
                weight, dev->name);
    napi->weight = weight;
    list_add(&napi->dev_list, &dev->napi_list);
    napi->dev = dev;
#ifdef CONFIG_NETPOLL
    spin_lock_init(&napi->poll_lock);
    napi->poll_owner = -1;
#endif
    set_bit(NAPI_STATE_SCHED, &napi->state);
}
EXPORT_SYMBOL(netif_napi_add);

最后注册网络设备:
register_netdev(dev)

/**
 *  register_netdev - register a network device
 *  @dev: device to register
 *
 *  Take a completed network device structure and add it to the kernel
 *  interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
 *  chain. 0 is returned on success. A negative errno code is returned
 *  on a failure to set up the device, or if the name is a duplicate.
 *
 *  This is a wrapper around register_netdevice that takes the rtnl semaphore
 *  and expands the device name if you passed a format string to
 *  alloc_netdev.
 */
int register_netdev(struct net_device *dev)
{
    int err;

    rtnl_lock();
    err = register_netdevice(dev);
    rtnl_unlock();
    return err;
}
EXPORT_SYMBOL(register_netdev);
/**
 *  register_netdevice  - register a network device
 *  @dev: device to register
 *
 *  Take a completed network device structure and add it to the kernel
 *  interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
 *  chain. 0 is returned on success. A negative errno code is returned
 *  on a failure to set up the device, or if the name is a duplicate.
 *
 *  Callers must hold the rtnl semaphore. You may want
 *  register_netdev() instead of this.
 *
 *  BUGS:
 *  The locking appears insufficient to guarantee two parallel registers
 *  will not get the same name.
 */

int register_netdevice(struct net_device *dev)
{
    int ret;
    struct net *net = dev_net(dev);

    BUG_ON(dev_boot_phase);
    ASSERT_RTNL();

    might_sleep();

    /* When net_device's are persistent, this will be fatal. */
    BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
    BUG_ON(!net);

    spin_lock_init(&dev->addr_list_lock);
    netdev_set_addr_lockdep_class(dev);

    dev->iflink = -1;

    ret = dev_get_valid_name(net, dev, dev->name);
    if (ret < 0)
        goto out;

    /* Init, if this function is available */
    if (dev->netdev_ops->ndo_init) {
        ret = dev->netdev_ops->ndo_init(dev);
        if (ret) {
            if (ret > 0)
                ret = -EIO;
            goto out;
        }
    }

    if (((dev->hw_features | dev->features) &
         NETIF_F_HW_VLAN_CTAG_FILTER) &&
        (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
         !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
        netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
        ret = -EINVAL;
        goto err_uninit;
    }

    ret = -EBUSY;
    if (!dev->ifindex)
        dev->ifindex = dev_new_index(net);
    else if (__dev_get_by_index(net, dev->ifindex))
        goto err_uninit;

    if (dev->iflink == -1)
        dev->iflink = dev->ifindex;

    /* Transfer changeable features to wanted_features and enable
     * software offloads (GSO and GRO).
     */
    dev->hw_features |= NETIF_F_SOFT_FEATURES;
    dev->features |= NETIF_F_SOFT_FEATURES;
    dev->wanted_features = dev->features & dev->hw_features;

    /* Turn on no cache copy if HW is doing checksum */
    if (!(dev->flags & IFF_LOOPBACK)) {
        dev->hw_features |= NETIF_F_NOCACHE_COPY;
        if (dev->features & NETIF_F_ALL_CSUM) {
            dev->wanted_features |= NETIF_F_NOCACHE_COPY;
            dev->features |= NETIF_F_NOCACHE_COPY;
        }
    }

    /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
     */
    dev->vlan_features |= NETIF_F_HIGHDMA;

    /* Make NETIF_F_SG inheritable to tunnel devices.
     */
    dev->hw_enc_features |= NETIF_F_SG;

    ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);//调用netdev_chain通知链
    ret = notifier_to_errno(ret);
    if (ret)
        goto err_uninit;

    ret = netdev_register_kobject(dev);//将dev添加到设备模型中
    if (ret)
        goto err_uninit;
    dev->reg_state = NETREG_REGISTERED;

    __netdev_update_features(dev);

    /*
     *  Default initial state at registry is that the
     *  device is present.
     */

    set_bit(__LINK_STATE_PRESENT, &dev->state);

    linkwatch_init_dev(dev);

    dev_init_scheduler(dev);
    dev_hold(dev);
    list_netdevice(dev);//将dev添加到网路结构net的dev_based_head链表中,并分别添加到以网络设备名和网络设备序号为key的哈希表中。
    add_device_randomness(dev->dev_addr, dev->addr_len);

    /* If the device has permanent device address, driver should
     * set dev_addr and also addr_assign_type should be set to
     * NET_ADDR_PERM (default value).
     */
    if (dev->addr_assign_type == NET_ADDR_PERM)
        memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

    /* Notify protocols, that a new device appeared. */
    ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
    ret = notifier_to_errno(ret);
    if (ret) {
        rollback_registered(dev);
        dev->reg_state = NETREG_UNREGISTERED;
    }
    /*
     *  Prevent userspace races by waiting until the network
     *  device is fully setup before sending notifications.
     */
    if (!dev->rtnl_link_ops ||
        dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
        rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);

out:
    return ret;

err_uninit:
    if (dev->netdev_ops->ndo_uninit)
        dev->netdev_ops->ndo_uninit(dev);
    goto out;
}
EXPORT_SYMBOL(register_netdevice);

dev_init_scheduler(dev)初始化发送队列规则:

void dev_init_scheduler(struct net_device *dev)
{
    dev->qdisc = &noop_qdisc;
    netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
    if (dev_ingress_queue(dev))
        dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);

    setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
}

static void dev_init_scheduler_queue(struct net_device *dev,
                     struct netdev_queue *dev_queue,
                     void *_qdisc)
{
    struct Qdisc *qdisc = _qdisc;

    dev_queue->qdisc = qdisc;
    dev_queue->qdisc_sleeping = qdisc;
}

3. 打开网络设备

网络设备初始化完成后,网络设备就出现在系统中,等待被打开。当使用ifconfig eth0 up 时,就会调用打开网络设备函数。

/**
 *  dev_open    - prepare an interface for use.
 *  @dev:   device to open
 *
 *  Takes a device from down to up state. The device's private open
 *  function is invoked and then the multicast lists are loaded. Finally
 *  the device is moved into the up state and a %NETDEV_UP message is
 *  sent to the netdev notifier chain.
 *
 *  Calling this function on an active interface is a nop. On a failure
 *  a negative errno code is returned.
 */
int dev_open(struct net_device *dev)
{
    int ret;

    if (dev->flags & IFF_UP) //已经被打开,直接退出。
        return 0;

    ret = __dev_open(dev);
    if (ret < 0)
        return ret;

    rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
    call_netdevice_notifiers(NETDEV_UP, dev);

    return ret;
}
EXPORT_SYMBOL(dev_open);

static int __dev_open(struct net_device *dev)
{
    const struct net_device_ops *ops = dev->netdev_ops;
    int ret;

    ASSERT_RTNL();

    if (!netif_device_present(dev))
        return -ENODEV;

    /* Block netpoll from trying to do any rx path servicing.
     * If we don't do this there is a chance ndo_poll_controller
     * or ndo_poll may be running while we open the device
     */
    ret = netpoll_rx_disable(dev);
    if (ret)
        return ret;

    ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
    ret = notifier_to_errno(ret);
    if (ret)
        return ret;

    set_bit(__LINK_STATE_START, &dev->state);

    if (ops->ndo_validate_addr)
        ret = ops->ndo_validate_addr(dev);

    if (!ret && ops->ndo_open) //调用net_device中注册的netdev_ops->ndo_open函数
        ret = ops->ndo_open(dev);

    netpoll_rx_enable(dev);

    if (ret)
        clear_bit(__LINK_STATE_START, &dev->state);
    else {
        dev->flags |= IFF_UP;
        net_dmaengine_get();
        dev_set_rx_mode(dev);
        dev_activate(dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
    }

    return ret;
}

在这里就是调用rtl8168_open()函数

static int rtl8168_open(struct net_device *dev)
{
    struct rtl8168_private *tp = netdev_priv(dev);
    int retval;

    tp->in_open_fun = TRUE;

    retval = -ENOMEM;

    rtl8168_set_rxbufsize(tp, dev);
    /*
     * Rx and Tx descriptors needs 256 bytes alignment.
     * pci_alloc_consistent provides more.
     */
    tp->TxDescArray = dma_alloc_coherent(&tp->platform_dev->dev, R8168_TX_RING_BYTES,
                                           &tp->TxPhyAddr, GFP_KERNEL);
    if (!tp->TxDescArray)
        goto out;

    tp->RxDescArray = dma_alloc_coherent(&tp->platform_dev->dev, R8168_RX_RING_BYTES,
                                           &tp->RxPhyAddr, GFP_KERNEL);
    if (!tp->RxDescArray)
        goto err_free_tx;

    tp->tally_vaddr = dma_alloc_coherent(&tp->platform_dev->dev, sizeof(*tp->tally_vaddr), &tp->tally_paddr, GFP_KERNEL);
    if (!tp->tally_vaddr)
        goto err_free_rx;

    retval = rtl8168_init_ring(dev);
    if (retval < 0)
        goto err_free_counters;

    INIT_DELAYED_WORK(&tp->task, NULL);

#ifdef  CONFIG_R8168_NAPI
    RTL_NAPI_ENABLE(dev, &tp->napi);
#endif

    rtl8168_exit_oob(dev);

    rtl8168_tally_counter_clear(tp);

    rtl8168_hw_init(dev);

    rtl8168_hw_reset(dev);

    rtl8168_powerup_pll(dev);

    rtl8168_hw_ephy_config(dev);

    rtl8168_hw_phy_config(dev);

    rtl8168_hw_start(dev);

    rtl8168_set_speed(dev, autoneg, speed, duplex);

    retval = request_irq(dev->irq, rtl8168_interrupt, (tp->features & RTL_FEATURE_MSI) ? 0 : SA_SHIRQ, dev->name, dev);
    if (retval<0)
        goto err_free_counters;

    rtl8168_request_link_timer(dev);

out:
    tp->in_open_fun = FALSE;

    return retval;

err_free_counters:
    dma_free_coherent(&tp->platform_dev->dev, sizeof(*tp->tally_vaddr), tp->tally_vaddr, tp->tally_paddr);

    tp->tally_vaddr = NULL;
err_free_rx:
    dma_free_coherent(&tp->platform_dev->dev, R8168_RX_RING_BYTES, tp->RxDescArray,
                        tp->RxPhyAddr);
    tp->RxDescArray = NULL;
err_free_tx:
    dma_free_coherent(&tp->platform_dev->dev, R8168_TX_RING_BYTES, tp->TxDescArray,
                        tp->TxPhyAddr);
    tp->TxDescArray = NULL;
    goto out;
}

你可能感兴趣的:(Device,driver)