在以太网驱动加载之前,首先会进行网络子系统初始化
subsys_initcall(net_dev_init)
net_dev_init()
:
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
if (dev_proc_init())
goto out;
if (netdev_kobject_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
INIT_LIST_HEAD(&offload_base);
if (register_pernet_subsys(&netdev_net_ops))
goto out;
/*
* Initialise the packet receive queues.
*/
for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);
memset(sd, 0, sizeof(*sd));
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
sd->csd.func = rps_trigger_softirq;
sd->csd.info = sd;
sd->csd.flags = 0;
sd->cpu = i;
#endif
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
sd->backlog.gro_list = NULL;
sd->backlog.gro_count = 0;
}
dev_boot_phase = 0;
/* The loopback device is special if any other network devices
* is present in a network namespace the loopback device must
* be present. Since we now dynamically allocate and free the
* loopback device ensure this invariant is maintained by
* keeping the loopback device as the first device on the
* list of network devices. Ensuring the loopback devices
* is the first device that appears and the last network device
* that disappears.
*/
if (register_pernet_device(&loopback_net_ops))
goto out;
if (register_pernet_device(&default_device_ops))
goto out;
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
rc = 0;
out:
return rc;
}
dev_proc_init()
:
int __init dev_proc_init(void)
{
int ret = register_pernet_subsys(&dev_proc_ops);
if (!ret)
return register_pernet_subsys(&dev_mc_net_ops);
return ret;
}
register_pernet_subsys()
:
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
error = register_pernet_operations(first_device, ops);
mutex_unlock(&net_mutex);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
register_pernet_operation()
在没有定义CONFIG_NET_NS时,只有一个network namespace,就是init_net, 此时调用init_net
版本的__register_pernet_operations()
函数。
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
return ops_init(ops, &init_net);
}
struct net init_net = {
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);
此时该函数所做的只是以struct net init_net
为参数调用ops->init()
。
dev_proc_ops
定义:
static struct pernet_operations __net_initdata dev_proc_ops = {
.init = dev_proc_net_init,
.exit = dev_proc_net_exit,
};
static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
goto out;
if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
&softnet_seq_fops))
goto out_dev;
if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
goto out_softnet;
if (wext_proc_init(net))
goto out_ptype;
rc = 0;
out:
return rc;
out_ptype:
remove_proc_entry("ptype", net->proc_net);
out_softnet:
remove_proc_entry("softnet_stat", net->proc_net);
out_dev:
remove_proc_entry("dev", net->proc_net);
goto out;
}
在proc/net目录下创建dev, softnet_stat, ptype文件。
dev_mc_net_ops
定义:
static struct pernet_operations __net_initdata dev_mc_net_ops = {
.init = dev_mc_net_init,
.exit = dev_mc_net_exit,
};
static int __net_init dev_mc_net_init(struct net *net)
{
if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
return -ENOMEM;
return 0;
}
再proc/net目录下创建dev_mcast文件。
netdev_kobject_init()
定义:
netdev_kobject_init()主要是在sys/class下创建net类目录,net_class中定义了网络设备都要有的设备属性文件。
int netdev_kobject_init(void)
{
kobj_ns_type_register(&net_ns_type_operations);
return class_register(&net_class);
}
ptype_all
是接收任一(ETH_ALL)上层协议的协议管理结构struct pactet_type
的链表。
ptype_base[PTYPE_HASH_SIZE]
是管理特定上层协议管理结构的哈希表。
offload_base
是管理用于GRO功能结构的链表。
register_pernet_subsys(&netdev_net_ops)
最后调用netdev_net_ops.init()
.
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
};
netdev_init(struct net *net)
分别创建以设备名和设备序号为key的HASH table。
static int __net_init netdev_init(struct net *net)
{
if (net != &init_net)
INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
goto err_name;
net->dev_index_head = netdev_create_hash();
if (net->dev_index_head == NULL)
goto err_idx;
return 0;
err_idx:
kfree(net->dev_name_head);
err_name:
return -ENOMEM;
}
接下来获取每cpu struct softnet_data 结构并初始化。
register_pernet_device(&loopback_net_ops)
会调用register_pernet_operations(&pernet_list, ops)
,
最后依然是调用loopback_net_ops.init()
。
int register_pernet_device(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
mutex_unlock(&net_mutex);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
loopback_net_ops.init()
注册loopback设备。
register_pernet_device(&default_device_ops)
由于default_device_ops
init成员为NULL,这里该调用默认没有做任何事情。
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
最后注册net_tx_action
(处理发送的软中断)和net_rx_action
(处理接收的软中断).
两个软中断定义:
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_disable();
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_enable();
while (clist) {
struct sk_buff *skb = clist;
clist = clist->next;
WARN_ON(atomic_read(&skb->users));
trace_kfree_skb(skb, net_tx_action);
__kfree_skb(skb);
}
}
if (sd->output_queue) {
struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
local_irq_enable();
while (head) {
struct Qdisc *q = head;
spinlock_t *root_lock;
head = head->next_sched;
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);
spin_unlock(root_lock);
} else {
if (!test_bit(__QDISC_STATE_DEACTIVATED,
&q->state)) {
__netif_reschedule(q);
} else {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
}
}
}
}
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
local_irq_disable();
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
/* If softirq window is exhuasted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
goto softnet_break;
local_irq_enable();
/* Even though interrupts have been re-enabled, this
* access is safe because interrupts can only add new
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
have = netpoll_poll_lock(n);
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
* with netpoll's poll_napi(). Only the entity which
* obtains the lock and sees NAPI_STATE_SCHED set will
* actually make the ->poll() call. Therefore we avoid
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
trace_napi_poll(n);
}
WARN_ON_ONCE(work > weight);
budget -= work;
local_irq_disable();
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
* still "owns" the NAPI instance and therefore can
* move the instance around on the list at-will.
*/
if (unlikely(work == weight)) {
if (unlikely(napi_disable_pending(n))) {
local_irq_enable();
napi_complete(n);
local_irq_disable();
} else {
if (n->gro_list) {
/* flush too old packets
* If HZ < 1000, flush all packets.
*/
local_irq_enable();
napi_gro_flush(n, HZ >= 1000);
local_irq_disable();
}
list_move_tail(&n->poll_list, &sd->poll_list);
}
}
netpoll_poll_unlock(have);
}
out:
net_rps_action_and_irq_enable(sd);
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
dma_issue_pending_all();
#endif
return;
softnet_break:
sd->time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
static struct platform_driver rtl8168_platform_driver = {
.driver = {
.name = MODULENAME,
.owner = THIS_MODULE,
},
.probe = rtl8168_init_one,
.remove = __devexit_p(rtl8168_remove_one),
.shutdown = rtl8168_shutdown,
#ifdef CONFIG_PM
.suspend = rtl8168_suspend,
.resume = rtl8168_resume,
#endif
};
static int __init
rtl8168_init_module(void)
{
return platform_driver_register(&rtl8168_platform_driver);
}
static void __exit
rtl8168_cleanup_module(void)
{
platform_driver_unregister(&rtl8168_platform_driver);
}
module_init(rtl8168_init_module);
module_exit(rtl8168_cleanup_module);
驱动与设备匹配,调用xxx_driver.probe(). 这里是rtl8168_init_one( )
.
static int __devinit
rtl8168_init_one(struct platform_device *pdev)
{
struct net_device *dev = NULL;
struct rtl8168_private *tp;
void __iomem *ioaddr = NULL;
static int board_idx = -1;
struct resource *irq_res = NULL;
int rc;
assert(pdev != NULL);
assert(ent != NULL);
board_idx++;
if (netif_msg_drv(&debug))
printk(KERN_INFO "%s Gigabit Ethernet driver %s loaded\n",
MODULENAME, RTL8168_VERSION);
rc = rtl8168_init_board(pdev, &dev, &ioaddr, &irq_res);
if (rc)
return rc;
tp = netdev_priv(dev);
assert(ioaddr != NULL);
tp->mmio_addr = ioaddr;
tp->set_speed = rtl8168_set_speed_xmii;
tp->get_settings = rtl8168_gset_xmii;
tp->phy_reset_enable = rtl8168_xmii_reset_enable;
tp->phy_reset_pending = rtl8168_xmii_reset_pending;
tp->link_ok = rtl8168_xmii_link_ok;
RTL_NET_DEVICE_OPS(rtl8168_netdev_ops); //注册使用socket操作网卡的接口
SET_ETHTOOL_OPS(dev, &rtl8168_ethtool_ops);
dev->watchdog_timeo = RTL8168_TX_TIMEOUT;
dev->irq = irq_res->start;
dev->base_addr = (unsigned long) ioaddr;
#ifdef CONFIG_R8168_NAPI
RTL_NAPI_CONFIG(dev, tp, rtl8168_poll, R8168_NAPI_WEIGHT);
#endif
#ifdef CONFIG_R8168_VLAN
if (tp->mcfg != CFG_METHOD_DEFAULT)
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
#endif
tp->cp_cmd |= RTL_R16(CPlusCmd);
if (tp->mcfg != CFG_METHOD_DEFAULT) {
dev->features |= NETIF_F_IP_CSUM;
dev->features |= NETIF_F_RXCSUM | NETIF_F_SG;
dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
NETIF_F_RXCSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
NETIF_F_HIGHDMA;
}
tp->platform_dev = pdev;
spin_lock_init(&tp->lock);
spin_lock_init(&tp->phy_lock);
rtl8168_init_software_variable(dev);
#ifdef ENABLE_DASH_SUPPORT
if(tp->DASH)
AllocateDashShareMemory(dev);
#endif
rtl8168_exit_oob(dev);
rtl8168_hw_init(dev);
rtl8168_hw_reset(dev);
/* Get production from EEPROM */
if ((tp->mcfg == CFG_METHOD_21 ) && (mac_ocp_read(tp, 0xDC00) & BIT_3))
tp->eeprom_type = EEPROM_TYPE_NONE;
else
rtl_eeprom_type(tp);
if (tp->eeprom_type == EEPROM_TYPE_93C46 || tp->eeprom_type == EEPROM_TYPE_93C56)
rtl_set_eeprom_sel_low(ioaddr);
rtl8168_get_mac_address(dev);
platform_set_drvdata(pdev, dev);
if (netif_msg_probe(tp)) {
printk(KERN_INFO "%s: 0x%lx, "
"%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
"IRQ %d\n",
dev->name,
dev->base_addr,
dev->dev_addr[0], dev->dev_addr[1],
dev->dev_addr[2], dev->dev_addr[3],
dev->dev_addr[4], dev->dev_addr[5], dev->irq);
}
rtl8168_link_option(board_idx, (u8*)&autoneg, (u16*)&speed, (u8*)&duplex);
rtl8168_init_led(tp);
rc = register_netdev(dev);
if (rc) {
rtl8168_release_board(pdev, dev, ioaddr);
return rc;
}
//printk(KERN_INFO "%s: This product is covered by one or more of the following patents: US6,570,884, US6,115,776, and US6,327,625.\n", MODULENAME);
netif_carrier_off(dev);
//printk("%s", GPL_CLAIM);
return 0;
}
static int __devinit
rtl8168_init_board(struct platform_device *pdev,
struct net_device **dev_out,
void __iomem **ioaddr_out,
struct resource **irq_out)
{
void __iomem *ioaddr;
struct net_device *dev;
struct rtl8168_private *tp;
int rc = 0, i;
struct resource *addr_res, *irq_res;
struct clk *clk;
assert(ioaddr_out != NULL);
/* dev zeroed in alloc_etherdev */
dev = alloc_etherdev(sizeof (*tp));
if (dev == NULL) {
if (netif_msg_drv(&debug))
dev_err(&pdev->dev, "unable to alloc new ethernet\n");
rc = -1;
goto err_out;
}
SET_MODULE_OWNER(dev);
SET_NETDEV_DEV(dev, &pdev->dev);
tp = netdev_priv(dev);
tp->dev = dev;
tp->msg_enable = netif_msg_init(debug.msg_enable, R8168_MSG_DEFAULT);
addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
if(addr_res == NULL || irq_res == NULL){
dev_err(&pdev->dev,
"insufficient resources\n");
rc = -2;
goto err_out_free;
}
if (resource_size(addr_res) < R8168_REGS_SIZE) {
dev_err(&pdev->dev,
"MMIO Resource too small\n");
rc = -3;
goto err_out_free;
}
if (!request_mem_region(addr_res->start, resource_size(addr_res),MODULENAME)) {
dev_err(&pdev->dev,
"cannot claim address reg area\n");
rc = -4;
goto err_out_free;
}
tp->addr_res = addr_res;
tp->irq_res = irq_res;
/* ioremap MMIO region */
ioaddr = ioremap(addr_res->start, R8168_REGS_SIZE);
if (ioaddr == NULL) {
if (netif_msg_probe(tp))
dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
rc = -5;
goto err_out_res;
}
/* Identify chip attached to board */
rtl8168_get_mac_version(tp, ioaddr);
rtl8168_print_mac_version(tp);
for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) {
if (tp->mcfg == rtl_chip_info[i].mcfg)
break;
}
if (i < 0) {
/* Unknown chip: assume array element #0, original RTL-8168 */
if (netif_msg_probe(tp))
dev_printk(KERN_DEBUG, &pdev->dev, "unknown chip version, assuming %s\n", rtl_chip_info[0].name);
i++;
}
tp->chipset = i;
*ioaddr_out = ioaddr;
*dev_out = dev;
*irq_out = irq_res;
out:
return rc;
err_out_res:
release_mem_region(addr_res->start, sizeof(struct rtl8168_private));
err_out_free:
free_netdev(dev);
err_out:
*ioaddr_out = NULL;
*dev_out = NULL;
*irq_out = NULL;
goto out;
}
dev = alloc_etherdev(sizeof (*tp));
中创建了以太网设备结构。网络设备结构由struct net_device
和底部的硬件私有数据结构组成,如下图所示,左边可以看做是以太网设备,右边是网桥设备。
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
}
alloc_etherdev_mqs()
是分配网络设备结构的通用函数。
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
* @txqs: the number of TX subqueues to allocate
* @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
* for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
if (txqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
return NULL;
}
#ifdef CONFIG_RPS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kzalloc(alloc_size, GFP_KERNEL);
if (!p)
return NULL;
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_p;
if (dev_addr_init(dev)) //创建struct netdev_hw_addr 硬件地址结构挂到struct netdev_hw_addr_list的连表中
goto free_pcpu;
dev_mc_init(dev);//初始化多播地址链表
dev_uc_init(dev);//初始化单播地址链表
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);//调用私有初始化回调函数
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
goto free_all;
#ifdef CONFIG_RPS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
#endif
strcpy(dev->name, name);
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
return dev;
free_all:
free_netdev(dev);
return NULL;
free_pcpu:
free_percpu(dev->pcpu_refcnt);
kfree(dev->_tx);
#ifdef CONFIG_RPS
kfree(dev->_rx);
#endif
free_p:
kfree(p);
return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);
netif_alloc_netdev_queues(dev)
static int netif_alloc_netdev_queues(struct net_device *dev)
{
unsigned int count = dev->num_tx_queues;
struct netdev_queue *tx;
BUG_ON(count < 1);
tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
if (!tx)
return -ENOMEM;
dev->_tx = tx;
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
return 0;
}
注册socket操作网卡接口:
RTL_NET_DEVICE_OPS(rtl8168_netdev_ops)
dev->netdev_ops=&rtl8168_netdev_ops;
static const struct net_device_ops rtl8168_netdev_ops = {
.ndo_open = rtl8168_open,
.ndo_stop = rtl8168_close,
.ndo_get_stats = rtl8168_get_stats,
.ndo_start_xmit = rtl8168_start_xmit,
.ndo_tx_timeout = rtl8168_tx_timeout,
.ndo_change_mtu = rtl8168_change_mtu,
.ndo_set_mac_address = rtl8168_set_mac_address,
.ndo_do_ioctl = rtl8168_do_ioctl,
.ndo_set_rx_mode = rtl8168_set_rx_mode,
.ndo_fix_features = rtl8168_fix_features,
.ndo_set_features = rtl8168_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = rtl8168_netpoll,
#endif
};
注册ethool操作网卡的接口:
SET_ETHTOOL_OPS(dev, &rtl8168_ethtool_ops);
(dev)->ethtool_ops = (&rtl8168_ethtool_ops)
static const struct ethtool_ops rtl8168_ethtool_ops = {
.get_drvinfo = rtl8168_get_drvinfo,
.get_regs_len = rtl8168_get_regs_len,
.get_link = ethtool_op_get_link,
.get_settings = rtl8168_get_settings,
.set_settings = rtl8168_set_settings,
.get_msglevel = rtl8168_get_msglevel,
.set_msglevel = rtl8168_set_msglevel,
.get_regs = rtl8168_get_regs,
.get_wol = rtl8168_get_wol,
.set_wol = rtl8168_set_wol,
.get_strings = rtl8168_get_strings,
.get_sset_count = rtl8168_get_sset_count,
.get_ethtool_stats = rtl8168_get_ethtool_stats,
.get_ts_info = ethtool_op_get_ts_info,
};
开启napi:
RTL_NAPI_CONFIG(dev, tp, rtl8168_poll, R8168_NAPI_WEIGHT)
netif_napi_add(dev, &tp->napi, rtl8168_poll, R8168_NAPI_WEIGHT)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
pr_err_once("netif_napi_add() called with weight %d on device %s\n",
weight, dev->name);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
#ifdef CONFIG_NETPOLL
spin_lock_init(&napi->poll_lock);
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
}
EXPORT_SYMBOL(netif_napi_add);
最后注册网络设备:
register_netdev(dev)
/**
* register_netdev - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* This is a wrapper around register_netdevice that takes the rtnl semaphore
* and expands the device name if you passed a format string to
* alloc_netdev.
*/
int register_netdev(struct net_device *dev)
{
int err;
rtnl_lock();
err = register_netdevice(dev);
rtnl_unlock();
return err;
}
EXPORT_SYMBOL(register_netdev);
/**
* register_netdevice - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* Callers must hold the rtnl semaphore. You may want
* register_netdev() instead of this.
*
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
*/
int register_netdevice(struct net_device *dev)
{
int ret;
struct net *net = dev_net(dev);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
might_sleep();
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
dev->iflink = -1;
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out;
}
}
if (((dev->hw_features | dev->features) &
NETIF_F_HW_VLAN_CTAG_FILTER) &&
(!dev->netdev_ops->ndo_vlan_rx_add_vid ||
!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
ret = -EINVAL;
goto err_uninit;
}
ret = -EBUSY;
if (!dev->ifindex)
dev->ifindex = dev_new_index(net);
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit;
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
dev->hw_features |= NETIF_F_SOFT_FEATURES;
dev->features |= NETIF_F_SOFT_FEATURES;
dev->wanted_features = dev->features & dev->hw_features;
/* Turn on no cache copy if HW is doing checksum */
if (!(dev->flags & IFF_LOOPBACK)) {
dev->hw_features |= NETIF_F_NOCACHE_COPY;
if (dev->features & NETIF_F_ALL_CSUM) {
dev->wanted_features |= NETIF_F_NOCACHE_COPY;
dev->features |= NETIF_F_NOCACHE_COPY;
}
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
dev->vlan_features |= NETIF_F_HIGHDMA;
/* Make NETIF_F_SG inheritable to tunnel devices.
*/
dev->hw_enc_features |= NETIF_F_SG;
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);//调用netdev_chain通知链
ret = notifier_to_errno(ret);
if (ret)
goto err_uninit;
ret = netdev_register_kobject(dev);//将dev添加到设备模型中
if (ret)
goto err_uninit;
dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
/*
* Default initial state at registry is that the
* device is present.
*/
set_bit(__LINK_STATE_PRESENT, &dev->state);
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);//将dev添加到网路结构net的dev_based_head链表中,并分别添加到以网络设备名和网络设备序号为key的哈希表中。
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
* set dev_addr and also addr_assign_type should be set to
* NET_ADDR_PERM (default value).
*/
if (dev->addr_assign_type == NET_ADDR_PERM)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
rollback_registered(dev);
dev->reg_state = NETREG_UNREGISTERED;
}
/*
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
out:
return ret;
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
goto out;
}
EXPORT_SYMBOL(register_netdevice);
dev_init_scheduler(dev)初始化发送队列规则:
void dev_init_scheduler(struct net_device *dev)
{
dev->qdisc = &noop_qdisc;
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
}
static void dev_init_scheduler_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc)
{
struct Qdisc *qdisc = _qdisc;
dev_queue->qdisc = qdisc;
dev_queue->qdisc_sleeping = qdisc;
}
网络设备初始化完成后,网络设备就出现在系统中,等待被打开。当使用ifconfig eth0 up 时,就会调用打开网络设备函数。
/**
* dev_open - prepare an interface for use.
* @dev: device to open
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
* the device is moved into the up state and a %NETDEV_UP message is
* sent to the netdev notifier chain.
*
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
int dev_open(struct net_device *dev)
{
int ret;
if (dev->flags & IFF_UP) //已经被打开,直接退出。
return 0;
ret = __dev_open(dev);
if (ret < 0)
return ret;
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
call_netdevice_notifiers(NETDEV_UP, dev);
return ret;
}
EXPORT_SYMBOL(dev_open);
static int __dev_open(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
ASSERT_RTNL();
if (!netif_device_present(dev))
return -ENODEV;
/* Block netpoll from trying to do any rx path servicing.
* If we don't do this there is a chance ndo_poll_controller
* or ndo_poll may be running while we open the device
*/
ret = netpoll_rx_disable(dev);
if (ret)
return ret;
ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
ret = notifier_to_errno(ret);
if (ret)
return ret;
set_bit(__LINK_STATE_START, &dev->state);
if (ops->ndo_validate_addr)
ret = ops->ndo_validate_addr(dev);
if (!ret && ops->ndo_open) //调用net_device中注册的netdev_ops->ndo_open函数
ret = ops->ndo_open(dev);
netpoll_rx_enable(dev);
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
else {
dev->flags |= IFF_UP;
net_dmaengine_get();
dev_set_rx_mode(dev);
dev_activate(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
}
return ret;
}
在这里就是调用rtl8168_open()函数
static int rtl8168_open(struct net_device *dev)
{
struct rtl8168_private *tp = netdev_priv(dev);
int retval;
tp->in_open_fun = TRUE;
retval = -ENOMEM;
rtl8168_set_rxbufsize(tp, dev);
/*
* Rx and Tx descriptors needs 256 bytes alignment.
* pci_alloc_consistent provides more.
*/
tp->TxDescArray = dma_alloc_coherent(&tp->platform_dev->dev, R8168_TX_RING_BYTES,
&tp->TxPhyAddr, GFP_KERNEL);
if (!tp->TxDescArray)
goto out;
tp->RxDescArray = dma_alloc_coherent(&tp->platform_dev->dev, R8168_RX_RING_BYTES,
&tp->RxPhyAddr, GFP_KERNEL);
if (!tp->RxDescArray)
goto err_free_tx;
tp->tally_vaddr = dma_alloc_coherent(&tp->platform_dev->dev, sizeof(*tp->tally_vaddr), &tp->tally_paddr, GFP_KERNEL);
if (!tp->tally_vaddr)
goto err_free_rx;
retval = rtl8168_init_ring(dev);
if (retval < 0)
goto err_free_counters;
INIT_DELAYED_WORK(&tp->task, NULL);
#ifdef CONFIG_R8168_NAPI
RTL_NAPI_ENABLE(dev, &tp->napi);
#endif
rtl8168_exit_oob(dev);
rtl8168_tally_counter_clear(tp);
rtl8168_hw_init(dev);
rtl8168_hw_reset(dev);
rtl8168_powerup_pll(dev);
rtl8168_hw_ephy_config(dev);
rtl8168_hw_phy_config(dev);
rtl8168_hw_start(dev);
rtl8168_set_speed(dev, autoneg, speed, duplex);
retval = request_irq(dev->irq, rtl8168_interrupt, (tp->features & RTL_FEATURE_MSI) ? 0 : SA_SHIRQ, dev->name, dev);
if (retval<0)
goto err_free_counters;
rtl8168_request_link_timer(dev);
out:
tp->in_open_fun = FALSE;
return retval;
err_free_counters:
dma_free_coherent(&tp->platform_dev->dev, sizeof(*tp->tally_vaddr), tp->tally_vaddr, tp->tally_paddr);
tp->tally_vaddr = NULL;
err_free_rx:
dma_free_coherent(&tp->platform_dev->dev, R8168_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
tp->RxDescArray = NULL;
err_free_tx:
dma_free_coherent(&tp->platform_dev->dev, R8168_TX_RING_BYTES, tp->TxDescArray,
tp->TxPhyAddr);
tp->TxDescArray = NULL;
goto out;
}