网卡驱动架构分析:
1. Linux网络子系统
2. 重要数据结构
总结一下三个重要的数据结构:
2.1. net_device
2.2. net_device_ops
2.3. sk_buff
3. 网卡驱动架构分析
CS8900.c //早期2410使用的网卡芯片
3.1. 网卡初始化
首先找到驱动程序的入口:
早期的驱动入口并不是module_init()函数,而是init_module,所以找到这个函数
int __init init_module(void) { struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); struct net_local *lp; int ret = 0; #if DEBUGGING net_debug = debug; #else debug = 0; #endif if (!dev) return -ENOMEM; dev->irq = irq; dev->base_addr = io; lp = netdev_priv(dev); #if ALLOW_DMA if (use_dma) { lp->use_dma = use_dma; lp->dma = dma; lp->dmasize = dmasize; } #endif spin_lock_init(&lp->lock); /* boy, they'd better get these right */ if (!strcmp(media, "rj45")) lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T; else if (!strcmp(media, "aui")) lp->adapter_cnf = A_CNF_MEDIA_AUI | A_CNF_AUI; else if (!strcmp(media, "bnc")) lp->adapter_cnf = A_CNF_MEDIA_10B_2 | A_CNF_10B_2; else lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T; if (duplex==-1) lp->auto_neg_cnf = AUTO_NEG_ENABLE; if (io == 0) { printk(KERN_ERR "cs89x0.c: Module autoprobing not allowed.\n"); printk(KERN_ERR "cs89x0.c: Append io=0xNNN\n"); ret = -EPERM; goto out; } else if (io <= 0x1ff) { ret = -ENXIO; goto out; }
第一步:分配net_device结构,
第二步:初始化net_device结构,
dev->irq = irq;//分配中断号 dev->base_addr = io;//设备基地址 lp = netdev_priv(dev);第三步:
ret = cs89x0_probe1(dev, io, 1);\\这一步其实也是初始化硬件的!还有一部分是对device结构进行一些初始化这个函数比较长就不贴代码了,其中一行比较重要:
dev->netdev_ops = &net_ops; \\这个是对netdev_ops成员进行初始化
最后一步注册网卡驱动!上图中第二个红色箭头所指向的地方!
总结一下上图:
3.2. 网卡数据的发送
这个结合前面的经验,找到网卡的函数操作集结构:
可以看到这个成员函数的名字叫做:net_send_packet
static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev) { struct net_local *lp = netdev_priv(dev); unsigned long flags; if (net_debug > 3) { printk("%s: sent %d byte packet of type %x\n", dev->name, skb->len, (skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]); } /* keep the upload from being interrupted, since we ask the chip to start transmitting before the whole packet has been completely uploaded. */ spin_lock_irqsave(&lp->lock, flags); netif_stop_queue(dev);//1. 网卡驱动在向上层发送数据的时候暂时停止接收上层发来的数据 /* initiate a transmit sequence */ writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);//2. 将skb中的数据写入寄存器 writeword(dev->base_addr, TX_LEN_PORT, skb->len); /* Test to see if the chip has allocated memory for the packet */ if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) { /* * Gasp! It hasn't. But that shouldn't happen since * we're waiting for TxOk, so return 1 and requeue this packet. */ spin_unlock_irqrestore(&lp->lock, flags); if (net_debug) printk("cs89x0: Tx buffer not free!\n"); return NETDEV_TX_BUSY; } /* Write the contents of the packet */ writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1); spin_unlock_irqrestore(&lp->lock, flags); dev->stats.tx_bytes += skb->len; dev_kfree_skb (skb);//3. 释放skb结构 //发送数据完后,网卡会产生一个中断 return NETDEV_TX_OK; }产生一个中断这个可以查查request_irq函数,在这个函数被调用的地方可以看到这样的一行代码:
ret = request_irq(dev->irq, net_interrupt, 0, dev->name, dev);</span>
可以看到这里调用了net_initerupt函数,网卡发送和接收中断!
一个是发送中断,一个是接收中断,
netif_wake_queue(dev);/* Inform upper layers. */ // 这行代码表示在发送中断处理过程中,通知上层协议,可以再次向网卡传输数据。
3.3. 网卡数据的接收
网卡数据的接收入口是在中断中完成的,这个是从中断函数中可以看到net_interrupt
接收中断处理函数net_rx(dev)
/* We have a good packet(s), get it/them out of the buffers. */ static void net_rx(struct net_device *dev) { struct sk_buff *skb; int status, length; int ioaddr = dev->base_addr; status = readword(ioaddr, RX_FRAME_PORT);//读取寄存器,网卡接收状态 length = readword(ioaddr, RX_FRAME_PORT);//网卡接收字节长度 if ((status & RX_OK) == 0) { count_rx_errors(status, dev); return; } /* Malloc up new buffer. */ skb = dev_alloc_skb(length + 2);//分配skb结构 +2字节空间是为头预留的 if (skb == NULL) { #if 0 /* Again, this seems a cruel thing to do */ printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name); #endif dev->stats.rx_dropped++; return; } skb_reserve(skb, 2); /* longword align L3 header */ readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);//将收到的数据填充入skb if (length & 1) skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT); if (net_debug > 3) { printk( "%s: received %d byte packet of type %x\n", dev->name, length, (skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]); } skb->protocol=eth_type_trans(skb,dev); netif_rx(skb);//将skb提交到协议栈 dev->stats.rx_packets++; dev->stats.rx_bytes += length; }
回环网卡驱动设计:
使用ifocnfig,可以看到除了eth0还有一个l0, eth0代表的是一个物理网卡,l0代表的就是回环网卡,从上面的打印信息可以看到l0的IP地址是127.0.0.1,可以看到当ping 127.0.0.x的时候能ping通,其实l0就是网卡的tx和rx在软件层的短接!所以才叫做回环网卡!
其实内核代码中也可以找到回环网卡的驱动!Lookback.c
这个文件中的代码部分其实不是内核模块,而是由其它部分的调用的!
删掉内核代码中的原有的loopback.c,结合上面的的分析和原有源码的分析重写编写loopback.c,
#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/if_ether.h> /* For the statistics structure. */ unsigned long bytes = 0; unsigned long packets = 0;//skb包 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)//数据发送 { skb->protocol = eth_type_trans(skb,dev);//表明skb包的协议 以太网协议 bytes += skb->len;//发送的数据量 packets++; //发送的数据包也要加一 netif_rx(skb);//将skb向回送, 回环网卡驱动就是这实现的,这是很关键的一步 return 0; } static struct net_device_stats *loopback_get_stats(struct net_device *dev)//获取网卡状态 { struct net_device_stats *stats = &dev->stats;//首先把state这个成员取出来 stats->rx_packets = packets;//表示网卡收到了多少个包 stats->tx_packets = packets;//表示网卡发送了多少个包 stats->rx_bytes = bytes;//表示网卡接收到了多少个字节 stats->tx_bytes = bytes; return stats;//返回状态 } static const struct net_device_ops loopback_ops = {//定义一个net_device_ops 结构 .ndo_start_xmit= loopback_xmit,//发送指针 .ndo_get_stats = loopback_get_stats,//获取网卡状态的函数 }; static void loopback_setup(struct net_device *dev)//初始化设置操作 { dev->mtu = (16 * 1024) + 20 + 20 + 12;//网卡最大接收包的尺寸:16K + TCP头 + IP头 + 以太网头 dev->flags = IFF_LOOPBACK;//回环网卡专有标志 这是一个宏内核代码可查 dev->header_ops = ð_header_ops;//这个是网络包的函数操作集,内核可以看这个成员的数据结构 dev->netdev_ops = &loopback_ops;//网卡所支持操作的集合 } static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup);//分配一个net_device结构,loopback为一个初始化函数 if (!dev) goto out; err = register_netdev(dev);//注册网卡驱动程序 if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net == &init_net) panic("loopback: Failed to register netdevice: %d\n", err); return err; } static __net_exit void loopback_net_exit(struct net *net) { struct net_device *dev = net->loopback_dev; unregister_netdev(dev);//注销网卡驱动程序 } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; </span>
上面的回环网卡驱动有点问题,ping不同!
这是能ping通的内核自带的源码:
#include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/in.h> #include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/ethtool.h> #include <net/sock.h> #include <net/checksum.h> #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/if_arp.h> /* For ARPHRD_ETHER */ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/percpu.h> #include <net/net_namespace.h> #include <linux/u64_stats_sync.h> struct pcpu_lstats { u64 packets; u64 bytes; struct u64_stats_sync syncp; }; /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ static netdev_tx_t loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *lb_stats; int len; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); /* it's OK to use per_cpu_ptr() because BHs are off */ lb_stats = this_cpu_ptr(dev->lstats); len = skb->len; if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { u64_stats_update_begin(&lb_stats->syncp); lb_stats->bytes += len; lb_stats->packets++; u64_stats_update_end(&lb_stats->syncp); } return NETDEV_TX_OK; } static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { u64 bytes = 0; u64 packets = 0; int i; for_each_possible_cpu(i) { const struct pcpu_lstats *lb_stats; u64 tbytes, tpackets; unsigned int start; lb_stats = per_cpu_ptr(dev->lstats, i); do { start = u64_stats_fetch_begin(&lb_stats->syncp); tbytes = lb_stats->bytes; tpackets = lb_stats->packets; } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); bytes += tbytes; packets += tpackets; } stats->rx_packets = packets; stats->tx_packets = packets; stats->rx_bytes = bytes; stats->tx_bytes = bytes; return stats; } static u32 always_on(struct net_device *dev) { return 1; } static const struct ethtool_ops loopback_ethtool_ops = { .get_link = always_on, }; static int loopback_dev_init(struct net_device *dev) { dev->lstats = alloc_percpu(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; return 0; } static void loopback_dev_free(struct net_device *dev) { free_percpu(dev->lstats); free_netdev(dev); } static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, .ndo_start_xmit= loopback_xmit, .ndo_get_stats64 = loopback_get_stats64, }; /* * The loopback device is special. There is only one instance * per network namespace. */ static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->hw_features = NETIF_F_ALL_TSO | NETIF_F_UFO; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | NETIF_F_UFO | NETIF_F_NO_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_VLAN_CHALLENGED | NETIF_F_LOOPBACK; dev->ethtool_ops = &loopback_ethtool_ops; dev->header_ops = ð_header_ops; dev->netdev_ops = &loopback_ops; dev->destructor = loopback_dev_free; } /* Setup and register the loopback device. */ static __net_init int loopback_net_init(struct net *net) { struct net_device *dev; int err; err = -ENOMEM; dev = alloc_netdev(0, "lo", loopback_setup); if (!dev) goto out; dev_net_set(dev, net); err = register_netdev(dev); if (err) goto out_free_netdev; net->loopback_dev = dev; return 0; out_free_netdev: free_netdev(dev); out: if (net_eq(net, &init_net)) panic("loopback: Failed to register netdevice: %d\n", err); return err; } /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, };
自己将两份源码对照着看了,暂时还没找出原因,这里先上一张错误的截图以及我认为出错的原因
在使用ifconfig命令的时候,发现RX, TX, 居然都有packets网络包数据传输,当ping 127.0.0.x的时候会一直阻塞在哪里,说明问题应该在初始化参数设置的部分!这里有数据包发送但是没有接收到数据包!说明数据接收部分,也就是回环发送部分有问题!这里暂时先搁一下,后边在杀个回马枪来深入研究一下!