网络源码分析

ixbge网卡驱动

static int __init ixgbe_init_module(void)
{
    int ret;
    pr_info("%s\n", ixgbe_driver_string);
    pr_info("%s\n", ixgbe_copyright);

    ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
    if (!ixgbe_wq) {
        pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
        return -ENOMEM;
    }

    ixgbe_dbg_init();

    ret = pci_register_driver(&ixgbe_driver);
    if (ret) {
        destroy_workqueue(ixgbe_wq);
        ixgbe_dbg_exit();
        return ret;
    }

#ifdef CONFIG_IXGBE_DCA
    dca_register_notify(&dca_notifier);
#endif

    return 0;
}

ixgbe_wq 全局变量 workqueue
ixgbe_dbg_init 创建debugfs目录
pci_register_driver 注册驱动到pci总线
dca_register_notify Direct Cache Access注册 支持网卡DMA直接写进cpu cache,实现cpu处理之前的预读

static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
    struct net_device *netdev;
    struct ixgbe_adapter *adapter = NULL;
    struct ixgbe_hw *hw;
    const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
    int i, err, pci_using_dac, expected_gts;
    unsigned int indices = MAX_TX_QUEUES;
    u8 part_str[IXGBE_PBANUM_LENGTH];
    bool disable_dev = false;
#ifdef IXGBE_FCOE
    u16 device_caps;
#endif
    u32 eec;

    /* Catch broken hardware that put the wrong VF device ID in
     * the PCIe SR-IOV capability.
     */
    if (pdev->is_virtfn) {
        WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
             pci_name(pdev), pdev->vendor, pdev->device);
        return -EINVAL;
    }

    err = pci_enable_device_mem(pdev);
    if (err)
        return err;

    if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
        pci_using_dac = 1;
    } else {
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (err) {
            dev_err(&pdev->dev,
                "No usable DMA configuration, aborting\n");
            goto err_dma;
        }
        pci_using_dac = 0;
    }

    err = pci_request_mem_regions(pdev, ixgbe_driver_name);
    if (err) {
        dev_err(&pdev->dev,
            "pci_request_selected_regions failed 0x%x\n", err);
        goto err_pci_reg;
    }

    pci_enable_pcie_error_reporting(pdev);

    pci_set_master(pdev);
    pci_save_state(pdev);

    if (ii->mac == ixgbe_mac_82598EB) {
#ifdef CONFIG_IXGBE_DCB
        /* 8 TC w/ 4 queues per TC */
        indices = 4 * MAX_TRAFFIC_CLASS;
#else
        indices = IXGBE_MAX_RSS_INDICES;
#endif
    }

    netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
    if (!netdev) {
        err = -ENOMEM;
        goto err_alloc_etherdev;
    }

    SET_NETDEV_DEV(netdev, &pdev->dev);

    adapter = netdev_priv(netdev);

    adapter->netdev = netdev;
    adapter->pdev = pdev;
    hw = &adapter->hw;
    hw->back = adapter;
    adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);

    hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
                  pci_resource_len(pdev, 0));
    adapter->io_addr = hw->hw_addr;
    if (!hw->hw_addr) {
        err = -EIO;
        goto err_ioremap;
    }

    netdev->netdev_ops = &ixgbe_netdev_ops;
    ixgbe_set_ethtool_ops(netdev);
    netdev->watchdog_timeo = 5 * HZ;
    strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));

    /* Setup hw api */
    hw->mac.ops   = *ii->mac_ops;
    hw->mac.type  = ii->mac;
    hw->mvals     = ii->mvals;
    if (ii->link_ops)
        hw->link.ops  = *ii->link_ops;

    /* EEPROM */
    hw->eeprom.ops = *ii->eeprom_ops;
    eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
    if (ixgbe_removed(hw->hw_addr)) {
        err = -EIO;
        goto err_ioremap;
    }
    /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
    if (!(eec & BIT(8)))
        hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;

    /* PHY */
    hw->phy.ops = *ii->phy_ops;
    hw->phy.sfp_type = ixgbe_sfp_type_unknown;
    /* ixgbe_identify_phy_generic will set prtad and mmds properly */
    hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
    hw->phy.mdio.mmds = 0;
    hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
    hw->phy.mdio.dev = netdev;
    hw->phy.mdio.mdio_read = ixgbe_mdio_read;
    hw->phy.mdio.mdio_write = ixgbe_mdio_write;

    /* setup the private structure */
    err = ixgbe_sw_init(adapter, ii);
    if (err)
        goto err_sw_init;

    switch (adapter->hw.mac.type) {
    case ixgbe_mac_X550:
    case ixgbe_mac_X550EM_x:
        netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550;
        break;
    case ixgbe_mac_x550em_a:
        netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550em_a;
        break;
    default:
        break;
    }

    /* Make sure the SWFW semaphore is in a valid state */
    if (hw->mac.ops.init_swfw_sync)
        hw->mac.ops.init_swfw_sync(hw);

    /* Make it possible the adapter to be woken up via WOL */
    switch (adapter->hw.mac.type) {
    case ixgbe_mac_82599EB:
    case ixgbe_mac_X540:
    case ixgbe_mac_X550:
    case ixgbe_mac_X550EM_x:
    case ixgbe_mac_x550em_a:
        IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
        break;
    default:
        break;
    }

    /*
     * If there is a fan on this device and it has failed log the
     * failure.
     */
    if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
        u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
        if (esdp & IXGBE_ESDP_SDP1)
            e_crit(probe, "Fan has stopped, replace the adapter\n");
    }

    if (allow_unsupported_sfp)
        hw->allow_unsupported_sfp = allow_unsupported_sfp;

    /* reset_hw fills in the perm_addr as well */
    hw->phy.reset_if_overtemp = true;
    err = hw->mac.ops.reset_hw(hw);
    hw->phy.reset_if_overtemp = false;
    ixgbe_set_eee_capable(adapter);
    if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
        err = 0;
    } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
        e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
        e_dev_err("Reload the driver after installing a supported module.\n");
        goto err_sw_init;
    } else if (err) {
        e_dev_err("HW Init failed: %d\n", err);
        goto err_sw_init;
    }

#ifdef CONFIG_PCI_IOV
    /* SR-IOV not supported on the 82598 */
    if (adapter->hw.mac.type == ixgbe_mac_82598EB)
        goto skip_sriov;
    /* Mailbox */
    ixgbe_init_mbx_params_pf(hw);
    hw->mbx.ops = ii->mbx_ops;
    pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
    ixgbe_enable_sriov(adapter, max_vfs);
skip_sriov:

#endif
    netdev->features = NETIF_F_SG |
               NETIF_F_TSO |
               NETIF_F_TSO6 |
               NETIF_F_RXHASH |
               NETIF_F_RXCSUM |
               NETIF_F_HW_CSUM;

#define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
                    NETIF_F_GSO_GRE_CSUM | \
                    NETIF_F_GSO_IPXIP4 | \
                    NETIF_F_GSO_IPXIP6 | \
                    NETIF_F_GSO_UDP_TUNNEL | \
                    NETIF_F_GSO_UDP_TUNNEL_CSUM)

    netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES;
    netdev->features |= NETIF_F_GSO_PARTIAL |
                IXGBE_GSO_PARTIAL_FEATURES;

    if (hw->mac.type >= ixgbe_mac_82599EB)
        netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;

#ifdef CONFIG_IXGBE_IPSEC
#define IXGBE_ESP_FEATURES  (NETIF_F_HW_ESP | \
                 NETIF_F_HW_ESP_TX_CSUM | \
                 NETIF_F_GSO_ESP)

    if (adapter->ipsec)
        netdev->features |= IXGBE_ESP_FEATURES;
#endif
    /* copy netdev features into list of user selectable features */
    netdev->hw_features |= netdev->features |
                   NETIF_F_HW_VLAN_CTAG_FILTER |
                   NETIF_F_HW_VLAN_CTAG_RX |
                   NETIF_F_HW_VLAN_CTAG_TX |
                   NETIF_F_RXALL |
                   NETIF_F_HW_L2FW_DOFFLOAD;

    if (hw->mac.type >= ixgbe_mac_82599EB)
        netdev->hw_features |= NETIF_F_NTUPLE |
                       NETIF_F_HW_TC;

    if (pci_using_dac)
        netdev->features |= NETIF_F_HIGHDMA;

    netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
    netdev->hw_enc_features |= netdev->vlan_features;
    netdev->mpls_features |= NETIF_F_SG |
                 NETIF_F_TSO |
                 NETIF_F_TSO6 |
                 NETIF_F_HW_CSUM;
    netdev->mpls_features |= IXGBE_GSO_PARTIAL_FEATURES;

    /* set this bit last since it cannot be part of vlan_features */
    netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
                NETIF_F_HW_VLAN_CTAG_RX |
                NETIF_F_HW_VLAN_CTAG_TX;

    netdev->priv_flags |= IFF_UNICAST_FLT;
    netdev->priv_flags |= IFF_SUPP_NOFCS;

    /* MTU range: 68 - 9710 */
    netdev->min_mtu = ETH_MIN_MTU;
    netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);

#ifdef CONFIG_IXGBE_DCB
    if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
        netdev->dcbnl_ops = &ixgbe_dcbnl_ops;
#endif

#ifdef IXGBE_FCOE
    if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
        unsigned int fcoe_l;

        if (hw->mac.ops.get_device_caps) {
            hw->mac.ops.get_device_caps(hw, &device_caps);
            if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
                adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
        }


        fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus());
        adapter->ring_feature[RING_F_FCOE].limit = fcoe_l;

        netdev->features |= NETIF_F_FSO |
                    NETIF_F_FCOE_CRC;

        netdev->vlan_features |= NETIF_F_FSO |
                     NETIF_F_FCOE_CRC |
                     NETIF_F_FCOE_MTU;
    }
#endif /* IXGBE_FCOE */
    if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
        netdev->hw_features |= NETIF_F_LRO;
    if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
        netdev->features |= NETIF_F_LRO;

    if (ixgbe_check_fw_error(adapter)) {
        err = -EIO;
        goto err_sw_init;
    }

    /* make sure the EEPROM is good */
    if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
        e_dev_err("The EEPROM Checksum Is Not Valid\n");
        err = -EIO;
        goto err_sw_init;
    }

    eth_platform_get_mac_address(&adapter->pdev->dev,
                     adapter->hw.mac.perm_addr);

    memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);

    if (!is_valid_ether_addr(netdev->dev_addr)) {
        e_dev_err("invalid MAC address\n");
        err = -EIO;
        goto err_sw_init;
    }

    /* Set hw->mac.addr to permanent MAC address */
    ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
    ixgbe_mac_set_default_filter(adapter);

    timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);

    if (ixgbe_removed(hw->hw_addr)) {
        err = -EIO;
        goto err_sw_init;
    }
    INIT_WORK(&adapter->service_task, ixgbe_service_task);
    set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
    clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);

    err = ixgbe_init_interrupt_scheme(adapter);
    if (err)
        goto err_sw_init;

    for (i = 0; i < adapter->num_rx_queues; i++)
        u64_stats_init(&adapter->rx_ring[i]->syncp);
    for (i = 0; i < adapter->num_tx_queues; i++)
        u64_stats_init(&adapter->tx_ring[i]->syncp);
    for (i = 0; i < adapter->num_xdp_queues; i++)
        u64_stats_init(&adapter->xdp_ring[i]->syncp);

    /* WOL not supported for all devices */
    adapter->wol = 0;
    hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
    hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
                        pdev->subsystem_device);
    if (hw->wol_enabled)
        adapter->wol = IXGBE_WUFC_MAG;

    device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);

    /* save off EEPROM version number */
    ixgbe_set_fw_version(adapter);

    /* pick up the PCI bus settings for reporting later */
    if (ixgbe_pcie_from_parent(hw))
        ixgbe_get_parent_bus_info(adapter);
    else
         hw->mac.ops.get_bus_info(hw);

    /* calculate the expected PCIe bandwidth required for optimal
     * performance. Note that some older parts will never have enough
     * bandwidth due to being older generation PCIe parts. We clamp these
     * parts to ensure no warning is displayed if it can't be fixed.
     */
    switch (hw->mac.type) {
    case ixgbe_mac_82598EB:
        expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16);
        break;
    default:
        expected_gts = ixgbe_enumerate_functions(adapter) * 10;
        break;
    }

    /* don't check link if we failed to enumerate functions */
    if (expected_gts > 0)
        ixgbe_check_minimum_link(adapter, expected_gts);

    err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
    if (err)
        strlcpy(part_str, "Unknown", sizeof(part_str));
    if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
        e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
               hw->mac.type, hw->phy.type, hw->phy.sfp_type,
               part_str);
    else
        e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
               hw->mac.type, hw->phy.type, part_str);

    e_dev_info("%pM\n", netdev->dev_addr);

    /* reset the hardware with the new settings */
    err = hw->mac.ops.start_hw(hw);
    if (err == IXGBE_ERR_EEPROM_VERSION) {
        /* We are running on a pre-production device, log a warning */
        e_dev_warn("This device is a pre-production adapter/LOM. "
               "Please be aware there may be issues associated "
               "with your hardware.  If you are experiencing "
               "problems please contact your Intel or hardware "
               "representative who provided you with this "
               "hardware.\n");
    }
    strcpy(netdev->name, "eth%d");
    pci_set_drvdata(pdev, adapter);
    err = register_netdev(netdev);
    if (err)
        goto err_register;


    /* power down the optics for 82599 SFP+ fiber */
    if (hw->mac.ops.disable_tx_laser)
        hw->mac.ops.disable_tx_laser(hw);

    /* carrier off reporting is important to ethtool even BEFORE open */
    netif_carrier_off(netdev);

#ifdef CONFIG_IXGBE_DCA
    if (dca_add_requester(&pdev->dev) == 0) {
        adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
        ixgbe_setup_dca(adapter);
    }
#endif
    if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
        e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
        for (i = 0; i < adapter->num_vfs; i++)
            ixgbe_vf_configuration(pdev, (i | 0x10000000));
    }

    /* firmware requires driver version to be 0xFFFFFFFF
     * since os does not support feature
     */
    if (hw->mac.ops.set_fw_drv_ver)
        hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF,
                       sizeof(UTS_RELEASE) - 1,
                       UTS_RELEASE);

    /* add san mac addr to netdev */
    ixgbe_add_sanmac_netdev(netdev);

    e_dev_info("%s\n", ixgbe_default_device_descr);

#ifdef CONFIG_IXGBE_HWMON
    if (ixgbe_sysfs_init(adapter))
        e_err(probe, "failed to allocate sysfs resources\n");
#endif /* CONFIG_IXGBE_HWMON */

    ixgbe_dbg_adapter_init(adapter);

    /* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
    if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
        hw->mac.ops.setup_link(hw,
            IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
            true);

    err = ixgbe_mii_bus_init(hw);
    if (err)
        goto err_netdev;

    return 0;

err_netdev:
    unregister_netdev(netdev);
err_register:
    ixgbe_release_hw_control(adapter);
    ixgbe_clear_interrupt_scheme(adapter);
err_sw_init:
    ixgbe_disable_sriov(adapter);
    adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
    iounmap(adapter->io_addr);
    kfree(adapter->jump_tables[0]);
    kfree(adapter->mac_table);
    kfree(adapter->rss_key);
    bitmap_free(adapter->af_xdp_zc_qps);
err_ioremap:
    disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
    free_netdev(netdev);
err_alloc_etherdev:
    pci_release_mem_regions(pdev);
err_pci_reg:
err_dma:
    if (!adapter || disable_dev)
        pci_disable_device(pdev);
    return err;
}

ixgbe_probe->ixgbe_init_interrupt_scheme->ixgbe_alloc_q_vectors->netif_napi_add(ixgbe_poll) 注册poll函数到q_vector->napi
ixgbe_open->ixgbe_request_irq->ixgbe_request_msix_irqs->request_irq(ixgbe_intr) 注册硬中断处理函数 取得q_vector
ixgbe_intr->napi_schedule_irqoff->list_add_tail(&napi->poll_list, &sd->poll_list); 将q_vectorr->napi挂到sd->poll_list 触发软中断
open_softirq(NET_RX_SOFTIRQ, net_rx_action); 系统初始化时注册软中断
net_rx_action->napi_poll 遍历sd->poll_list 调用poll函数 也就是ixgbe_poll

ixgbe_poll->ixgbe_clean_rx_irq->ixgbe_process_skb_fields 填写包2层头

ixgbe_rx_skb->napi_gro_receive->dev_gro_receive->napi_skb_finish->gro_normal_one->gro_normal_list gro处理

netif_receive_skb_list_internal->netif_receive_skb_list->__netif_receive_skb_list_core->__netif_receive_skb_core 协议栈处理

static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
                    struct packet_type **ppt_prev)
{

    list_for_each_entry_rcu(ptype, &ptype_all, list) {                          //tcpdump回调处理
        if (pt_prev)
            ret = deliver_skb(skb, pt_prev, orig_dev);
        pt_prev = ptype;
    }

    list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
        if (pt_prev)
            ret = deliver_skb(skb, pt_prev, orig_dev);
        pt_prev = ptype;
    }

    rx_handler = rcu_dereference(skb->dev->rx_handler);                        //ovs回调处理


    type = skb->protocol;

    /* deliver only exact match when indicated */
    if (likely(!deliver_exact)) {                               
        deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,                 //网络3层协议处理
                       &ptype_base[ntohs(type) &
                           PTYPE_HASH_MASK]);
    }

    deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
                   &orig_dev->ptype_specific);

    if (unlikely(skb->dev != orig_dev)) {
        deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
                       &skb->dev->ptype_specific);
    }
}

你可能感兴趣的:(网络源码分析)