一般为 MAC-PHY 模式:
-----------
| CPU | RGMII/
| ------| MII ---------
| | MAC |---------| PHY |
| | |---------| |
| |-----| SMI |_______|
----------
MAC -PHY 之间使用 RGMII /MII 接口连接,用于传输数据。 SMI 接口,通常为 MDIO 接口或 I2C接口,用于读写PHY上面的寄存器。
1,2 phy device 的数据结构
struct phy_device {
struct mdio_device mdio; // mdio 设备,提供mdio的读写接口
/* Information about the PHY type */
/* And management functions */
struct phy_driver *drv; // 匹配到phy驱动后赋值
u32 phy_id; // phy_id ,匹配驱动的时候用到
struct phy_c45_device_ids c45_ids;
bool is_c45;
bool is_internal;
bool is_pseudo_fixed_link;
bool has_fixups;
bool suspended;
bool sysfs_links;
bool loopback_enabled;
enum phy_state state;
u32 dev_flags;
phy_interface_t interface;
/*
* forced speed & duplex (no autoneg)
* partner speed & duplex & pause (autoneg)
*/
int speed;
int duplex;
int pause;
int asym_pause;
/* The most recently read link state */
int link;
/* Enabled Interrupts */
u32 interrupts;
/* Union of PHY and Attached devices' supported modes */
/* See mii.h for more info */
u32 supported;
u32 advertising;
u32 lp_advertising;
/* Energy efficient ethernet modes which should be prohibited */
u32 eee_broken_modes;
int autoneg;
int link_timeout;
#ifdef CONFIG_LED_TRIGGER_PHY
struct phy_led_trigger *phy_led_triggers;
unsigned int phy_num_led_triggers;
struct phy_led_trigger *last_triggered;
#endif
/*
* Interrupt number for this PHY
* -1 means no interrupt
*/
int irq; // 表示phy_statu的查询方式,大于0 为中断方式,-1 为POLL 方式
/* private data pointer */
/* For use by PHYs to maintain extra state */
void *priv;
/* Interrupt and Polling infrastructure */
struct work_struct phy_queue;
struct delayed_work state_queue;
atomic_t irq_disable;
struct mutex lock;
struct phylink *phylink;
struct net_device *attached_dev;
u8 mdix;
u8 mdix_ctrl;
void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier);
void (*adjust_link)(struct net_device *dev);
}
1.3 phy device 的注册函数
/**
* phy_device_register - Register the phy device on the MDIO bus
* @phydev: phy_device structure to be added to the MDIO bus
*/
int phy_device_register(struct phy_device *phydev)
{
int err;
err = mdiobus_register_device(&phydev->mdio); //本质是在mdiobus 上注册 mdio 设备
if (err) //从面向对象的角度 pdy device 继承自 mdio device
return err;
/* Run all of the fixups for this PHY */
err = phy_scan_fixups(phydev);
if (err) {
pr_err("PHY %d failed to initialize\n", phydev->mdio.addr);
goto out;
}
phydev->mdio.dev.groups = phy_dev_groups;
err = device_add(&phydev->mdio.dev);
if (err) {
pr_err("PHY %d failed to add\n", phydev->mdio.addr);
goto out;
}
return 0;
out:
mdiobus_unregister_device(&phydev->mdio);
return err;
}
1.4 phy device 在哪注册的?
以 orangepi-one 为例,先看它的dts
/*arch/arm/boot/dts/sunxi-h3-h5.dtsi */
emac: ethernet@1c30000 {
compatible = "allwinner,sun8i-h3-emac";
syscon = <&syscon>;
reg = <0x01c30000 0x10000>;
interrupts = ;
interrupt-names = "macirq";
resets = <&ccu RST_BUS_EMAC>;
reset-names = "stmmaceth";
clocks = <&ccu CLK_BUS_EMAC>;
clock-names = "stmmaceth";
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
mdio: mdio {
#address-cells = <1>;
#size-cells = <0>;
compatible = "snps,dwmac-mdio";
};
mdio-mux {
compatible = "allwinner,sun8i-h3-mdio-mux";
#address-cells = <1>;
#size-cells = <0>;
mdio-parent-bus = <&mdio>;
/* Only one MDIO is usable at the time */
internal_mdio: mdio@1 {
compatible = "allwinner,sun8i-h3-mdio-internal";
reg = <1>;
#address-cells = <1>;
#size-cells = <0>;
int_mii_phy: ethernet-phy@1 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <1>;
clocks = <&ccu CLK_BUS_EPHY>;
resets = <&ccu RST_BUS_EPHY>;
};
};
external_mdio: mdio@2 {
reg = <2>;
#address-cells = <1>;
#size-cells = <0>;
};
};
};
/*arch/arm/boot/dts/sun8i-h3-orangepi-one.dts */
&emac {
phy-handle = <&int_mii_phy>;
phy-mode = "mii";
allwinner,use-internal-phy;
status = "okay";
};
compatible = “allwinner,sun8i-h3-emac” ,对应驱动代码为 drivers\net\ethernet\stmicro\stmmac\dwmac-sun8i.c
这是一个mac 驱动,在probe 过程中会注册phy device。
sun8i_dwmac_probe
-> stmmac_dvr_probe
-> stmmac_mdio_register // 这里注册的是mdio bus,提供读写接口
-> sun8i_dwmac_register_mdio_mux // 为mdio-mux子节点创建设备
-> mdio_mux_init
-> of_mdiobus_register
-> of_mdiobus_register_phy
-> phy_device_register
这个过程中会根据 dts 内容决定这个是不是 phy
if (of_mdiobus_child_is_phy(child))
rc = of_mdiobus_register_phy(mdio, child, addr);
elsec
rc = of_mdiobus_register_device(mdio, child, addr);
mdiobus_child 中符合以下条件之一的就认为是phy ,否则只是个 mdio device
/*
* Return true if the child node is for a phy. It must either:
* o Compatible string of "ethernet-phy-idX.X"
* o Compatible string of "ethernet-phy-ieee802.3-c45"
* o Compatible string of "ethernet-phy-ieee802.3-c22"
* o In the white list above (and issue a warning)
* o No compatibility string
*
* A device which is not a phy is expected to have a compatible string
* indicating what sort of device it is.
*/
static bool of_mdiobus_child_is_phy(struct device_node *child)
{
u32 phy_id;
if (of_get_phy_id(child, &phy_id) != -EINVAL)
return true;
if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c45"))
return true;
if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
return true;
if (of_match_node(whitelist_phys, child)) {
pr_warn(FW_WARN
"%pOF: Whitelisted compatible string. Please remove\n",
child);
return true;
}
if (!of_find_property(child, "compatible", NULL))
return true;
return false;
}
phy_id 的初始化,在of_mdiobus_register_phy 中
if (!is_c45 && !of_get_phy_id(child, &phy_id))
phy = phy_device_create(mdio, addr, phy_id, 0, NULL); // dts 里用"ethernet-phy-idX.X"指定
else
phy = get_phy_device(mdio, addr, is_c45); // 通过读MII_PHYSID1 MII_PHYSID2 获取
1.5 phy_device_create
struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
bool is_c45,
struct phy_c45_device_ids *c45_ids)
{
struct phy_device *dev;
struct mdio_device *mdiodev;
/* We allocate the device, and initialize the default values */
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
mdiodev = &dev->mdio;
mdiodev->dev.release = phy_device_release;
mdiodev->dev.parent = &bus->dev;
mdiodev->dev.bus = &mdio_bus_type;
mdiodev->bus = bus;
mdiodev->pm_ops = MDIO_BUS_PHY_PM_OPS;
mdiodev->bus_match = phy_bus_match; // phy_bus_match 使用 phy_id 匹配 driver
mdiodev->addr = addr;
mdiodev->flags = MDIO_DEVICE_FLAG_PHY;
mdiodev->device_free = phy_mdio_device_free;
mdiodev->device_remove = phy_mdio_device_remove;
dev->speed = 0;
dev->duplex = -1;
dev->pause = 0;
dev->asym_pause = 0;
dev->link = 1;
dev->interface = PHY_INTERFACE_MODE_GMII;
dev->autoneg = AUTONEG_ENABLE;
dev->is_c45 = is_c45;
dev->phy_id = phy_id;
if (c45_ids)
dev->c45_ids = *c45_ids;
dev->irq = bus->irq[addr];
dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr);
dev->state = PHY_DOWN;
mutex_init(&dev->lock);
INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); // 初始化phy状态机
INIT_WORK(&dev->phy_queue, phy_change_work);
/* Request the appropriate module unconditionally; don't
* bother trying to do so only if it isn't already loaded,
* because that gets complicated. A hotplug event would have
* done an unconditional modprobe anyway.
* We don't do normal hotplug because it won't work for MDIO
* -- because it relies on the device staying around for long
* enough for the driver to get loaded. With MDIO, the NIC
* driver will get bored and give up as soon as it finds that
* there's no driver _already_ loaded.
*/
request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id));
device_initialize(&mdiodev->dev);
return dev;
}
2.1 phy driver 的数据结构
struct phy_driver {
struct mdio_driver_common mdiodrv; // 注册的时候,填充
u32 phy_id; // phy id 用于匹配
char *name;
unsigned int phy_id_mask;
u32 features;
u32 flags;
const void *driver_data;
/*
* Called to issue a PHY software reset
*/
int (*soft_reset)(struct phy_device *phydev);
/*
* Called to initialize the PHY,
* including after a reset
*/
int (*config_init)(struct phy_device *phydev);
...其它方法...
}
struct phy_driver 结构相对简单,大部分是接口函数,而这些接口函数大部分都不需要实现。
2.2 phy driver 的注册函数
int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
{
int retval;
new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY;
new_driver->mdiodrv.driver.name = new_driver->name;
new_driver->mdiodrv.driver.bus = &mdio_bus_type;
new_driver->mdiodrv.driver.probe = phy_probe;
new_driver->mdiodrv.driver.remove = phy_remove;
new_driver->mdiodrv.driver.owner = owner;
retval = driver_register(&new_driver->mdiodrv.driver);
if (retval) {
pr_err("%s: Error %d in registering driver\n",
new_driver->name, retval);
return retval;
}
pr_debug("%s: Registered new driver\n", new_driver->name);
return 0;
}
2.3 phy device 与 phy driver 的匹配
首先 通过 phy_bus_match 函数,使用phy_id 进行匹配。
static int phy_bus_match(struct device *dev, struct device_driver *drv)
{
return (phydrv->phy_id & phydrv->phy_id_mask) ==
(phydev->phy_id & phydrv->phy_id_mask);
}
当一个phy device 没有匹配到 driver 时,默认使用 genphy driver 。
内核默认会添加两种 genphy driver ,分别是 genphy_10g_driver ,genphy_driver。
static int __init phy_init(void)
{
rc = phy_driver_register(&genphy_10g_driver, THIS_MODULE);
rc = phy_driver_register(&genphy_driver, THIS_MODULE);
return rc;
}
在 phy_attach_direct 函数中,当一个phy device 未匹配到 phy driver 时,默认使用 genphy driver。
int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
u32 flags, phy_interface_t interface)
{
//...
/* Assume that if there is no driver, that it doesn't
* exist, and we should use the genphy driver.
*/
if (!d->driver) {
if (phydev->is_c45)
d->driver = &genphy_10g_driver.mdiodrv.driver;
else
d->driver = &genphy_driver.mdiodrv.driver;
using_genphy = true;
}
//...
}
调用链如下:
.ndo_open->stmmac_open->stmmac_init_phy -> of_phy_connect ->phy_connect_direct->phy_attach_direct
static struct phy_driver genphy_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic PHY",
.soft_reset = genphy_no_soft_reset,
.config_init = genphy_config_init,
.features = PHY_GBIT_FEATURES | SUPPORTED_MII |
SUPPORTED_AUI | SUPPORTED_FIBRE |
SUPPORTED_BNC,
.config_aneg = genphy_config_aneg,
.aneg_done = genphy_aneg_done,
.read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
.set_loopback = genphy_loopback,
};
其中比较关键的一点是 .read_status = genphy_read_status ,用于获取phy 的连接状态,speed ,全双工状态等。
read_status 在 phy_state_machine 中被调用,phy_state_machine 是个状态机,每间隔1s 执行一次。当phy 的状态发生改变时,如协商速率从100M 变为了 1000M ,这时会调用 phy_link_change ->phydev->adjust_link 来调整 MAC 端的速率
两者区别在于寄存器定义,IEEE802.3 定义了地址为0-15这16个寄存器的功能,而有的厂家制造的PHY 寄存器定义不一样,所以就有了标准PHY 与非标准PHY .
寄存器定义符合IEEE802.3 标准的,可以直接使用 genphy_driver ,而非标准的需要单独实现。
------------------
| CPU |
| ----------- |
| | MAC/ephy |
|___|__||_____|__|
||
RGMII/ || RXP/TXP
MII ||
||
------------------------------------------------||----------
| Switch || |
| || |
| || |
| |-----| |-----| |-----| |-----| |-----| |
____|_____|___|_____|___|_____|___|_____|____|_____|______|
PHY1 PHY2 PHY3 PHY4 PHY/MAC
switch 一般包含多个PHY,同样会有提供MDIO ,I2C 接口给CPU 进行寄存器读写。其寄存器会比单纯的PHY寄存器复杂。所以switch 驱动一般不注册为phy驱动,而是注册为平台驱动。
当cpu 与swicth 使用MAC-MAC 方式连接时,内核仍会运行phy 状态机去获取phy 的连接状态连接速率等,这时就要告诉内核我是固定连接的,内核给了个虚拟MDIO接口位于drivers\net\phy\fixed_phy.c ,主要提供假的 mdio_read 函数,返回固定信息给内核。
int swphy_read_reg(int reg, const struct fixed_phy_status *state)
{
...
bmcr |= speed[speed_index].bmcr & duplex[duplex_index].bmcr;
lpa |= speed[speed_index].lpa & duplex[duplex_index].lpa;
lpagb |= speed[speed_index].lpagb & duplex[duplex_index].lpagb;
...
switch (reg) {
case MII_BMCR:
return bmcr;
case MII_BMSR:
return bmsr;
case MII_LPA:
return lpa;
case MII_STAT1000:
return lpagb;
default:
return 0xffff;
}
}
需要在 dts 中添加如下内容:
Examples:
ethernet@0 {
...
fixed-link {
speed = <1000>;
full-duplex;
};
...
};
代码中 of_phy_is_fixed_link 当节点中包含fixed-link 返回true.
if (!plat->phy_node && of_phy_is_fixed_link(np)) {
if ((of_phy_register_fixed_link(np) < 0)) // 这里注册一个使用虚拟mdio接口的phy device
return -ENODEV;
dev_dbg(dev, "Found fixed-link subnode\n");
plat->phy_node = of_node_get(np);
mdio = false;
}
MAC-MAC 方式下,phy driver 使用 genphy_driver ,phy device 的 mido 是假的mdio 。
OpenWrt 中实现了 swconfig 工具用于配置 switch ,swconfig 主要分为两部分
应用层实现
内核层实现
swconfig:
------------------------------------
应用层
| | (Generic Netlink)
-----------|--|---------------------
内核层
swconfig core
---------------------------
驱动层 swicth_dev
------------------------------------
swconfig 源代码可以在 build_dir/target_xxx/swconfig 下找到。主要是使用 genetlink 接口与内核进行通信。
使用方法参考:https://openwrt.org/docs/techref/swconfig
swconfig list
swconfig dev switch0 show
Show current configuration
swconfig dev rtl8366rb show
and you will obtain:
VLAN 1:
info: VLAN 1: Ports: '12345t', members=003e, untag=001e, fid=0
fid: 0
ports: 1 2 3 4 5t
VLAN 2:
info: VLAN 2: Ports: '05t', members=0021, untag=0001, fid=0
fid: 0
ports: 0 5t
代码位于linux-3.18.21/drivers/net/phy/swconfig.c ,这是OpenWrt 打补丁后生成的代码。
static struct genl_ops swconfig_ops[] = {
{
.cmd = SWITCH_CMD_LIST_GLOBAL,
.doit = swconfig_list_attrs,
.policy = switch_policy,
},
...
};
static struct genl_family switch_fam = {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
.id = GENL_ID_GENERATE,
#endif
.name = "switch",
.hdrsize = 0,
.version = 1,
.maxattr = SWITCH_ATTR_MAX,
.module = THIS_MODULE,
.ops = swconfig_ops,
.n_ops = ARRAY_SIZE(swconfig_ops),
};
static int __init swconfig_init(void)
{
INIT_LIST_HEAD(&swdevs);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
return genl_register_family_with_ops(&switch_fam, swconfig_ops);
#else
return genl_register_family(&switch_fam);
#endif
}
核心代码是使用 genl_register_family 注册了一个 struct genl_family switch_fam,switch_fam.ops = swconfig_ops, swconfig_ops 中注册了相应命令集合及其处理函数。
此外对外导出register_switch ,unregister_switch 接口,用于switch 驱动实现自己的接口函数。
int register_switch(struct switch_dev *dev, struct net_device *netdev)
{
...
}
EXPORT_SYMBOL_GPL(register_switch);
EXPORT_SYMBOL_GPL(unregister_switch);
2)swicth_dev 部分
以 drivers\net\phy\rtl8367b.c 为例:
static struct platform_driver rtl8367b_driver = {
.driver = {
.name = RTL8367B_DRIVER_NAME,
.owner = THIS_MODULE,
#ifdef CONFIG_OF
.of_match_table = of_match_ptr(rtl8367b_match),
#endif
},
.probe = rtl8367b_probe,
.remove = rtl8367b_remove,
.shutdown = rtl8367b_shutdown,
};
它将switch 驱动实现为 platform_driver , 在 probe 函数中,最终会调用 register_switch
static const struct switch_dev_ops rtl8367b_sw_ops = {
.attr_global = {
.attr = rtl8367b_globals,
.n_attr = ARRAY_SIZE(rtl8367b_globals),
},
.attr_port = {
.attr = rtl8367b_port,
.n_attr = ARRAY_SIZE(rtl8367b_port),
},
.attr_vlan = {
.attr = rtl8367b_vlan,
.n_attr = ARRAY_SIZE(rtl8367b_vlan),
},
.get_vlan_ports = rtl8366_sw_get_vlan_ports,
.set_vlan_ports = rtl8366_sw_set_vlan_ports,
.get_port_pvid = rtl8366_sw_get_port_pvid,
.set_port_pvid = rtl8366_sw_set_port_pvid,
.reset_switch = rtl8366_sw_reset_switch,
.get_port_link = rtl8367b_sw_get_port_link,
.get_port_stats = rtl8367b_sw_get_port_stats,
};
{
struct switch_dev *dev = &smi->sw_dev;
int err;
dev->name = "RTL8367B";
dev->cpu_port = RTL8367B_CPU_PORT_NUM;
dev->ports = RTL8367B_NUM_PORTS;
dev->vlans = RTL8367B_NUM_VIDS;
dev->ops = &rtl8367b_sw_ops;
dev->alias = dev_name(smi->parent);
err = register_switch(dev, NULL);
if (err)
dev_err(smi->parent, "switch registration failed\n");
return err;
}
可以看出为了实现swconfig 功能,swicth 驱动要做的事情就是实现 struct switch_dev_ops 结构体 ,实现以下接口。
dsa 全称是 Distributed Switch Architecture ,即分布式交换机架构。其介绍可见:Documentation\networking\dsa\dsa.txt
是linux 内核自带的交换机子系统。设计目的是使交换机可以通过工具bridge, iproute2, ifconfig 直接进行配置/查询。
具体实现,有待研究。
Introduction
This document describes the Distributed Switch Architecture (DSA) subsystem
design principles, limitations, interactions with other subsystems, and how to
develop drivers for this subsystem as well as a TODO for developers interested
in joining the effort.Design principles
The Distributed Switch Architecture is a subsystem which was primarily designed
to support Marvell Ethernet switches (MV88E6xxx, a.k.a Linkstreet product line)
using Linux, but has since evolved to support other vendors as well.