首先我想引用一下官方的源代码中的文档,对设备驱动的工作做一个概括性的叙述:
1 首先是调用 pci_register_driver()函数,对设备进行注册。
2.Once the driver knows about a PCI device and takes ownership, the
driver generally needs to perform the following initialization:
Enable the device
Request MMIO/IOP resources
Set the DMA mask size (for both coherent and streaming DMA)
Allocate and initialize shared control data (pci_allocate_coherent())
Access device configuration space (if needed)
Register IRQ handler (request_irq())
Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
Enable DMA/processing engines
When done using the device, and perhaps the module needs to be unloaded,
the driver needs to take the follow steps:
Disable the device from generating IRQs
Release the IRQ (free_irq())
Stop all DMA activity
Release DMA buffers (both streaming and coherent)
Unregister from other subsystems (e.g. scsi or netdev)
Release MMIO/IOP resources
Disable the device
前面讲了很多关于PCI的架构之类的基础知识,现在我们结合代码看一下怎么实现一个PCI设备驱动。软件代码依然是NXP开源的linux代码。我们以bt878的驱动代码为例。
Software: linux version 4.14.98
首先我们需要注册PCI设备驱动,废话少说,直接看代码
/*******************************/
/* Module management functions */
/*******************************/
static int __init bt878_init_module(void)
{
bt878_num = 0;
printk(KERN_INFO "bt878: AUDIO driver version %d.%d.%d loaded\n",
(BT878_VERSION_CODE >> 16) & 0xff,
(BT878_VERSION_CODE >> 8) & 0xff,
BT878_VERSION_CODE & 0xff);
return pci_register_driver(&bt878_pci_driver);
}
static void __exit bt878_cleanup_module(void)
{
pci_unregister_driver(&bt878_pci_driver);
}
module_init(bt878_init_module);
module_exit(bt878_cleanup_module);
下面我们看pci_register_driver(&bt878_pci_driver);
/*
* pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
*/
#define pci_register_driver(driver) \
__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
__pci_register_driver()函数如下
/**
* __pci_register_driver - register a new pci driver
* @drv: the driver structure to register
* @owner: owner module of drv
* @mod_name: module name string
*
* Adds the driver structure to the list of registered drivers.
* Returns a negative value on error, otherwise 0.
* If no error occurred, the driver remains registered even if
* no device was claimed during registration.
*/
int __pci_register_driver(struct pci_driver *drv, struct module *owner,
const char *mod_name)
{
/* initialize common driver fields */
drv->driver.name = drv->name;
drv->driver.bus = &pci_bus_type;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
drv->driver.groups = drv->groups;
spin_lock_init(&drv->dynids.lock);
INIT_LIST_HEAD(&drv->dynids.list);
/* register with core */
return driver_register(&drv->driver);
}
EXPORT_SYMBOL(__pci_register_driver);
driver_register()函数如下
/**
* driver_register - register driver with bus
* @drv: driver to register
*
* We pass off most of the work to the bus_add_driver() call,
* since most of the things we have to do deal with the bus
* structures.
*/
int driver_register(struct device_driver *drv)
{
int ret;
struct device_driver *other;
BUG_ON(!drv->bus->p);
if ((drv->bus->probe && drv->probe) ||
(drv->bus->remove && drv->remove) ||
(drv->bus->shutdown && drv->shutdown))
printk(KERN_WARNING "Driver '%s' needs updating - please use "
"bus_type methods\n", drv->name);
other = driver_find(drv->name, drv->bus);
if (other) {
printk(KERN_ERR "Error: Driver '%s' is already registered, "
"aborting...\n", drv->name);
return -EBUSY;
}
ret = bus_add_driver(drv);
if (ret)
return ret;
ret = driver_add_groups(drv, drv->groups);
if (ret) {
bus_remove_driver(drv);
return ret;
}
kobject_uevent(&drv->p->kobj, KOBJ_ADD);
return ret;
}
EXPORT_SYMBOL_GPL(driver_register);
我们对driver进行register之后,我们接下来看probe函数。
首先看bt878_pci_driver的定义:
static struct pci_driver bt878_pci_driver = {
.name = "bt878",
.id_table = bt878_pci_tbl,
.probe = bt878_probe,
.remove = bt878_remove,
};
再看bt878_probe函数的定义:
/***********************/
/* PCI device handling */
/***********************/
static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id)
{
int result = 0;
unsigned char lat;
struct bt878 *bt;
unsigned int cardid;
printk(KERN_INFO "bt878: Bt878 AUDIO function found (%d).\n",
bt878_num);
if (bt878_num >= BT878_MAX) {
printk(KERN_ERR "bt878: Too many devices inserted\n");
return -ENOMEM;
}
if (pci_enable_device(dev))
return -EIO;
cardid = dev->subsystem_device << 16;
cardid |= dev->subsystem_vendor;
printk(KERN_INFO "%s: card id=[0x%x],[ %s ] has DVB functions.\n",
__func__, cardid, card_name(pci_id));
bt = &bt878[bt878_num];
bt->dev = dev;
bt->nr = bt878_num;
bt->shutdown = 0;
bt->id = dev->device;
bt->irq = dev->irq;
bt->bt878_adr = pci_resource_start(dev, 0);
if (!request_mem_region(pci_resource_start(dev, 0),
pci_resource_len(dev, 0), "bt878")) {
result = -EBUSY;
goto fail0;
}
bt->revision = dev->revision;
pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
printk(KERN_INFO "bt878(%d): Bt%x (rev %d) at %02x:%02x.%x, ",
bt878_num, bt->id, bt->revision, dev->bus->number,
PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
printk("irq: %d, latency: %d, memory: 0x%lx\n",
bt->irq, lat, bt->bt878_adr);
#ifdef __sparc__
bt->bt878_mem = (unsigned char *) bt->bt878_adr;
#else
bt->bt878_mem = ioremap(bt->bt878_adr, 0x1000);
#endif
/* clear interrupt mask */
btwrite(0, BT848_INT_MASK);
result = request_irq(bt->irq, bt878_irq,
IRQF_SHARED, "bt878", (void *) bt);
if (result == -EINVAL) {
printk(KERN_ERR "bt878(%d): Bad irq number or handler\n",
bt878_num);
goto fail1;
}
if (result == -EBUSY) {
printk(KERN_ERR
"bt878(%d): IRQ %d busy, change your PnP config in BIOS\n",
bt878_num, bt->irq);
goto fail1;
}
if (result < 0)
goto fail1;
pci_set_master(dev);
pci_set_drvdata(dev, bt);
if ((result = bt878_mem_alloc(bt))) {
printk(KERN_ERR "bt878: failed to allocate memory!\n");
goto fail2;
}
bt878_make_risc(bt);
btwrite(0, BT878_AINT_MASK);
bt878_num++;
return 0;
fail2:
free_irq(bt->irq, bt);
fail1:
release_mem_region(pci_resource_start(bt->dev, 0),
pci_resource_len(bt->dev, 0));
fail0:
pci_disable_device(dev);
return result;
}
在这里我们重要讲解probe()的几个重要函数。
我们重点查看函数pci_enable_device()
看probe中的代码片段:
if (pci_enable_device(dev))
return -EIO;
那么这个函数主要是做什么呢,在这里摘录一段源代码中自带的文档中的说明:
Before touching any device registers, the driver needs to enable
the PCI device by calling pci_enable_device(). This will:
o wake up the device if it was in suspended state,
o allocate I/O and memory regions of the device (if BIOS did not),
o allocate an IRQ (if BIOS did not).
翻译过来其实就是,唤醒设备,分配I/O和memory region,分配IRQ,好了我们接下来继续跟踪这个函数
/**
* pci_enable_device - Initialize device before it's used by a driver.
* @dev: PCI device to be initialized
*
* Initialize device before it's used by a driver. Ask low-level code
* to enable I/O and memory. Wake up the device if it was suspended.
* Beware, this function can fail.
*
* Note we don't actually enable the device many times if we call
* this function repeatedly (we just increment the count).
*/
int pci_enable_device(struct pci_dev *dev)
{
return pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO);
}
EXPORT_SYMBOL(pci_enable_device);
继续看pci_enable_device_flags()函数
static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
{
struct pci_dev *bridge;
int err;
int i, bars = 0;
/*
* Power state could be unknown at this point, either due to a fresh
* boot or a device removal call. So get the current power state
* so that things like MSI message writing will behave as expected
* (e.g. if the device really is in D0 at enable time).
*/
if (dev->pm_cap) {
u16 pmcsr;
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
}
if (atomic_inc_return(&dev->enable_cnt) > 1)
return 0; /* already enabled */
bridge = pci_upstream_bridge(dev);
if (bridge)
pci_enable_bridge(bridge);
/* only skip sriov related */
for (i = 0; i <= PCI_ROM_RESOURCE; i++)
if (dev->resource[i].flags & flags)
bars |= (1 << i);
for (i = PCI_BRIDGE_RESOURCES; i < DEVICE_COUNT_RESOURCE; i++)
if (dev->resource[i].flags & flags)
bars |= (1 << i);
err = do_pci_enable_device(dev, bars);
if (err < 0)
atomic_dec(&dev->enable_cnt);
return err;
}
重点看函数do_pci_enable_device()
static int do_pci_enable_device(struct pci_dev *dev, int bars)
{
int err;
struct pci_dev *bridge;
u16 cmd;
u8 pin;
err = pci_set_power_state(dev, PCI_D0);
if (err < 0 && err != -EIO)
return err;
bridge = pci_upstream_bridge(dev);
if (bridge)
pcie_aspm_powersave_config_link(bridge);
err = pcibios_enable_device(dev, bars);
if (err < 0)
return err;
pci_fixup_device(pci_fixup_enable, dev);
if (dev->msi_enabled || dev->msix_enabled)
return 0;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin) {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (cmd & PCI_COMMAND_INTX_DISABLE)
pci_write_config_word(dev, PCI_COMMAND,
cmd & ~PCI_COMMAND_INTX_DISABLE);
}
return 0;
}
看函数pci_set_power_state(dev, PCI_D0);
/**
* pci_load_and_free_saved_state - Reload the save state pointed to by state,
* and free the memory allocated for it.
* @dev: PCI device that we're dealing with
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
* 0 if device already is in the requested state.
* 0 if the transition is to D3 but D3 is not supported.
* 0 if device's power state has been successfully changed.
*/
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{
int error;
/* bound the state we're entering */
if (state > PCI_D3cold)
state = PCI_D3cold;
else if (state < PCI_D0)
state = PCI_D0;
else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
/*
* If the device or the parent bridge do not support PCI PM,
* ignore the request if we're doing anything other than putting
* it into D0 (which would only happen on boot).
*/
return 0;
/* Check if we're already there */
if (dev->current_state == state)
return 0;
__pci_start_power_transition(dev, state);
/* This device is quirked not to be put into D3, so
don't put it in D3 */
if (state >= PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
return 0;
/*
* To put device in D3cold, we put device into D3hot in native
* way, then put device into D3cold with platform ops
*/
error = pci_raw_set_power_state(dev, state > PCI_D3hot ?
PCI_D3hot : state);
if (!__pci_complete_power_transition(dev, state))
error = 0;
return error;
}
EXPORT_SYMBOL(pci_set_power_state);
在这里就是设置PCI 设备的power 状态。
再回到do_pci_enable_device()函数
err = pcibios_enable_device(dev, bars);
if (err < 0)
return err;
在pcibios_enable_device(dev, bars)函数中,设置I/O和memory region
接下来设置中断:
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin) {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (cmd & PCI_COMMAND_INTX_DISABLE)
pci_write_config_word(dev, PCI_COMMAND,
cmd & ~PCI_COMMAND_INTX_DISABLE);
}
接下来我们讲另外一个函数
我们看request_mem_region(pci_resource_start(dev, 0),pci_resource_len(dev, 0), “bt878”) 函数
/* these helpers provide future and backwards compatibility
* for accessing popular PCI BAR info */
#define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start)
#define pci_resource_end(dev, bar) ((dev)->resource[(bar)].end)
#define pci_resource_flags(dev, bar) ((dev)->resource[(bar)].flags)
#define pci_resource_len(dev,bar) \
((pci_resource_start((dev), (bar)) == 0 && \
pci_resource_end((dev), (bar)) == \
pci_resource_start((dev), (bar))) ? 0 : \
\
(pci_resource_end((dev), (bar)) - \
pci_resource_start((dev), (bar)) + 1))
其中 request_mem_region()定义如下
#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0)
接下来我们接着追函数
/**
* @flags: IO resource flags
*/
struct resource * __request_region(struct resource *parent,
resource_size_t start, resource_size_t n,
const char *name, int flags)
{
DECLARE_WAITQUEUE(wait, current);
struct resource *res = alloc_resource(GFP_KERNEL);
if (!res)
return NULL;
res->name = name;
res->start = start;
res->end = start + n - 1;
write_lock(&resource_lock);
for (;;) {
struct resource *conflict;
res->flags = resource_type(parent) | resource_ext_type(parent);
res->flags |= IORESOURCE_BUSY | flags;
res->desc = parent->desc;
conflict = __request_resource(parent, res);
if (!conflict)
break;
if (conflict != parent) {
if (!(conflict->flags & IORESOURCE_BUSY)) {
parent = conflict;
continue;
}
}
if (conflict->flags & flags & IORESOURCE_MUXED) {
add_wait_queue(&muxed_resource_wait, &wait);
write_unlock(&resource_lock);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
remove_wait_queue(&muxed_resource_wait, &wait);
write_lock(&resource_lock);
continue;
}
/* Uhhuh, that didn't work out.. */
free_resource(res);
res = NULL;
break;
}
write_unlock(&resource_lock);
return res;
}
EXPORT_SYMBOL(__request_region);
看函数
result = request_irq(bt->irq, bt878_irq,
IRQF_SHARED, "bt878", (void *) bt);
bt878_irq函数定义如下:
/*****************************/
/* Interrupt service routine */
/*****************************/
static irqreturn_t bt878_irq(int irq, void *dev_id)
{
u32 stat, astat, mask;
int count;
struct bt878 *bt;
bt = (struct bt878 *) dev_id;
count = 0;
while (1) {
stat = btread(BT878_AINT_STAT);
mask = btread(BT878_AINT_MASK);
if (!(astat = (stat & mask)))
return IRQ_NONE; /* this interrupt is not for me */
/* dprintk("bt878(%d) debug: irq count %d, stat 0x%8.8x, mask 0x%8.8x\n",bt->nr,count,stat,mask); */
btwrite(astat, BT878_AINT_STAT); /* try to clear interrupt condition */
if (astat & (BT878_ASCERR | BT878_AOCERR)) {
if (bt878_verbose) {
printk(KERN_INFO
"bt878(%d): irq%s%s risc_pc=%08x\n",
bt->nr,
(astat & BT878_ASCERR) ? " SCERR" :
"",
(astat & BT878_AOCERR) ? " OCERR" :
"", btread(BT878_ARISC_PC));
}
}
if (astat & (BT878_APABORT | BT878_ARIPERR | BT878_APPERR)) {
if (bt878_verbose) {
printk(KERN_INFO
"bt878(%d): irq%s%s%s risc_pc=%08x\n",
bt->nr,
(astat & BT878_APABORT) ? " PABORT" :
"",
(astat & BT878_ARIPERR) ? " RIPERR" :
"",
(astat & BT878_APPERR) ? " PPERR" :
"", btread(BT878_ARISC_PC));
}
}
if (astat & (BT878_AFDSR | BT878_AFTRGT | BT878_AFBUS)) {
if (bt878_verbose) {
printk(KERN_INFO
"bt878(%d): irq%s%s%s risc_pc=%08x\n",
bt->nr,
(astat & BT878_AFDSR) ? " FDSR" : "",
(astat & BT878_AFTRGT) ? " FTRGT" :
"",
(astat & BT878_AFBUS) ? " FBUS" : "",
btread(BT878_ARISC_PC));
}
}
if (astat & BT878_ARISCI) {
bt->finished_block = (stat & BT878_ARISCS) >> 28;
tasklet_schedule(&bt->tasklet);
break;
}
count++;
if (count > 20) {
btwrite(0, BT878_AINT_MASK);
printk(KERN_ERR
"bt878(%d): IRQ lockup, cleared int mask\n",
bt->nr);
break;
}
}
return IRQ_HANDLED;
}
看函数pci_set_master(dev);,官方说明如下
pci_set_master() will enable DMA by setting the bus master bit
in the PCI_COMMAND register. It also fixes the latency timer value if
it's set to something bogus by the BIOS. pci_clear_master() will
disable DMA by clearing the bus master bit.
跟踪代码如下:
/**
* pci_set_master - enables bus-mastering for device dev
* @dev: the PCI device to enable
*
* Enables bus-mastering on the device and calls pcibios_set_master()
* to do the needed arch specific settings.
*/
void pci_set_master(struct pci_dev *dev)
{
__pci_set_master(dev, true);
pcibios_set_master(dev);
}
EXPORT_SYMBOL(pci_set_master);
我们看函数__pci_set_master(dev, true);
static void __pci_set_master(struct pci_dev *dev, bool enable)
{
u16 old_cmd, cmd;
pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
if (enable)
cmd = old_cmd | PCI_COMMAND_MASTER;
else
cmd = old_cmd & ~PCI_COMMAND_MASTER;
if (cmd != old_cmd) {
dev_dbg(&dev->dev, "%s bus mastering\n",
enable ? "enabling" : "disabling");
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
dev->is_busmaster = enable;
}
这样就能使能master了。
再回到之前的pci_set_master()函数,查看函数 pcibios_set_master(dev);
/**
* pcibios_set_master - enable PCI bus-mastering for device dev
* @dev: the PCI device to enable
*
* Enables PCI bus-mastering for the device. This is the default
* implementation. Architecture specific implementations can override
* this if necessary.
*/
void __weak pcibios_set_master(struct pci_dev *dev)
{
u8 lat;
/* The latency timer doesn't apply to PCIe (either Type 0 or Type 1) */
if (pci_is_pcie(dev))
return;
pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
if (lat < 16)
lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
else if (lat > pcibios_max_latency)
lat = pcibios_max_latency;
else
return;
pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
}
这里有对latency timer value的处理。
看函数
if ((result = bt878_mem_alloc(bt))) {
printk(KERN_ERR "bt878: failed to allocate memory!\n");
goto fail2;
}
我们看一下bt878_mem_alloc(struct bt878 *bt)函数
static int bt878_mem_alloc(struct bt878 *bt)
{
if (!bt->buf_cpu) {
bt->buf_size = 128 * 1024;
bt->buf_cpu = pci_zalloc_consistent(bt->dev, bt->buf_size,
&bt->buf_dma);
if (!bt->buf_cpu)
return -ENOMEM;
}
if (!bt->risc_cpu) {
bt->risc_size = PAGE_SIZE;
bt->risc_cpu = pci_zalloc_consistent(bt->dev, bt->risc_size,
&bt->risc_dma);
if (!bt->risc_cpu) {
bt878_mem_free(bt);
return -ENOMEM;
}
}
return 0;
}
继续看函数pci_zalloc_consistent()
static inline void *
pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle)
{
return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
size, dma_handle, GFP_ATOMIC);
}
我们看函数继续看函数pci_zalloc_coherent()函数
static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
void *ret = dma_alloc_coherent(dev, size, dma_handle,
flag | __GFP_ZERO);
return ret;
}
这样就分配好了
我们看bt878_remove函数
static void bt878_remove(struct pci_dev *pci_dev)
{
u8 command;
struct bt878 *bt = pci_get_drvdata(pci_dev);
if (bt878_verbose)
printk(KERN_INFO "bt878(%d): unloading\n", bt->nr);
/* turn off all capturing, DMA and IRQs */
btand(~0x13, BT878_AGPIO_DMA_CTL);
/* first disable interrupts before unmapping the memory! */
btwrite(0, BT878_AINT_MASK);
btwrite(~0U, BT878_AINT_STAT);
/* disable PCI bus-mastering */
pci_read_config_byte(bt->dev, PCI_COMMAND, &command);
/* Should this be &=~ ?? */
command &= ~PCI_COMMAND_MASTER;
pci_write_config_byte(bt->dev, PCI_COMMAND, command);
free_irq(bt->irq, bt);
printk(KERN_DEBUG "bt878_mem: 0x%p.\n", bt->bt878_mem);
if (bt->bt878_mem)
iounmap(bt->bt878_mem);
release_mem_region(pci_resource_start(bt->dev, 0),
pci_resource_len(bt->dev, 0));
/* wake up any waiting processes
because shutdown flag is set, no new processes (in this queue)
are expected
*/
bt->shutdown = 1;
bt878_mem_free(bt);
pci_disable_device(pci_dev);
return;
}
在这里对资源进行释放。
当模块卸载的时候会调用函数pci_unregister_driver(&bt878_pci_driver),注销这个driver。