实现一个PCIe设备驱动,需要分配几个128M的物理连续内存作为DMA的缓冲区,受制于伙伴系统最多分配4M,所以需要使用到cma机制,来分配大块的物理连续内存实现通过DMA搬运数据:
硬件平台:x86_64
软件平台:CentOs7.4 linux-3.10.0
根据网上的资料看,cma目前应该只能支持x86或者ARM平台,在x86下linux的cma是和swiotlb有关系的,swiotlb是使能的(enable),这种情况下cma是不使能的(disable),所以cma在x86_64下也是不使能的,这是因为swiotlb总是使能的,为了同时使能swiotlb和cma,为内核打补丁:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd..1b6275d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,7 @@ config X86
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
- select HAVE_DMA_CONTIGUOUS if !SWIOTLB
+ select HAVE_DMA_CONTIGUOUS
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_KPROBES_ON_FTRACE
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 977f176..ab05d73 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
static inline void dma_mark_clean(void *addr, size_t size) {}
+extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs);
+extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs);
+
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b574b29..8e3842f 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, struct dma_attrs *attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
- free_pages((unsigned long)vaddr, get_order(size));
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6c483ba..77dd0ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -14,7 +14,7 @@
#include
int swiotlb __read_mostly;
-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
@@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ else
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static struct dma_map_ops swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 9d8a509..5ceda85 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
{
void *vaddr;
- vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
return vaddr;
}
@@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
static struct dma_map_ops sta2x11_dma_ops = {
.alloc = sta2x11_swiotlb_alloc_coherent,
- .free = swiotlb_free_coherent,
+ .free = x86_swiotlb_free_coherent,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.map_sg = swiotlb_map_sg_attrs,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a5ffd32..e7a018e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
#endif
extern void swiotlb_print_info(void);
+extern int is_swiotlb_buffer(phys_addr_t paddr);
+
#endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fe978e0..6e4a798 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -369,7 +369,7 @@ void __init swiotlb_free(void)
io_tlb_nslabs = 0;
}
-static int is_swiotlb_buffer(phys_addr_t paddr)
+int is_swiotlb_buffer(phys_addr_t paddr)
{
return paddr >= io_tlb_start && paddr < io_tlb_end;
}
-> Device Drivers
-> Generic Driver Options
DMA Contiguous Memory Allocator
这里同时也可以配置预留内存的大小。比较方便的是在内核启动参数里设置预留大小。
编译、安装内核:
make
make modules_install
make install
编辑/etc/default/grub文件
在GRUB_CMDLINE_LINUX这一行加入cma=”1024M”,表示预留1024M内存等待分配。
执行下面命令,生成grub2的配置文件
sudo grub2-mkconfig -o /boot/grub2/grub.cfg
重启系统,启动加载界面选择新的内核启动,系统启动完了,可以查看cma空间是否预留成功:
[root@localhost linux-3.10.0-693.21.1.el7.x86_64]# dmesg | grep cma
[ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-3.10.0 root=/dev/mapper/cl-root ro cma=1024M crashkernel=auto rd.lvm.lv=cl/root rd.lvm.lv=cl/swap rhgb quiet
[ 0.000000] cma: CMA: reserved 1024 MiB at 103fc00000
[ 0.000000] Kernel command line: BOOT_IMAGE=/vmlinuz-3.10.0 root=/dev/mapper/cl-root ro cma=1024M crashkernel=auto rd.lvm.lv=cl/root rd.lvm.lv=cl/swap rhgb quiet
分配内存不用需要调用cma的接口,使用dma原有的接口分配:
dma_alloc_coherent()
这个函数需要传入一个设备的参数,最好在分配前为设备设置mask值:
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (!err) {
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
ERROR("pci_set_consistent_dma_mask failed\n");
}
}
else {
ERROR("pci_set_dma_mask failed");
}
内存可以正确分配:
[ 139.185437] DEBUG: [fpga_driver_probe 132]dma_alloc_coherent [0] ok!! phys=0000001040b00000
[ 139.187532] DEBUG: [fpga_driver_probe 132]dma_alloc_coherent [1] ok!! phys=0000001048b00000
[ 139.189638] DEBUG: [fpga_driver_probe 132]dma_alloc_coherent [2] ok!! phys=0000001050b00000
[ 139.191745] DEBUG: [fpga_driver_probe 132]dma_alloc_coherent [3] ok!! phys=0000001058b00000
[ 139.193710] DEBUG: [fpga_driver_probe 132]dma_alloc_coherent [4] ok!! phys=0000001060b00000