Linux Kernel 内存管理之CMA

linux kernel中的CMA即,连续内存区管理,其提供配置为CONFIG_CMA和CONFIG_CMA_DEBUG
毋庸置疑,其管理的是一块块连续内存块。这个在物理地址上是连续的。这点跟我们使用的伙伴算法
以及虚拟地址有点不一样。尽管伙伴算法中使用kmalloc申请连续物理内存也可以,但是在长时间
测试环境下,连续物理内存可能申请不到。因此,内核设计者设计了CMA,即连续物理内存管理。
其定制了一块连续物理内存,专门用于需要连续物理内存的场景,比如DMA。
对于这一块连续物理内存来说,因为物理内存有限,并且使用对象也有限,所以需要非常严格的
限制。整个CMA区大小以及base地址和对齐都有限制。
函数cma_declare_contiguous()用于对这些CMA区进行一些申明。比如base,size,limit等
函数cma_init_reserved_mem()用于从保留内存块里面获取一块内存用于CMA块。需要注意,这里
定义的块数为MAX_CMA_AREAS,也就是说,你用户想使用的CMA块个数,或者用户数最大为MAX_CMA_AREAS
我们CMA就是对这MAX_CMA_AREAS个块进行管理。
之后调用函数cma_init_reserved_areas()把这些CMA块激活。
 
当然,我们正常使用时,可以调用函数cma_alloc()分配CMA内存或者cma_release()对申请的CMA内存释放。
 
我们先看内核对CMA内存的一个全局约束,即函数cma_declare_contiguous()实现:
/**
 * cma_declare_contiguous() - reserve custom contiguous area
 * @base: Base address of the reserved area optional, use 0 for any
 * @size: Size of the reserved area (in bytes),
 * @limit: End address of the reserved memory (optional, 0 for any).
 * @alignment: Alignment for the CMA area, should be power of 2 or zero
 * @order_per_bit: Order of pages represented by one bit on bitmap.
 * @fixed: hint about where to place the reserved area
 * @res_cma: Pointer to store the created cma region.
 *
 * This function reserves memory from early allocator. It should be
 * called by arch specific code once the early allocator (memblock or bootmem)
 * has been activated and all other subsystems have already allocated/reserved
 * memory. This function allows to create custom reserved areas.
 *
 * If @fixed is true, reserve contiguous area at exactly @base.  If false,
 * reserve in range from @base to @limit.
 */
int __init cma_declare_contiguous(phys_addr_t base,
   phys_addr_t size, phys_addr_t limit,
   phys_addr_t alignment, unsigned int order_per_bit,
   bool fixed, struct cma **res_cma)
{
 phys_addr_t memblock_end = memblock_end_of_DRAM();
 phys_addr_t highmem_start;
 int ret = 0;
#ifdef CONFIG_X86
 /*
  * high_memory isn't direct mapped memory so retrieving its physical
  * address isn't appropriate.  But it would be useful to check the
  * physical address of the highmem boundary so it's justifiable to get
  * the physical address from it.  On x86 there is a validation check for
  * this case, so the following workaround is needed to avoid it.
  */
 highmem_start = __pa_nodebug(high_memory);
#else
 highmem_start = __pa(high_memory);
#endif
 pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
  __func__, &size, &base, &limit, &alignment);
 
 if (cma_area_count == ARRAY_SIZE(cma_areas)) {
  pr_err("Not enough slots for CMA reserved regions!\n");
  return -ENOSPC;
 }
 if (!size)
  return -EINVAL;
 if (alignment && !is_power_of_2(alignment))
  return -EINVAL;
 /*
  * Sanitise input arguments.
  * Pages both ends in CMA area could be merged into adjacent unmovable
  * migratetype page by page allocator's buddy algorithm. In the case,
  * you couldn't get a contiguous memory, which is not what we want.
  */
 alignment = max(alignment,  (phys_addr_t)PAGE_SIZE <<
     max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
 base = ALIGN(base, alignment);
 size = ALIGN(size, alignment);
 limit &= ~(alignment - 1);
 if (!base)
  fixed = false;
 /* size should be aligned with order_per_bit */
 if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
  return -EINVAL;
 /*
  * If allocating at a fixed base the request region must not cross the
  * low/high memory boundary.
  */
 if (fixed && base < highmem_start && base + size > highmem_start) {
  ret = -EINVAL;
  pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
   &base, &highmem_start);
  goto err;
 }
 /*
  * If the limit is unspecified or above the memblock end, its effective
  * value will be the memblock end. Set it explicitly to simplify further
  * checks.
  */
 if (limit == 0 || limit > memblock_end)
  limit = memblock_end;
 /* Reserve memory */
 if (fixed) {
  if (memblock_is_region_reserved(base, size) ||
      memblock_reserve(base, size) < 0) {
   ret = -EBUSY;
   goto err;
  }
 } else {
  phys_addr_t addr = 0;
  /*
   * All pages in the reserved area must come from the same zone.
   * If the requested region crosses the low/high memory boundary,
   * try allocating from high memory first and fall back to low
   * memory in case of failure.
   */
  if (base < highmem_start && limit > highmem_start) {
   addr = memblock_alloc_range(size, alignment,
          highmem_start, limit,
          MEMBLOCK_NONE);
   limit = highmem_start;
  }
  if (!addr) {
   addr = memblock_alloc_range(size, alignment, base,
          limit,
          MEMBLOCK_NONE);
   if (!addr) {
    ret = -ENOMEM;
    goto err;
   }
  }
  /*
   * kmemleak scans/reads tracked objects for pointers to other
   * objects but this address isn't mapped and accessible
   */
  kmemleak_ignore_phys(addr);
  base = addr;
 }
 
 ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma);
 if (ret)
  goto err;
 pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
  &base);
 return 0;
err:
 pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
 return ret;
}
 
/**
 * cma_init_reserved_mem() - create custom contiguous area from reserved memory
 * @base: Base address of the reserved area
 * @size: Size of the reserved area (in bytes),
 * @order_per_bit: Order of pages represented by one bit on bitmap.
 * @res_cma: Pointer to store the created cma region.
 *
 * This function creates custom contiguous area from already reserved memory.
 */
int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
     unsigned int order_per_bit,
     struct cma **res_cma)
{
 struct cma *cma;
 phys_addr_t alignment;
 /* Sanity checks */
 if (cma_area_count == ARRAY_SIZE(cma_areas)) {
  pr_err("Not enough slots for CMA reserved regions!\n");
  return -ENOSPC;
 }
 if (!size || !memblock_is_region_reserved(base, size))
  return -EINVAL;
 /* ensure minimal alignment required by mm core */
 alignment = PAGE_SIZE <<
   max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
 /* alignment should be aligned with order_per_bit */
 if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
  return -EINVAL;
 if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
  return -EINVAL;
 /*
  * Each reserved area must be initialised later, when more kernel
  * subsystems (like slab allocator) are available.
  */
 cma = &cma_areas[cma_area_count];
 cma->base_pfn = PFN_DOWN(base);
 cma->count = size >> PAGE_SHIFT;
 cma->order_per_bit = order_per_bit;
 *res_cma = cma;
 cma_area_count++;
 totalcma_pages += (size / PAGE_SIZE);
 return 0;
}
这些reserve的内存存放到cma_areas[]数组中。需要注意,这些reserve的内存是存放计入totalcma_pages中的。
由于这些所有reserve的内存都是以cma_areas[]形式管理,所以,其管理的非常有限。
函数cma_init_reserved_areas()会把早期reserve的内存放入zone管理中的MIGRATE_CMA链表中。
static int __init cma_init_reserved_areas(void)
{
 int i;
 for (i = 0; i < cma_area_count; i++) {
  int ret = cma_activate_area(&cma_areas[i]);
  if (ret)
   return ret;
 }
 return 0;
}
core_initcall(cma_init_reserved_areas);
static int __init cma_activate_area(struct cma *cma)
{
 int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
 unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
 unsigned i = cma->count >> pageblock_order;
 struct zone *zone;
 cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 if (!cma->bitmap)
  return -ENOMEM;
 WARN_ON_ONCE(!pfn_valid(pfn));
 zone = page_zone(pfn_to_page(pfn));
 do {
  unsigned j;
  base_pfn = pfn;
  for (j = pageblock_nr_pages; j; --j, pfn++) {
   WARN_ON_ONCE(!pfn_valid(pfn));
   /*
    * alloc_contig_range requires the pfn range
    * specified to be in the same zone. Make this
    * simple by forcing the entire CMA resv range
    * to be in the same zone.
    */
   if (page_zone(pfn_to_page(pfn)) != zone)
    goto err;
  }
  init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 } while (--i);
 mutex_init(&cma->lock);
#ifdef CONFIG_CMA_DEBUGFS
 INIT_HLIST_HEAD(&cma->mem_head);
 spin_lock_init(&cma->mem_head_lock);
#endif
 return 0;
err:
 kfree(cma->bitmap);
 cma->count = 0;
 return -EINVAL;
}
 
#ifdef CONFIG_CMA
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
{
 unsigned i = pageblock_nr_pages;
 struct page *p = page;
 do {
  __ClearPageReserved(p);
  set_page_count(p, 0);
 } while (++p, --i);
 set_pageblock_migratetype(page, MIGRATE_CMA);
 if (pageblock_order >= MAX_ORDER) {
  i = pageblock_nr_pages;
  p = page;
  do {
   set_page_refcounted(p);
   __free_pages(p, MAX_ORDER - 1);
   p += MAX_ORDER_NR_PAGES;
  } while (i -= MAX_ORDER_NR_PAGES);
 } else {
  set_page_refcounted(page);
  __free_pages(page, pageblock_order);
 }
 adjust_managed_page_count(page, pageblock_nr_pages);
}
#endif
 
void adjust_managed_page_count(struct page *page, long count)
{
 spin_lock(&managed_page_count_lock);
 page_zone(page)->managed_pages += count;
 totalram_pages += count;
#ifdef CONFIG_HIGHMEM
 if (PageHighMem(page))
  totalhigh_pages += count;
#endif
 spin_unlock(&managed_page_count_lock);
}
EXPORT_SYMBOL(adjust_managed_page_count);
 

你可能感兴趣的:(Linux,Kernel,内存管理)