4、_objc_rootAllocWithZone方法进一步分析()
对_objc_rootAllocWithZone
方法进行调试,单步跟踪:
NEVER_INLINE
id
_objc_rootAllocWithZone(Class cls, malloc_zone_t *zone __unused)
{
// allocWithZone under __OBJC2__ ignores the zone parameter
return _class_createInstanceFromZone(cls, 0, nil,
OBJECT_CONSTRUCT_CALL_BADALLOC);
}
因此进入到_class_createInstanceFromZone
方法:
static ALWAYS_INLINE id
_class_createInstanceFromZone(Class cls, size_t extraBytes, void *zone,
int construct_flags = OBJECT_CONSTRUCT_NONE,
bool cxxConstruct = true,
size_t *outAllocatedSize = nil)
{
ASSERT(cls->isRealized());
// Read class's info bits all at once for performance
bool hasCxxCtor = cxxConstruct && cls->hasCxxCtor();
bool hasCxxDtor = cls->hasCxxDtor();
bool fast = cls->canAllocNonpointer();
size_t size;
size = cls->instanceSize(extraBytes);
if (outAllocatedSize) *outAllocatedSize = size;
id obj;
if (zone) {
obj = (id)malloc_zone_calloc((malloc_zone_t *)zone, 1, size);
} else {
obj = (id)calloc(1, size);
}
if (slowpath(!obj)) {
if (construct_flags & OBJECT_CONSTRUCT_CALL_BADALLOC) {
return _objc_callBadAllocHandler(cls);
}
return nil;
}
if (!zone && fast) {
obj->initInstanceIsa(cls, hasCxxDtor);
} else {
// Use raw pointer isa on the assumption that they might be
// doing something weird with the zone or RR.
obj->initIsa(cls);
}
if (fastpath(!hasCxxCtor)) {
return obj;
}
construct_flags |= OBJECT_CONSTRUCT_FREE_ONFAILURE;
return object_cxxConstructFromClass(obj, cls, construct_flags);
}
有三个主要方法instanceSize
、malloc_zone_calloc
或者calloc
、initInstanceIsa
或者initIsa
。
最后一个Isa
相关的无法直接读出来,其实这是一个将内存地址与Isa
绑定的过程,即Isa
存储的是类的一些信息。
那么看了这里的源码又出现了两个问题:
问题一、之前介绍过汇编开关,可以在断点调试时显示汇编方法调用过程.然后再介绍一个这个选项是编译器转化低层次代码以什么形式,默认选择是None,这里选择Fastest、Smallest
就会走fastpath
,以None的选择打开汇编开关,就会走slowpath
。slowpath
和fastpath
有什么区别,slowpath
的流程会长一些,多了很多汇编命令,而fastpath
就会少很多,节省时间。
问题二、这个涉及到类和类的结构了,后续会分析。
在此,既然提到instanceSize,最后补充一下对内存对齐的探究:
读代码就可以知道,instanceSize
是计算需要申请的内存大小:
size_t instanceSize(size_t extraBytes) const {
if (fastpath(cache.hasFastInstanceSize(extraBytes))) {
return cache.fastInstanceSize(extraBytes);
}
size_t size = alignedInstanceSize() + extraBytes;
// CF requires all objects be at least 16 bytes.
if (size < 16) size = 16;
return size;
}
cache.fastInstanceSize
方法是在fastpath下计算所需申请的内存大小:
size_t fastInstanceSize(size_t extra) const
{
ASSERT(hasFastInstanceSize(extra));
if (__builtin_constant_p(extra) && extra == 0) {
return _flags & FAST_CACHE_ALLOC_MASK16;
} else {
size_t size = _flags & FAST_CACHE_ALLOC_MASK;
// remove the FAST_CACHE_ALLOC_DELTA16 that was added
// by setFastInstanceSize
return align16(size + extra - FAST_CACHE_ALLOC_DELTA16);
}
}
static inline size_t align16(size_t x) {
return (x + size_t(15)) & ~size_t(15);
}
最后一个字节对齐方法alignedInstanceSize
:
#ifdef __LP64__
# define WORD_SHIFT 3UL
# define WORD_MASK 7UL
# define WORD_BITS 64
#else
# define WORD_SHIFT 2UL
# define WORD_MASK 3UL
# define WORD_BITS 32
#endif
uint32_t alignedInstanceSize() const {
return word_align(unalignedInstanceSize());
}
//字节对齐
static inline size_t word_align(size_t x) {
return (x + WORD_MASK) & ~WORD_MASK;
}
WORD_MASK
在arm64架构下是无符号长整型的7,既是16进制下的0000 0111,这里先尝试两个x值:
x | x+WORD_MASK | ~WORD_MASK | 结果 |
---|---|---|---|
12 | 0001 0011(19 ) | 1111 1000(-8) | 0001 0000(16) |
18 | 0001 1001(25) | 1111 1000(-8) | 0001 1000(24) |
如果x=12,x+WORD_MASK = 19 即0001 0011,~WORD_MASK = 1111 1000,0001 0011 & 1111 1000 = 0001 0000 即为16。
如果x=18,x+WORD_MASK = 25 即0001 1001,~WORD_MASK = 1111 1000,0001 1001 & 1111 1000 = 0001 1000即24。可见word_align
这是一个8字节对齐算法,所以。
malloc_zone_calloc
或者calloc
是申请类的内存,调试源码走的是calloc
的分支,由于alloc分析、源码调试(1)中符号断点调试知道是属于libsystem_malloc.dylib
库,因此对这个库的源码可以探究一下,和objc库的配置类似,新建一个commandLine的target,在main方法添加代码:
void *p = calloc(1, 24);
进行断点调试,会跟进到:
void *
malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
MALLOC_TRACE(TRACE_calloc | DBG_FUNC_START, (uintptr_t)zone, num_items, size, 0);
void *ptr;
if (malloc_check_start && (malloc_check_counter++ >= malloc_check_start)) {
internal_check();
}
//跟进到的断点位置
ptr = zone->calloc(zone, num_items, size);
if (malloc_logger) {
malloc_logger(MALLOC_LOG_TYPE_ALLOCATE | MALLOC_LOG_TYPE_HAS_ZONE | MALLOC_LOG_TYPE_CLEARED, (uintptr_t)zone,
(uintptr_t)(num_items * size), 0, (uintptr_t)ptr, 0);
}
MALLOC_TRACE(TRACE_calloc | DBG_FUNC_END, (uintptr_t)zone, num_items, size, (uintptr_t)ptr);
return ptr;
}
ptr = zone->calloc(zone, num_items, size);
已经无法点击进去,这里可以符号断点继续跟进去,还有另外一个方法就是control
+ step into
进入到,calloc方法,需要点击多次,然后就进入到:
static void *
default_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
zone = runtime_default_zone();
return zone->calloc(zone, num_items, size);
}
然后跟踪进入zone->calloc(zone, num_items, size);
方法,再次调用nano_create_zone
方法,然后在nano_create_zone
中调用nano_malloc
方法,
static void *
nano_malloc(nanozone_t *nanozone, size_t size)
{
if (size <= NANO_MAX_SIZE) {
void *p = _nano_malloc_check_clear(nanozone, size, 0);
if (p) {
return p;
} else {
/* FALLTHROUGH to helper zone */
}
}
malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
return zone->malloc(zone, size);
}
这里主要是_nano_malloc_check_clear
方法来申请内存,跟踪进入:
static void *
_nano_malloc_check_clear(nanozone_t *nanozone, size_t size, boolean_t cleared_requested)
{
MALLOC_TRACE(TRACE_nano_malloc, (uintptr_t)nanozone, size, cleared_requested, 0);
void *ptr;
size_t slot_key;
size_t slot_bytes = segregated_size_to_fit(nanozone, size, &slot_key); // Note slot_key is set here
mag_index_t mag_index = nano_mag_index(nanozone);
//下面省略
这里segregated_size_to_fit
方法来开辟内存:
#define SHIFT_NANO_QUANTUM 4
#define NANO_REGIME_QUANTA_SIZE (1 << SHIFT_NANO_QUANTUM) // 16
static MALLOC_INLINE size_t
segregated_size_to_fit(nanozone_t *nanozone, size_t size, size_t *pKey)
{
size_t k, slot_bytes;
if (0 == size) {
size = NANO_REGIME_QUANTA_SIZE; // Historical behavior
}
k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta
slot_bytes = k << SHIFT_NANO_QUANTUM; // multiply by power of two quanta size
*pKey = k - 1; // Zero-based!
return slot_bytes;
}
和之前的8字节对齐的算法一样,这个是16进制对齐,所以。
至此alloc源码分析完结~