3、alloc分析、源码调试（3）

4、_objc_rootAllocWithZone方法进一步分析()

对_objc_rootAllocWithZone方法进行调试，单步跟踪：

NEVER_INLINE
id
_objc_rootAllocWithZone(Class cls, malloc_zone_t *zone __unused)
{
    // allocWithZone under __OBJC2__ ignores the zone parameter
    return _class_createInstanceFromZone(cls, 0, nil,
                                         OBJECT_CONSTRUCT_CALL_BADALLOC);
}

因此进入到_class_createInstanceFromZone方法：

static ALWAYS_INLINE id
_class_createInstanceFromZone(Class cls, size_t extraBytes, void *zone,
                              int construct_flags = OBJECT_CONSTRUCT_NONE,
                              bool cxxConstruct = true,
                              size_t *outAllocatedSize = nil)
{
    ASSERT(cls->isRealized());

    // Read class's info bits all at once for performance
    bool hasCxxCtor = cxxConstruct && cls->hasCxxCtor();
    bool hasCxxDtor = cls->hasCxxDtor();
    bool fast = cls->canAllocNonpointer();
    size_t size;

    size = cls->instanceSize(extraBytes);
    if (outAllocatedSize) *outAllocatedSize = size;

    id obj;
    if (zone) {
        obj = (id)malloc_zone_calloc((malloc_zone_t *)zone, 1, size);
    } else {
        obj = (id)calloc(1, size);
    }
    if (slowpath(!obj)) {
        if (construct_flags & OBJECT_CONSTRUCT_CALL_BADALLOC) {
            return _objc_callBadAllocHandler(cls);
        }
        return nil;
    }

    if (!zone && fast) {
        obj->initInstanceIsa(cls, hasCxxDtor);
    } else {
        // Use raw pointer isa on the assumption that they might be
        // doing something weird with the zone or RR.
        obj->initIsa(cls);
    }

    if (fastpath(!hasCxxCtor)) {
        return obj;
    }

    construct_flags |= OBJECT_CONSTRUCT_FREE_ONFAILURE;
    return object_cxxConstructFromClass(obj, cls, construct_flags);
}

有三个主要方法instanceSize、malloc_zone_calloc或者calloc、initInstanceIsa或者initIsa。
最后一个Isa相关的无法直接读出来，其实这是一个将内存地址与Isa绑定的过程，即Isa存储的是类的一些信息。
那么看了这里的源码又出现了两个问题：

问题一、之前介绍过汇编开关，可以在断点调试时显示汇编方法调用过程.然后再介绍一个这个选项是编译器转化低层次代码以什么形式，默认选择是None，这里选择Fastest、Smallest就会走fastpath，以None的选择打开汇编开关，就会走slowpath。slowpath和fastpath有什么区别，slowpath的流程会长一些，多了很多汇编命令，而fastpath就会少很多，节省时间。

问题二、这个涉及到类和类的结构了，后续会分析。

在此，既然提到instanceSize，最后补充一下对内存对齐的探究：
读代码就可以知道，instanceSize是计算需要申请的内存大小：

size_t instanceSize(size_t extraBytes) const {
        if (fastpath(cache.hasFastInstanceSize(extraBytes))) {
            return cache.fastInstanceSize(extraBytes);
        }

        size_t size = alignedInstanceSize() + extraBytes;
        // CF requires all objects be at least 16 bytes.
        if (size < 16) size = 16;
        return size;
}

cache.fastInstanceSize方法是在fastpath下计算所需申请的内存大小：

size_t fastInstanceSize(size_t extra) const
    {
        ASSERT(hasFastInstanceSize(extra));

        if (__builtin_constant_p(extra) && extra == 0) {
            return _flags & FAST_CACHE_ALLOC_MASK16;
        } else {
            size_t size = _flags & FAST_CACHE_ALLOC_MASK;
            // remove the FAST_CACHE_ALLOC_DELTA16 that was added
            // by setFastInstanceSize
            return align16(size + extra - FAST_CACHE_ALLOC_DELTA16);
        }
}
static inline size_t align16(size_t x) {
    return (x + size_t(15)) & ~size_t(15);
}

最后一个字节对齐方法alignedInstanceSize：

#ifdef __LP64__
#   define WORD_SHIFT 3UL
#   define WORD_MASK 7UL
#   define WORD_BITS 64
#else
#   define WORD_SHIFT 2UL
#   define WORD_MASK 3UL
#   define WORD_BITS 32
#endif

uint32_t alignedInstanceSize() const {
        return word_align(unalignedInstanceSize());
}
//字节对齐
static inline size_t word_align(size_t x) {
    return (x + WORD_MASK) & ~WORD_MASK;
}

WORD_MASK在arm64架构下是无符号长整型的7，既是16进制下的0000 0111，这里先尝试两个x值：

x	x+WORD_MASK	～WORD_MASK	结果
12	0001 0011（19 ）	1111 1000（-8）	0001 0000（16）
18	0001 1001（25）	1111 1000（-8）	0001 1000（24）

如果x=12，x+WORD_MASK = 19 即0001 0011,～WORD_MASK = 1111 1000，0001 0011 & 1111 1000 = 0001 0000 即为16。
如果x=18，x+WORD_MASK = 25 即0001 1001，～WORD_MASK = 1111 1000，0001 1001 & 1111 1000 = 0001 1000即24。可见word_align这是一个8字节对齐算法，所以。

malloc_zone_calloc或者calloc是申请类的内存，调试源码走的是calloc的分支，由于alloc分析、源码调试（1）中符号断点调试知道是属于libsystem_malloc.dylib库，因此对这个库的源码可以探究一下，和objc库的配置类似，新建一个commandLine的target，在main方法添加代码：

void *p = calloc(1, 24);

进行断点调试，会跟进到：

void *
malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
    MALLOC_TRACE(TRACE_calloc | DBG_FUNC_START, (uintptr_t)zone, num_items, size, 0);

    void *ptr;
    if (malloc_check_start && (malloc_check_counter++ >= malloc_check_start)) {
        internal_check();
    }
        //跟进到的断点位置
    ptr = zone->calloc(zone, num_items, size);
    
    if (malloc_logger) {
        malloc_logger(MALLOC_LOG_TYPE_ALLOCATE | MALLOC_LOG_TYPE_HAS_ZONE | MALLOC_LOG_TYPE_CLEARED, (uintptr_t)zone,
                (uintptr_t)(num_items * size), 0, (uintptr_t)ptr, 0);
    }

    MALLOC_TRACE(TRACE_calloc | DBG_FUNC_END, (uintptr_t)zone, num_items, size, (uintptr_t)ptr);
    return ptr;
}

ptr = zone->calloc(zone, num_items, size);已经无法点击进去，这里可以符号断点继续跟进去，还有另外一个方法就是control + step into进入到，calloc方法，需要点击多次，然后就进入到：

static void *
default_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
    zone = runtime_default_zone();
    
    return zone->calloc(zone, num_items, size);
}

然后跟踪进入zone->calloc(zone, num_items, size);方法,再次调用nano_create_zone方法，然后在nano_create_zone中调用nano_malloc方法，

static void *
nano_malloc(nanozone_t *nanozone, size_t size)
{
    if (size <= NANO_MAX_SIZE) {
        void *p = _nano_malloc_check_clear(nanozone, size, 0);
        if (p) {
            return p;
        } else {
            /* FALLTHROUGH to helper zone */
        }
    }

    malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
    return zone->malloc(zone, size);
}

这里主要是_nano_malloc_check_clear方法来申请内存，跟踪进入：

static void *
_nano_malloc_check_clear(nanozone_t *nanozone, size_t size, boolean_t cleared_requested)
{
    MALLOC_TRACE(TRACE_nano_malloc, (uintptr_t)nanozone, size, cleared_requested, 0);

    void *ptr;
    size_t slot_key;
    size_t slot_bytes = segregated_size_to_fit(nanozone, size, &slot_key); // Note slot_key is set here
    mag_index_t mag_index = nano_mag_index(nanozone);
//下面省略

这里segregated_size_to_fit方法来开辟内存：

#define SHIFT_NANO_QUANTUM      4
#define NANO_REGIME_QUANTA_SIZE (1 << SHIFT_NANO_QUANTUM)   // 16
static MALLOC_INLINE size_t
segregated_size_to_fit(nanozone_t *nanozone, size_t size, size_t *pKey)
{
    size_t k, slot_bytes;

    if (0 == size) {
        size = NANO_REGIME_QUANTA_SIZE; // Historical behavior
    }
    k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta
    slot_bytes = k << SHIFT_NANO_QUANTUM;                           // multiply by power of two quanta size
    *pKey = k - 1;                                                  // Zero-based!

    return slot_bytes;
}

和之前的8字节对齐的算法一样，这个是16进制对齐，所以。

至此alloc源码分析完结～

3、alloc分析、源码调试（3）

4、_objc_rootAllocWithZone方法进一步分析()

你可能感兴趣的:(3、alloc分析、源码调试（3）)