在一个存取粒度为 4 字节的内存中,先从地址 0 读取 4 个字节到寄存器,然后从地址 1 读取 4 个字节到寄存器:
当从地址 0 开始读取数据时,是读取对齐地址的数据,直接通过一次读取就能完成;当从地址 1 读取数据时读取的是非对齐地址的数据,需要读取两次数据才能完成。
在读取完两次数据后,还要将 0-3 的数据向上偏移 1 字节,将 4-7 的数据向下偏移 3 字节,最后再将两块数据合并放入寄存器。
对一个内存未对齐的数据进行了这么多额外的操作,这对 CPU 的开销很大,大大降低了CPU性能。
在 iOS 中,对象的属性需要进行内存对齐,而对象本身也需要进行内存对齐。内存对齐有三原则:
简而言之:
C | OC | 32位 | 64位 |
---|---|---|---|
bool | BOOL(64位) | 1 | 1 |
signed char | (_signed char)int8_t、BOOL(32位) | 1 | 1 |
unsigned char | Boolean | 1 | 1 |
short | int16_t | 2 | 2 |
unsigned short | unichar | 2 | 2 |
int、int32_t | NSInteger(32位)、boolean_t(32位) | 4 | 4 |
unsigned int | NSUInteger(32位)、boolean_t(64位) | 4 | 4 |
long | NSInteger(64位) | 4 | 8 |
unsigned long | NSUInteger(64位) | 4 | 8 |
long long | int64_t | 8 | 8 |
float | CGFloat(32位) | 4 | 4 |
double | CGFloat(64位) | 8 | 8 |
typedef struct YDWTeacher {
char name;
bool sex;
int age;
float height;
double level;
}teacher;
typedef struct YDWTeacher {
char name; // 1字节
bool sex; // 1字节
int age; // 4字节
float height; // 4字节
double level; // 8字节
}teacher;
iOS之内存对齐[25483:1817236] 24
char name => [0], offset = 1;
bool sex 长度为 1 个字节,此时 offset 需要 +3,才是 4 的倍数,offset = 4, b => [4, 4], offset = 5, 需要 + 3 = 8;
int age 长度为 4 个字节, offset = 8 满足,因此 int age => [8, 11], offset = 12
float height 长度为 4 个字节, offset = 12 满足,因此 float height => [12,15], offset = 16;
double level 长度为 8 个字节,offset = 16 满足 8 的倍数,因此 double score => [16,23];
此时共占用字节 24,由于当前结构体中,最长数据类型 sizeof(double) = 8, 24 是 8 的倍数,因此对齐后取值为 24
typedef struct YDWStudent {
char name; // 1字节
int age; // 4字节
bool sex; // 1字节
double score; // 8字节
}student;
iOS之内存对齐[25751:1843819] 24
char name => [0], offset = 1
int age 长度为 4 个字节,此时 offset 需要 +3,才是4的倍数,offset = 4, b => [4, 7], offset = 8
bool sex 长度为 1 个字节, offset = 8 满足,因此 c => [8], offset = 9
double score 长度为 8 个字节,offset + 7 = 16 才是 8 的倍数,因此 d => [16,23]
此时共占用字节 24,由于当前结构体中,最长数据类型 sizeof(double) = 8, 24 是 8 的倍数,因此对齐后取值为 24
typedef struct YDWTeacher {
char name; // 1字节
bool sex; // 1字节
int age; // 4字节
float height; // 4字节
double level; // 8字节
}teacher;
typedef struct YDWStudent {
char name; // 1字节
int age; // 4字节
bool sex; // 1字节
double score; // 8字节
teacher *teacher;
}student;
iOS之内存对齐[25862:1856001] 32
char name => [0], offset = 1
int age 长度为 4 个字节,此时 offset 需要 +3,才是4的倍数,offset = 4, b => [4, 7], offset = 8
bool sex 长度为 1 个字节, offset = 8 满足,因此 c => [8], offset = 9
double score 长度为 8 个字节,offset + 7 = 16 才是 8 的倍数,因此 d => [16,23]
teacher *teacher 长度为 24 字节,但是 teacher 中最长数据类型为 sizeof(double) = 8, 此时 offset = 24 正好是 8 的倍数, teacher => [24, 31], offset = 32
此时共占用字节 32,由于当前结构体中,最长数据类型 sizeof(double) = 8, 32 是 8 的倍数,因此对齐后取值为 32
YDWBoy *boy = [YDWBoy alloc];
boy.name = @"YDW";
boy.nickName = "handsome";
boy.age = 18;
boy.height = 175.0;
NSLog(@"%lu - %lu - %lu", sizeof(boy), class_getInstanceSize([YDWBoy class]), malloc_size((__bridge const void *)(boy)));
2020-09-08 00:00:58.621860+0800 iOS之内存对齐[26116:1877306] 8 - 40 - 48
size_t instanceSize(size_t extraBytes) {
size_t size = alignedInstanceSize() + extraBytes;
// CF requires all objects be at least 16 bytes.
if (size < 16) size = 16;
return size;
}
obj = (id)calloc(1, size);
size_t class_getInstanceSize(Class cls) {
if (!cls) return 0;
return cls->alignedInstanceSize();
}
void *p = calloc(1, 40);
NSLog(@"%lu",malloc_size(p));
ptr = zone->calloc(zone, num_items, size);
p zone->calloc
输出: (void *(*)(_malloc_zone_t *, size_t, size_t)) $1 = 0x00000001003839c7 (.dylib`default_zone_calloc at malloc.c:249)
static void *
default_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
zone = runtime_default_zone();
return zone->calloc(zone, num_items, size);
}
p zone->calloc
输出: (void *(*)(_malloc_zone_t *, size_t, size_t)) $0 = 0x0000000100384faa (.dylib`nano_calloc at nano_malloc.c:884)
static void *
nano_calloc(nanozone_t *nanozone, size_t num_items, size_t size)
{
size_t total_bytes;
if (calloc_get_size(num_items, size, 0, &total_bytes)) {
return NULL;
}
if (total_bytes <= NANO_MAX_SIZE) {
void *p = _nano_malloc_check_clear(nanozone, total_bytes, 1);
if (p) {
return p;
} else {
/* FALLTHROUGH to helper zone */
}
}
malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
return zone->calloc(zone, 1, total_bytes);
}
static MALLOC_INLINE size_t
segregated_size_to_fit(nanozone_t *nanozone, size_t size, size_t *pKey)
{
// size = 40
size_t k, slot_bytes;
if (0 == size) {
size = NANO_REGIME_QUANTA_SIZE; // Historical behavior
}
// 40 + 16-1 >> 4 << 4
// 40 - 16*3 = 48
//
// 16
k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta
slot_bytes = k << SHIFT_NANO_QUANTUM; // multiply by power of two quanta size
*pKey = k - 1; // Zero-based!
return slot_bytes;
}