0x00 枚举的两种形式

0x00 原始值

简单的代码如下:

    // 原始值
    enum Season: String {
        case spring, summer, autumn, winter
        
        var rawValue: String {
            switch self {
            case .spring:
                return "Spring"
            default:
                return "Not Spring"
            }
        }
        
    }

    let season = Season.autumn

    //season.rawValue

    print(MemoryLayout.size(ofValue: season))       // 1
    print(MemoryLayout.alignment(ofValue: season))  // 1
    print(MemoryLayout.stride(ofValue: season))     // 1

不过多介绍, 原始值就是有 rawValue 的一种枚举形式, 可以看到 print 结果都是 1, 这段枚举实际占用内存大小为 1 个字节, 对齐方式 1 个字节, 占用空间 1 个字节, 说明在内存中只需要 1 个字节空间就好, 由于重写了 rawValue 这个计算属性, 导致 season.rawValue 的结果为 Not Spring

0x01 关联值

简单的代码如下:

    // 关联值
    enum Score {
        enum Grade: String {
            case A, B, C, D
        }
        case points(Float)
        case grade(Grade)
    }

    var score: Score = .grade(.A)
    score = .points(12)

    print(MemoryLayout.size(ofValue: score))        // 5
    print(MemoryLayout.alignment(ofValue: score))   // 4
    print(MemoryLayout.stride(ofValue: score))      // 8

同样不过多介绍, 关联值就是可以存储一些系统允许的类型的值加上成员值(就是用来分辨是哪个枚举值的成员), 此时打印结果为实际占用内存 5 个字节, 对齐方式 4 个字节, 占用空间 8 个字节, Float 占用 4 个字节, 但是这里实际时间占用内存超过了 4 个字节, 所以占用空间用 4 个字节对齐就是 8 个字节, Grade 是一个原始值枚举, 所以是 1 个字节, 难道是这两个加起来的?

前面结论已经说了其实是成员值, 枚举关联值实际只取最大的那个来占用空间

0x01 探索原始值内存分布

代码如下:

func testEnum2() {
    enum TestEnum {
        case test1, test2, test3
    }
    
    var t: TestEnum = .test1
    t = .test2
    t = .test3
    
    let _ = t
}

testEnum2()

汇编如下:

SwiftTest`testEnum2():
    0x100003b70 <+0>:  pushq  %rbp
    0x100003b71 <+1>:  movq   %rsp, %rbp
->  0x100003b74 <+4>:  movb   $0x0, -0x8(%rbp)
    0x100003b78 <+8>:  movb   $0x1, -0x8(%rbp)
    0x100003b7c <+12>: movb   $0x2, -0x8(%rbp)
    0x100003b80 <+16>: popq   %rbp
    0x100003b81 <+17>: retq

看汇编可以知道, 0x100003b74 这段代码就是将 .test1 赋值给 变量 t 的操作, -0x8(%rbp) 这个栈空间地址就是 变量 t 在 testEnum2 函数中的内存地址

由此可见, .test2 在内存中体现的值就为 0x1, .test3 在内存中体现的值就为 0x2, 所以原始值的形式下, 系统是通过将枚举放到内存中用类似 0, 1, 2, 3... 的方式存储, 以便将来用于 switch 或者 if 判断

0x01 探索关联值内存分布

代码如下:

func testEnum3() {
    enum TestEnum {
        case test1(Int, Int, Int)
        case test2(Int, Int)
        case test3(Int)
        case test4(Bool)
        case test5
    }
    
    var t: TestEnum = .test1(9, 10, 11)
    t = .test2(4, 5)
    t = .test3(15)
    t = .test4(true)
    t = .test5
    
    let _ = t
}

testEnum3()

汇编如下:

SwiftTest`testEnum3():
    0x100003b90 <+0>:   pushq  %rbp
    0x100003b91 <+1>:   movq   %rsp, %rbp
->  0x100003b94 <+4>:   movq   $0x9, -0x20(%rbp)
    0x100003b9c <+12>:  movq   $0xa, -0x18(%rbp)
    0x100003ba4 <+20>:  movq   $0xb, -0x10(%rbp)
    0x100003bac <+28>:  movb   $0x0, -0x8(%rbp)
    0x100003bb0 <+32>:  movq   $0x4, -0x20(%rbp)
    0x100003bb8 <+40>:  movq   $0x5, -0x18(%rbp)
    0x100003bc0 <+48>:  movq   $0x0, -0x10(%rbp)
    0x100003bc8 <+56>:  movb   $0x1, -0x8(%rbp)
    0x100003bcc <+60>:  movq   $0xf, -0x20(%rbp)
    0x100003bd4 <+68>:  movq   $0x0, -0x18(%rbp)
    0x100003bdc <+76>:  movq   $0x0, -0x10(%rbp)
    0x100003be4 <+84>:  movb   $0x2, -0x8(%rbp)
    0x100003be8 <+88>:  movq   $0x1, -0x20(%rbp)
    0x100003bf0 <+96>:  movq   $0x0, -0x18(%rbp)
    0x100003bf8 <+104>: movq   $0x0, -0x10(%rbp)
    0x100003c00 <+112>: movb   $0x3, -0x8(%rbp)
    0x100003c04 <+116>: movq   $0x0, -0x20(%rbp)
    0x100003c0c <+124>: movq   $0x0, -0x18(%rbp)
    0x100003c14 <+132>: movq   $0x0, -0x10(%rbp)
    0x100003c1c <+140>: movb   $0x4, -0x8(%rbp)
    0x100003c20 <+144>: popq   %rbp
    0x100003c21 <+145>: retq

同样地, 0x100003b94 这段指令就是将 .test1 赋值给 变量 t 的操作, -0x20(%rbp) 这个栈空间地址就是 变量 t 在 testEnum3 函数中的内存地址

由于关联值会选择最大需求容量的那一个枚举值来用做计算占用内存大小, 所以理应为 .test1 这个值来计算, 故关联值方面占用 8 * 3 = 24 个字节大小, 因为 Int 在 64 位上占用 8 个字节, .test1 有 3 个 Int, 然后还有一位用来存储成员值

看汇编可以知道, var t: TestEnum = .test1(9, 10, 11) 这句代码在汇编中体现为, 将 0x9, 0xa, 0xb, 0x0 放入连续的栈空间地址中, 其中前三个为关联值, 第四个为成员值用于标志

随后 t = .test2(4, 5) 这句代码在汇编中体现为(0x100003bb0 这行指令开始), 将 0x4, 0x5, 0x0, 0x1 放入刚才那段连续的栈空间地址中, 其中前三个为关联值, 第四个为成员值用于标志

以此类推, 汇编上看的很清楚, 关联值形式的枚举确实是使用最大需求容量的那一个枚举值来用做计算占用内存大小, 最后一位为成员值

0x02 特殊情况

0x00 最大需求容量为三个关联值

// 1. Bool 值在末尾
  enum TestEnum {
        case test1(Int, Int, Bool)
        case test2(Int, Int)
        case test3(Int)
        case test4
        case test5
        case test6
        case test7
    }

// 2. Bool 值在中间
  enum TestEnum {
        case test1(Int, Bool, Int)
        case test2(Int, Int)
        case test3(Int)
        case test4
        case test5
        case test6
        case test7
    }

都以 test7 以及 test3 为例的汇编如下:

// 1. Bool 值在末尾
// .test7
    0x1000028d4 <+4>:  movq   $0x3, -0x18(%rbp)
    0x1000028dc <+12>: movq   $0x0, -0x10(%rbp)
    0x1000028e4 <+20>: movb   $-0x40, -0x8(%rbp)
// .test3(10)
    0x1000028d4 <+4>:  movq   $0xa, -0x18(%rbp)
    0x1000028dc <+12>: movq   $0x0, -0x10(%rbp)
    0x1000028e4 <+20>: movb   $-0x80, -0x8(%rbp)

// 2. Bool 值在中间
// .test7
    0x1000028a4 <+4>:  movq   $0x3, -0x20(%rbp)
    0x1000028ac <+12>: movq   $0x0, -0x18(%rbp)
    0x1000028b4 <+20>: movq   $0x0, -0x10(%rbp)
    0x1000028bc <+28>: movb   $0x3, -0x8(%rbp)
// .test3(10)
    0x1000028a4 <+4>:  movq   $0xa, -0x20(%rbp)
    0x1000028ac <+12>: movq   $0x0, -0x18(%rbp)
    0x1000028b4 <+20>: movq   $0x0, -0x10(%rbp)
    0x1000028bc <+28>: movb   $0x2, -0x8(%rbp)

我这里只举例两个 case , 其他的可以自测, 结论是当最大需求容量为三个关联值:

当 Bool 类型出现在末尾的时候并且该 Case 为内存空间基准项, 底层将该位置依旧作为单字节位处理, 只是将标志功能(存储成员值)包含在这个 Bool 类型的单字节中, 其中标志功能不再用 0,1,2,3 等数字表示, 而是用 0x20, 0x40, 0x60, -0x80 来用做标志功能
当不满足 8 字节类型出现在中间的时候并且该 Case 为内存空间基准项, 底层将该位置包裹成一个 8 字节来处理, 标志功能还是单独使用一个字节来处理, 并用 0,1,2,3....来标记, 如果多个无参数的, 则最后一字节标志功能相同, 然后再用前 8 个字节来做区分标记

0x01 最大需求容量为四个关联值以上时

// 1. Bool 值在末尾
  enum TestEnum {
        case test1(Int, Int, Int, Int, Bool)
        case test2(Int, Int)
        case test3(Int)
        case test4
        case test5
        case test6
        case test7
    }

// 2. Bool 值在中间
  enum TestEnum {
        case test1(Int, Int, Bool, Int, Int)
        case test2(Int, Int)
        case test3(Int)
        case test4
        case test5
        case test6
        case test7
    }

// 3. Bool 值在前两个位置
  enum TestEnum {
        case test1(Int, Bool, Int, Int, Int)
        case test2(Int, Int)
        case test3(Int)
        case test4
        case test5
        case test6
        case test7
    }