AES代码实现-Golang源码

引流 : AES 原理 / DES 原理和Golang实现 / 个人主页

参考 : 标准AES-FIPS197 /《深入浅出密码学》

crypto/aes 包

Golang中aes和des采用了相同的设计模式，都实现了cipher.block接口，整体的代码设计思路是类似的。

主要涉及下面三个文件

-aes
 -cipher.go //提供构造方法，暴露加解密方法
 -block.go // Golang加解密的具体实现
 -const.go // 定义算法所需要的常量和测试数据

对于包中的其他文件*asm.go , *_gcm.go 是直接操作汇编码进行加解密，前提是操作系统支持。

对于模块中的方法：*Go()表示用纯Go实现的；*Asm()和*Gcm()都是工具人，下面是汇编。

为什么AES整三套实现呢？原因一些操作系统是实现了AES，汇编调用总要比Go快呀，白嫖操作系统现成的肯定更香呀。

那DES为啥只有Go实现呢？当然是操作系统基本没有实现这个呀，早在1999年NIST就说了除了历史遗留系统都不会再支持DES了。还是因为缺点明显，没有牌面。

-- 沃兹基.斯沃德

const.go

在const.go文件中定义的常量包含：

有限域上的不可约多项式（irreducible polynomials）:
```
const poly = 1<<8 | 1<<4 | 1<<3 | 1<<1 | 1<<0 // x⁸ + x⁴ + x³ + x + 1
```
关于有限域以及不可约多项式可以查看相关的定义和性质。需要说明的是：有限域上的加法和减法对应二进制的异或运算，对于异或运算得到的结果长度不会超过m；有限域上的乘法运算，在二进制上式通过分配率然后进行移位运算，如:

由此可知在xor操作之后得到的结果长度可能会大于，所以需要对不可约多项式进行mod运算。

数组powx存储密钥编排时的每一轮所需要的系数，用于g函数，相当于。

// Powers of x mod poly in GF(2).
var powx = [16]byte{
  0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f,
}

AES和DES不同的是S-Box在加密过程中只有一个，S-Box置换实际上是对1byte的数据（8位，刚好对应有限域）进行乘法逆元，得到的逆元再进行一次仿射。对于S-Box的置换过程在AES原理中有解释。Golang中为加密定义了一个S-Box，为解密过程定义了一个S-Box的逆。S-Box的数据直接使用了AES标准上的数据。

// FIPS-197 Figure 7. S-box substitution values in hexadecimal format.
var sbox0 = [256]byte{
  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
}

// FIPS-197 Figure 14.  Inverse S-box substitution values in hexadecimal format.
var sbox1 = [256]byte{
  0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
  0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
  0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
  0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
  0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
  0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
  0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
  0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
  0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
  0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
  0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
  0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
  0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
  0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
}

加密和解密的中间表te* [256]uint32和 td* [256]uint32将加密的字节代换层、ShiftRows、MixColumn合并为一个查表操作，极大地提升的效率。中间表示如产生的：链接。OZR

cipher.go

实例构造

cipher.go负责暴露必要的共有方法。首先是构造方法NewCipher, 输入为密钥，从密钥长度校检可以看到，该构造器可以构造支持AES-128, AES-192, 或者AES-256的实例。具体的构造方法是调用cipher_asm.go文件中的newCipher方法。

// NewCipher creates and returns a new cipher.Block.
// The key argument should be the AES key,
// either 16, 24, or 32 bytes to select
// AES-128, AES-192, or AES-256.
func NewCipher(key []byte) (cipher.Block, error) {
    k := len(key)
    switch k {
    default:
        return nil, KeySizeError(k)
    case 16, 24, 32:
        break
    }
    return newCipher(key)
}

cipher_asm.go文件是直接调用系统底层的AES实现，直接操作的汇编码。newCipher中可以看到如果操作系统上没有实现AES算法，则会到cipher.go中的newCipherGeneric创建一个由Golang实现加密算法的cipher实例。由此可以看出操作系统上实现的AES算法实际效率会比Golang实现的高。

var supportsAES = cpu.X86.HasAES || cpu.ARM64.HasAES
var supportsGFMUL = cpu.X86.HasPCLMULQDQ || cpu.ARM64.HasPMULL

func newCipher(key []byte) (cipher.Block, error) {
    if !supportsAES {
        return newCipherGeneric(key)
    }
    n := len(key) + 28
    c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
    var rounds int
    switch len(key) {
    case 128 / 8:
        rounds = 10
    case 192 / 8:
        rounds = 12
    case 256 / 8:
        rounds = 14
    }

    expandKeyAsm(rounds, &key[0], &c.enc[0], &c.dec[0])
    if supportsAES && supportsGFMUL {
        // 千层饼的n-1层
        return &aesCipherGCM{c}, nil
    }
    return &c, nil
}

以cipher_asm.go中的加密方法Encrypt为例，该方法实际掉用的是asm_（cpu）.s中定义的encryptBlockAsm方法。而asm_*.s文件是由汇编码编写。

// defined in asm_*.s

//go:noescape
func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)

//go:noescape
func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)

//go:noescape
func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)

type aesCipherAsm struct {
    aesCipher
}

func (c *aesCipherAsm) Encrypt(dst, src []byte) {
    if len(src) < BlockSize {
        panic("crypto/aes: input not full block")
    }
    if len(dst) < BlockSize {
        panic("crypto/aes: output not full block")
    }
    if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
        panic("crypto/aes: invalid buffer overlap")
    }
    encryptBlockAsm(len(c.enc)/4-1, &c.enc[0], &dst[0], &src[0])
}

好了，汇编就不去头铁了。如果操作系统层面上不支持AES，就回到cipher.go的newCipherGeneric吧。

implemented in pure Go,嗯，真香。newCipherGeneric中为enc和dec分配了len(key) + 28大小的空间，具体为什么要+28呢？以AES-128为例：

AES 128 需要10轮，每轮4个分组，每个分组4 * 8 bits = 32 bits。所以每个分组需要占用4个uint32的大小，存储所有的密钥需要（10+1）* 4= 44个uint32大小的空间。
而 len(enc) = len(key) + 28 = 16 + 28 = 44，刚好。
同理根据密钥编排的标准，可以推出AES 192 和AES-256的enc大小，正好是len(key) + 28

// A cipher is an instance of AES encryption using a particular key.
type aesCipher struct {
    // 加密密钥
    enc []uint32
    // 解密密钥
    dec []uint32
}
// newCipherGeneric creates and returns a new cipher.Block
// implemented in pure Go.
func newCipherGeneric(key []byte) (cipher.Block, error) {
    n := len(key) + 28
    c := aesCipher{make([]uint32, n), make([]uint32, n)}
    expandKeyGo(key, c.enc, c.dec)
    return &c, nil
}

密钥编排

newCipherGeneric毫无疑问肯定要进行密钥编排expandKeyGo吧，一边加密边一边编排运算多慢呀，先算好，放到内存妥妥的。expandKeyGo在block.go中定义的，都干了啥呢？enc是一个unit32的切片，密钥按每4 byte一个分组刚好是enc中一个元素的大小。加密密钥编排的流程为：

将16 byte的初始密钥存入enc[0:len(key) / 4]中。
如果i%nk == 0说明是子密钥的第一个分组，需要先经过g函数subw(rotw(t)) ^ (uint32(powx[i/nk-1]) << 24)。首先进行rotw()将32bits的元素按byte翻转一下，subw会按byte求逆。powx[i/nk-1]为每一轮编排的系数，系数的长度为8bit，左移动24位和subw(rotw(t))的结果异或，和编排框架图中一致。
nk > 6 && i%nk == 4说明是AES-192第8轮编排的第一个分组或者AES-256第7轮编排的第一分组。对于这个分组只需要对t := enc[i-1]进行按byte求逆。
递推公式:enc[i] = enc[i-nk] ^ t

对于解密密钥的生成，Golang没有简单的按编排的轮数倒序过来，而是进行一次MixColumn转换。

// Key expansion algorithm. See FIPS-197, Figure 11.
// Their rcon[i] is our powx[i-1] << 24.
func expandKeyGo(key []byte, enc, dec []uint32) {
    // Encryption key setup.
    var i int
    nk := len(key) / 4
    // 1. 初始密钥分组
    for i = 0; i < nk; i++ {
        enc[i] = binary.BigEndian.Uint32(key[4*i:])
    }
    // 2. 生成加密密钥
    for ; i < len(enc); i++ {
        t := enc[i-1]
        if i%nk == 0 {
            t = subw(rotw(t)) ^ (uint32(powx[i/nk-1]) << 24)
        } else if nk > 6 && i%nk == 4 {
            t = subw(t)
        }
        // 递归公式
        enc[i] = enc[i-nk] ^ t
    }

    // Derive decryption key from encryption key.
    // Reverse the 4-word round key sets from enc to produce dec.
    // All sets but the first and last get the MixColumn transform applied.
    // 解密密钥生成，除了第一个和最后一个集合都进行了MixColumn转换。
    if dec == nil {
        return
    }
    n := len(enc)
    for i := 0; i < n; i += 4 {
        ei := n - i - 4
        for j := 0; j < 4; j++ {
            x := enc[ei+j]
            if i > 0 && i+4 < n {
                x = td0[sbox0[x>>24]] ^ td1[sbox0[x>>16&0xff]] ^ td2[sbox0[x>>8&0xff]] ^ td3[sbox0[x&0xff]]
            }
            dec[i+j] = x
        }
    }
}

128密钥编排过程

block.go

完成密钥编排了之后，就可以拿到实例cipherobj调用Encrypt方法，实际会进入block.go:encryptBlockGo函数。

func (c *aesCipher) Encrypt(dst, src []byte) {
    if len(src) < BlockSize {
        panic("crypto/aes: input not full block")
    }
    if len(dst) < BlockSize {
        panic("crypto/aes: output not full block")
    }
    if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
        panic("crypto/aes: invalid buffer overlap")
    }
    // 加密肯定需要密钥c.enc，存放结果dst，明文src
    encryptBlockGo(c.enc, dst, src)
}

这是时候又要贴出每轮加密的框架图了，先把流程放这儿，接着看encryptBlockGo函数。

AES加密轮框架

先说明一下AES分组是固定的16 bytes，和密钥长度无关。

// The AES block size in bytes.
const BlockSize = 16
func (c *aesCipher) BlockSize() int { return BlockSize }

加密过程:

按字节分成四个组，每组刚好（4 byte = 32 bits）是一个uint32的大小。
进行首轮密钥加法。并且最后一轮加密也是单独提出来的所以，需要在加密轮数的基础上减一次。如AES-128为10-1=9轮
计算加密的轮数：AES-128中len(xk) / 4-2=11-2=9；AES-192中len(xk) / 4-2=13-2=11;AES-256中len(xk) / 4-2=15-2=13
加密轮n-1次，使用中间表T-table:t0 = xk[k+0] ^ te0[uint8(s0>>24)] ^ te1[uint8(s1>>16)] ^ te2[uint8(s2>>8)] ^ te3[uint8(s3)],
最后一轮加密, 因为最后一轮不需要MixColumn操作。

// Encrypt one block from src into dst, using the expanded key xk.
// ...Go结尾的，说明是 implemented in pure Go！
func encryptBlockGo(xk []uint32, dst, src []byte) {
    _ = src[15] // early bounds check
   //  1. 明文分组
    s0 := binary.BigEndian.Uint32(src[0:4])
    s1 := binary.BigEndian.Uint32(src[4:8])
    s2 := binary.BigEndian.Uint32(src[8:12])
    s3 := binary.BigEndian.Uint32(src[12:16])

    // First round just XORs input with key.
    // 进行首轮密钥加法
    s0 ^= xk[0]
    s1 ^= xk[1]
    s2 ^= xk[2]
    s3 ^= xk[3]

    // Middle rounds shuffle using tables.
    // Number of rounds is set by length of expanded key.
    // 计算加密的轮数
    nr := len(xk)/4 - 2 // - 2: one above, one more below
    k := 4
    var t0, t1, t2, t3 uint32
    for r := 0; r < nr; r++ {
        t0 = xk[k+0] ^ te0[uint8(s0>>24)] ^ te1[uint8(s1>>16)] ^ te2[uint8(s2>>8)] ^ te3[uint8(s3)]
        t1 = xk[k+1] ^ te0[uint8(s1>>24)] ^ te1[uint8(s2>>16)] ^ te2[uint8(s3>>8)] ^ te3[uint8(s0)]
        t2 = xk[k+2] ^ te0[uint8(s2>>24)] ^ te1[uint8(s3>>16)] ^ te2[uint8(s0>>8)] ^ te3[uint8(s1)]
        t3 = xk[k+3] ^ te0[uint8(s3>>24)] ^ te1[uint8(s0>>16)] ^ te2[uint8(s1>>8)] ^ te3[uint8(s2)]
        k += 4
        s0, s1, s2, s3 = t0, t1, t2, t3
    }

    // Last round uses s-box directly and XORs to produce output.
    s0 = uint32(sbox0[t0>>24])<<24 | uint32(sbox0[t1>>16&0xff])<<16 | uint32(sbox0[t2>>8&0xff])<<8 | uint32(sbox0[t3&0xff])
    s1 = uint32(sbox0[t1>>24])<<24 | uint32(sbox0[t2>>16&0xff])<<16 | uint32(sbox0[t3>>8&0xff])<<8 | uint32(sbox0[t0&0xff])
    s2 = uint32(sbox0[t2>>24])<<24 | uint32(sbox0[t3>>16&0xff])<<16 | uint32(sbox0[t0>>8&0xff])<<8 | uint32(sbox0[t1&0xff])
    s3 = uint32(sbox0[t3>>24])<<24 | uint32(sbox0[t0>>16&0xff])<<16 | uint32(sbox0[t1>>8&0xff])<<8 | uint32(sbox0[t2&0xff])

    s0 ^= xk[k+0]
    s1 ^= xk[k+1]
    s2 ^= xk[k+2]
    s3 ^= xk[k+3]

    _ = dst[15] // early bounds check
    binary.BigEndian.PutUint32(dst[0:4], s0)
    binary.BigEndian.PutUint32(dst[4:8], s1)
    binary.BigEndian.PutUint32(dst[8:12], s2)
    binary.BigEndian.PutUint32(dst[12:16], s3)
}

总的来说理解了AES的过程，源码实现并不难。目前看源码后，还没有明确的问题是：

解密秘钥的编排时候，为什么做了Mixcolumn操作?
T-table是如何构造的？从加密过程的最后一轮源码来看，应该是字节代换和Mixcolumn合在一块了。具体怎么算出来的，还需要再看一下文献Rijndael NIST proposal, section 5.2.1。