遇到一段汇编代码,没认出来是查表法的AES。这里完全用字节处理,来实现AES加密计算,来加深一下对查表法AES的印象。
由于采用对字节的操作,会造成很多无畏的内存读写,运算速度肯定没有对uint(4字节)来的快。本来反汇编出来的就是uint的,这边只是想学习具体细节,才转换成字节的。后续有空了,再把整理后的uint操作的代码贴出来吧。
主要参考了:
查表法的理论计算过程
部分代码参考
部分代码参考2
///
/// 字节数组->uint,大端模式
///
///
///
///
public static uint ReadUint_BE(byte[] buf, int index)
{
uint d1 = buf[index + 3]; // buf的高位,成为retData的尾端
uint d2 = (uint)(buf[index + 2] << 8);
uint d3 = (uint)(buf[index + 1] << 16);
uint d4 = (uint)(buf[index] << 24);
return d1 | d2 | d3 | d4;
}
public static void WriteUint_BE(byte[] buf, int index, uint data)
{
buf[index + 3] = (byte)(data); // data低8位(尾端)写到buf的高位
buf[index + 2] = (byte)(data >> 8);
buf[index + 1] = (byte)(data >> 16);
buf[index + 0] = (byte)(data >> 24); // data高8位写到buf的低位
}
///
/// 字节数组->uint,小端模式
///
///
///
///
public static uint ReadUint_LE(byte[] buf, int index)
{
uint d1 = buf[index]; // buf的低位,成为retData的尾端
uint d2 = (uint)(buf[index + 1] << 8);
uint d3 = (uint)(buf[index + 2] << 16);
uint d4 = (uint)(buf[index + 3] << 24);
return d1 | d2 | d3 | d4;
}
public static void WriteUint_LE(byte[] buf, int index, uint data)
{
buf[index] = (byte)data; // data低8位(尾端)写道buf的低位
buf[index + 1] = (byte)(data >> 8);
buf[index + 2] = (byte)(data >> 16);
buf[index + 3] = (byte)(data >> 24);
}
public static byte HIBYTE(uint data) => (byte)((data >> 24) & 0xff);
public static byte BYTE2(uint data) => (byte)((data >> 16) & 0xff);
public static byte BYTE1(uint data) => (byte)((data >> 8) & 0xff);
public static byte LOWBYTE(uint data) => (byte)(data & 0xff);
为了避免位移运算,这里对sbox进行偏移,得到4个uint[]的sbox
public static byte[] create_sbox()
{
byte[] a = { 0xf1, 0xe3, 0xc7, 0x8f, 0x1f, 0x3e, 0x7c, 0xf8 };
byte b = 0x63, x1, x, y, z, i = 0x00;
byte[] sbox = new byte[0x100];
int j, k;
do
{
z = 0x00;
x = calc_inverse(i);
for (j = 0; j < 8; j++)
{
z = (byte)(z >> 1);
y = (byte)(a[j] & x);
x1 = 0x00;
for (k = 0; k < 8; k++)
{
x1 ^= (byte)(y & 0x80);
y = (byte)(y << 1);
}
z ^= x1;
}
z ^= b;
sbox[i] = z;
i++;
} while (i != 0x00);
return sbox;
}
public static void create_shift_sbox(byte[] oriSbox, out uint[] sbox0, out uint[] sbox1, out uint[] sbox2, out uint[] sbox3)
{
sbox0 = new uint[256];
for (int idx = 0; idx < 0x100; idx++)
sbox0[idx] = oriSbox[idx];
sbox1 = new uint[256];
for (int idx = 0; idx < 0x100; idx++)
sbox1[idx] = (uint)(oriSbox[idx] << 8);
sbox2 = new uint[256];
for (int idx = 0; idx < 0x100; idx++)
sbox2[idx] = (uint)(oriSbox[idx] << 0x10);
sbox3 = new uint[256];
for (int idx = 0; idx < 0x100; idx++)
sbox3[idx] = (uint)(oriSbox[idx] << 0x18);
}
///
/// 计算逆元
///
private static byte calc_inverse(byte tar)
{
if (tar == 0x00)
return 0x00;
for (byte i = 0x01; i != 0x00; i++)
if (gf_mul(tar, i) == 0x01)
return i;
throw new Exception("不对劲,没有正常return");
}
///
/// 乘法运算
///
///
///
///
private static byte gf_mul(byte a, byte b)
{
byte[] data = new byte[8];
data[0] = a;
data[1] = (byte)(data[0] << 1);
if ((data[0] & 0x80) == 0x80) data[1] ^= 0x1b;
data[2] = (byte)(data[1] << 1);
if ((data[1] & 0x80) == 0x80) data[2] ^= 0x1b;
data[3] = (byte)(data[2] << 1);
if ((data[2] & 0x80) == 0x80) data[3] ^= 0x1b;
data[4] = (byte)(data[3] << 1);
if ((data[3] & 0x80) == 0x80) data[4] ^= 0x1b;
data[5] = (byte)(data[4] << 1);
if ((data[4] & 0x80) == 0x80) data[5] ^= 0x1b;
data[6] = (byte)(data[5] << 1);
if ((data[5] & 0x80) == 0x80) data[6] ^= 0x1b;
data[7] = (byte)(data[6] << 1);
if ((data[6] & 0x80) == 0x80) data[7] ^= 0x1b;
byte dat = 0x00;
if ((b & 0x01) == 0x01) dat ^= data[0];
if ((b & 0x02) == 0x02) dat ^= data[1];
if ((b & 0x04) == 0x04) dat ^= data[2];
if ((b & 0x08) == 0x08) dat ^= data[3];
if ((b & 0x10) == 0x10) dat ^= data[4];
if ((b & 0x20) == 0x20) dat ^= data[5];
if ((b & 0x40) == 0x40) dat ^= data[6];
if ((b & 0x80) == 0x80) dat ^= data[7];
return dat;
}
///
/// 2 1 1 3
/// 3 2 1 1
/// 1 3 2 1
/// 1 1 3 2
///
///
/// 2 1 1 3
/// 3 2 1 1
/// 1 3 2 1
/// 1 1 3 2
public static void create_tbox(byte[] sbox, out uint[] tbox0, out uint[] tbox1, out uint[] tbox2, out uint[] tbox3)
{
tbox0 = new uint[256];
tbox1 = new uint[256];
tbox2 = new uint[256];
tbox3 = new uint[256];
for (int i = 0; i < 0x100; i++)
{
//byte mul1 = gf_mul(sbox[i], 1);
byte mul1 = sbox[i];
byte mul2 = gf_mul(sbox[i], 2);
byte mul3 = gf_mul(sbox[i], 3);
tbox0[i] = (uint)((mul2 << 0x18) | (mul1 << 0x10) | (mul1 << 8) | mul3);
tbox1[i] = (uint)((mul3 << 0x18) | (mul2 << 0x10) | (mul1 << 8) | mul1);
tbox2[i] = (uint)((mul1 << 0x18) | (mul3 << 0x10) | (mul2 << 8) | mul1);
tbox3[i] = (uint)((mul1 << 0x18) | (mul1 << 0x10) | (mul3 << 8) | mul2);
}
}
///
/// Rcon
///
public static uint[] ROUND_PARAMS_B = new uint[10]
{
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36,
};
public static byte[] create_round_keys_bytes(byte[] keys)
{
byte[] expKey = new byte[0xb0];
for (int i = 0; i < 0x10; i += 4) // mem copy
{
// 大小端转换
expKey[i] = keys[i + 3];
expKey[i + 1] = keys[i + 2];
expKey[i + 2] = keys[i + 1];
expKey[i + 3] = keys[i];
}
for (int i = 0x10, rounds = 0; i < 0xb0; i += 0x10, rounds++)
{
// 前一个4字节uint
byte b0 = expKey[i - 4];
byte b1 = expKey[i - 3];
byte b2 = expKey[i - 2];
byte b3 = expKey[i - 1];
expKey[i] = (byte)(expKey[i - 0x10] ^ SBOX0[b3]);
expKey[i + 1] = (byte)(expKey[i - 0x10 + 1] ^ SBOX0[b0]);
expKey[i + 2] = (byte)(expKey[i - 0x10 + 2] ^ SBOX0[b1]);
expKey[i + 3] = (byte)(expKey[i - 0x10 + 3] ^ SBOX0[b2] ^ ROUND_PARAMS_B[rounds]);
// 分组后12B,循环处理
for (int j = 4; j < 0x10; j++)
expKey[i + j] = (byte)(expKey[i + j - 4] ^ expKey[i + j - 0x10]);
}
return expKey;
}
///
/// 4个SBOX,分别对应:
/// SBOX0[i]: SBOX_ori[i]
/// SBOX1[i]: SBOX_ori[i] << 8
/// SBOX2[i]: SBOX_ori[i] << 0x10
/// SBOX3[i]: SBOX_ori[i] << 0x18
///
public static uint[] SBOX0;
public static uint[] SBOX1;
public static uint[] SBOX2;
public static uint[] SBOX3;
///
/// 4个TBOX,根据矩阵运算得到
///
public static uint[] TBOX0;
public static uint[] TBOX1;
public static uint[] TBOX2;
public static uint[] TBOX3;
public static void init_box()
{
byte[] oriSbox = create_sbox();
create_shift_sbox(oriSbox, out SBOX0, out SBOX1, out SBOX2, out SBOX3);
create_tbox(oriSbox, out TBOX0, out TBOX1, out TBOX2, out TBOX3);
}
///
/// 分组加密运算,aes-128,没个分组计算10轮
///
public static void round_encrypt(byte[] data, byte[] expKey)
{
byte A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15;
byte D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15;
uint u0 = ReadUint_LE(expKey, 0) ^ ReadUint_BE(data, 0);
uint u1 = ReadUint_LE(expKey, 4) ^ ReadUint_BE(data, 4);
uint u2 = ReadUint_LE(expKey, 8) ^ ReadUint_BE(data, 8);
uint u3 = ReadUint_LE(expKey, 0xc) ^ ReadUint_BE(data, 0xc);
uint wa0, wa1, wa2, wa3;
int halfRound = (0xa >> 1) - 1; // 一重循环做两次
for (int i = 0x10; ; i += 0x20)
{
// 因为大小端的原因,这里的高位字节,其实是字节数组中的低位,高尾端模式:HI在低位。
A0 = HIBYTE(u0);
A1 = BYTE2(u0);
A2 = BYTE1(u0);
A3 = LOWBYTE(u0);
A4 = HIBYTE(u1);
A5 = BYTE2(u1);
A6 = BYTE1(u1);
A7 = LOWBYTE(u1);
A8 = HIBYTE(u2);
A9 = BYTE2(u2);
A10 = BYTE1(u2);
A11 = LOWBYTE(u2);
A12 = HIBYTE(u3);
A13 = BYTE2(u3);
A14 = BYTE1(u3);
A15 = LOWBYTE(u3);
wa0 = TBOX0[A0] ^ TBOX1[A5] ^ TBOX2[A10] ^ TBOX3[A15] ^ ReadUint_LE(expKey, i);
wa1 = TBOX0[A4] ^ TBOX1[A9] ^ TBOX2[A14] ^ TBOX3[A3] ^ ReadUint_LE(expKey, i + 4);
wa2 = TBOX0[A8] ^ TBOX1[A13] ^ TBOX2[A2] ^ TBOX3[A7] ^ ReadUint_LE(expKey, i + 8);
wa3 = TBOX0[A12] ^ TBOX1[A1] ^ TBOX2[A6] ^ TBOX3[A11] ^ ReadUint_LE(expKey, i + 0xc);
// 还是要注意大小端。小端模式下:尾部LO在低位0
D0 = HIBYTE(wa0);
D1 = BYTE2(wa0);
D2 = BYTE1(wa0);
D3 = LOWBYTE(wa0);
D4 = HIBYTE(wa1);
D5 = BYTE2(wa1);
D6 = BYTE1(wa1);
D7 = LOWBYTE(wa1);
D8 = HIBYTE(wa2);
D9 = BYTE2(wa2);
D10 = BYTE1(wa2);
D11 = LOWBYTE(wa2);
D12 = HIBYTE(wa3);
D13 = BYTE2(wa3);
D14 = BYTE1(wa3);
D15 = LOWBYTE(wa3);
if (halfRound == 0) break; // 为了循环能写简单一点、为了能精简汇编代码,原来的9轮循环,改成了现在2*4 + 1轮循环。实际上感觉这是汇编代码优化后的结果。不是刻意写成这样的。
u0 = TBOX0[D0] ^ TBOX1[D5] ^ TBOX2[D10] ^ TBOX3[D15] ^ ReadUint_LE(expKey, i + 0x10);
u1 = TBOX0[D4] ^ TBOX1[D9] ^ TBOX2[D14] ^ TBOX3[D3] ^ ReadUint_LE(expKey, i + 0x14);
u2 = TBOX0[D8] ^ TBOX1[D13] ^ TBOX2[D2] ^ TBOX3[D7] ^ ReadUint_LE(expKey, i + 0x18);
u3 = TBOX0[D12] ^ TBOX1[D1] ^ TBOX2[D6] ^ TBOX3[D11] ^ ReadUint_LE(expKey, i + 0x1c);
halfRound = halfRound - 1;
}
uint v39 = SBOX0[D15] ^ SBOX1[D10] ^ SBOX2[D5] ^ SBOX3[D0] ^ ReadUint_LE(expKey, 0xa0);
uint v40 = SBOX0[D3] ^ SBOX1[D14] ^ SBOX2[D9] ^ SBOX3[D4] ^ ReadUint_LE(expKey, 0xa4);
uint v42 = SBOX0[D7] ^ SBOX1[D2] ^ SBOX2[D13] ^ SBOX3[D8] ^ ReadUint_LE(expKey, 0xa8);
uint v35 = SBOX0[D11] ^ SBOX1[D6] ^ SBOX2[D1] ^ SBOX3[D12] ^ ReadUint_LE(expKey, 0xac);
data[0] = HIBYTE(v39);
data[1] = BYTE2(v39);
data[2] = BYTE1(v39);
data[3] = LOWBYTE(v39);
data[4] = HIBYTE(v40);
data[5] = BYTE2(v40);
data[6] = BYTE1(v40);
data[7] = LOWBYTE(v40);
data[8] = HIBYTE(v42);
data[9] = BYTE2(v42);
data[10] = BYTE1(v42);
data[11] = LOWBYTE(v42);
data[12] = HIBYTE(v35);
data[13] = BYTE2(v35);
data[14] = BYTE1(v35);
data[15] = LOWBYTE(v35);
}
这里采用in-place计算,加密结果直接写回到data。
///
/// ECB模式
///
/// 数据部分保证16字节对齐
/// 秘钥长度为16字节
public static void aes_128_ecb_encrypt(byte[] data, byte[] key)
{
byte[] expKey = create_round_keys_bytes(key);
byte[] buf = new byte[0x10];
for (int i = 0; i < data.Length; i += 0x10)
{
Array.Copy(data, i, buf, 0, 0x10);
round_encrypt(buf, expKey);
Array.Copy(buf, 0, data, i, 0x10);
}
}
///
/// CBC模式
///
/// 数据部分保证16字节对齐
/// 秘钥长度为16字节
public static void aes_128_cbc_encrypt(byte[] data, byte[] key, byte[] iv)
{
byte[] expKey = create_round_keys_bytes(key);
byte[] buf = new byte[0x10];
for (int i = 0; i < data.Length; i += 0x10)
{
Array.Copy(data, i, buf, 0, 0x10);
for (int j = 0; j < 16; j++)
buf[j] = (byte)(iv[j] ^ buf[j]);
round_encrypt(buf, expKey);
Array.Copy(buf, 0, data, i, 0x10);
Array.Copy(buf, 0, iv, i, 0x10);
}
}
class Program
{
static void Main(string[] args)
{
MyAes.init_box();
byte[] keys = ByteUtils.FromHexString("2E82BFAFF4E0C26D1D032A8F90803EAE");
byte[] iv = ByteUtils.FromHexString("7315254F862899A133B3A832C2F78E0E");
// ecb测试
byte[] testData = new byte[0x10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
Console.WriteLine(ByteUtils.ToHexString(testData));
Console.WriteLine(ByteUtils.ToHexString(keys));
MyAes.aes_128_ecb_encrypt(testData, keys);
Console.WriteLine(ByteUtils.ToHexString(testData));
Console.WriteLine();
// cbc测试
testData = new byte[0x10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
Console.WriteLine(ByteUtils.ToHexString(testData));
Console.WriteLine(ByteUtils.ToHexString(keys));
Console.WriteLine(ByteUtils.ToHexString(iv));
MyAes.aes_128_cbc_encrypt(testData, keys, iv);
Console.WriteLine(ByteUtils.ToHexString(testData));
Console.ReadKey();
}
}
实际计算结果,和采用公共加密库计算得到的结果一致。