如何识别加密算法?这个问题很朴素,建议工作日带薪如厕看。
在大约2000年的时候,一个研究人员发现,大部分加密算法,在实现里使用了固定常数或固定表。举几个例子,如下是MD5伪代码
// : All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
var int s[64], K[64]
var int i
// s specifies the per-round shift amounts
s[ 0..15] := { 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22 }
s[16..31] := { 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20 }
s[32..47] := { 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23 }
s[48..63] := { 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 }
// Use binary integer part of the sines of integers (Radians) as constants:
for i from 0 to 63 do
K[i] := floor(232 × abs (sin(i + 1)))
end for
// (Or just use the following precomputed table):
K[ 0.. 3] := { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee }
K[ 4.. 7] := { 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 }
K[ 8..11] := { 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be }
K[12..15] := { 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 }
K[16..19] := { 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa }
K[20..23] := { 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 }
K[24..27] := { 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed }
K[28..31] := { 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a }
K[32..35] := { 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c }
K[36..39] := { 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 }
K[40..43] := { 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 }
K[44..47] := { 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 }
K[48..51] := { 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 }
K[52..55] := { 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 }
K[56..59] := { 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 }
K[60..63] := { 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }
// Initialize variables:
var int a0 := 0x67452301 // A
var int b0 := 0xefcdab89 // B
var int c0 := 0x98badcfe // C
var int d0 := 0x10325476 // D
// Pre-processing: adding a single 1 bit
append "1" bit to message
// Notice: the input bytes are considered as bits strings,
// where the first bit is the most significant bit of the byte.[51]
// Pre-processing: padding with zeros
append "0" bit until message length in bits ≡ 448 (mod 512)
// Notice: the two padding steps above are implemented in a simpler way
// in implementations that only work with complete bytes: append 0x80
// and pad with 0x00 bytes so that the message length in bytes ≡ 56 (mod 64).
append original length in bits mod 264 to message
// Process the message in successive 512-bit chunks:
for each 512-bit chunk of padded message do
break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
// Initialize hash value for this chunk:
var int A := a0
var int B := b0
var int C := c0
var int D := d0
// Main loop:
for i from 0 to 63 do
var int F, g
if 0 ≤ i ≤ 15 then
F := (B and C) or ((not B) and D)
g := i
else if 16 ≤ i ≤ 31 then
F := (D and B) or ((not D) and C)
g := (5×i + 1) mod 16
else if 32 ≤ i ≤ 47 then
F := B xor C xor D
g := (3×i + 5) mod 16
else if 48 ≤ i ≤ 63 then
F := C xor (B or (not D))
g := (7×i) mod 16
// Be wary of the below definitions of a,b,c,d
F := F + A + K[i] + M[g] // M[g] must be a 32-bits block
A := D
D := C
C := B
B := B + leftrotate(F, s[i])
end for
// Add this chunk's hash to result so far:
a0 := a0 + A
b0 := b0 + B
c0 := c0 + C
d0 := d0 + D
end for
var char digest[16] := a0 append b0 append c0 append d0 // (Output is in little-endian)
其中固定常数为0x67452301/0xefcdab89 等四个初始化IV,固定表为64个32位int组成的K表。
再看一下AES,如下为密钥编排部分的代码,其中有固定的S盒和Rcon数组。
#include
#include
using namespace std;
typedef bitset<8> byte;
typedef bitset<32> word;
const int Nr = 10; // AES-128需要 10 轮加密
const int Nk = 4; // Nk 表示输入密钥的 word 个数
byte S_Box[16][16] = {
{0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76},
{0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0},
{0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15},
{0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75},
{0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84},
{0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF},
{0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8},
{0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2},
{0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73},
{0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB},
{0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79},
{0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08},
{0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A},
{0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E},
{0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF},
{0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16}
};
// 轮常数,密钥扩展中用到。(AES-128只需要10轮)
word Rcon[10] = {0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000};
/**
* 将4个 byte 转换为一个 word
*/
word Word(byte& k1, byte& k2, byte& k3, byte& k4)
{
word result(0x00000000);
word temp;
temp = kto_ulong(); // K1
temp <<= 24;
result |= temp;
temp = kto_ulong(); // K2
temp <<= 16;
result |= temp;
temp = kto_ulong(); // K3
temp <<= 8;
result |= temp;
temp = kto_ulong(); // K4
result |= temp;
return result;
}
/**
* 按字节 循环左移一位
* 即把[a0, a1, a2, a3]变成[a1, a2, a3, a0]
*/
word RotWord(word& rw)
{
word high = rw << 8;
word low = rw >> 24;
return high | low;
}
/**
* 对输入word中的每一个字节进行S-盒变换
*/
word SubWord(word& sw)
{
word temp;
for(int i=0; i<32; i+=8)
{
int row = sw[i+7]*8 + sw[i+6]*4 + sw[i+5]*2 + sw[i+4];
int col = sw[i+3]*8 + sw[i+2]*4 + sw[i+1]*2 + sw[i];
byte val = S_Box[row][col];
for(int j=0; j<8; ++j)
temp[i+j] = val[j];
}
return temp;
}
/**
* 密钥扩展函数 - 对128位密钥进行扩展得到 w[4*(Nr+1)]
*/
void KeyExpansion(byte key[4*Nk], word w[4*(Nr+1)])
{
word temp;
int i = 0;
// w[]的前4个就是输入的key
while(i < Nk)
{
w[i] = Word(key[4*i], key[4*i+1], key[4*i+2], key[4*i+3]);
++i;
}
i = Nk;
while(i < 4*(Nr+1))
{
temp = w[i-1]; // 记录前一个word
if(i % Nk == 0)
w[i] = w[i-Nk] ^ SubWord(RotWord(temp)) ^ Rcon[i/Nk-1];
else
w[i] = w[i-Nk] ^ temp;
++i;
}
}
int main()
{
byte key[16] = {0x2b, 0x7e, 0x15, 0x16,
0x28, 0xae, 0xd2, 0xa6,
0xab, 0xf7, 0x15, 0x88,
0x09, 0xcf, 0x4f, 0x3c};
word w[4*(Nr+1)];
cout << "KEY IS: ";
for(int i=0; i<16; ++i)
cout << hex << key[i]to_ulong() << " ";
cout << endl;
KeyExpansion(key, w);
// 测试
for(int i=0; i<4*(Nr+1); ++i)
cout << "w[" << dec << i << "] = " << hex << w[i]to_ulong() << endl;
return 0;
}
研究人员认为,可以收集常见加密算法的常数和表特征,然后在分析二进制文件时,搜索这些常数/表的字节序列。如果匹配成功,就可以判定程序中大概率使用了此种加密算法,这会辅助和引导算法分析的进程。而部分算法,没有固定常数,那就从另外两个方面入手。
一是函数名或其他字符信息,许多时候会直接暴露加密算法。
二是代码结构
比如RC4,不存在固定常量,但代码中存在循环结构,且循环256次。在汇编中可以匹配、检查这种循环的结构特征。除此之外他发现,因为加密算法中会有大量运算,其汇编层面,运算相关指令占比相较于一般程序也会极高。这就是结构特征。
#include
#include
static __inline void
swap_bytes(u_char *a, u_char *b)
{
u_char temp;
temp = *a;
*a = *b;
*b = temp;
}
/*
* Initialize an RC4 state buffer using the supplied key,
* which can have arbitrary length.
*/
void
rc4_init(struct rc4_state *const state, const u_char *key, int keylen)
{
u_char j;
int i;
/* Initialize state with identity permutation */
for (i = 0; i < 256; i++)
state->perm[i] = (u_char)i;
state->index1 = 0;
state->index2 = 0;
/* Randomize the permutation using key data */
for (j = i = 0; i < 256; i++) {
j += state->perm[i] + key[i % keylen];
swap_bytes(&state->perm[i], &state->perm[j]);
}
}
/*
* Encrypt some data using the supplied RC4 state buffer.
* The input and output buffers may be the same buffer.
* Since RC4 is a stream cypher, this function is used
* for both encryption and decryption.
*/
void
rc4_crypt(struct rc4_state *const state,
const u_char *inbuf, u_char *outbuf, int buflen)
{
int i;
u_char j;
for (i = 0; i < buflen; i++) {
/* Update modification indicies */
state->index1++;
state->index2 += state->perm[state->index1];
/* Modify permutation */
swap_bytes(&state->perm[state->index1],
&state->perm[state->index2]);
/* Encrypt/decrypt next byte */
j = state->perm[state->index1] + state->perm[state->index2];
outbuf[i] = inbuf[i] ^ state->perm[j];
}
}
于是他写了一篇论文,发表了这些观点,大意是《如何在二进制文件中快速识别加密算法》。同行们都很认可,并基于此文进行了代码上的落地实现。IDA上各种版本的Findcrypt,以及SIgnsearch等也是其思路的实践。
L4ys/IDASignsrch: IDA_Signsrch in Python
polymorf/findcrypt-yara: IDA pro plugin to find crypto constants
比如看一下FIndcrypt中MD5的匹配规则
rule MD5_Constants {
meta:
author = "phoul (@phoul)"
description = "Look for MD5 constants"
date = "2014-01"
version = "0.2"
strings:
// Init constants
$c0 = { 67452301 }
$c1 = { efcdab89 }
$c2 = { 98badcfe }
$c3 = { 10325476 }
$c4 = { 01234567 }
$c5 = { 89ABCDEF }
$c6 = { FEDCBA98 }
$c7 = { 76543210 }
// Round 2
$c8 = { F4D50d87 }
$c9 = { 78A46AD7 }
condition:
5 of them
}
rule MD5_API {
meta:
author = "_pusher_"
description = "Looks for MD5 API"
date = "2016-07"
strings:
$advapi32 = "advapi32.dll" wide ascii nocase
$cryptdll = "cryptdll.dll" wide ascii nocase
$MD5Init = "MD5Init" wide ascii
$MD5Update = "MD5Update" wide ascii
$MD5Final = "MD5Final" wide ascii
condition:
($advapi32 or $cryptdll) and ($MD5Init and $MD5Update and $MD5Final)
}
yara是一个基于规则的恶意样本分析工具,Signsrch采用了yara来简化代码编写工作,yara规则如下
rule RuleName
{
meta:
strings:
condition:
}
一般一个yara文件分为上述的三个部分,meta、strings、condition
可以发现,在检测MD5上,由常量和API两个规则组成。常量上,condition是10之选5,匹配十个常数,但其实只是五个常数的大小端格式。c0-c7是魔数,c8-c9是K表中挑了一个值。API上是字符串匹配。可以发现,总体上就是对《如何在二进制文件中快速识别加密算法》的实现。
在分析SO时,我们会先运行Findcrypt或者Signsrch这类脚本,检测样本中的加解密算法,以期对样本有更深的了解。或者直接顺着这些常量,找其对应程序,做第二部分——分析关键要素。所谓的“顺着这些常量”,静态分析的办法是查看交叉引用,动态分析的办法是在这些数据块下断点。
顺带一提,理论上,第二部分“分析关键要素”,也能做自动化,为什么没看到这类产品呢,有两个原因
简而言之,做这事的性价比不高。而Findcrypt这类工具,成本小,效果也不错,所以才会广为流传。
如果样本使用了Findcrypt所包含的算法,但检测不到,那可能是什么原因呢?这可太多了,排除掉 Findcrypt 匹配规则有误或不全外,简单陈述如下。
一是程序在编译后,常量未必整整齐齐在二进制中能找到
比如上图,0xEFCDAB89是0x67452301+0x88888888计算而来,二进制文件中自然无法搜索0xEFCDAB89这样的四字节。
二是在不魔改算法的情况下,依然有很多办法可以让内存中找不到相关表/常量
常量混淆,val = ax + by + C。比如100 = 10*5 + 7*7 + 1,常数这么替换后,就无法找到了。固定表也即常数数组,同理可以逐个计算和替换。
编码或加密,比如开发中把加密算法里的固定表base64编码,运行前再解码数组所在内存块,这样就可以了,也不一定是base64,简单的异或也挺好。或者简单的逐个字节赋值,也行。
static const uint8_t *getKey() {
const int len = 16;
uint8_t *src = malloc(len + 1);
for (int i = 0; i < len; ++i) {
switch (i) {
case 0: src[i] = 'g'; break;
case 1: src[i] = 'o'; break;
case 2: src[i] = 'o'; break;
case 3: src[i] = 'd'; break;
case 4: src[i] = 'l'; break;
case 5: src[i] = '-'; break;
case 6: src[i] = 'a'; break;
case 7: src[i] = 'e'; break;
case 8: src[i] = 's'; break;
case 9: src[i] = '-'; break;
case 10: src[i] = 'k'; break;
case 11: src[i] = 'e'; break;
case 12: src[i] = 'y'; break;
case 13: src[i] = '1'; break;
case 14: src[i] = '2'; break;
case 15: src[i] = '3'; break;
}
}
src[len] = '\0';
return src;
}
static const uint8_t *getIV() {
const int len = 16;
uint8_t *src = malloc(len + 1);
for (int i = 0; i < len; ++i) {
switch (i) {
case 0: src[i] = 'g'; break;
case 1: src[i] = 'o'; break;
case 2: src[i] = 'o'; break;
case 3: src[i] = 'd'; break;
case 4: src[i] = 'l'; break;
case 5: src[i] = '-'; break;
case 6: src[i] = 'a'; break;
case 7: src[i] = 'e'; break;
case 8: src[i] = 's'; break;
case 9: src[i] = '-'; break;
case 10: src[i] = 'i'; break;
case 11: src[i] = 'v'; break;
case 12: src[i] = '1'; break;
case 13: src[i] = '2'; break;
case 14: src[i] = '3'; break;
case 15: src[i] = '4'; break;
}
}
src[len] = '\0';
return src;
}
SO加固,更整体的对SO全局进行保护,但dump+fix后就又可以正常findcrypt。
展开计算,看起来和常量混淆很像,但本质不同。密码学家在设计密码时,常量如何选择?有一些是随便选,另一些是依照数学公式。
仅以MD5为例,MD5的常数属于是图一乐,0x67452301/0xEFCDAB89/0x98badcfe/0x10325476,就是把012345…… 倒序整了两遍。比如后续的SHA256,有八个常数,取自前8个素数(2、3、5、7、11、13、17、19)的平方根的小数部分其二进制表示的前32位,听着就很拗口是吧。再比如MD5的K表,也是正儿八经算出来的。
// Use binary integer part of the sines of integers (Radians) as constants:
// 计算出来
for i from 0 to 63 do
K[i] := floor(2^32 × abs (sin(i + 1)))
end for
// 将计算出来的结果硬编码
// (Or just use the following precomputed table):
K[ 0.. 3] := { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee }
// ……………………
一般的代码实现中,会直接使用计算出来的结果硬编码,但我们可以按照常数的定义把计算展开,形式上和常量混淆类似,但内涵不同。
识别加密算法是我们密码学讲解中,三部分的第一部分,它的内容量最小,也最简单。