STM32F439芯片,以下用M4称呼。M4的HASH模块,可以计算SHA1、SHA224、SHA256、MD5这些校验值,也可以计算基于它们的HMAC加密校验值,都是硬件计算。在此以SHA1及其HMAC_SHA1为例,讨论其用法。
介绍一下HMAC的概念:
HMAC(message) = Hash[((key | pad) XOR 0x5C) | Hash(((key | pad) XOR 0x36) | message)]
其中,(key | pad)表示在key的后面缀上若干数量的零,使得其总长度为64bytes,即512bits。假如key本身就已经达到512bits,就不必后缀零了。假如key超过了512bits,那么计算这个key的HASH值来代替原本的key。XOR 0x5C表示这64个byte全部要异或0x5C。XOR 0x36同理,剩下的 | 符号只是简单的连接前面的bit串和后面的bit串。
首先要说明的是,这些校验算法,对原始数据的尺寸,都是以bit为单位的。只不过,平时在电脑上常用的CRC32啦、MD5啦、SHA1啦,因为都是用来校验文件的,而文件的尺寸是以byte为单位的,所以常见的HASH函数都是以byte为单位,就连M4的固件库也不例外。固件库里面的函数如下:
ErrorStatus HASH_SHA1(uint8_t *Input, uint32_t Ilen, uint8_t Output[20]) { …… /* Number of valid bits in last word of the Input data */ nbvalidbitsdata = 8 * (Ilen % 4); /* HASH peripheral initialization */ HASH_DeInit(); …… /* Configure the number of valid bits in last word of the data */ HASH_SetLastWordValidBitsNbr(nbvalidbitsdata); /* Write the Input block in the IN FIFO */ for(i=0; i<Ilen; i+=4) { HASH_DataIn(*(uint32_t*)inputaddr); inputaddr+=4; } /* Start the HASH processor */ HASH_StartDigest(); …… if (busystatus != RESET) { status = ERROR; } else { /* Read the message digest */ HASH_GetDigest(&SHA1_MessageDigest); *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]); …… } return status; }
函数已经包装的很好了,只可惜直接拿来用还是用不了,明明固件库就是为了方便众人使用的,可是这个不能直接用,让人很无语啊……
好吧,其实只要在调用这个函数之前,使能某个总线的时钟即可:
RCC_AHB2PeriphClockCmd (RCC_AHB2Periph_HASH, ENABLE);算完之后,用同样的函数把时钟禁用,当然也可以不禁用。
接下来讨论byte和bit的区别。上文也说了,大多数HASH函数都是以byte为单位,固件库函数的Ilen参数就是指字节数,然后是 nbvalidbitsdata = 8 * (Ilen % 4); 这句,计算的是剩余的不够4个字节即不够32位的位数。明明NBW寄存器是可以精确到位的,但这里却只精确到字节。
下面改造这个函数,使其可以精确到位。注意,SHA1算法的字节顺序,是高位在前低位在后,也就是说,我们在原始数据的结尾,添上一些位,那么这些位要放置在字节的高位。改造后的函数如下:
ErrorStatus HASH_SHA1_bit(uint8_t *Input, uint32_t bit_Ilen, uint8_t Output[20]) { HASH_InitTypeDef SHA1_HASH_InitStructure; HASH_MsgDigest SHA1_MessageDigest; __IO uint16_t nbvalidbitsdata = 0; uint32_t i = 0; __IO uint32_t counter = 0; uint32_t busystatus = 0; ErrorStatus status = SUCCESS; uint32_t inputaddr = (uint32_t)Input; uint32_t outputaddr = (uint32_t)Output; /* Number of valid bits in last word of the Input data */ nbvalidbitsdata = bit_Ilen % 32; //8 * (Ilen % 4); /* HASH peripheral initialization */ HASH_DeInit(); /* HASH Configuration */ SHA1_HASH_InitStructure.HASH_AlgoSelection = HASH_AlgoSelection_SHA1; SHA1_HASH_InitStructure.HASH_AlgoMode = HASH_AlgoMode_HASH; SHA1_HASH_InitStructure.HASH_DataType = HASH_DataType_8b; HASH_Init(&SHA1_HASH_InitStructure); /* Configure the number of valid bits in last word of the data */ HASH_SetLastWordValidBitsNbr(nbvalidbitsdata); /* Write the Input block in the IN FIFO */ for(i=0; i<bit_Ilen; i+=4 * 8) // for(i=0; i<Ilen; i+=4) { HASH_DataIn(*(uint32_t*)inputaddr); inputaddr+=4; } /* Start the HASH processor */ HASH_StartDigest(); /* wait until the Busy flag is RESET */ do { busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY); counter++; }while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET)); if (busystatus != RESET) { status = ERROR; } else { /* Read the message digest */ HASH_GetDigest(&SHA1_MessageDigest); *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[2]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[3]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[4]); } return status; } ErrorStatus HMAC_SHA1_bit(uint8_t *Key, uint32_t bit_Keylen, uint8_t *Input, uint32_t bit_Ilen, uint8_t Output[20]) { HASH_InitTypeDef SHA1_HASH_InitStructure; HASH_MsgDigest SHA1_MessageDigest; __IO uint16_t nbvalidbitsdata = 0; __IO uint16_t nbvalidbitskey = 0; uint32_t i = 0; __IO uint32_t counter = 0; uint32_t busystatus = 0; ErrorStatus status = SUCCESS; uint32_t keyaddr = (uint32_t)Key; uint32_t inputaddr = (uint32_t)Input; uint32_t outputaddr = (uint32_t)Output; /* Number of valid bits in last word of the Input data */ nbvalidbitsdata = bit_Ilen % 32; // 8 * (Ilen % 4); /* Number of valid bits in last word of the Key */ nbvalidbitskey = bit_Keylen % 32; // 8 * (Keylen % 4); /* HASH peripheral initialization */ HASH_DeInit(); /* HASH Configuration */ SHA1_HASH_InitStructure.HASH_AlgoSelection = HASH_AlgoSelection_SHA1; SHA1_HASH_InitStructure.HASH_AlgoMode = HASH_AlgoMode_HMAC; SHA1_HASH_InitStructure.HASH_DataType = HASH_DataType_8b; if(bit_Keylen > 64 * 8) // if(Keylen > 64) { /* HMAC long Key */ SHA1_HASH_InitStructure.HASH_HMACKeyType = HASH_HMACKeyType_LongKey; } else { /* HMAC short Key */ SHA1_HASH_InitStructure.HASH_HMACKeyType = HASH_HMACKeyType_ShortKey; } HASH_Init(&SHA1_HASH_InitStructure); /* Configure the number of valid bits in last word of the Key */ HASH_SetLastWordValidBitsNbr(nbvalidbitskey); /* Write the Key */ for(i=0; i<bit_Keylen; i+=4 * 8) // for(i=0; i<Keylen; i+=4) { HASH_DataIn(*(uint32_t*)keyaddr); keyaddr+=4; } /* Start the HASH processor */ HASH_StartDigest(); /* wait until the Busy flag is RESET */ do { busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY); counter++; }while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET)); if (busystatus != RESET) { status = ERROR; } else { /* Configure the number of valid bits in last word of the Input data */ HASH_SetLastWordValidBitsNbr(nbvalidbitsdata); /* Write the Input block in the IN FIFO */ for(i=0; i<bit_Ilen; i+=4 * 8) // for(i=0; i<Ilen; i+=4) { HASH_DataIn(*(uint32_t*)inputaddr); inputaddr+=4; } /* Start the HASH processor */ HASH_StartDigest(); /* wait until the Busy flag is RESET */ counter =0; do { busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY); counter++; }while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET)); if (busystatus != RESET) { status = ERROR; } else { /* Configure the number of valid bits in last word of the Key */ HASH_SetLastWordValidBitsNbr(nbvalidbitskey); /* Write the Key */ keyaddr = (uint32_t)Key; for(i=0; i<bit_Keylen; i+=4 * 8) // for(i=0; i<Keylen; i+=4) { HASH_DataIn(*(uint32_t*)keyaddr); keyaddr+=4; } /* Start the HASH processor */ HASH_StartDigest(); /* wait until the Busy flag is RESET */ counter =0; do { busystatus = HASH_GetFlagStatus(HASH_FLAG_BUSY); counter++; }while ((counter != SHA1BUSY_TIMEOUT) && (busystatus != RESET)); if (busystatus != RESET) { status = ERROR; } else { /* Read the message digest */ HASH_GetDigest(&SHA1_MessageDigest); *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[0]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[1]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[2]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[3]); outputaddr+=4; *(uint32_t*)(outputaddr) = __REV(SHA1_MessageDigest.Data[4]); } } } return status; }函数还是那么长,这里只说和固件库函数的区别,首先在函数名后面缀上 bit表示这俩函数可以对原始数据精确到bit,同时给参数中的长度, Ilen和KeyLen也缀上bit,表示这俩参数也是以bit为单位。然后编译一下,所有有错误的行,都是要修改的地方,改起来只要注意把针对字节的计算和判断改成针对位的就行了。
接下来生成一组key和一组message,并选取各种不同的长度来进行测试。为了便于在IAR开发环境下查看HASH结果,用如下的函数专门把结果转换成十六进制字符串:
void out_to_hash( volatile char ref_hash[41], uint8_t Output[20] ) { for (size_t i = 0; i < 20; i++) { const char *hex = "0123456789ABCDEF"; unsigned char ch = Output[i]; ref_hash[i * 2 + 0] = hex[(ch >> 4U) & 0x0F]; ref_hash[i * 2 + 1] = hex[(ch >> 0U) & 0x0F]; } ref_hash[40] = 0; return; }测试代码如下:
void hash_test( void ) { uint8_t key[256] = ""; uint8_t message[256] = ""; for (size_t i = 0; i < 256; i++) { key[i] = 8 + i * 13; message[i] = 3 + i * 5; } volatile char sha1_1b [41] = ""; volatile char sha1_5b [41] = ""; volatile char sha1_8b [41] = ""; volatile char sha1_13b [41] = ""; volatile char sha1_21b [41] = ""; volatile char sha1_34b [41] = ""; volatile char sha1_377b [41] = ""; volatile char sha1_610b [41] = ""; volatile char hmac_0b_sha1_0b [41] = ""; volatile char hmac_8b_sha1_8b [41] = ""; volatile char hmac_8b_sha1_13b [41] = ""; volatile char hmac_8b_sha1_610b [41] = ""; volatile char hmac_13b_sha1_8b [41] = ""; volatile char hmac_13b_sha1_13b [41] = ""; volatile char hmac_13b_sha1_610b [41] = ""; volatile char hmac_512b_sha1_8b [41] = ""; volatile char hmac_512b_sha1_13b [41] = ""; volatile char hmac_512b_sha1_610b[41] = ""; volatile char hmac_610b_sha1_8b [41] = ""; volatile char hmac_610b_sha1_13b [41] = ""; volatile char hmac_610b_sha1_610b[41] = ""; RCC_AHB2PeriphClockCmd (RCC_AHB2Periph_HASH, ENABLE); uint8_t output[20] = ""; HASH_SHA1_bit (message, 1 , output); out_to_hash (sha1_1b , output); HASH_SHA1_bit (message, 5 , output); out_to_hash (sha1_5b , output); HASH_SHA1_bit (message, 8 , output); out_to_hash (sha1_8b , output); HASH_SHA1_bit (message, 13 , output); out_to_hash (sha1_13b , output); HASH_SHA1_bit (message, 21 , output); out_to_hash (sha1_21b , output); HASH_SHA1_bit (message, 34 , output); out_to_hash (sha1_34b , output); HASH_SHA1_bit (message, 377, output); out_to_hash (sha1_377b, output); HASH_SHA1_bit (message, 610, output); out_to_hash (sha1_610b, output); HMAC_SHA1_bit ("" , 8 , message, 0 , output); out_to_hash (hmac_0b_sha1_0b , output); HMAC_SHA1_bit (key, 8 , message, 8 , output); out_to_hash (hmac_8b_sha1_8b , output); HMAC_SHA1_bit (key, 8 , message, 13 , output); out_to_hash (hmac_8b_sha1_13b , output); HMAC_SHA1_bit (key, 8 , message, 610, output); out_to_hash (hmac_8b_sha1_610b , output); HMAC_SHA1_bit (key, 13 , message, 8 , output); out_to_hash (hmac_13b_sha1_8b , output); HMAC_SHA1_bit (key, 13 , message, 13 , output); out_to_hash (hmac_13b_sha1_13b , output); HMAC_SHA1_bit (key, 13 , message, 610, output); out_to_hash (hmac_13b_sha1_610b , output); HMAC_SHA1_bit (key, 512, message, 8 , output); out_to_hash (hmac_512b_sha1_8b , output); HMAC_SHA1_bit (key, 512, message, 13 , output); out_to_hash (hmac_512b_sha1_13b , output); HMAC_SHA1_bit (key, 512, message, 610, output); out_to_hash (hmac_512b_sha1_610b, output); HMAC_SHA1_bit (key, 610, message, 8 , output); out_to_hash (hmac_610b_sha1_8b , output); HMAC_SHA1_bit (key, 610, message, 13 , output); out_to_hash (hmac_610b_sha1_13b , output); HMAC_SHA1_bit (key, 610, message, 610, output); out_to_hash (hmac_610b_sha1_610b, output); return; }测试时要注意,key的长度可以是零,但硬件不认,算出来的HMAC值也只是简单的160个bit的零。考虑到(key | pad)的特点,只要在512bits长度之内,key后面缀多少个零都是等效的,所以这个key用8bits的零来代替了。测试结果如下:
虽然不知道算得对不对,但看起来挺像那么回事的。