SHA256的SIMD优化

一、简介

关于SHA256的SIMD优化,是我在网上看到了一篇关于SHA-1的优化的文章之后,将那篇文章的思想转移到了SHA256上,我将那篇文章放到了网盘里,有需要的同学可以下载:
点我下载文章
提取码:4l7x

关于SHA256的实现不再介绍,网上可以搜到不少,只将实现的代码贴在下面,以便同学参考:

#include 
#include 
#include
#include
#include
#include
#include
#include
#define NUM 2
#pragma warning(disable:4996);
#define _CRT_SECURE_NO_DEPRECATE;
#define _CRT_SECURE_NO_WARNINGS;
#define SHA256_ROTL(a,b) (_mm_or_si128(_mm_and_si128(_mm_srli_epi32(a,32-b), _mm_set1_epi32((0x7fffffff>>(31-b)))),_mm_slli_epi32(a,b)))
#define SHA256_SR(a,b) (_mm_and_si128((_mm_srli_epi32(a,b)), _mm_set1_epi32((0x7fffffff>>(b-1)))))
#define SHA256_Ch(x,y,z) (_mm_xor_si128(_mm_and_si128(x,y),_mm_and_si128(_mm_xor_si128(x,_mm_set1_epi32(0xffffffff)),z)))
#define SHA256_Maj(x,y,z) (_mm_xor_si128(_mm_xor_si128(_mm_and_si128(x,y),_mm_and_si128(x,z)),_mm_and_si128(y,z)))
#define SHA256_E0(x) _mm_xor_si128(_mm_xor_si128(SHA256_ROTL(x,30),SHA256_ROTL(x,19)),SHA256_ROTL(x,10))
#define SHA256_E1(x) _mm_xor_si128(_mm_xor_si128(SHA256_ROTL(x,26),SHA256_ROTL(x,21)),SHA256_ROTL(x,7))
#define SHA256_O0(x) _mm_xor_si128(_mm_xor_si128(SHA256_ROTL(x,25),SHA256_ROTL(x,14)),SHA256_SR(x,3))
#define SHA256_O1(x)  _mm_xor_si128(_mm_xor_si128(SHA256_ROTL(x,15),SHA256_ROTL(x,13)),SHA256_SR(x,10))
using namespace std;
char text[NUM + 1];
int  init(char ss[4][4], int length,long M[4][16])
{
    char* pp,*ppend;
    int l = length + ((length % 64 >= 56) ? (128 - length % 64) : (64 - length % 64));
    for (int k = 0; k < 4;k++)
    {
        int i=0;
        if (!(pp = (char*)malloc((unsigned long)l))) return 0;
        for (i = 0; i < length; pp[i + 3 - 2 * (i % 4)] = ss[k][i], i++);
        for (pp[ i+ 3 - 2 * (i % 4)] = 128, i++; i < l; pp[i + 3 - 2 * (i % 4)] = 0, i++);
        *((long*)(pp + l - 4)) = length << 3;
        *((long*)(pp + l - 8)) = length >> 29;
        for (ppend=pp+l; pp<ppend;pp+=64)
        {
            for (i = 0; i < 16;M[k][i] = ((long*)pp)[i], i++);
        }
        free(pp - l);
    }
    return 0;
}
void StrSHA256(long M[4][16], long long length, char* sha2560,char* sha2561,char* sha2562,char* sha2563){
    /*
    计算字符串SHA-256
    参数说明:
    str         字符串指针
    length      字符串长度
    sha256         用于保存SHA-256的字符串指针
    返回值为参数sha256
    */
    long l, i;
    __m128i A, B, C, D, E, F, G,H,T1, T2;
    __m128i H0 = _mm_set1_epi32(0x6a09e667);
    __m128i H1 = _mm_set1_epi32(0xbb67ae85);
    __m128i H2 = _mm_set1_epi32(0x3c6ef372);
    __m128i H3 = _mm_set1_epi32(0xa54ff53a);
    __m128i H4 = _mm_set1_epi32(0x510e527f);
    __m128i H5 = _mm_set1_epi32(0x9b05688c);
    __m128i H6 = _mm_set1_epi32(0x1f83d9ab);
    __m128i H7 = _mm_set1_epi32(0x5be0cd19);
    long Ki[64] = {
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
    };
    __m128i K[64];
    for (int i = 0; i < 64; K[i] = _mm_set1_epi32(Ki[i]),i++);//对常量进行初始化
    __m128i W[64];
    for (int c = 0; c < 16; W[c] = _mm_set_epi32(M[0][c], M[1][c], M[2][c], M[3][c]), c++);

    for (i = 16; i < 64; i++)
    {
        W[i] = _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(SHA256_O1(W[i - 2]), W[i - 7]), SHA256_O0(W[i - 15])), W[i - 16]);
    }
    A = H0, B = H1, C = H2, D = H3, E = H4, F = H5, G = H6, H = H7;
    for (i = 0; i < 64; i++) {
        //T1 = H + SHA256_E1(E) + SHA256_Ch(E, F, G) + K[i] + W[i];
        T1 = _mm_add_epi32(H, _mm_add_epi32(SHA256_E1(E), _mm_add_epi32(SHA256_Ch(E, F, G), _mm_add_epi32(K[i], W[i]))));
        T2 =_mm_add_epi32( SHA256_E0(A) , SHA256_Maj(A, B, C));
        H = G, G = F, F = E, E = _mm_add_epi32(D , T1), D = C, C = B, B = A, A = _mm_add_epi32(T1 , T2);
    }
    H0 = _mm_add_epi32(H0,A), H1 = _mm_add_epi32(H1,B), H2 = _mm_add_epi32(H2,C), H3 = _mm_add_epi32(H3,D), H4 = _mm_add_epi32(H4,E), H5 = _mm_add_epi32(H5,F), H6 = _mm_add_epi32(H6,G), H7 = _mm_add_epi32(H7,H);
    long add_out[8][4];
    _mm_storeu_si128((__m128i*)add_out[0], H0);
    _mm_storeu_si128((__m128i*)add_out[1], H1);
    _mm_storeu_si128((__m128i*)add_out[2], H2);
    _mm_storeu_si128((__m128i*)add_out[3], H3);
    _mm_storeu_si128((__m128i*)add_out[4], H4);
    _mm_storeu_si128((__m128i*)add_out[5], H5);
    _mm_storeu_si128((__m128i*)add_out[6], H6);
    _mm_storeu_si128((__m128i*)add_out[7], H7);
    sprintf(sha2560, "%08X%08X%08X%08X%08X%08X%08X%08X", add_out[0][0], add_out[1][0], add_out[2][0], add_out[3][0], add_out[4][0], add_out[5][0], add_out[6][0], add_out[7][0]);
    sprintf(sha2561, "%08X%08X%08X%08X%08X%08X%08X%08X", add_out[0][1], add_out[1][1], add_out[2][1], add_out[3][1], add_out[4][1], add_out[5][1], add_out[6][1], add_out[7][1]);
    sprintf(sha2562, "%08X%08X%08X%08X%08X%08X%08X%08X", add_out[0][2], add_out[1][2], add_out[2][2], add_out[3][2], add_out[4][2], add_out[5][2], add_out[6][2], add_out[7][2]);
    sprintf(sha2563, "%08X%08X%08X%08X%08X%08X%08X%08X", add_out[0][3], add_out[1][3], add_out[2][3], add_out[3][3], add_out[4][3], add_out[5][3], add_out[6][3], add_out[7][3]);
}

int main() {
    char ss[4][4] = { "abc","ABC","bcd","efg" };
    long M[4][16];
    init(ss, 3, M);
    char sha2560[65],sha2561[65],sha2562[65],sha2563[65];
    for (int i = 0; i < 1; i++)
    {
        LARGE_INTEGER  num;
        long long start, end, freq;
        QueryPerformanceFrequency(&num);
        freq = num.QuadPart;
        QueryPerformanceCounter(&num);
        start = num.QuadPart;
        for (int j = 0; j < 1; j++)
        {
            StrSHA256(M,3,sha2560,sha2561,sha2562,sha2563);
        }
        QueryPerformanceCounter(&num);
        end = num.QuadPart;
        printf("time=%d 毫秒\n", (end - start) * 1000 / freq);
        puts(sha2563);
        puts(sha2562);
        puts(sha2561);
        puts(sha2560);
    }
    return 0;
}



你可能感兴趣的:(计算机,网络空间安全)