c++ SIMD 样例

代码如下:

#include 
#include 
#include 
#include 

using namespace std;

struct StringView {
     
    const char* p;
    const size_t len;
};

StringView FileSize(const char* fileName) {
     
    ifstream ifstr(fileName);
    const auto b = ifstr.tellg();
    ifstr.seekg(0, ios::end);
    const auto e = ifstr.tellg();
    const size_t fileSize = e - b;
    ifstr.seekg(0, ios::beg);
    char *p = new char[fileSize];
    ifstr.read(p, fileSize);
    return {
     p, fileSize};
}

// Normal function
size_t count_c_normal(const StringView& str, const uint8_t c) {
     
    uint32_t num = 0;
    for (uint32_t i = 0; i < str.len; ++i) {
     
        if (c == *(str.p + i)) {
     
            ++num;
        }
    }
    return num;
}

// SIMD function
size_t count_c_simd(const StringView& str, const uint8_t c) {
     
    __m128i ch = _mm_set1_epi8(c); // char ch[16] = { c, c, ..., c }
    size_t cnt = 0;
    uint32_t i = 0;
    for (; i < str.len; i+=16) {
     
        // char t[16] = { (str+i)[0], (str+i)[1], ... }
        __m128i t = _mm_loadu_si128((__m128i *)(str.p + i));
        __m128i res = _mm_cmpeq_epi8(t, ch);

        // res[16] = { 0xFF, 0x00, 0xFF ... }
        unsigned mask = _mm_movemask_epi8(res);

        // bits[16] = 0...1101
        cnt += __builtin_popcount(mask);
    }

    // free cnt .
    for (; i < str.len; ++i) {
     
        if (c == *(str.p + i))
        {
     
            ++cnt;
        }
    }
    return cnt;
}

int main() {
     
    const auto ret = FileSize("./test_file");
    size_t cnt1 = 0, cnt2 = 0;
    const auto t1 = std::chrono::steady_clock::now();
    cnt1 = count_c_normal(ret, uint8_t('1'));
    const auto t2 = std::chrono::steady_clock::now();
    cnt2 = count_c_simd(ret, uint8_t('1'));
    const auto t3 = std::chrono::steady_clock::now();
    std::cout << "cnt1:" << cnt1 << ",cnt2:" << cnt2 << std::endl;
    const auto d1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
    const auto d2 = std::chrono::duration_cast<std::chrono::milliseconds>(t3-t2).count();
    std::cout << "time1:" << d1 << ",time2:" << d2 << std::endl;
    return 0;
}

生成随机数代码如下:

#include 
#include 
#include 
using namespace std;

void RandCharFile(const uint32_t file_len) {
     
    default_random_engine e;
    ofstream ofstr("./test_file");
    for (uint32_t i = 0; i < file_len; ++i) {
     
       ofstr << e() % 128;
    }
    ofstr.close();
}

int main() {
     
    RandCharFile(1024 * 1024 * 1024);

    return 0;
}

编译命令:

g++ -std=c++11 main.cc -o main -O2

结果如下:

cnt1:511699574,cnt2:511699574
time1:1957,time2:900

总结:在O2优化下耗时相差2倍多,没有O2优化耗时6倍+。

你可能感兴趣的:(c++,SIMD)