virtual bool create(unsigned int width, unsigned int height)
if (this->close())
m_width = width;
m_height = height;
m_stride = width * BYTES_PER_PIXEL;
#ifdef __USE_SSE__
m_data = (unsigned char *)_mm_malloc(m_height * (m_stride + 16 - (m_stride & 3)), 16);
m_data = (unsigned char *) malloc(m_height * m_stride);
if (m_data != NULL)
return true;
return false;
virtual bool close()
if (m_data != NULL)
#ifdef __USE_SSE__
m_data = NULL;
m_width = m_height = 0;
return true;
class SIMD_Grayscale: public Effect
virtual bool execute(....);
virtual bool SIMD_Grayscale::execute(unsigned char* buffer, unsigned int width, unsigned int height)
unsigned int _mstride = width * 4 + 16 - (width & 3); // 扫描线长度是16字节的整数倍
unsigned int _mwidth = _mstride / 16; // 扫描线有多少个16字节
__m128i* mptr = (__m128i *)buffer; // 指针
// 灰度因子,填充到16字节,
__m128i factor = ::_mm_set_epi16((unsigned int) (0.21 * 256), (unsigned int) (0.71 * 256),
(unsigned int) (0.07 * 256), 0x0, (unsigned int) (0.21 * 256), (unsigned int) (0.71 * 256),
(unsigned int) (0.07 * 256), 0x0);
__m128i factor = ::_mm_set_epi16(
(unsigned int) (0.07 * 256), (unsigned int) (0.71 * 256), (unsigned int) (0.21 * 256), 0x0,
(unsigned int) (0.07 * 256), (unsigned int) (0.71 * 256),(unsigned int) (0.21 * 256), 0x0);
// 颜色分量的掩码,算法是这样设计:
// 1.取得三个颜色分量;
// 2.与灰度因子相乘;
// 3.再与红色对齐;
// 4.三色相加;
// 5.因为灰度因子提前做了定点运算的原因,需要将其还原,这时就得到灰度;
// 6.通过移位,用灰度值填充其它两个颜色分量;
// 7.三个颜色再合成一个_m128i值;
// 注意:Intel的数据存储方式是倒着放,所以掩码也要到着。
__m128i redMask = _mm_set_epi16(0x0000, 0x0000, 0xFF00, 0x0000, 0x0000, 0x0000, 0x00FF, 0x0000);
__m128i greenMask = _mm_set_epi16(0x0000, 0xFF00, 0x0000, 0x0000, 0x0000, 0xFF00, 0x0000, 0x0000);
__m128i blueMask = _mm_set_epi16(0xFF00, 0x0000, 0x0000, 0x0000, 0xFF00, 0x0000, 0x0000, 0x0000);
__m128i red, green, blue;
__m128i pixel_01;
__m128i pixel_23;
for (unsigned int h = 0; h < height; h++)
for (unsigned int w = 0; w < _mwidth; w++)
// 四个像素排列方式: 0x00RRGGBB, 0x11RRGGBB, 0x22RRGGBB, 0x33RRGGBB
// 在unpack后,字节顺序会反过来.
// 就变成这样:0xBBGGRR00
pixel_01 = _mm_unpacklo_epi8(*mptr, _mm_setzero_si128());
pixel_23 = _mm_unpackhi_epi8(*mptr, _mm_setzero_si128());
// 先与灰度因子相乘
pixel_01 = _mm_mullo_epi16(pixel_01, factor);
// 取得颜色分量
red = _mm_and_si128(pixel_01, redMask);
green = _mm_and_si128(pixel_01, greenMask);
blue = _mm_and_si128(pixel_01, blueMask);
// 绿色和蓝色都与红色对齐
green = ::_mm_slli_epi64(green, 16);
blue = _mm_slli_epi64(blue, 32);
red = _mm_add_epi16(red, green);
red = _mm_add_epi16(red, blue);
red = _mm_srli_epi16(red, 8);
// 用红色填充其它两个颜色
green = _mm_srli_epi64(red, 16);
blue = _mm_srli_epi64(red, 32);
// 三色合成一个像素
red = _mm_or_si128(red, green);
red = _mm_or_si128(red, blue);
// 保存
pixel_01 = red;
// 另两个像素
pixel_23 = _mm_mullo_epi16(pixel_23, factor);
red = _mm_and_si128(pixel_23, redMask);
green = _mm_and_si128(pixel_23, greenMask);
blue = _mm_and_si128(pixel_23, blueMask);
// Align green and blue with red
green = ::_mm_slli_epi64(green, 16);
blue = _mm_slli_epi64(blue, 32);
// Add R,G and B
red = _mm_add_epi16(red, green);
red = _mm_add_epi16(red, blue);
red = _mm_srli_epi16(red, 8);
// Move green and blue to original position
green = _mm_srli_epi64(red, 16);
blue = _mm_srli_epi64(red, 32);
// combine R, G, B
red = ::_mm_or_si128(red, green);
red = _mm_or_si128(red, blue);
// Save result
pixel_23 = red;
*mptr++ = ::_mm_packs_epi16(pixel_23, pixel_01);
} // for width
} // for height
return true;
} // func
11ms, 11ms, 11ms, 11ms, 11ms, 11ms, 11ms, 12ms,
11ms, 12ms, 11ms, 12ms, 11ms, 11ms, 11ms, 12ms,
11ms, 11ms, 11ms, 11ms, 12ms, 11ms, 11ms, 11ms,
12ms, 12ms, 11ms, 11ms, 11ms, 12ms, 12ms, 11ms,
11ms, 12ms, 12ms, 12ms, 12ms, 11ms, 11ms, 12ms,
11ms, 12ms, 11ms, 11ms, 11ms, 12ms, 11ms, 12ms,
11ms, 12ms, 11ms, 11ms, 11ms, 11ms, 11ms, 11ms,
12ms, 11ms, 11ms, 11ms, 11ms, 11ms, 11ms, 12ms,
Max:13.14 Min:10.97