最近要用到Intel的media SDK播放H264视频,发现intel的SDK转码出来的帧缓冲是NV12像素格式的,需要自己转换成RGB32格式。虽然这种代码网上找找一大堆,但还是自己动动手吧。
google了一下转换公式:
C = Y - 16
D = U - 128
E = V - 128
R = clip(round(1.164383 * C + 1.596027 * E))
G = clip(round(1.164383 * C - (0.391762 * D) - (0.812968 * E)))
B = clip(round(1.164383 * C + 2.017232 * D))
研究了一个小时用SSE2搞定,代码一次调试用过,心情大好,把核心代码贴上来。
__m128i c0
=
_mm_setzero_si128();
__m128i c128
=
_mm_set1_epi16(
128
);
__m128i c128_32
=
_mm_set1_epi32(
128
);
__m128i c16
=
_mm_set1_epi16(
16
);
__m128i c255
=
_mm_set1_epi16(
255
);
__m128i c_1_1596
=
_mm_set1_epi32(
0x199012a
);
__m128i c_1_2017
=
_mm_set1_epi32(
0x204012a
);
__m128i c_0_392
=
_mm_set1_epi32(0xff9c0000)
;
__m128i c_1_813
=
_mm_set1_epi32(0xff30012a)
;
for
(
int
y
=
0
; y
<
src
.Height; y
++
)
{
BYTE
*
dest
=
(BYTE
*
)data.Scan0
+
data.Stride
*
y;
BYTE
*
srcY
=
src.Y
+
src.Pitch
*
y;
BYTE
*
srcUV
=
src.UV
+
src.Pitch
*
(y
/
2
);
for
(
int
x
=
0
; x
<
src
.Width; x
+=
4
)
{
//
Y0Y1Y2Y30000 - 16
__m128i Ymm
=
_mm_sub_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(
*
(
int
*
)(srcY
+
x)), c0), c16);
//
U0V0U2V20000 - 128
__m128i UVmm
=
_mm_sub_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(
*
(
int
*
)(srcUV
+
x)), c0), c128);
//
U0U0U2U20000
__m128i Umm
=
_mm_shufflelo_epi16(UVmm, _MM_SHUFFLE(
2
,
2
,
0
,
0
));
//
V0V0V2V20000
__m128i Vmm
=
_mm_shufflelo_epi16(UVmm, _MM_SHUFFLE(
3
,
3
,
1
,
1
));
//
Y0V0Y1V0Y2V2Y3V2
__m128i YVmm
=
_mm_unpacklo_epi16(Ymm, Vmm);
//
Y0U0Y1U0Y2U2Y3U2
__m128i YUmm
=
_mm_unpacklo_epi16(Ymm, Umm);
__m128i Rmm
=
_mm_srai_epi32(_mm_add_epi32(_mm_madd_epi16(YVmm, c_1_1596), c128_32),
8
);
__m128i Bmm
=
_mm_srai_epi32(_mm_add_epi32(_mm_madd_epi16(YUmm, c_1_2017), c128_32),
8
);
__m128i Gmm
=
_mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_madd_epi16(YVmm, c_1_813), _mm_madd_epi16(YUmm, c_0_392)), c128_32),
8
);
Rmm
=
_mm_slli_epi32(_mm_and_si128(Rmm, _mm_cmpgt_epi32(Rmm, c0)),
16
);
Bmm
=
_mm_and_si128(Bmm, _mm_cmpgt_epi32(Bmm, c0));
Gmm
=
_mm_slli_epi32(_mm_min_epi16(_mm_and_si128(Gmm, _mm_cmpgt_epi32(Gmm, c0)), c255),
8
);
*
(__m128i
*
)dest
=
_mm_or_si128(_mm_min_epi16(_mm_or_si128(Rmm, Bmm), c255), Gmm);
dest
+=
16
;
}
}