1.文章介绍
这篇文章的与上一篇文章<
2.实现目标
在Android设备上把本地视频或者网络视频解码后重新编码为H264(video/avc)/AAC(audio/mp4a-latm),最后合成可播放的音视频文件。
本篇文章是在完成了对RTSP的支持后才写的,所以会根据实际达成目标(RTSP协议支持,解码重编码后,封装成RTP包转发)来对代码作出说明。
3.技术核心
废话不多说,直接上干货:
/** parameters for the video encoder */
const char *OUTPUT_VIDEO_MIME_TYPE = "video/avc"; // H.264 Advanced Video Coding;MediaDefs::MEDIA_MIMETYPE_VIDEO_AVC
const float OUTPUT_VIDEO_BIT_RATE = 512 * 1024; // 512 kbps maybe better
const int32_t OUTPUT_VIDEO_FRAME_RATE = 20; // 20fps;better same with source
const int32_t OUTPUT_VIDEO_IFRAME_INTERVAL = 10; // 10 seconds between I-frames
const int32_t OUTPUT_VIDEO_COLOR_FORMAT = OMX_COLOR_FormatYUV420SemiPlanar; //OMX_COLOR_FormatYUV420SemiPlanar;
/** parameters for the audio encoder */
const char *OUTPUT_AUDIO_MIME_TYPE = "audio/mp4a-latm"; // Advanced Audio Coding;MediaDefs::MEDIA_MIMETYPE_AUDIO_AAC
const float OUTPUT_AUDIO_BIT_RATE = 128 * 1024; // 128 kbps
const int32_t OUTPUT_AUDIO_AAC_PROFILE = OMX_AUDIO_AACObjectLC; //OMX_AUDIO_AACObjectLC;//better then AACObjectHE?
/**parameters for the audio encoder config from input stream */
static int32_t OUTPUT_AUDIO_CHANNEL_COUNT = 1; // better match the input stream
static int32_t OUTPUT_AUDIO_SAMPLE_RATE_HZ = 48000; // better match the input stream
static int32_t gVideoWidth = 0;
static int32_t gVideoHeight = 0;
这是关于编码器的配置参数的声明,代码风格好的程序员应该喜欢这样。
自己扩展了电信IPTV RTSP解复用器:
sp extractor = new RTSPMediaExtractor;
配置解复用器:
if (extractor->setDataSource(path) != OK) {
fprintf(stderr, "unable to instantiate extractor.\n");
extractor = NULL;
return 1;
}
通过复用器获取音视频的元数据:
bool haveAudio = false;
bool haveVideo = false;
for (size_t i = 0; i < extractor->countTracks(); ++i) {
sp decode_format;
status_t err = extractor->getTrackFormat(i, &decode_format);
CHECK_EQ(err, (status_t)OK);
AString mime;
CHECK(decode_format->findString("mime", &mime));
bool isAudio = !strncasecmp(mime.c_str(), "audio/", 6);
bool isVideo = !strncasecmp(mime.c_str(), "video/", 6);
sp encode_format = new AMessage;
if (useAudio && !haveAudio && isAudio) {
haveAudio = true;
CHECK(decode_format->findInt32("sample-rate", &OUTPUT_AUDIO_SAMPLE_RATE_HZ));
CHECK(decode_format->findInt32("channel-count", &OUTPUT_AUDIO_CHANNEL_COUNT));
//make encode format
encode_format->setString("mime", OUTPUT_AUDIO_MIME_TYPE);
encode_format->setInt32("aac-profile", OUTPUT_AUDIO_AAC_PROFILE);
encode_format->setInt32("max-input-size", 100 * 1024);
encode_format->setInt32("sample-rate", OUTPUT_AUDIO_SAMPLE_RATE_HZ);
encode_format->setInt32("channel-count", OUTPUT_AUDIO_CHANNEL_COUNT);
encode_format->setInt32("bitrate", OUTPUT_AUDIO_BIT_RATE);
ALOGV("selecting audio track %d", i);
err = extractor->selectTrack(i);
CHECK_EQ(err, (status_t)OK);
audioTrack = i;
mAudioMapCursor = mTrackIndex;
}else if (useVideo && !haveVideo && isVideo) {
haveVideo = true;
decode_format->setInt32("color-format",OUTPUT_VIDEO_COLOR_FORMAT);
CHECK(decode_format->findInt32("width", &gVideoWidth));
CHECK(decode_format->findInt32("height", &gVideoHeight));
//make encode format
encode_format->setString("mime", OUTPUT_VIDEO_MIME_TYPE);
encode_format->setInt32("width", gVideoWidth);
encode_format->setInt32("height", gVideoHeight);
encode_format->setInt32("color-format", OUTPUT_VIDEO_COLOR_FORMAT);
encode_format->setInt32("bitrate", OUTPUT_VIDEO_BIT_RATE);
encode_format->setFloat("frame-rate", OUTPUT_VIDEO_FRAME_RATE);
encode_format->setInt32("i-frame-interval", OUTPUT_VIDEO_IFRAME_INTERVAL);
if(mVideoWidth > 0){
encode_format->setInt32("scale-width", mVideoWidth);
}
if(mVideoHeight > 0){
encode_format->setInt32("scale-height", mVideoHeight);
}
ALOGV("selecting video track %d", i);
err = extractor->selectTrack(i);
CHECK_EQ(err, (status_t)OK);
videoTrack = i;
mVideoMapCursor = mTrackIndex;
}else {
continue;
}
CodecState *state = &stateByTrack.editValueAt(stateByTrack.add(mTrackIndex++, CodecState()));
//make decodeMediaCodec
state->mDecodec = MediaCodec::CreateByType(
looper, mime.c_str(), false /* encoder */);
CHECK(state->mDecodec != NULL);
err = state->mDecodec->configure(
decode_format, NULL/*surface*/,
NULL /* crypto */,
0 /* flags */);
CHECK_EQ(err, (status_t)OK);
//make encodeMediaCodec
if(isVideo){
state->mEncodec = MediaCodec::CreateByType(
looper, OUTPUT_VIDEO_MIME_TYPE, true /* encoder */);
CHECK(state->mEncodec != NULL);
}else if(isAudio){
state->mEncodec = MediaCodec::CreateByType(
looper, OUTPUT_AUDIO_MIME_TYPE, true /* encoder */);
CHECK(state->mEncodec != NULL);
}
ALOGV("%s encode_format: %s",isVideo?"video":"audio", encode_format->debugString().c_str());
err = state->mEncodec->configure(
encode_format, NULL,NULL /* crypto */,
MediaCodec::CONFIGURE_FLAG_ENCODE/* flags */);
CHECK_EQ(err, (status_t)OK);
//start decoder
CHECK_EQ((status_t)OK, state->mDecodec->start());
CHECK_EQ((status_t)OK, state->mDecodec->getInputBuffers(&state->mDecodecInBuffers));
CHECK_EQ((status_t)OK, state->mDecodec->getOutputBuffers(&state->mDecodecOutBuffers));
//start encoder
CHECK_EQ((status_t)OK, state->mEncodec->start());
CHECK_EQ((status_t)OK, state->mEncodec->getInputBuffers(&state->mEncodecInBuffers));
CHECK_EQ((status_t)OK, state->mEncodec->getOutputBuffers(&state->mEncodecOutBuffers));
}
上一篇文章介绍过JAVA的处理,关于解码器/编码器的配置是一致的,只是换了一种编程语言而已,很好理解。
解码器、编码器配置完后,复合器建议在解码前配置,避免解复用器被一路流独占。
sp muxer = new TSMuxer(NULL,mFunc);
//##################### config the muxer ####################
while ( ((haveVideo && encoderOutputVideoFormat == NULL) || (haveAudio && encoderOutputAudioFormat == NULL)) ){
size_t mMapCursor = -1;
if(haveVideo && encoderOutputVideoFormat == NULL){
mMapCursor = mVideoMapCursor;
}
if(haveAudio && encoderOutputAudioFormat == NULL){
mMapCursor = mAudioMapCursor;
}
CodecState *state = &stateByTrack.editValueAt(mMapCursor);
size_t index;
size_t offset;
size_t size;
int64_t presentationTimeUs;
uint32_t flags;
bool useOriTime = false;
status_t err = state->mEncodec->dequeueOutputBuffer(
&index, &offset, &size, &presentationTimeUs, &flags,kTimeout);
if (err == OK) {
err = state->mEncodec->releaseOutputBuffer(index);
CHECK_EQ(err, (status_t)OK);
}else if (err == INFO_FORMAT_CHANGED) {
if(mMapCursor == mVideoMapCursor){
CHECK_EQ((status_t)OK, state->mEncodec->getOutputFormat(&encoderOutputVideoFormat));
ALOGV("%s encoder INFO_FORMAT_CHANGED: %s",mMapCursor==mVideoMapCursor?"video":"audio", encoderOutputVideoFormat->debugString().c_str());
if (haveVideo) {
outputVideoTrack = muxer->addTrack(encoderOutputVideoFormat);
ALOGV("muxer: adding video track %d",outputVideoTrack);
}
}else if(mMapCursor == mAudioMapCursor){
CHECK_EQ((status_t)OK, state->mEncodec->getOutputFormat(&encoderOutputAudioFormat));
ALOGV("%s encoder INFO_FORMAT_CHANGED: %s",mMapCursor==mVideoMapCursor?"video":"audio", encoderOutputAudioFormat->debugString().c_str());
if (haveAudio) {
outputAudioTrack = muxer->addTrack(encoderOutputAudioFormat);
ALOGV("muxer: adding audio track %d",outputAudioTrack);
}
}
if( ((haveVideo && encoderOutputVideoFormat != NULL) || !haveVideo) &&
((haveAudio && encoderOutputAudioFormat != NULL) || !haveAudio) ){
ALOGV("muxer: starting video:%s audio:%s",haveVideo?"true":"false",haveAudio?"true":"false");
muxer->start();
muxing = true;
}
} else {
CHECK_EQ(err, -EAGAIN);
ALOGV("err muxer config");
}
}
//##################### config the muxer : end ####################
接下来的流程起先和上一篇文章设计得一模一样,在经过多次采坑后,优化成如下,优化关键因素,通过解复用器智能切换音视频流:
status_t err = extractor->getSampleTrackIndex(&trackIndex);
通过方案上的优化,和之前JAVA版的CPU占用比较:
(JAVA版只完成了编解码过程,C++版从RTSP协议获取媒体流到解码重编码,然后封装成RTP包转发整个过程):
Logcat是我调试代码放开的,不用在意。
还是补一张关闭了调试打印的监测图吧,转发后的媒体流不卡顿可正常播放。
内存占用比较:
可以看到内存一直在往上涨...,过了大概1分钟内存已经占用到快200M了...,但是还没完,我看到内存涨到了接近500M,然后出现了低内存保护机制,把应用给kill掉了。
通过对比,可以看到无论是cpu占用还是内存占用方面,都有了较大提升,当然,还可以优化得更好,需要时间来验证。
1.解码器ready时,通过解复用器获取es的buffer
//#####################step 1 : read SampleData####################
while( (trackIndex == videoTrack && (haveVideo && !videoExtractorDone)) ||
(trackIndex == audioTrack && (haveAudio && !audioExtractorDone)) ){
size_t index;
status_t err = state->mDecodec->dequeueInputBuffer(&index, kTimeout);
if (err == OK) {
const sp &buffer = state->mDecodecInBuffers.itemAt(index);
err = extractor->readSampleData(buffer);
//never execute this code
if (err == ERROR_END_OF_STREAM) {
ALOGV("%s signalling input EOS ",trackIndex==videoTrack?"video":"audio");
err = state->mDecodec->queueInputBuffer(
index,
0 /* offset */,
0 /* size */,
0ll /* timeUs */,
MediaCodec::BUFFER_FLAG_EOS);
CHECK_EQ(err, (status_t)OK);
err = extractor->getSampleTime(&timeUs);
CHECK_EQ(err, (status_t)OK);
if(trackIndex == videoTrack){
videoExtractorDone = true;
}else if(trackIndex == audioTrack){
audioExtractorDone = true;
}
break;
}
sp meta;
err = extractor->getSampleMeta(&meta);
CHECK_EQ(err, (status_t)OK);
uint32_t bufferFlags = 0;
int32_t val;
if (meta->findInt32(kKeyIsSyncFrame, &val) && val != 0) {
// only support BUFFER_FLAG_SYNCFRAME in the flag for now.
bufferFlags |= MediaCodec::BUFFER_FLAG_SYNCFRAME;
}
int64_t timeUs;
err = extractor->getSampleTime(&timeUs);
CHECK_EQ(err, (status_t)OK);
ALOGV("%s decoder filling input buffer index:%d time:%lld", trackIndex==videoTrack?"video":"audio",index,timeUs);
err = state->mDecodec->queueInputBuffer(
index,
buffer->offset(),
buffer->size(),
timeUs,
bufferFlags);
CHECK_EQ(err, (status_t)OK);
}else{
CHECK_EQ(err, -EAGAIN);
ALOGV("no %s decoder input buffer",trackIndex==videoTrack?"video":"audio");
//here will loss one buffer if execute advance
break;
}
err = extractor->advance();
CHECK_EQ(err, (status_t)OK);
}
//#####################step 1 : end ####################
这段代码针对音视频的es是相同的业务处理,都是当decode的InputBuffers准备好后从解复用器中获取es,再填充到InputBuffers中,经过decode解码后输出OutputBuffer,下一阶段就可以把yuv、pcm数据转储给encdoe的InputBuffers:
size_t index;
size_t offset;
size_t size;
int64_t presentationTimeUs;
uint32_t flags;
status_t err = state->mDecodec->dequeueOutputBuffer(
&index, &offset, &size, &presentationTimeUs, &flags,
kTimeout);
if (err == OK) {
ALOGV("%s decoder draining output buffer %d, time = %lld us",trackIndex==videoTrack?"video":"audio",
index, presentationTimeUs);
if (flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) {
ALOGV("reached %s decoder BUFFER_FLAG_CODECCONFIG",trackIndex==videoTrack?"video":"audio");
err = state->mDecodec->releaseOutputBuffer(index);
CHECK_EQ(err, (status_t)OK);
break;
}
CodecOutInfo *info;
if(trackIndex == videoTrack){
if(mVideoInfoVector.size() >= state->mDecodecOutBuffers.size()){
info = &mVideoInfoVector.editValueAt(index);
}else{
info = &mVideoInfoVector.editValueAt(mVideoInfoVector.add(index, CodecOutInfo()));
}
pendingVideoDecoderOutputBufferIndex = index;
}else if(trackIndex == audioTrack){
if(mAudioInfoVector.size() >= state->mDecodecOutBuffers.size()){
info = &mAudioInfoVector.editValueAt(index);
}else{
info = &mAudioInfoVector.editValueAt(mAudioInfoVector.add(index, CodecOutInfo()));
}
pendingAudioDecoderOutputBufferIndex = index;
}
info->offset = offset;
info->size = size;
info->presentationTimeUs = presentationTimeUs;
info->flags = flags;
break;
把准备好的yuv或pcm数据填充到encode:
err = state->mEncodec->queueInputBuffer(index,
0, srcBuffer->size(), info->presentationTimeUs,
info->flags);
CHECK_EQ(err, (status_t)OK);
err = state->mDecodec->releaseOutputBuffer(pendingIndex);
CHECK_EQ(err, (status_t)OK);
最后就是把编码后的es丢给复用器,要么保存文件,要么转发出去(本文是自己封装成了rtp):
const sp &buffer = state->mEncodecOutBuffers.itemAt(index);
if(trackIndex == videoTrack){
if(presentationTimeUs >= mLastVideoSampleTime){
useOriTime = true;
}
if (size >= 0 && outputVideoTrack != -1) {
if(useOriTime){
mLastVideoSampleTime = presentationTimeUs;
err = muxer->writeSampleData(buffer,outputVideoTrack,mLastVideoSampleTime, flags);
CHECK_EQ(err, (status_t)OK);
}else{
ALOGV("%s encoder loss one buffer.",trackIndex==videoTrack?"video":"audio");
}
}
}else if(trackIndex == audioTrack){
if(presentationTimeUs >= mLastAudioSampleTime){
useOriTime = true;
}
if (size >= 0 && outputAudioTrack != -1) {
if(useOriTime){
mLastAudioSampleTime = presentationTimeUs;
err = muxer->writeSampleData(buffer,outputAudioTrack,mLastAudioSampleTime, flags);
CHECK_EQ(err, (status_t)OK);
}else{
ALOGV("%s encoder loss one buffer.",trackIndex==videoTrack?"video":"audio");
}
}
}
核心的解码、编码流程就是这些了,其实和Java版的某些细节上处理不一样外,总体思路是一致的。
4.结束语
本篇文章分析还是着重于重编码的流程,和Java的实现方式上有一个很好的对比,RTSP的扩展涉及到电信行业IPTV专业技术,我也不方便开源出来,望谅解。总体上又写了这么多,也达到了预期目标,写下来的东西希望可以帮助到关注该技术的同学吧,感谢关注!