音视频学习已经到了第5章,前面已经学了音频和视频的渲染知识,这一章讲的是视频播放器的实现,这篇文章用到了很多知识,linux线程同步控制,生产者消费之模型,opengl的fbo,ffmpeg,以及音视频同步等等。下面记录一下看源码的要点。
初始化一些参数,minBufferedDuration为0.5f,maxBufferedDuration为1.0f。在调用initMeta()之后变成0.5和0.8,最大的音视频偏差设置为了0.05。
bool AVSynchronizer::init(DecoderRequestHeader *requestHeader, JavaVM *g_jvm, jobject obj,float minBufferedDuration, float maxBufferedDuration) {
LOGI("Enter AVSynchronizer::init");
currentAudioFrame = NULL;
currentAudioFramePos = 0;
isCompleted = false;
moviePosition = 0;
buffered = false;
bufferedDuration = 0;
decoder = NULL;
decodeVideoErrorState = 0;
isLoading = false;
isInitializeDecodeThread = false;
this->minBufferedDuration = minBufferedDuration;
this->maxBufferedDuration = maxBufferedDuration;
this->g_jvm = g_jvm;
this->obj = obj;
isOnDecoding = false;
isDestroyed = false;
//1、创建decoder实例
this->createDecoderInstance();
//2、初始化成员变量
this->initMeta();
//3、打开流并且解析出来音视频流的Context
int initCode = decoder->openFile(requestHeader);
if (initCode < 0 || isDestroyed) {
LOGI("VideoDecoder decode file fail...");
closeDecoder();
return false;
}
if (!decoder->isSubscribed() || isDestroyed) {
LOGI("decoder has not Subscribed...");
closeDecoder();
return false;
}
//5、回调客户端视频宽高以及duration
float duration = decoder->getDuration();
int videoWidth = decoder->getVideoFrameWidth();
int videoHeight = decoder->getVideoFrameHeight();
if(videoWidth <= 0 || videoHeight <= 0){
return false;
}
//6、启动decoder的uploader部分
//这里会初始化一个纹理的环形队列
decoder->startUploader(&mUploaderCallback);
this->viewStreamMetaCallback(videoWidth, videoHeight, duration);
//7、increase for Only audio stream
if (!decoder->validVideo()){
this->minBufferedDuration *= 10.0;
}
LOGI("Leave AVSynchronizer::init");
return true;
}
int VideoDecoder::openFile(DecoderRequestHeader *requestHeader) {
isSubscribe = true;
isOpenInputSuccess = false;
position = 0.0f;
seek_req = false;
seek_resp = false;
pFormatCtx = NULL;
subscribeTimeOutTimeMills = SUBSCRIBE_VIDEO_DATA_TIME_OUT;
videoCodecCtx = NULL;
videoFrame = NULL;
videoStreams = NULL;
swrContext = NULL;
swrBuffer = NULL;
audioCodecCtx = NULL;
audioStreams = NULL;
audioFrame = NULL;
textureFrameUploader = NULL;
int initLockCode = pthread_mutex_init(&mLock, NULL);
int initConditionCode = pthread_cond_init(&mCondition, NULL);
subtitleStreams = NULL;
this->requestHeader = requestHeader;
this->initFFMpegContext();
if (isNeedBuriedPoint) {
buriedPointStart = currentTimeMills();
buriedPoint.beginOpen = buriedPointStart;
buriedPoint.duration = 0.0f;
}
long long startTimeMills = currentTimeMills();
int errorCode = openInput();
// LOGI("openInput [%s] waste TimeMills is %d", requestHeader->getURI(), (int )(currentTimeMills() - startTimeMills));
//现在 pFormatCtx->streams 中已经有所有流了,因此现在我们遍历它找出对应的视频流、音频流、字幕流等:
if (errorCode > 0) {
if (isNeedBuriedPoint) {
long long curTime = currentTimeMills();
buriedPoint.successOpen = (curTime - buriedPointStart) / 1000.0f;
buriedPoint.failOpen = 0.0f;
buriedPoint.failOpenType = 1;
LOGI("successOpen is %f", buriedPoint.successOpen);
}
int videoErr = openVideoStream();
int audioErr = openAudioStream();
if (videoErr < 0 && audioErr < 0) {
errorCode = -1; // both fails
} else {
//解析字幕流
subtitleStreams = collectStreams(AVMEDIA_TYPE_SUBTITLE);
}
} else {
LOGE("open input failed, have to return");
if (isNeedBuriedPoint) {
long long curTime = currentTimeMills();
buriedPoint.failOpen = (curTime - buriedPointStart) / 1000.0f;
buriedPoint.successOpen = 0.0f;
buriedPoint.failOpenType = errorCode;
LOGI("failOpen is %f", buriedPoint.failOpen);
}
return errorCode;
}
isOpenInputSuccess = true;
isVideoOutputEOF = false;
isAudioOutputEOF = false;
return errorCode;
}
int VideoDecoder::openInput() {
// LOGI("VideoDecoder::openInput");
//文件路径
char *videoSourceURI = requestHeader->getURI();
//最大解析时间这里为{-1,-1,-1}
int* max_analyze_durations = requestHeader->getMaxAnalyzeDurations();
int analyzeDurationSize = requestHeader->getAnalyzeCnt();
//重试次数 一开始为1
int tryNum = (connectionRetry <= 0) ? 1 : connectionRetry;
LOGI("tryNum ===== %d", tryNum);
//探测时读入数据大小
int probesize = requestHeader->getProbeSize() + (tryNum-1)*20*1024;
//这里为true
bool fpsProbeSizeConfigured = requestHeader->getFPSProbeSizeConfigured();
if (-1 == probesize) {
probesize = DECODE_PROBESIZE_DEFAULT_VALUE;
}
LOGI("probesize ===== %d", probesize);
readLatestFrameTimemills = currentTimeMills();
isTimeout = false;
pFormatCtx = avformat_alloc_context();
//回调的结构体,程序终止会调用该结构体的第一个函数指针
//并将第二个值作为参数传入
int_cb = {VideoDecoder::interrupt_cb, this};
pFormatCtx->interrupt_callback = int_cb;
//打开一个文件 只是读文件头,并不会填充流信息 需要注意的是,此处的pFormatContext必须为NULL或由avformat_alloc_context分配得到
int openInputErrCode = 0;
if ((openInputErrCode = this->openFormatInput(videoSourceURI)) != 0) {
LOGI("Video decoder open input file failed... videoSourceURI is %s openInputErr is %s", videoSourceURI, av_err2str(openInputErrCode));
return -1;
}
//这个方法没有做什么
this->initAnalyzeDurationAndProbesize(max_analyze_durations, analyzeDurationSize, probesize, fpsProbeSizeConfigured);
// LOGI("pFormatCtx->max_analyze_duration is %d", pFormatCtx->max_analyze_duration);
// LOGI("pFormatCtx->probesize is %d", pFormatCtx->probesize);
//获取文件中的流信息,此函数会读取packet,并确定文件中所有的流信息 设置pFormatCtx->streams指向文件中的流,但此函数并不会改变文件指针,读取的packet会给后面的解码进行处理
if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
// avformat_close_input(&pFormatCtx);
LOGI("Video decoder Stream info not found...");
return -1;
}
is_eof = false;
//输出文件的信息,也就是我们在使用ffmpeg时能看到的文件详细信息
// av_dump_format(pFormatCtx, -1, videoSourceURI, 0);
//这个判断解码器的信息是否可以用
if (this->isNeedRetry()) {
if (isNeedBuriedPoint) {
long long curTime = currentTimeMills();
float retryTime = (curTime - buriedPointStart) / 1000.0f;
buriedPoint.retryOpen.push_back(retryTime);
LOGI("retryTime is %f", retryTime);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return openInput();
} else {
LOGI("retry finish");
return (hasAllCodecParameters() ? 1 : -1);
}
return 1;
}
打开音频流
int VideoDecoder::openVideoStream(int streamIndex) {
LOGI("VideoDecoder::openVideoStream");
//1、get a pointer to the codec context for the video stream
AVStream *videoStream = pFormatCtx->streams[streamIndex];
degress = 0;
AVDictionary *videoStreamMetadata = videoStream->metadata;
AVDictionaryEntry* entry = NULL;
//获取视频的一些元数据,比如作者 什么之类的
while ((entry = av_dict_get(videoStreamMetadata, "", entry, AV_DICT_IGNORE_SUFFIX))){
LOGI("entry: key is %s value is %s\n", entry->key, entry->value);
if (0 == strcmp(entry->key, "rotate")) {
//pull video orientation hint
degress = atoi(entry->value);
}
}
int* rotate = (int*)requestHeader->get(DECODER_HEADER_FORCE_ROTATE);
if(NULL != rotate){
degress = (*rotate);
}
LOGI("degress is %d", degress);
videoCodecCtx = videoStream->codec;
//2、通过codecContext的codec_id 找出对应的decoder
videoCodec = avcodec_find_decoder(videoCodecCtx->codec_id);
LOGI("CODEC_ID_H264 is %d videoCodecCtx->codec_id is %d", CODEC_ID_H264, videoCodecCtx->codec_id);
if (videoCodec == NULL) {
LOGI("can not find the videoStream's Codec ...");
return -1;
}
//3、打开找出的decoder
if (avcodec_open2(videoCodecCtx, videoCodec, NULL) < 0) {
LOGI("open video codec failed...");
return -1;
}
//4、分配图像缓存:准备给即将解码的图片分配内存空间 调用 avcodec_alloc_frame 分配帧,videoFrame用于存储解码后的数据
videoFrame = avcodec_alloc_frame();
if (videoFrame == NULL) {
LOGI("alloc video frame failed...");
avcodec_close(videoCodecCtx);
return -1;
}
//5、now: we think we can Correctly identify the video stream
this->videoStreamIndex = streamIndex;
//6、determine fps and videoTimeBase 确定时间基以及fps
avStreamFPSTimeBase(videoStream, 0.04, &fps, &videoTimeBase);
float* actualFps = (float*)requestHeader->get(DECODER_HEADER_FORCE_FPS);
if(NULL != actualFps){
fps = (*actualFps);
}
if(fps > 30.0f || fps < 5.0f){
fps = 24.0f;
}
LOGI("video codec size: fps: %.3f tb: %f", fps, videoTimeBase);
// LOGI("video start time %f", videoStream->start_time * videoTimeBase);
// LOGI("video disposition %d", videoStream->disposition);
LOGI("videoCodecCtx->pix_fmt is %d {%d, %d}", videoCodecCtx->pix_fmt, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P);
//筛选格式
if (videoCodecCtx->pix_fmt != AV_PIX_FMT_YUV420P && videoCodecCtx->pix_fmt != AV_PIX_FMT_YUVJ420P) {
LOGI("NOW we only surpport Format is regular YUV we can render it to OpenGL");
LOGI("very sorry for this format we must convert to RGB");
avcodec_close(videoCodecCtx);
return -1;
}
//get video width and height
width = videoCodecCtx->width;
height = videoCodecCtx->height;
LOGI("width is %d height is %d degress is %d", width, height, degress);
return 1;
}
纹理环形队列初始化,主要是在startUploader
void VideoDecoder::startUploader(UploaderCallback * pUploaderCallback) {
mUploaderCallback = pUploaderCallback;
//ffmpeg解码创建的是一个YUVTextureFrameUploader
textureFrameUploader = createTextureFrameUploader();
textureFrameUploader->registerUpdateTexImageCallback(update_tex_image_callback, signal_decode_thread_callback, this);
textureFrameUploader->setUploaderCallback(pUploaderCallback);
textureFrameUploader->start(width, height, degress);
//wait EGL Context initialize success
int getLockCode = pthread_mutex_lock(&mLock);
pthread_cond_wait(&mCondition, &mLock);
pthread_mutex_unlock(&mLock);
}
最后经过一列的回调初始化会走到
bool TextureFrameUploader::initialize() {
eglCore = new EGLCore();
LOGI("TextureFrameUploader use sharecontext");
eglCore->initWithSharedContext();
LOGI("after TextureFrameUploader use sharecontext");
copyTexSurface = eglCore->createOffscreenSurface(videoWidth, videoHeight);
eglCore->makeCurrent(copyTexSurface);
glGenFramebuffers(1, &mFBO);
//初始化outputTexId
glGenTextures(1, &outputTexId);
glBindTexture(GL_TEXTURE_2D, outputTexId);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, videoWidth, videoHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBindTexture(GL_TEXTURE_2D, 0);
//这个回调方法会初始化音频帧的队列以及关于纹理的环形队列
if (mUploaderCallback){
mUploaderCallback->initFromUploaderGLContext(eglCore);
}
eglCore->makeCurrent(copyTexSurface);
LOGI("leave TextureFrameUploader::initialize");
return true;
}
初始化队列
void AVSynchronizer::initCircleQueue(int videoWidth, int videoHeight) {
// 初始化audioQueue与videoQueue
float fps = decoder->getVideoFPS();
// LOGI("decoder->getVideoFPS() is %.3f maxBufferedDuration is %.3f", fps, maxBufferedDuration);
//此处修正fps,因为某些平台得到的fps很大,导致计算出来要申请很多的显存,因此,这里做一下限定
if (fps > 30.0f) {
fps = 30.0f;
}
int queueSize = (maxBufferedDuration + 1.0) * fps;
circleFrameTextureQueue = new CircleFrameTextureQueue(
"decode frame texture queue");
circleFrameTextureQueue->init(videoWidth, videoHeight, queueSize);
audioFrameQueue = new std::queue ();
pthread_mutex_init(&audioFrameQueueMutex, NULL);
}
void VideoPlayerController::onSurfaceCreated(ANativeWindow* window, int width, int height) {
LOGI("enter VideoPlayerController::onSurfaceCreated...");
if (window != NULL){
this->window = window;
}
if (userCancelled){
return;
}
if (width > 0 && height > 0){
this->screenHeight = height;
this->screenWidth = width;
}
if (!videoOutput) {
initVideoOutput(window);
}else{
videoOutput->onSurfaceCreated(window);
}
LOGI("Leave VideoPlayerController::onSurfaceCreated...");
}
void VideoPlayerController::initVideoOutput(ANativeWindow* window){
LOGI("VideoPlayerController::initVideoOutput beigin width:%d, height:%d", screenWidth, screenHeight);
if (window == NULL || userCancelled){
return;
}
videoOutput = new VideoOutput();
videoOutput->initOutput(window, screenWidth, screenHeight,videoCallbackGetTex, this);
}
这里最终其实和handler类似,初始化一个队列,循环去取消息然后进行绘制,一开始没有视频帧的时候是只初始化了egl环境。
int Message::execute(){
if (MESSAGE_QUEUE_LOOP_QUIT_FLAG == what) {
return MESSAGE_QUEUE_LOOP_QUIT_FLAG;
} else if (handler) {
handler->handleMessage(this);
return 1;
}
return 0;
};
void handleMessage(Message* msg) {
int what = msg->getWhat();
ANativeWindow* obj;
switch (what) {
case VIDEO_OUTPUT_MESSAGE_CREATE_EGL_CONTEXT:
if (videoOutput->eglHasDestroyed){
break;
}
obj = (ANativeWindow*) (msg->getObj());
initPlayerResourceFlag = videoOutput->createEGLContext(obj);
break;
case VIDEO_OUTPUT_MESSAGE_RENDER_FRAME:
if (videoOutput->eglHasDestroyed) {
break;
}
if(initPlayerResourceFlag){
videoOutput->renderVideo();
}
break;
case VIDEO_OUTPUT_MESSAGE_CREATE_WINDOW_SURFACE:
if (videoOutput->eglHasDestroyed) {
break;
}
if(initPlayerResourceFlag){
obj = (ANativeWindow*) (msg->getObj());
videoOutput->createWindowSurface(obj);
}
break;
case VIDEO_OUTPUT_MESSAGE_DESTROY_WINDOW_SURFACE:
if(initPlayerResourceFlag){
videoOutput->destroyWindowSurface();
}
break;
case VIDEO_OUTPUT_MESSAGE_DESTROY_EGL_CONTEXT:
videoOutput->destroyEGLContext();
break;
}
}
我们先看解码线程,在使用ffmpeg解码的时候,又几个关键的东西。因为注册了一堆回调,容易搞混,这里我写出来。
AVSynchronizer.decoder ——FFMPEGVideoDecoder.this
decoder.textureFrameUploader—-YUVTextureFrameUploader.this
textureFrameUploader.updateTexImageCallback—-FFMPEGVideoDecoder::update_tex_image_callback
textureFrameUploader.signalDecodeThreadCallback—-FFMPEGVideoDecoder::signal_decode_thread_callback
textureFrameUploader.updateTexImageContext—-FFMPEGVideoDecoder.this
textureFrameUploader.mUploaderCallback—-AVSynchronizer.mUploaderCallback
先记住这些,目前初始化线程完毕之后,我们这里就开了三个线程,一个是解码线程,一个是视频绘制线程,以及一个离屏绘制线程。主要对解码线程进行分析。
void AVSynchronizer::start() {
isOnDecoding = true;
pauseDecodeThreadFlag = false;
circleFrameTextureQueue->setIsFirstFrame(true);
//开启解码线程
initDecoderThread();
}
最终会开启一个线程,执行这个方法。
void AVSynchronizer::processDecodingFrame(bool& good, float duration){
std::list * frames = decoder->decodeFrames(duration,&decodeVideoErrorState);
if (NULL != frames) {
if (!frames->empty()) {
if (decoder->hasSeekReq()) {
if (decoder->hasSeekResp()) {
if (NULL != audioFrameQueue) {
clearAudioFrameQueue();
}
bufferedDuration = 0.0f;
good = addFrames(frames);
int count = audioFrameQueue->size();
if (count > 0) {
AudioFrame *frame = audioFrameQueue->front();
moviePosition = frame->position;
}
buffered = false;
decoder->setSeekReq(false);
} else {
std::list ::iterator i;
for (i = frames->begin(); i != frames->end(); ++i) {
MovieFrame* frame = *i;
delete frame;
}
}
} else {
good = addFrames(frames);
}
} else {
LOGI("frames is empty %d", (int )good);
}
delete frames;
} else {
LOGI("why frames is NULL tell me why?");
}
}
音频帧的处理比较简单,解码后加入list,主要看视频帧的解码处理。
std::list * VideoDecoder::decodeFrames(float minDuration, int* decodeVideoErrorState) {
if (!isSubscribe || NULL == pFormatCtx) {
return NULL;
}
if (-1 == audioStreamIndex && -1 == videoStreamIndex) {
return NULL;
}
readLatestFrameTimemills = currentTimeMills();
std::list *result = new std::list ();
AVPacket packet;
float decodedDuration = 0.0f;
bool finished = false;
if (seek_req) {
//需要先清空视频队列
this->seek_frame();
}
int ret = 0;
char errString[128];
while (!finished) {
ret = av_read_frame(pFormatCtx, &packet);
if (ret < 0) {
LOGE("av_read_frame return an error");
if (ret != AVERROR_EOF) {
av_strerror(ret, errString, 128);
LOGE("av_read_frame return an not AVERROR_EOF error : %s", errString);
} else {
LOGI("input EOF");
is_eof = true;
}
av_free_packet(&packet);
break;
}
if (packet.stream_index == videoStreamIndex) {
this->decodeVideoFrame(packet, decodeVideoErrorState);
} else if (packet.stream_index == audioStreamIndex) {
finished = decodeAudioFrames(&packet, result, decodedDuration, minDuration, decodeVideoErrorState);
}
av_free_packet(&packet);
}
// flush video and audio decoder
// input for decoder end of file
if (is_eof) {
// video
flushVideoFrames(packet, decodeVideoErrorState);
// audio
flushAudioFrames(&packet, result, minDuration, decodeVideoErrorState);
}
return result;
}
解码视频帧
bool FFMPEGVideoDecoder::decodeVideoFrame(AVPacket packet, int* decodeVideoErrorState) {
int pktSize = packet.size;
int gotframe = 0;
while (pktSize > 0) {
int len = avcodec_decode_video2(videoCodecCtx, videoFrame, &gotframe, &packet);
if (len < 0) {
LOGI("decode video error, skip packet");
*decodeVideoErrorState = 1;
break;
}
if (gotframe) {
if (videoFrame->interlaced_frame) {
avpicture_deinterlace((AVPicture*) videoFrame, (AVPicture*) videoFrame, videoCodecCtx->pix_fmt, videoCodecCtx->width, videoCodecCtx->height);
}
this->uploadTexture();
}
if (0 == len) {
break;
}
pktSize -= len;
}
return (bool)gotframe;
}
void VideoDecoder::uploadTexture() {
int getLockCode = pthread_mutex_lock(&mLock);
textureFrameUploader->signalFrameAvailable();
//wait EGL Context copy frame
pthread_cond_wait(&mCondition, &mLock);
pthread_mutex_unlock(&mLock);
}
void TextureFrameUploader::signalFrameAvailable() {
// LOGI("enter TextureFrameUploader::signalFrameAvailable");
while(!isInitial || _msg == MSG_WINDOW_SET || NULL == eglCore){
usleep(100 * 1000);
}
pthread_mutex_lock(&mLock);
pthread_cond_signal(&mCondition);
pthread_mutex_unlock(&mLock);
}
到这里,通知离屏线程进行渲染。
void TextureFrameUploader::drawFrame() {
float position = this->updateTexImage();
glBindFramebuffer(GL_FRAMEBUFFER, mFBO);
/** 将YUV数据(软件解码), samplerExternalOES格式的TexId(硬件解码) 拷贝到GL_RGBA格式的纹理ID上 **/
textureFrameCopier->renderWithCoords(textureFrame, outputTexId, vertexCoords, textureCoords);
//通过fob渲染到一个单独的纹理上,然后将纹理id存入循环链表
if (mUploaderCallback)
mUploaderCallback->processVideoFrame(outputTexId, videoWidth, videoHeight, position);
else
LOGE("TextureFrameUploader::mUploaderCallback is NULL");
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
updateTexImage会执行到FFMPEGVideoDecoder::updateTexImage
float FFMPEGVideoDecoder::updateTexImage(TextureFrame* textureFrame) {
float position = -1;
VideoFrame *yuvFrame = handleVideoFrame();
if (yuvFrame) {
((YUVTextureFrame*) textureFrame)->setVideoFrame(yuvFrame);
textureFrame->updateTexImage();
position = yuvFrame->position;
delete yuvFrame;
}
return position;
}
下面这个方法就是拷贝出来yuv的真实数据以及对每帧数据,并且将pts和时长进行包装成VideoFrame。
VideoFrame * VideoDecoder::handleVideoFrame() {
// LOGI("enter VideoDecoder::handleVideoFrame()...");
if (!videoFrame->data[0]) {
LOGI("videoFrame->data[0] is 0... why...");
return NULL;
}
VideoFrame *yuvFrame = new VideoFrame();
int width = MIN(videoFrame->linesize[0], videoCodecCtx->width);
int height = videoCodecCtx->height;
int lumaLength = width * height;
uint8_t * luma = new uint8_t[lumaLength];
copyFrameData(luma, videoFrame->data[0], width, height, videoFrame->linesize[0]);
yuvFrame->luma = luma;
width = MIN(videoFrame->linesize[1], videoCodecCtx->width / 2);
height = videoCodecCtx->height / 2;
int chromaBLength = width * height;
uint8_t * chromaB = new uint8_t[chromaBLength];
copyFrameData(chromaB, videoFrame->data[1], width, height, videoFrame->linesize[1]);
yuvFrame->chromaB = chromaB;
width = MIN(videoFrame->linesize[2], videoCodecCtx->width / 2);
height = videoCodecCtx->height / 2;
int chromaRLength = width * height;
uint8_t * chromaR = new uint8_t[chromaRLength];
copyFrameData(chromaR, videoFrame->data[2], width, height, videoFrame->linesize[2]);
yuvFrame->chromaR = chromaR;
yuvFrame->width = videoCodecCtx->width;
yuvFrame->height = videoCodecCtx->height;
//获取视频帧的pts
/** av_frame_get_best_effort_timestamp 实际上获取AVFrame的 int64_t best_effort_timestamp; 这个Filed **/
yuvFrame->position = av_frame_get_best_effort_timestamp(videoFrame) * videoTimeBase;
const int64_t frameDuration = av_frame_get_pkt_duration(videoFrame);
if (frameDuration) {
yuvFrame->duration = frameDuration * videoTimeBase;
yuvFrame->duration += videoFrame->repeat_pict * videoTimeBase * 0.5;
} else {
yuvFrame->duration = 1.0 / fps;
}
// LOGI("VFD: %.4f %.4f | %lld ", yuvFrame->position, yuvFrame->duration, av_frame_get_pkt_pos(videoFrame));
// LOGI("leave VideoDecoder::handleVideoFrame()...");
return yuvFrame;
}
接下来就是绑定纹理了,将yuv的三个分量绑定到三个纹理中。
void YUVTextureFrame::updateTexImage() {
// LOGI("YUVTextureFrame::updateTexImage");
if (frame) {
// LOGI("start upload texture");
int frameWidth = frame->width;
int frameHeight = frame->height;
if(frameWidth % 16 != 0){
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
}
uint8_t *pixels[3] = { frame->luma, frame->chromaB, frame->chromaR };
int widths[3] = { frameWidth, frameWidth >> 1, frameWidth >> 1 };
int heights[3] = { frameHeight, frameHeight >> 1, frameHeight >> 1 };
for (int i = 0; i < 3; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, textures[i]);
if (checkGlError("glBindTexture")) {
return;
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, widths[i], heights[i], 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixels[i]);
}
}
}
完成之后回到drawFrame(),在最终绘制到fbo之前我们先看看着色器的代码吧。
这里最终传入的都是单位矩阵,应该是考虑到后面需要旋转裁剪之类的,目前源码只是用了单位矩阵。
static char* NO_FILTER_VERTEX_SHADER =
"attribute vec4 vPosition;\n"
"attribute vec4 vTexCords;\n"
"varying vec2 yuvTexCoords;\n"
"uniform highp mat4 texMatrix;\n"
"uniform highp mat4 trans; \n"
"void main() {\n"
" yuvTexCoords = (texMatrix*vTexCords).xy;\n"
" gl_Position = trans * vPosition;\n"
"}\n";
static char* YUV_FRAME_FRAGMENT_SHADER =
"varying highp vec2 yuvTexCoords;\n"
"uniform sampler2D s_texture_y;\n"
"uniform sampler2D s_texture_u;\n"
"uniform sampler2D s_texture_v;\n"
"void main(void)\n"
"{\n"
"highp float y = texture2D(s_texture_y, yuvTexCoords).r;\n"
"highp float u = texture2D(s_texture_u, yuvTexCoords).r - 0.5;\n"
"highp float v = texture2D(s_texture_v, yuvTexCoords).r - 0.5;\n"
"\n"
"highp float r = y + 1.402 * v;\n"
"highp float g = y - 0.344 * u - 0.714 * v;\n"
"highp float b = y + 1.772 * u;\n"
"gl_FragColor = vec4(r,g,b,1.0);\n"
"}\n";
然后进入OpenGL的绘制,这里是绘制到fbo,这里需要注意fbo绘制的纹理是反的,所以我们需要把纹理坐标进行颠倒。我们通过fbo将yuv数据转换绘制到一个单独的纹理之后,又将fbo绘制的纹理又再次绘制到了循环链表中纹理id中。之后就可以正常的绘制了。
void AVSynchronizer::renderToVideoQueue(GLuint inputTexId, int width, int height, float position) {
if (!passThorughRender){
LOGE("renderToVideoQueue::passThorughRender is NULL");
return;
}
if (!circleFrameTextureQueue) {
LOGE("renderToVideoQueue::circleFrameTextureQueue is NULL");
return;
}
//注意:先做上边一步的原因是 担心videoEffectProcessor处理速度比较慢 这样子就把circleQueue锁住太长时间了
bool isFirstFrame = circleFrameTextureQueue->getIsFirstFrame();
FrameTexture* frameTexture = circleFrameTextureQueue->lockPushCursorFrameTexture();
if (NULL != frameTexture) {
frameTexture->position = position;
// LOGI("Render To TextureQueue texture Position is %.3f ", position);
//cpy input texId to target texId
passThorughRender->renderToTexture(inputTexId, frameTexture->texId);
circleFrameTextureQueue->unLockPushCursorFrameTexture();
frameAvailable();
// backup the first frame
if (isFirstFrame) {
FrameTexture* firstFrameTexture = circleFrameTextureQueue->getFirstFrameFrameTexture();
if (firstFrameTexture) {
//cpy input texId to target texId
passThorughRender->renderToTexture(inputTexId, firstFrameTexture->texId);
}
}
}
}
总算是把数据填充分析完了,音频的填充我没有分析,和视频类似的,解码后将每帧的时间戳以及时长和解码数据封装后放入audioFrameQueue,这里的队列是一个标准库的队列std::queue
这里我们得来到OpenSL ES的处理中了,音频渲染的时候注册的回调接口,我们看看做了些什么。
void AudioOutput::producePacket() {
//回调playerController中的方法来获得buffer
if (playingState == PLAYING_STATE_PLAYING) {
int actualSize = produceDataCallback(buffer, bufferSize, ctx);
if (actualSize > 0 && playingState == PLAYING_STATE_PLAYING) {
//将提供的数据加入到播放的buffer中去
(*audioPlayerBufferQueue)->Enqueue(audioPlayerBufferQueue, buffer, actualSize);
}
}
}
上面简单一看就是获取数据然后写入队列中,其实这里还包含了通知视频帧绘制的过程。获取数据最终会走到这个方法。
int VideoPlayerController::consumeAudioFrames(byte* outData, size_t bufferSize) {
int ret = bufferSize;
if(this->isPlaying &&
synchronizer && !synchronizer->isDestroyed && !synchronizer->isPlayCompleted()) {
// LOGI("Before synchronizer fillAudioData...");
ret = synchronizer->fillAudioData(outData, bufferSize);
// LOGI("After synchronizer fillAudioData... ");
signalOutputFrameAvailable();
} else {
LOGI("VideoPlayerController::consumeAudioFrames set 0");
memset(outData, 0, bufferSize);
}
return ret;
}
音视频同步一般是视频去同步音频,所以音频不用管,直接取数据填充播放即可。将当前的音频帧的pts作为视频的基准位置,由视频帧来进行同步。
int AVSynchronizer::fillAudioData(byte* outData, int bufferSize) {
// LOGI("enter AVSynchronizer::fillAudioData... buffered is %d", buffered);
this->signalDecodeThread();
if(buffered) {
this->checkPlayState();
// LOGI("fillAudioData if(buffered) circleFrameTextureQueue->getValidSize() %d", circleFrameTextureQueue->getValidSize());
memset(outData, 0, bufferSize);
return bufferSize;
}
int needBufferSize = bufferSize;
while (bufferSize > 0) {
if (NULL == currentAudioFrame) {
pthread_mutex_lock(&audioFrameQueueMutex);
int count = audioFrameQueue->size();
// LOGI("audioFrameQueue->size() is %d", count);
if (count > 0) {
AudioFrame *frame = audioFrameQueue->front();
bufferedDuration -= frame->duration;
audioFrameQueue->pop();
if (!decoder->hasSeekReq()) {
//resolve when drag seek bar position changed Frequent
moviePosition = frame->position;
}
currentAudioFrame = new AudioFrame();
currentAudioFramePos = 0;
int frameSize = frame->size;
currentAudioFrame->samples = new byte[frameSize];
memcpy(currentAudioFrame->samples, frame->samples, frameSize);
currentAudioFrame->size = frameSize;
delete frame;
}
pthread_mutex_unlock(&audioFrameQueueMutex);
}
if (NULL != currentAudioFrame) {
//从frame的samples数据放入到buffer中
byte* bytes = currentAudioFrame->samples + currentAudioFramePos;
int bytesLeft = currentAudioFrame->size - currentAudioFramePos;
int bytesCopy = std::min(bufferSize, bytesLeft);
memcpy(outData, bytes, bytesCopy);
bufferSize -= bytesCopy;
outData += bytesCopy;
if (bytesCopy < bytesLeft)
currentAudioFramePos += bytesCopy;
else {
delete currentAudioFrame;
currentAudioFrame = NULL;
}
} else {
LOGI("fillAudioData NULL == currentAudioFrame");
memset(outData, 0, bufferSize);
bufferSize = 0;
break;
}
}
// LOGI("leave AVSynchronizer::fillAudioData...");
return needBufferSize - bufferSize;
}
完成之后会通知视频线程绘制
/** 绘制视频帧 **/
void VideoOutput::signalFrameAvailable() {
// LOGI("enter VideoOutput::signalFrameAvailable surfaceExists is %d", surfaceExists);
if(surfaceExists){
if (handler)
handler->postMessage(new Message(VIDEO_OUTPUT_MESSAGE_RENDER_FRAME));
}
}
bool VideoOutput::renderVideo() {
FrameTexture* texture = NULL;
produceDataCallback(&texture, ctx, forceGetFrame);
if (NULL != texture && NULL != renderer) {
// LOGI("VideoOutput::renderVideo() ");
eglCore->makeCurrent(renderTexSurface);
renderer->renderToViewWithAutoFill(texture->texId, screenWidth, screenHeight, texture->width, texture->height);
if (!eglCore->swapBuffers(renderTexSurface)) {
LOGE("eglSwapBuffers(renderTexSurface) returned error %d", eglGetError());
}
}
if(forceGetFrame){
forceGetFrame = false;
}
return true;
}
关键的视频帧同步的位置
FrameTexture* AVSynchronizer::getCorrectRenderTexture(bool forceGetFrame) {
FrameTexture *texture = NULL;
if (!circleFrameTextureQueue) {
LOGE("getCorrectRenderTexture::circleFrameTextureQueue is NULL");
return texture;
}
int leftVideoFrames = decoder->validVideo() ? circleFrameTextureQueue->getValidSize() : 0;
if (leftVideoFrames == 1) {
return texture;
}
while (true) {
int ret = circleFrameTextureQueue->front(&texture);
if(ret > 0){
if (forceGetFrame) {
return texture;
}
const float delta = (moviePosition - DEFAULT_AUDIO_BUFFER_DURATION_IN_SECS) - texture->position;
if (delta < (0 - syncMaxTimeDiff)) {
//视频比音频快了好多,我们还是渲染上一帧
// LOGI("视频比音频快了好多,我们还是渲染上一帧 moviePosition is %.4f texture->position is %.4f", moviePosition, texture->position);
texture = NULL;
break;
}
circleFrameTextureQueue->pop();
if (delta > syncMaxTimeDiff) {
//视频比音频慢了好多,我们需要继续从queue拿到合适的帧
// LOGI("视频比音频慢了好多,我们需要继续从queue拿到合适的帧 moviePosition is %.4f texture->position is %.4f", moviePosition, texture->position);
continue;
} else {
break;
}
} else{
texture = NULL;
break;
}
}
return texture;
}
简单来说就是从循环队列头一帧一帧的去拿数据,如果该帧的时间和音频的时间差在设定的范围内,就使用该帧来绘制,否在就继续轮询,或者直接跳出等待下一次的绘制通知。这个取决于音频比视频快还是比视频慢。
最后总结一下流程:
音频正常时间渲染,视频通过音频帧的pts进行音视频同步,通知视频绘制线程进行绘制
参考源码地址