接上篇博客,更换成AudioFifo音频转码

之前的ffmpeg的博客,大多用audiofifo做音频转码,并没有用avfilter,当然相比较来说,AVFilter更加简单,适合无脑上手。不过其实它底层也是用fifo实现。下面我们自己实现fifo。重点理解音频pts的计算。还有重采样。
这里只上dbt_rtmp.h和dbt_rtmp.cpp的代码,队列部分代码不用变。

//dbt_rtmp.cpp
#include 

void init_packet(AVPacket *packet){
    av_init_packet(packet);
    packet->data = NULL;
    packet->size = 0;
}
void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
{
    if (pkt->pts != AV_NOPTS_VALUE)
        pkt->pts = av_rescale_q(pkt->pts, src_tb, dst_tb);
    if (pkt->dts != AV_NOPTS_VALUE)
        pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb);
    if (pkt->duration > 0)
        pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb);
}


void add_stream(AVFormatContext *out_format_context, AVStream** st, AVCodecContext **out_codec_context, AVCodec** codec, AVCodecID codec_id)
{
    *codec = avcodec_find_encoder(codec_id);
    if (!codec) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
            avcodec_get_name(codec_id));
        getchar();exit(1);
    }

    *st = avformat_new_stream(out_format_context, *codec);

    if (!*st) {
        fprintf(stderr, "Could not alloc stream");
        getchar();exit(1);
    }

    *out_codec_context = (*st)->codec;
    (*st)->id = out_format_context->nb_streams - 1;
    (*out_codec_context)->codec_id = codec_id;
    AVRational time_base;
    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:

        (*out_codec_context)->codec_type = AVMEDIA_TYPE_AUDIO;

        (*out_codec_context)->channels = dst_channels;
        (*out_codec_context)->channel_layout = av_get_default_channel_layout(dst_channels);

        (*out_codec_context)->sample_rate = ist_a->codec->sample_rate;
        (*out_codec_context)->frame_size = audio_frame_size;
        (*out_codec_context)->sample_fmt = (*codec)->sample_fmts[0];

        time_base = { 1, (*out_codec_context)->sample_rate };
        (*out_codec_context)->time_base = time_base;

        break;
    case AVMEDIA_TYPE_VIDEO:
        (*out_codec_context)->codec_type = AVMEDIA_TYPE_VIDEO;

        (*out_codec_context)->time_base = ist_v->codec->time_base;

        (*out_codec_context)->pix_fmt = (*codec)->pix_fmts[0];
        (*out_codec_context)->width = width;
        (*out_codec_context)->height = height;

        (*out_codec_context)->me_range = 0;
        (*out_codec_context)->max_qdiff = 4;
        (*out_codec_context)->qmin = ist_v->codec->qmin;
        (*out_codec_context)->qmax = ist_v->codec->qmax;
        (*out_codec_context)->qcompress = 0.6;

        break;
    default:
        break;
    }
    (*out_codec_context)->codec_tag = 0;

    // some formats want stream headers to be separate
    if (out_format_context->oformat->flags & AVFMT_GLOBALHEADER)
        (*out_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;


}


void open_video(AVCodecContext* codec_context, AVCodec* codec){


    AVDictionary *param = NULL;

    av_dict_set(¶m, "preset", "veryfast", 0);
    av_dict_set(¶m, "tune", "zerolatency", 0);

    if (avcodec_open2(video_st->codec, codec, ¶m) < 0) {
        fprintf(stderr, "could not open codec\n");
        getchar(); exit(1);
    }

}

void open_audio(AVCodecContext* audio_codec_context, AVCodec * codec){

    if (avcodec_open2(audio_codec_context, codec, NULL) < 0) {
        printf("Could not open audio codec \n");
        getchar();exit(1);
    }
}


int encode_video_frame(AVFrame *frame,
    AVFormatContext *out_format_context,
    AVStream *video_st){

    AVCodecContext* out_codec_context = video_st->codec;
    int got_packet;


    if (frame){
        //decodec层时间基到codec的时间基转换
        frame->pts = av_rescale_q(frame->pts, ist_v->codec->time_base, video_st->codec->time_base);
        frame->pict_type = AV_PICTURE_TYPE_NONE;
    }

    int ret = avcodec_encode_video2(out_codec_context, &venc_pkt,
        frame, &got_packet);

    if (ret < 0 || !got_packet){ //在flush的时候,如果失败 ,说明丢失帧(缓存帧)已经空了

        return 1;
    }

    //codec层时间基转mux层
    av_packet_rescale_ts(&venc_pkt, video_st->codec->time_base, video_st->time_base);

    venc_pkt.stream_index = video_st->index;

    printf("video info--- enc_pkt:pts:%lld\t dts:%lld\t duration:%d\n", venc_pkt.pts, venc_pkt.dts, venc_pkt.duration);

    last_video_pts = venc_pkt.pts;
    ret = av_interleaved_write_frame(out_format_context, &venc_pkt);


    if (ret < 0){
        printf("write video frame failed!\n");
        return 1;
    }
    else{

        printf("write video frame success\t%d\n", videoframecnt);

        videoframecnt++;
    }

    return 0;
}


int encode_audio_frame(AVFrame *frame, int nbsamples,
    AVFormatContext *output_format_context, AVStream* st){
    int got_packet;

    if (frame){
        //decodec层时间基到codec的时间基转换
        frame->pts = av_rescale_q(frame->pts, ist_a->codec->time_base, audio_st->codec->time_base);

    }

    int ret = avcodec_encode_audio2(st->codec, &aenc_pkt,
        frame, &got_packet);

    if (ret < 0 || !got_packet){

        return 1;
    }

    av_packet_rescale_ts(&aenc_pkt, audio_st->codec->time_base, audio_st->time_base);

    aenc_pkt.stream_index = audio_st->index;

    last_audio_pts = aenc_pkt.pts;

    //printf("audio info--- enc_pkt:pts:%lld\t dts:%lld\t duration:%d\n", aenc_pkt.pts, aenc_pkt.dts, aenc_pkt.duration);

    av_bitstream_filter_filter(m_aac_adtstoasc, audio_st->codec, NULL, &aenc_pkt.data, &aenc_pkt.size, aenc_pkt.data, aenc_pkt.size, 0);


    ret = av_interleaved_write_frame(output_format_context, &aenc_pkt);



    if (ret < 0){
        printf("write audio frame failed!\n");
        return 1;
    }
    else{
        audioframecnt++;
    //  printf("write audio frame success!\t%d\n", audioframecnt);

    }

    return 0;
}



void decode_video_frame(AVPacket *pkt){


    av_packet_rescale_ts(pkt, ist_v->time_base, ist_v->codec->time_base);

    int got_picture;
    avcodec_decode_video2(ist_v->codec, videosrcFrame, &got_picture, pkt);
    printf("解析视频called,got_picture:%d\n", got_picture);
    if (got_picture){

        videosrcFrame->pts = av_frame_get_best_effort_timestamp(videosrcFrame);

        avpicture_fill((AVPicture *)video_frame, video_data_buffer, video_codec_context->pix_fmt, video_codec_context->width, video_codec_context->height);

        sws_scale(sws_ctx, videosrcFrame->data, videosrcFrame->linesize, 0, height, video_frame->data, video_frame->linesize);
        video_frame->pts = videosrcFrame->pts;
        queue_push(&videoq, video_frame);
        printf("放入了视频\n");
    }

}


void decode_audio_frame(AVPacket *pkt){


    //demux层转decode层时间基
    av_packet_rescale_ts(pkt, ist_a->time_base, ist_a->codec->time_base);

    int got_frame;


    avcodec_decode_audio4(ist_a->codec, audiosrcFrame, &got_frame, pkt);
    printf("解析音频called,got_frame:%d\n", got_frame);
    if (got_frame){

        av_samples_alloc_array_and_samples(&audio_data_buffer, NULL, audio_codec_context->channels, audiosrcFrame->nb_samples, audio_codec_context->sample_fmt, 1);
        swr_convert(swrCtx, audio_data_buffer, audiosrcFrame->nb_samples,
            (const uint8_t**)audiosrcFrame->data, audiosrcFrame->nb_samples);
        av_audio_fifo_realloc(audiofifo, av_audio_fifo_size(audiofifo) + audiosrcFrame->nb_samples);
        av_audio_fifo_write(audiofifo, (void **)audio_data_buffer, audiosrcFrame->nb_samples);


        if (!is_set_audio_first_pts){
            int64_t opts = av_frame_get_best_effort_timestamp(audiosrcFrame);
            is_set_audio_first_pts = 1;
            //decode层时间基到codec的时间基转换
            audio_first_pts = opts;
        }


        while (av_audio_fifo_size(audiofifo) >= out_framesize){

            //int frame_size = FFMIN(av_audio_fifo_size(audiofifo), out_framesize);

            audio_frame->nb_samples = out_framesize;
            audio_frame->channel_layout = av_get_default_channel_layout(audio_codec_context->channels);
            audio_frame->format = audio_codec_context->sample_fmt;
            audio_frame->sample_rate = audio_codec_context->sample_rate;

            av_frame_get_buffer(audio_frame, 0);
            av_audio_fifo_read(audiofifo, (void **)audio_frame->data, out_framesize);

            audio_frame->pts = audio_first_pts; //此时的pts是decodec层时间基

            audio_first_pts += iaudio_duration;

            queue_push(&audioq, audio_frame);


        }


    }

}


void read_decode_thread(int arg){

    int ret;
    AVPacket pkt;
    init_packet(&pkt);
    for (;;){

        ret = av_read_frame(ifmt_ctx, &pkt);
        if (ret < 0){
            printf("到文件尾了\n");
            finished = 1;
            break;
        }

        if (pkt.stream_index == iv_index){
            printf("开始解析视频\n");
            decode_video_frame(&pkt);

        }
        else if (pkt.stream_index == ia_index){
            printf("开始解析音频\n");
            decode_audio_frame(&pkt);
        }

    }
    av_free_packet(&pkt);
}

void encode_thread(int arg){


    AVFrame *pframe = av_frame_alloc();
    AVFrame* frame = NULL;
    for (;;){

        if (av_compare_ts(last_audio_pts, audio_st->time_base, last_video_pts, video_st->time_base) <= 0){
            printf("现在去取音频\n");
            frame = queue_pop(&audioq, finished);

            if (frame == NULL){
                printf("audio queue_pop exit\n");
                break;
            }

            printf("audio frame----%x,pts:%d\n", &frame, frame->pts);
            double delay = (double)out_framesize / audio_st->codec->sample_rate;

            audio_clock += delay; 
            double audiodiff = audio_clock - (double)av_gettime()/1000000;
            if (audiodiff > 0){
                Sleep(audiodiff*1000);
            }
            *pframe = *frame;
            encode_audio_frame(frame, out_framesize, ofmt_ctx, audio_st);

        }
        else{
            printf("现在去取视频\n");
            frame = queue_pop(&videoq, finished);
            if (frame == NULL){
                printf("video queue_pop exit\n");
                break;
            }

            printf("video frame----%x,pts:%d\n", &frame, frame->pts);
            double ptstime = last_video_pts*av_q2d(video_st->time_base);
            double videodiff = start_time+ptstime - audio_clock;
            if (videodiff > 0){
                Sleep(videodiff * 1000);
            }
            *pframe = *frame;
            encode_video_frame(pframe, ofmt_ctx, video_st);

        }

    }
    av_frame_free(&pframe);

}



int main(int argc, char **argv){

    av_register_all();
    avformat_network_init();

    int ret;
    AVDictionary* in_options = NULL;
    //av_dict_set(&in_options, "re", "1", 0);
    if ((ret = avformat_open_input(&ifmt_ctx, INPUTURL, 0, &in_options)) < 0) {
        printf("Could not open input file.");
    }

    avformat_find_stream_info(ifmt_ctx, 0);

    for (int i = 0; i < ifmt_ctx->nb_streams; i++) {

        if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO){
            iv_index = i;

            ist_v = ifmt_ctx->streams[i];
            width = ist_v->codec->width;
            height = ist_v->codec->height;

            AVCodec *codec = avcodec_find_decoder(ist_v->codec->codec_id);

            /* open the codec */
            if (avcodec_open2(ist_v->codec, codec,NULL) < 0) {
                fprintf(stderr, "could not open input video decoder codec\n");
                getchar();exit(1);
            }

        }
        else if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
            ia_index = i;
            ist_a = ifmt_ctx->streams[i];

            AVCodec *codec = avcodec_find_decoder(ist_a->codec->codec_id);

            /* open the codec */
            if (avcodec_open2(ist_a->codec, codec, NULL) < 0) {
                fprintf(stderr, "could not open input audio decoder codec\n");
                getchar(); exit(1);
            }
        }

    }
    av_dump_format(ifmt_ctx, 0, INPUTURL, 0);


    ofmt_ctx = avformat_alloc_context();
    if (strstr(OUTPUTURL, "rtmp")){
        avformat_alloc_output_context2(&ofmt_ctx, NULL, "flv", OUTPUTURL); //RTMP
    }
    else{
        avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, OUTPUTURL);
    }

    AVOutputFormat* fmt = ofmt_ctx->oformat;
    if (!fmt) {
        fprintf(stderr, "Could not find suitable output format");
        getchar();exit(1);
    }


    add_stream(ofmt_ctx, &video_st, &video_codec_context, &video_codec, video_codecID);
    add_stream(ofmt_ctx, &audio_st, &audio_codec_context, &audio_codec, audio_codecID);

    open_video(video_codec_context, video_codec);
    open_audio(audio_codec_context, audio_codec);

    av_dump_format(ofmt_ctx, 0, OUTPUTURL, 1);


    sws_ctx = sws_getContext(
        width, height, ist_v->codec->pix_fmt,
        width, height, video_codec_context->pix_fmt,
        SWS_BICUBIC, NULL, NULL, NULL);
    video_data_buffer = new uint8_t[avpicture_get_size(video_codec_context->pix_fmt, video_codec_context->width, video_codec_context->height)];

    swrCtx = swr_alloc_set_opts(NULL, av_get_default_channel_layout(audio_codec_context->channels), audio_codec_context->sample_fmt, audio_codec_context->sample_rate,
        av_get_default_channel_layout(ist_a->codec->channels), ist_a->codec->sample_fmt, ist_a->codec->sample_rate,
        0, NULL);
    swr_init(swrCtx);

    audiofifo = av_audio_fifo_alloc(audio_codec_context->sample_fmt, audio_codec_context->channels, 1);
    out_framesize = audio_codec_context->frame_size;
    double iaudio_duration_s = (double)ist_a->codec->frame_size / ist_a->codec->sample_rate; //输出流一帧音频帧的持续时间(标准时间)
    iaudio_duration = iaudio_duration_s / av_q2d(ist_a->codec->time_base);//转换为输入流decode层时间基
    audio_frame = av_frame_alloc();


    /* open the output file, if needed */
    if (!(fmt->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt_ctx->pb, OUTPUTURL, AVIO_FLAG_WRITE);
        if (ret < 0) {
            fprintf(stderr, "Could not open '%s': %s\n", OUTPUTURL,
                "");
            return 1;
        }
    }
    init_packet(&venc_pkt);

    videosrcFrame = av_frame_alloc();

    init_packet(&aenc_pkt);
    video_frame = av_frame_alloc();
    audiosrcFrame = av_frame_alloc();

    queue_init(&audioq,50,2);
    queue_init(&videoq,50,1);
    m_aac_adtstoasc = av_bitstream_filter_init("aac_adtstoasc");
    /*AVDictionary *out_options = NULL;
    av_dict_set(&out_options, "rtmp_buffer", "1024000", 0);
    av_dict_set(&out_options, "max_delay", "500000", 0);
    av_dict_set(&out_options, "timeout", "6", 0);*/
    ret = avformat_write_header(ofmt_ctx, NULL);
    if (ret < 0) {
        fprintf(stderr, "Error occurred when opening output file: %s\n", "");
        return 1;
    }

    out_framesize = audio_codec_context->frame_size;
    audio_clock = start_time=(double)av_gettime() / 1000000;
    thread t1(read_decode_thread, NULL);
    thread t2(encode_thread, NULL);

    t1.join();
    t2.join();


    if (video_codec_context->codec->capabilities &CODEC_CAP_DELAY){
        while (!encode_video_frame(NULL, ofmt_ctx, video_st)){ 
            printf("encode_video_frame while");
            ;
        }
    }
    if (audio_codec_context->codec->capabilities &CODEC_CAP_DELAY){
        while (!encode_audio_frame(NULL, out_framesize, ofmt_ctx,audio_st)){ 
            printf("encode_audio_frame while");
            ;
        }
    }

    av_write_trailer(ofmt_ctx);

    av_bitstream_filter_close(m_aac_adtstoasc);
    avformat_close_input(&ifmt_ctx);
    av_free_packet(&venc_pkt);

    av_frame_free(&videosrcFrame);
    av_frame_free(&audio_frame);
    sws_freeContext(sws_ctx);
    if (ofmt_ctx) {
        avio_closep(&ofmt_ctx->pb);
        avformat_free_context(ofmt_ctx);
    }

    printf("程序运行end");
    return getchar();
}
//dbt_rtmp.h
#include 
#include 
#include 
#include 
#include 

extern "C"
{

#include "libavformat/avformat.h"
#include 
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
};



#define STREAM_FRAME_RATE  25
#define INPUTURL   "yekong.mp4"
//#define OUTPUTURL  "pingfan_out.mp4"
#define OUTPUTURL    "rtmp://localhost:1935/live/stream"
int width, height;
AVFormatContext *ifmt_ctx = NULL;
AVStream* ist_v = NULL, *ist_a = NULL;
AVFormatContext* ofmt_ctx = NULL;
AVCodecContext *audio_codec_context = NULL, *video_codec_context = NULL;
AVCodec* video_codec = NULL, *audio_codec = NULL;
AVStream *video_st = NULL, *audio_st = NULL;

int videoframecnt = 1;
int audioframecnt;
SwsContext *sws_ctx = NULL;
uint8_t *video_data_buffer = NULL;
int finished;
double audio_clock,start_time;
uint8_t ** audio_data_buffer = NULL;
AVAudioFifo* audiofifo = NULL;
SwrContext *swrCtx = NULL;
int64_t audio_first_pts;
int64_t iaudio_duration; //输入流的音频一帧的持续时间,基于decodec层时间戳
int is_set_audio_first_pts = 0;//标识是否设置了第一帧音频帧的pts。因为音频帧的pts计算,只会利用输入的第一帧的pts,后面的pts会累加iaudio_duration的方式来计算值
AVFrame*audio_frame, *video_frame;
//video param
AVCodecID video_codecID = AV_CODEC_ID_H264;

AVPacket venc_pkt;

AVFrame* videosrcFrame = NULL;


AVPacket aenc_pkt;

AVFrame* audiosrcFrame = NULL;

//audio param
int dst_channels = 2;      //声道

AVCodecID audio_codecID = AV_CODEC_ID_AAC;
int audio_frame_size = 1024;

AVBitStreamFilterContext * m_aac_adtstoasc;     //aac->adts to asc过滤器  
int out_framesize; //音频输出流的每帧采样数(aac为1024)

//输入流中音视频的索引
int iv_index = -1, ia_index = -1;

Queue audioq;
Queue videoq;;

int64_t last_video_pts;
int64_t last_audio_pts;

void init_packet(AVPacket *packet);
void add_stream(AVFormatContext *out_format_context, AVStream** st, AVCodecContext **out_codec_context, AVCodec** codec, AVCodecID codec_id);
void open_video(AVCodecContext* codec_context, AVCodec* codec);
void open_audio(AVCodecContext* audio_codec_context, AVCodec * codec);
int encode_video_frame(AVFrame *frame,AVFormatContext *out_format_context,AVStream *video_st);
int encode_audio_frame(AVFrame *frame, int nbsamples,AVFormatContext *output_format_context, AVStream* st);
void decode_video_frame(AVPacket *pkt);
void decode_audio_frame(AVPacket *pkt);

你可能感兴趣的:(ffmpeg)