yuv,pcm合成mp4

代码涉及音频编码,视频编码,pts计算,可参考前面的几篇博文,已经模块化
这里pts计算直接使用outputStream的时间基,不存在inputStream转outputStream时间基的问题。因为yuv和pcm都是裸数据,no pts。所以直接写pts值就行。如果是从aac和h264合成mp4那么就需要做输入输出流的时间基转换了。关于时间基的理解和转换,参考上篇深入理解pts,dts,time_base

/**
实现yuv和aac合成mp4
Yuv是176x144的,pcm是16bit,44100HZ,双声道的
*/
#include 
#include 

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswresample/swresample.h"
};
#define STREAM_FRAME_RATE  25
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P 
const int width = 176, height = 144;
const char* input_pcm_file = "in.pcm";
const char* input_yuv_file = "in.yuv";
void add_stream(AVFormatContext *out_format_context, AVStream** st, AVCodecContext **out_codec_context, AVCodec** codec, AVCodecID codec_id)
{
    *codec = avcodec_find_encoder(codec_id);
    if (!codec) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
            avcodec_get_name(codec_id));
        exit(1);
    }

    *st = avformat_new_stream(out_format_context,*codec);
    if (!*st) {
        fprintf(stderr, "Could not alloc stream");
        exit(1);
    }
    *out_codec_context = (*st)->codec;
    (*out_codec_context)->codec_id = codec_id;
    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:

        (*out_codec_context)->codec_type = AVMEDIA_TYPE_AUDIO;
        (*out_codec_context)->channel_layout = AV_CH_LAYOUT_STEREO;
        (*out_codec_context)->channels = av_get_channel_layout_nb_channels((*out_codec_context)->channel_layout);
        //samples per second
        (*out_codec_context)->sample_rate = 44100;
        (*out_codec_context)->sample_fmt = (*codec)->sample_fmts ?
            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
        (*out_codec_context)->bit_rate = 64000;

        /** Allow the use of the experimental AAC encoder */
        (*out_codec_context)->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

        /** Set the sample rate for the container. */
        (*st)->time_base.den = (*out_codec_context)->sample_rate;
        (*st)->time_base.num = 1;
        break;
    case AVMEDIA_TYPE_VIDEO:
        (*out_codec_context)->codec_type = AVMEDIA_TYPE_VIDEO;

        (*out_codec_context)->bit_rate = 400000;

        (*out_codec_context)->width = width;
        (*out_codec_context)->height = height;

        (*out_codec_context)->time_base.den = STREAM_FRAME_RATE;
        (*out_codec_context)->time_base.num = 1;
        (*out_codec_context)->gop_size = 12;
        (*out_codec_context)->pix_fmt = STREAM_PIX_FMT;

        (*st)->time_base.den = 90000;
        (*st)->time_base.num = 1;

        (*out_codec_context)->qmin = 10;
        (*out_codec_context)->qmax = 51;
        (*out_codec_context)->qcompress = 0.6f;

        (*out_codec_context)->max_b_frames = 0;
        break;
    default:
        break;
    }


    // some formats want stream headers to be separate
    if (out_format_context->oformat->flags & AVFMT_GLOBALHEADER)
        (*out_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;


}


void open_video(AVCodecContext* codec_context,AVCodec* codec){


    AVDictionary *param = NULL;
    //H.264
    if (codec_context->codec_id == AV_CODEC_ID_H264) {
        av_dict_set(¶m, "preset", "fast", 0);
        av_dict_set(¶m, "tune", "zerolatency", 0);

    }
    /* open the codec */
    if (avcodec_open2(codec_context, codec, ¶m) < 0) {
        fprintf(stderr, "could not open codec\n");
        exit(1);
    }
}

void open_audio(AVCodecContext* audio_codec_context,AVCodec * codec){

    if (avcodec_open2(audio_codec_context, codec, NULL) < 0) {
        printf("Could not open audio codec \n");
         exit(1);
    }
}
void init_packet(AVPacket *packet){
    av_init_packet(packet);
    packet->data = NULL;
    packet->size = 0;
}
int framecount=0;
int64_t audio_last_pts, video_last_pts;
int encode_video_frame(AVFrame *frame,
    AVFormatContext *out_format_context,
    AVStream *video_st){
    AVCodecContext* out_codec_context = video_st->codec;
    int got_packet;
    AVPacket enc_pkt;
    init_packet(&enc_pkt);
    int ret = avcodec_encode_video2(out_codec_context, &enc_pkt,
        frame, &got_packet);

    if (ret < 0 || !got_packet){ //在flush的时候,如果失败 ,说明丢失帧(缓存帧)已经空了
        av_free_packet(&enc_pkt);
        av_frame_free(&frame);
        return 1;
    }

    if (enc_pkt.pts == AV_NOPTS_VALUE){
        enc_pkt.duration = av_rescale_q(1, out_codec_context->time_base, video_st->time_base);
        enc_pkt.pts = framecount*enc_pkt.duration;
        enc_pkt.dts = enc_pkt.pts;
        video_last_pts= enc_pkt.pts;

    }
    enc_pkt.stream_index = video_st->index;
    ret = av_interleaved_write_frame(out_format_context, &enc_pkt);
    av_free_packet(&enc_pkt);
    av_frame_free(&frame);
    if (ret < 0){
        return 1;
    }else{
        framecount++;
    }

    return 0;
}

int  write_video_frame(AVFormatContext* out_format_context, AVCodecContext *out_codec_context, AVStream *video_st, FILE** fp){
    uint8_t *buffer = new uint8_t[avpicture_get_size(out_codec_context->pix_fmt, out_codec_context->width, out_codec_context->height)];

    int ret = fread(buffer, out_codec_context->width * out_codec_context->height * 3 / 2, 1, *fp);
    if (ret == 0){
        return 1;
    }
    AVFrame* yuvFrame = av_frame_alloc();
    avpicture_fill((AVPicture *)yuvFrame, buffer, out_codec_context->pix_fmt, out_codec_context->width, out_codec_context->height);
    encode_video_frame(yuvFrame, out_format_context, video_st);


    return 0;
}

int encode_audio_frame(AVFrame *frame, int nbsamples,
    AVFormatContext *output_format_context,
    AVCodecContext *output_codec_context,AVStream* st){
    int got_packet;
    AVPacket enc_pkt;
    init_packet(&enc_pkt);

    int ret = avcodec_encode_audio2(output_codec_context, &enc_pkt,
        frame, &got_packet);

    if (ret < 0 || !got_packet){
        av_free_packet(&enc_pkt);
        av_frame_free(&frame);
        return 1;
    }

    /** Set a timestamp based on the sample rate for the container. */
    if (enc_pkt.pts == AV_NOPTS_VALUE){
        audio_last_pts += nbsamples;
        enc_pkt.pts = audio_last_pts;
        enc_pkt.dts = enc_pkt.pts;

        double duration_s = output_codec_context->frame_size*(1 / output_codec_context->sample_rate);
        enc_pkt.duration = duration_s / av_q2d(st->time_base);


    }
    enc_pkt.stream_index = st->index;
    ret = av_interleaved_write_frame(output_format_context, &enc_pkt);
    av_free_packet(&enc_pkt);
    av_frame_free(&frame);
    if (ret < 0){
        return 1;
    }

    return 0;
}

int write_audio_frame(AVFormatContext* out_format_context, AVCodecContext *out_codec_context,AVStream* st,FILE** fp){
    AVFrame* pFrame = av_frame_alloc();
    pFrame->nb_samples = out_codec_context->frame_size;
    pFrame->format = out_codec_context->sample_fmt;
    int size = av_samples_get_buffer_size(NULL, out_codec_context->channels, out_codec_context->frame_size, out_codec_context->sample_fmt, 1);
    uint8_t * frame_buf = (uint8_t *)av_malloc(size);
    int ret=fread(frame_buf, 1, size, *fp);
    if (ret == 0){
        return 1;
    }
    avcodec_fill_audio_frame(pFrame, out_codec_context->channels, out_codec_context->sample_fmt, (const uint8_t*)frame_buf, size, 1);

    encode_audio_frame(pFrame, out_codec_context->frame_size, out_format_context, out_codec_context,st);

    return 0;
}


int main(int argc, char **argv){

    av_register_all();
    const char *out_file = "output.mp4";
    AVOutputFormat* fmt = av_guess_format(NULL, out_file, NULL);
    if (!fmt) {
        fprintf(stderr, "Could not find suitable output format");
        exit(1);
    }
    AVFormatContext* out_format_context = avformat_alloc_context();
    out_format_context->oformat = fmt;
    AVCodecContext *audio_codec_context = NULL, *video_codec_context = NULL;
    AVCodec* video_codec=NULL, *audio_codec=NULL;
    AVStream *video_st=NULL,*audio_st=NULL;
    int have_video = 0, have_audio = 0;

    if (fmt->video_codec != AV_CODEC_ID_NONE) {
        add_stream(out_format_context, &video_st, &video_codec_context, &video_codec, fmt->video_codec);
        have_video = 1;

    }
    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
        add_stream(out_format_context, &audio_st, &audio_codec_context, &audio_codec, fmt->audio_codec);
        have_audio = 1;

    }

    if (have_video)
        open_video(video_codec_context,video_codec);

    if (have_audio)
        open_audio(audio_codec_context,audio_codec);



    int ret = 0;
    /* open the output file, if needed */
    if (!(fmt->flags & AVFMT_NOFILE)) {
        ret = avio_open(&out_format_context->pb, out_file, AVIO_FLAG_WRITE);
        if (ret < 0) {
            fprintf(stderr, "Could not open '%s': %s\n", out_file,
                "");
            return 1;
        }
    }

    ret = avformat_write_header(out_format_context, NULL);
    if (ret < 0) {
        fprintf(stderr, "Error occurred when opening output file: %s\n","");
        return 1;
    }
    FILE* yuvfp = fopen(input_yuv_file, "rb");
    FILE* pcmfp = fopen(input_pcm_file, "rb");
    int encode_video = 1, encode_audio = 1;
    while (encode_video && encode_audio) {
        //printf("video_last_pts:%lld, \t timebase:%f  timestamp:\t%f\n", video_last_pts, av_q2d(video_st->time_base), video_last_pts*av_q2d(video_st->time_base));
        //printf("audio_last_pts:%lld, \t timebase:%f  timestamp:\t%f\n", audio_last_pts, av_q2d(audio_st->time_base), audio_last_pts*av_q2d(audio_st->time_base));

        //< -1,> 1,= 0
        if (av_compare_ts(video_last_pts, video_st->time_base,
            audio_last_pts, audio_st->time_base) <= 0) {
            if (encode_video){
                encode_video = !write_video_frame(out_format_context, video_codec_context, video_st, &yuvfp);

            }

        }else {
            if (encode_audio){
                encode_audio=!write_audio_frame(out_format_context, audio_codec_context, audio_st, &pcmfp);

            }

        }
    }
    if (video_codec_context->codec->capabilities &CODEC_CAP_DELAY){
        while (!encode_video_frame(NULL, out_format_context, video_st)){ ; }
    }
    if (audio_codec_context->codec->capabilities &CODEC_CAP_DELAY){
        while (!encode_audio_frame(NULL, audio_codec_context->frame_size, out_format_context,audio_codec_context, audio_st)){;}
    }


    av_write_trailer(out_format_context);
    if (video_codec_context)
        avcodec_close(video_codec_context);
    if (audio_codec_context)
        avcodec_close(audio_codec_context);
    if (out_format_context) {
        avio_closep(&out_format_context->pb);
        avformat_free_context(out_format_context);
    }
    fclose(yuvfp);
    fclose(pcmfp);
    system("pause");
    return 0;
}

你可能感兴趣的:(ffmpeg,ffmpeg)