之前的ffmpeg的博客,大多用audiofifo做音频转码,并没有用avfilter,当然相比较来说,AVFilter更加简单,适合无脑上手。不过其实它底层也是用fifo实现。下面我们自己实现fifo。重点理解音频pts的计算。还有重采样。
这里只上dbt_rtmp.h和dbt_rtmp.cpp的代码,队列部分代码不用变。
//dbt_rtmp.cpp
#include
void init_packet(AVPacket *packet){
av_init_packet(packet);
packet->data = NULL;
packet->size = 0;
}
void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
{
if (pkt->pts != AV_NOPTS_VALUE)
pkt->pts = av_rescale_q(pkt->pts, src_tb, dst_tb);
if (pkt->dts != AV_NOPTS_VALUE)
pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb);
if (pkt->duration > 0)
pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb);
}
void add_stream(AVFormatContext *out_format_context, AVStream** st, AVCodecContext **out_codec_context, AVCodec** codec, AVCodecID codec_id)
{
*codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
getchar();exit(1);
}
*st = avformat_new_stream(out_format_context, *codec);
if (!*st) {
fprintf(stderr, "Could not alloc stream");
getchar();exit(1);
}
*out_codec_context = (*st)->codec;
(*st)->id = out_format_context->nb_streams - 1;
(*out_codec_context)->codec_id = codec_id;
AVRational time_base;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
(*out_codec_context)->codec_type = AVMEDIA_TYPE_AUDIO;
(*out_codec_context)->channels = dst_channels;
(*out_codec_context)->channel_layout = av_get_default_channel_layout(dst_channels);
(*out_codec_context)->sample_rate = ist_a->codec->sample_rate;
(*out_codec_context)->frame_size = audio_frame_size;
(*out_codec_context)->sample_fmt = (*codec)->sample_fmts[0];
time_base = { 1, (*out_codec_context)->sample_rate };
(*out_codec_context)->time_base = time_base;
break;
case AVMEDIA_TYPE_VIDEO:
(*out_codec_context)->codec_type = AVMEDIA_TYPE_VIDEO;
(*out_codec_context)->time_base = ist_v->codec->time_base;
(*out_codec_context)->pix_fmt = (*codec)->pix_fmts[0];
(*out_codec_context)->width = width;
(*out_codec_context)->height = height;
(*out_codec_context)->me_range = 0;
(*out_codec_context)->max_qdiff = 4;
(*out_codec_context)->qmin = ist_v->codec->qmin;
(*out_codec_context)->qmax = ist_v->codec->qmax;
(*out_codec_context)->qcompress = 0.6;
break;
default:
break;
}
(*out_codec_context)->codec_tag = 0;
// some formats want stream headers to be separate
if (out_format_context->oformat->flags & AVFMT_GLOBALHEADER)
(*out_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
void open_video(AVCodecContext* codec_context, AVCodec* codec){
AVDictionary *param = NULL;
av_dict_set(¶m, "preset", "veryfast", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
if (avcodec_open2(video_st->codec, codec, ¶m) < 0) {
fprintf(stderr, "could not open codec\n");
getchar(); exit(1);
}
}
void open_audio(AVCodecContext* audio_codec_context, AVCodec * codec){
if (avcodec_open2(audio_codec_context, codec, NULL) < 0) {
printf("Could not open audio codec \n");
getchar();exit(1);
}
}
int encode_video_frame(AVFrame *frame,
AVFormatContext *out_format_context,
AVStream *video_st){
AVCodecContext* out_codec_context = video_st->codec;
int got_packet;
if (frame){
//decodec层时间基到codec的时间基转换
frame->pts = av_rescale_q(frame->pts, ist_v->codec->time_base, video_st->codec->time_base);
frame->pict_type = AV_PICTURE_TYPE_NONE;
}
int ret = avcodec_encode_video2(out_codec_context, &venc_pkt,
frame, &got_packet);
if (ret < 0 || !got_packet){ //在flush的时候,如果失败 ,说明丢失帧(缓存帧)已经空了
return 1;
}
//codec层时间基转mux层
av_packet_rescale_ts(&venc_pkt, video_st->codec->time_base, video_st->time_base);
venc_pkt.stream_index = video_st->index;
printf("video info--- enc_pkt:pts:%lld\t dts:%lld\t duration:%d\n", venc_pkt.pts, venc_pkt.dts, venc_pkt.duration);
last_video_pts = venc_pkt.pts;
ret = av_interleaved_write_frame(out_format_context, &venc_pkt);
if (ret < 0){
printf("write video frame failed!\n");
return 1;
}
else{
printf("write video frame success\t%d\n", videoframecnt);
videoframecnt++;
}
return 0;
}
int encode_audio_frame(AVFrame *frame, int nbsamples,
AVFormatContext *output_format_context, AVStream* st){
int got_packet;
if (frame){
//decodec层时间基到codec的时间基转换
frame->pts = av_rescale_q(frame->pts, ist_a->codec->time_base, audio_st->codec->time_base);
}
int ret = avcodec_encode_audio2(st->codec, &aenc_pkt,
frame, &got_packet);
if (ret < 0 || !got_packet){
return 1;
}
av_packet_rescale_ts(&aenc_pkt, audio_st->codec->time_base, audio_st->time_base);
aenc_pkt.stream_index = audio_st->index;
last_audio_pts = aenc_pkt.pts;
//printf("audio info--- enc_pkt:pts:%lld\t dts:%lld\t duration:%d\n", aenc_pkt.pts, aenc_pkt.dts, aenc_pkt.duration);
av_bitstream_filter_filter(m_aac_adtstoasc, audio_st->codec, NULL, &aenc_pkt.data, &aenc_pkt.size, aenc_pkt.data, aenc_pkt.size, 0);
ret = av_interleaved_write_frame(output_format_context, &aenc_pkt);
if (ret < 0){
printf("write audio frame failed!\n");
return 1;
}
else{
audioframecnt++;
// printf("write audio frame success!\t%d\n", audioframecnt);
}
return 0;
}
void decode_video_frame(AVPacket *pkt){
av_packet_rescale_ts(pkt, ist_v->time_base, ist_v->codec->time_base);
int got_picture;
avcodec_decode_video2(ist_v->codec, videosrcFrame, &got_picture, pkt);
printf("解析视频called,got_picture:%d\n", got_picture);
if (got_picture){
videosrcFrame->pts = av_frame_get_best_effort_timestamp(videosrcFrame);
avpicture_fill((AVPicture *)video_frame, video_data_buffer, video_codec_context->pix_fmt, video_codec_context->width, video_codec_context->height);
sws_scale(sws_ctx, videosrcFrame->data, videosrcFrame->linesize, 0, height, video_frame->data, video_frame->linesize);
video_frame->pts = videosrcFrame->pts;
queue_push(&videoq, video_frame);
printf("放入了视频\n");
}
}
void decode_audio_frame(AVPacket *pkt){
//demux层转decode层时间基
av_packet_rescale_ts(pkt, ist_a->time_base, ist_a->codec->time_base);
int got_frame;
avcodec_decode_audio4(ist_a->codec, audiosrcFrame, &got_frame, pkt);
printf("解析音频called,got_frame:%d\n", got_frame);
if (got_frame){
av_samples_alloc_array_and_samples(&audio_data_buffer, NULL, audio_codec_context->channels, audiosrcFrame->nb_samples, audio_codec_context->sample_fmt, 1);
swr_convert(swrCtx, audio_data_buffer, audiosrcFrame->nb_samples,
(const uint8_t**)audiosrcFrame->data, audiosrcFrame->nb_samples);
av_audio_fifo_realloc(audiofifo, av_audio_fifo_size(audiofifo) + audiosrcFrame->nb_samples);
av_audio_fifo_write(audiofifo, (void **)audio_data_buffer, audiosrcFrame->nb_samples);
if (!is_set_audio_first_pts){
int64_t opts = av_frame_get_best_effort_timestamp(audiosrcFrame);
is_set_audio_first_pts = 1;
//decode层时间基到codec的时间基转换
audio_first_pts = opts;
}
while (av_audio_fifo_size(audiofifo) >= out_framesize){
//int frame_size = FFMIN(av_audio_fifo_size(audiofifo), out_framesize);
audio_frame->nb_samples = out_framesize;
audio_frame->channel_layout = av_get_default_channel_layout(audio_codec_context->channels);
audio_frame->format = audio_codec_context->sample_fmt;
audio_frame->sample_rate = audio_codec_context->sample_rate;
av_frame_get_buffer(audio_frame, 0);
av_audio_fifo_read(audiofifo, (void **)audio_frame->data, out_framesize);
audio_frame->pts = audio_first_pts; //此时的pts是decodec层时间基
audio_first_pts += iaudio_duration;
queue_push(&audioq, audio_frame);
}
}
}
void read_decode_thread(int arg){
int ret;
AVPacket pkt;
init_packet(&pkt);
for (;;){
ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0){
printf("到文件尾了\n");
finished = 1;
break;
}
if (pkt.stream_index == iv_index){
printf("开始解析视频\n");
decode_video_frame(&pkt);
}
else if (pkt.stream_index == ia_index){
printf("开始解析音频\n");
decode_audio_frame(&pkt);
}
}
av_free_packet(&pkt);
}
void encode_thread(int arg){
AVFrame *pframe = av_frame_alloc();
AVFrame* frame = NULL;
for (;;){
if (av_compare_ts(last_audio_pts, audio_st->time_base, last_video_pts, video_st->time_base) <= 0){
printf("现在去取音频\n");
frame = queue_pop(&audioq, finished);
if (frame == NULL){
printf("audio queue_pop exit\n");
break;
}
printf("audio frame----%x,pts:%d\n", &frame, frame->pts);
double delay = (double)out_framesize / audio_st->codec->sample_rate;
audio_clock += delay;
double audiodiff = audio_clock - (double)av_gettime()/1000000;
if (audiodiff > 0){
Sleep(audiodiff*1000);
}
*pframe = *frame;
encode_audio_frame(frame, out_framesize, ofmt_ctx, audio_st);
}
else{
printf("现在去取视频\n");
frame = queue_pop(&videoq, finished);
if (frame == NULL){
printf("video queue_pop exit\n");
break;
}
printf("video frame----%x,pts:%d\n", &frame, frame->pts);
double ptstime = last_video_pts*av_q2d(video_st->time_base);
double videodiff = start_time+ptstime - audio_clock;
if (videodiff > 0){
Sleep(videodiff * 1000);
}
*pframe = *frame;
encode_video_frame(pframe, ofmt_ctx, video_st);
}
}
av_frame_free(&pframe);
}
int main(int argc, char **argv){
av_register_all();
avformat_network_init();
int ret;
AVDictionary* in_options = NULL;
//av_dict_set(&in_options, "re", "1", 0);
if ((ret = avformat_open_input(&ifmt_ctx, INPUTURL, 0, &in_options)) < 0) {
printf("Could not open input file.");
}
avformat_find_stream_info(ifmt_ctx, 0);
for (int i = 0; i < ifmt_ctx->nb_streams; i++) {
if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO){
iv_index = i;
ist_v = ifmt_ctx->streams[i];
width = ist_v->codec->width;
height = ist_v->codec->height;
AVCodec *codec = avcodec_find_decoder(ist_v->codec->codec_id);
/* open the codec */
if (avcodec_open2(ist_v->codec, codec,NULL) < 0) {
fprintf(stderr, "could not open input video decoder codec\n");
getchar();exit(1);
}
}
else if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
ia_index = i;
ist_a = ifmt_ctx->streams[i];
AVCodec *codec = avcodec_find_decoder(ist_a->codec->codec_id);
/* open the codec */
if (avcodec_open2(ist_a->codec, codec, NULL) < 0) {
fprintf(stderr, "could not open input audio decoder codec\n");
getchar(); exit(1);
}
}
}
av_dump_format(ifmt_ctx, 0, INPUTURL, 0);
ofmt_ctx = avformat_alloc_context();
if (strstr(OUTPUTURL, "rtmp")){
avformat_alloc_output_context2(&ofmt_ctx, NULL, "flv", OUTPUTURL); //RTMP
}
else{
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, OUTPUTURL);
}
AVOutputFormat* fmt = ofmt_ctx->oformat;
if (!fmt) {
fprintf(stderr, "Could not find suitable output format");
getchar();exit(1);
}
add_stream(ofmt_ctx, &video_st, &video_codec_context, &video_codec, video_codecID);
add_stream(ofmt_ctx, &audio_st, &audio_codec_context, &audio_codec, audio_codecID);
open_video(video_codec_context, video_codec);
open_audio(audio_codec_context, audio_codec);
av_dump_format(ofmt_ctx, 0, OUTPUTURL, 1);
sws_ctx = sws_getContext(
width, height, ist_v->codec->pix_fmt,
width, height, video_codec_context->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
video_data_buffer = new uint8_t[avpicture_get_size(video_codec_context->pix_fmt, video_codec_context->width, video_codec_context->height)];
swrCtx = swr_alloc_set_opts(NULL, av_get_default_channel_layout(audio_codec_context->channels), audio_codec_context->sample_fmt, audio_codec_context->sample_rate,
av_get_default_channel_layout(ist_a->codec->channels), ist_a->codec->sample_fmt, ist_a->codec->sample_rate,
0, NULL);
swr_init(swrCtx);
audiofifo = av_audio_fifo_alloc(audio_codec_context->sample_fmt, audio_codec_context->channels, 1);
out_framesize = audio_codec_context->frame_size;
double iaudio_duration_s = (double)ist_a->codec->frame_size / ist_a->codec->sample_rate; //输出流一帧音频帧的持续时间(标准时间)
iaudio_duration = iaudio_duration_s / av_q2d(ist_a->codec->time_base);//转换为输入流decode层时间基
audio_frame = av_frame_alloc();
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, OUTPUTURL, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", OUTPUTURL,
"");
return 1;
}
}
init_packet(&venc_pkt);
videosrcFrame = av_frame_alloc();
init_packet(&aenc_pkt);
video_frame = av_frame_alloc();
audiosrcFrame = av_frame_alloc();
queue_init(&audioq,50,2);
queue_init(&videoq,50,1);
m_aac_adtstoasc = av_bitstream_filter_init("aac_adtstoasc");
/*AVDictionary *out_options = NULL;
av_dict_set(&out_options, "rtmp_buffer", "1024000", 0);
av_dict_set(&out_options, "max_delay", "500000", 0);
av_dict_set(&out_options, "timeout", "6", 0);*/
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n", "");
return 1;
}
out_framesize = audio_codec_context->frame_size;
audio_clock = start_time=(double)av_gettime() / 1000000;
thread t1(read_decode_thread, NULL);
thread t2(encode_thread, NULL);
t1.join();
t2.join();
if (video_codec_context->codec->capabilities &CODEC_CAP_DELAY){
while (!encode_video_frame(NULL, ofmt_ctx, video_st)){
printf("encode_video_frame while");
;
}
}
if (audio_codec_context->codec->capabilities &CODEC_CAP_DELAY){
while (!encode_audio_frame(NULL, out_framesize, ofmt_ctx,audio_st)){
printf("encode_audio_frame while");
;
}
}
av_write_trailer(ofmt_ctx);
av_bitstream_filter_close(m_aac_adtstoasc);
avformat_close_input(&ifmt_ctx);
av_free_packet(&venc_pkt);
av_frame_free(&videosrcFrame);
av_frame_free(&audio_frame);
sws_freeContext(sws_ctx);
if (ofmt_ctx) {
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
}
printf("程序运行end");
return getchar();
}
//dbt_rtmp.h
#include
#include
#include
#include
#include
extern "C"
{
#include "libavformat/avformat.h"
#include
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
};
#define STREAM_FRAME_RATE 25
#define INPUTURL "yekong.mp4"
//#define OUTPUTURL "pingfan_out.mp4"
#define OUTPUTURL "rtmp://localhost:1935/live/stream"
int width, height;
AVFormatContext *ifmt_ctx = NULL;
AVStream* ist_v = NULL, *ist_a = NULL;
AVFormatContext* ofmt_ctx = NULL;
AVCodecContext *audio_codec_context = NULL, *video_codec_context = NULL;
AVCodec* video_codec = NULL, *audio_codec = NULL;
AVStream *video_st = NULL, *audio_st = NULL;
int videoframecnt = 1;
int audioframecnt;
SwsContext *sws_ctx = NULL;
uint8_t *video_data_buffer = NULL;
int finished;
double audio_clock,start_time;
uint8_t ** audio_data_buffer = NULL;
AVAudioFifo* audiofifo = NULL;
SwrContext *swrCtx = NULL;
int64_t audio_first_pts;
int64_t iaudio_duration; //输入流的音频一帧的持续时间,基于decodec层时间戳
int is_set_audio_first_pts = 0;//标识是否设置了第一帧音频帧的pts。因为音频帧的pts计算,只会利用输入的第一帧的pts,后面的pts会累加iaudio_duration的方式来计算值
AVFrame*audio_frame, *video_frame;
//video param
AVCodecID video_codecID = AV_CODEC_ID_H264;
AVPacket venc_pkt;
AVFrame* videosrcFrame = NULL;
AVPacket aenc_pkt;
AVFrame* audiosrcFrame = NULL;
//audio param
int dst_channels = 2; //声道
AVCodecID audio_codecID = AV_CODEC_ID_AAC;
int audio_frame_size = 1024;
AVBitStreamFilterContext * m_aac_adtstoasc; //aac->adts to asc过滤器
int out_framesize; //音频输出流的每帧采样数(aac为1024)
//输入流中音视频的索引
int iv_index = -1, ia_index = -1;
Queue audioq;
Queue videoq;;
int64_t last_video_pts;
int64_t last_audio_pts;
void init_packet(AVPacket *packet);
void add_stream(AVFormatContext *out_format_context, AVStream** st, AVCodecContext **out_codec_context, AVCodec** codec, AVCodecID codec_id);
void open_video(AVCodecContext* codec_context, AVCodec* codec);
void open_audio(AVCodecContext* audio_codec_context, AVCodec * codec);
int encode_video_frame(AVFrame *frame,AVFormatContext *out_format_context,AVStream *video_st);
int encode_audio_frame(AVFrame *frame, int nbsamples,AVFormatContext *output_format_context, AVStream* st);
void decode_video_frame(AVPacket *pkt);
void decode_audio_frame(AVPacket *pkt);