先说下具体大体流程:
1. 打开输入文件(这里把摄像头当作一个输入文件),需要用到的函数如下
1)av_find_input_format() //如果是打开麦克风或者屏幕需要增加
2)avformat_open_input()/avformat_close_input() //打开输入流,并创建输入文件的上下文
3)avformat_find_stream_info() //读取输入的媒体信息
4)av_find_best_stream() //查找对应流的索引号
2.设置解码器(编码保存文件是把frame数据转成packet数据,但是av_read_frame获取到的是packet数据,所以这里先解码一方面是为了兼容有些麦克风的数据是经过了编码的数据,而是通过解码函数,把packet数据转成frame数据,这样就可以进行编码了)
1)avcodec_find_encoder_by_name()/avcodec_find_encoder() //查找解码器
2)avcodec_open2() //打开解码器
//这里可以不用avcodec_alloc_context3函数,直接查找解码器,然后打开解码器
3.重采样初始化
//这里重采样使用的是filter的方式
avfilter_get_by_name("abuffer");
avfilter_get_by_name("abuffersink");
avfilter_inout_alloc();
avfilter_inout_alloc();
avfilter_graph_create_filter(&buffersrcCtx, abuffersrc, "in",
args, NULL, filterGraph);
avfilter_graph_create_filter(&buffersinkCtx, abuffersink, "out",
NULL, NULL, filterGraph);
av_opt_set_int_list
4.设置编码器
1)avcodec_find_encoder_by_name()/avcodec_find_encoder() //查找编码器
2)avcodec_alloc_context3()/avcodec_free_context() //创建编码器上下文
3)设置编码器参数
4)avcodec_open2() //打开编码器
5.打开输出文件(这里rtmp链接就相当于是一个输出文件)
1)avformat_alloc_output_context2()/avformat_close_input() //创建输出文件的上下文
2)avformat_new_stream() //向输出的媒体文件添加流
3)avcodec_copy_context() //拷贝流信息给添加的流
4)avio_open() //打开输出文件
5)avformat_write_header() //写数据头文件
具体代码如下:
#define MAX_AUDIO_FRAME_SIZE 192000
AVFormatContext *infmt_ctx = nullptr;
AVFormatContext * outfmt_ctx = NULL;
int64_t lastReadPacktTime;
AVFilterContext *buffersinkCtx = NULL;
AVFilterContext *buffersrcCtx = NULL;
AVFilterGraph *filterGraph = NULL;
AVCodecContext* outPutAudioEncContext = NULL;
int64_t audioCount = 0;
int flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index) {
int ret;
int got_frame;
AVPacket enc_pkt;
if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
0x0020))
return 0;
while (1) {
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
NULL, &got_frame);
av_frame_free(NULL);
if (ret < 0)
break;
if (!got_frame) {
ret = 0;
break;
}
printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
/* mux encoded frame */
ret = av_write_frame(fmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
return ret;
}
static int select_channel_layout(const AVCodec *codec)
{
const uint64_t *p;
uint64_t best_ch_layout = 0;
int best_nb_channels = 0;
if (!codec->channel_layouts)
return AV_CH_LAYOUT_STEREO;
p = codec->channel_layouts;
while (*p) {
int nb_channels = av_get_channel_layout_nb_channels(*p);
if (nb_channels > best_nb_channels) {
best_ch_layout = *p;
best_nb_channels = nb_channels;
}
p++;
}
return best_ch_layout;
}
static int select_sample_rate(const AVCodec *codec)
{
const int *p;
int best_samplerate = 0;
if (!codec->supported_samplerates)
return 44100;
p = codec->supported_samplerates;
while (*p) {
if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
best_samplerate = *p;
p++;
}
return best_samplerate;
}
static char *dup_wchar_to_utf8(wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int initAudioFilters()
{
char args[512];
int ret;
AVFilter *abuffersrc = avfilter_get_by_name("abuffer");
AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
auto audioDecoderContext = infmt_ctx->streams[0]->codec;
if (!audioDecoderContext->channel_layout)
audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
static const int64_t out_channel_layouts[] = { audioDecoderContext->channel_layout, -1 };
static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 };
AVRational time_base = infmt_ctx->streams[0]->time_base;
filterGraph = avfilter_graph_alloc();
filterGraph->nb_threads = 1;
sprintf_s(args, sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
time_base.num, time_base.den, audioDecoderContext->sample_rate,
av_get_sample_fmt_name(audioDecoderContext->sample_fmt), audioDecoderContext->channel_layout);
ret = avfilter_graph_create_filter(&buffersrcCtx, abuffersrc, "in",
args, NULL, filterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
return ret;
}
/* buffer audio sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&buffersinkCtx, abuffersink, "out",
NULL, NULL, filterGraph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
return ret;
}
ret = av_opt_set_int_list(buffersinkCtx, "sample_fmts", out_sample_fmts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
return ret;
}
ret = av_opt_set_int_list(buffersinkCtx, "channel_layouts", out_channel_layouts, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
return ret;
}
ret = av_opt_set_int_list(buffersinkCtx, "sample_rates", out_sample_rates, -1,
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
return ret;
}
/* Endpoints for the filter graph. */
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrcCtx;;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersinkCtx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(filterGraph, "anull",
&inputs, &outputs, nullptr)) < 0)
return ret;
if ((ret = avfilter_graph_config(filterGraph, NULL)) < 0)
return ret;
av_buffersink_set_frame_size(buffersinkCtx, 1024);
return 0;
}
AVFrame* DecodeAudio(AVPacket* packet, AVFrame*pSrcAudioFrame)
{
AVStream * stream = infmt_ctx->streams[0];
AVCodecContext* codecContext = stream->codec;
int gotFrame;
AVFrame *filtFrame = nullptr;
auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet);
if (length >= 0 && gotFrame != 0)
{
if (av_buffersrc_add_frame_flags(buffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n");
return nullptr;
}
filtFrame = av_frame_alloc();
int ret = av_buffersink_get_frame_flags(buffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST);
if (ret < 0)
{
av_frame_free(&filtFrame);
goto error;
}
return filtFrame;
}
error:
return nullptr;
}
void captureFrame()
{
//输入文件 麦克风
//wchar_t w[200] = L"audio=麦克风 (Realtek(R) Audio)";
wchar_t w[200] = L"audio=virtual-audio-capturer";
string fileAudioInput = dup_wchar_to_utf8(w);
static AVInputFormat* imft = av_find_input_format("dshow");
AVDictionary *format_opts = nullptr;
av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0);
if (0 > avformat_open_input(&infmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) {
printf("failed input file\n");
return;
}
if (0 > avformat_find_stream_info(infmt_ctx, NULL)) {
printf("failed find stream info\n");
avformat_close_input(&infmt_ctx);
return;
}
int audio_index = -1;
audio_index = av_find_best_stream(infmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (-1 == audio_index) {
printf("failed find best stream\n");
avformat_close_input(&infmt_ctx);
return;
}
//av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1);
//END输入文件
//打开解码器
static AVCodec* decodec = avcodec_find_decoder(infmt_ctx->streams[0]->codec->codec_id);
if (!decodec) {
printf("failed find decoder\n");
return;
}
if (0 > avcodec_open2(infmt_ctx->streams[0]->codec, decodec, NULL)) {
printf("failed open decoder\n");
return;
}
//END解码器
//重采样初始化
initAudioFilters();
//END重采样初始化
//编码器
static AVCodec* codec = NULL;
//codec = avcodec_find_encoder_by_name("libmp3lame");
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
static AVCodecContext* codec_ctx = NULL;
codec_ctx = avcodec_alloc_context3(codec);
// codec_ctx->bit_rate = 64000;
// inputContext->streams[0]->codec
codec_ctx->codec = codec;
codec_ctx->sample_rate = 48000;
codec_ctx->channel_layout = 3;
codec_ctx->channels = 2;
//codec_ctx->frame_size = 1024;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->codec_tag = 0;
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (0 > avcodec_open2(codec_ctx, codec, NULL)) {
printf("failed open coder\n");
avformat_close_input(&infmt_ctx);
avcodec_free_context(&codec_ctx);
return;
}
//END编码器
//输出文件
AVFormatContext* outfmt_ctx = NULL;
if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) {
printf("failed alloc outputcontext\n");
avformat_close_input(&infmt_ctx);
avcodec_free_context(&codec_ctx);
return;
}
AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec);
if (!out_stream) {
printf("failed new stream\n");
avformat_close_input(&infmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return;
}
avcodec_copy_context(out_stream->codec, codec_ctx);
// if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) {
if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) {
printf("failed to open outfile\n");
avformat_close_input(&infmt_ctx);
avcodec_free_context(&codec_ctx);
avformat_close_input(&outfmt_ctx);
return;
}
avformat_write_header(outfmt_ctx, NULL);
//END输出文件
#if 0
AVFrame* Frame = av_frame_alloc();
Frame->nb_samples = codec_ctx->frame_size;
Frame->format = codec_ctx->sample_fmt;
Frame->channel_layout = codec_ctx->channel_layout;
int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size,
codec_ctx->sample_fmt, 1);
uint8_t* frame_buf = (uint8_t *)av_malloc(size);
avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1);
int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
AVPacket pkt;
av_new_packet(&pkt, size);
pkt.data = NULL;
int got_frame = -1;
int delayedFrame = 0;
static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
int audioCount = 0;
const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 };
AVFrame* Frame1 = av_frame_alloc();
#endif
int loop = 1;
int delayedFrame = 0;
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
AVFrame* pSrcAudioFrame = av_frame_alloc();
int got_frame = 0;
while (1) {
av_read_frame(infmt_ctx, &packet);
loop++;
if (packet.stream_index == audio_index) {
auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame);
if (filterFrame) {
avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame);
if (got_frame) {
#if 1
auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den;
auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den;
pkt.pts = pkt.dts = (1024 * streamTimeBase * audioCount) / codecTimeBase;
audioCount++;
auto inputStream = infmt_ctx->streams[pkt.stream_index];
auto outputStream = outfmt_ctx->streams[pkt.stream_index];
av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base);
#endif
// pkt.stream_index = out_stream->index;
av_interleaved_write_frame(outfmt_ctx, &pkt);
av_packet_unref(&pkt);
printf("output frame %3d\n", loop - delayedFrame);
}
else {
delayedFrame++;
av_packet_unref(&pkt);
printf("no output frame\n");
}
}
}
av_packet_unref(&packet);
}
flush_encoder(outfmt_ctx, 0);
av_write_trailer(outfmt_ctx);
//av_free(Frame);
av_free(pSrcAudioFrame);
avio_close(outfmt_ctx->pb);
avformat_close_input(&infmt_ctx);
//avformat_close_input(&outfmt_ctx);
return;
}
int main(void) {
av_register_all();
avfilter_register_all();
avformat_network_init();
avcodec_register_all();
avdevice_register_all();
captureFrame();
return 0;
}