FFMPEG音视频同步-音频实时采集编码封装成MP4

FFMPEG-音频实时采集编码封装成MP4

//-------------------------------------------------------------------------------------------------
参考链接1、https://blog.csdn.net/leixiaohua1020/article/details/39702113
参考链接2、https://blog.csdn.net/li_wen01/article/details/67631687

//-------------------------------------------------------------------------------------------------
音视频同步录制相关文章
//-------------------------------------------------------------------------------------------------
1、 ffmpeg-摄像头采集保存
2、 ffmpeg音视频同步-摄像头采集编码封装
3、 ffmpeg-音频正弦产生并编码封装
4、 ffmpeg-音频实时采集保存
5、 ffmpeg音视频同步-音频实时采集编码封装
6、 ffmpeg音视频同步-音视频实时采集编码封装
7、 ffmpeg音视频同步-音视频实时采集编码推流
8、 ffmpeg音视频同步-音视频实时采集编码推流-优化版本
//---------------------------------------------------------------

系统环境:
系统版本:lubuntu 16.04
Ffmpge版本:ffmpeg version N-93527-g1125277
摄像头:1.3M HD WebCan
虚拟机:Oracle VM VirtualBox 5.2.22

指令查看设备 ffmpeg -devices
指令播放实时音频 ffplay -f alsa -showmode 1 -ac 2 -i default -ar 44100
https://blog.csdn.net/Tang_Chuanlin/article/details/86775881

指令录制实时音频ffmpeg -f alsa -ar 44100 -i hw:0,0 audio.wav
ffmpeg -f alsa -ar 44100 -i default ffmpeg_record_audio.wav
https://blog.csdn.net/dingjianfeng2014/article/details/57424473

指令分离音视频:ffmpeg -i test.mp4 -vn -y -acodec copy test.aac
ffmpeg -i test.mp4 -vn -y -avcodec copy test.h264

本章文档基于ubuntu alsa格式。实时采集音频一帧数据并将其转化为pcm,编码成AAC,封装成mp4保存下来

1.简介

FFmpeg中有一个和多媒体设备交互的类库:Libavdevice。使用这个库可以读取电脑(或者其他设备上)的多媒体设备的数据,或者输出数据到指定的多媒体设备上。

ffmpeg中的音频相关内容

ffmpeg中AVPacket中可能会含有多个音频帧(AVFrame),一个音频帧含有多个采样,采样率决定了1s有多少个采样(或者说一个音频帧可以播放多久)。对于aac,一帧有1024个采样,mp3一帧则固定为1152个字节。

对于音频,由于有多声道的音频,那么音频解码出来的数据不同声道也储存在不同的指针,如data[0]是左声道,data[1]是右声道,由于各个声道的数据长度是一样的,所以linesize[0]就代表了所有声道数据的长度。

成员extended_data则指向了data,是一个拓展,上面可以看到data 是包含8个指针的数组,也就是说对于音频,最多只支持8个声道

如果是以下格式:

AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
AV_SAMPLE_FMT_S16,         ///< signed 16 bits
AV_SAMPLE_FMT_S32,         ///< signed 32 bits
AV_SAMPLE_FMT_FLT,         ///< float
AV_SAMPLE_FMT_DBL,         ///< double

只能保存再AVFrame的uint8_t *data[0]; 音频保持格式如下:
LRLRLR。。。。

如果是以下格式

AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP,        ///< float, planar //aac只支持此格式
AV_SAMPLE_FMT_DBLP,        ///< double, planar

plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL…
plane 1: RRRRRRRRRRRRRRRRRRRR…

plane 0对于uint8_t *data[0];
plane 1对于uint8_t *data[1];

2.源码

最简单的基于Libavdevice的摄像头数据读取一帧帧pcm数据,经过音频重采样获取目标AAC的音频源数据参数,保存成MP4文件

1.	 #include <stdlib.h>  
2.	#include <stdio.h>  
3.	#include <string.h>  
4.	#include <math.h>  
5.	  
6.	#include <libavutil/avassert.h>  
7.	#include <libavutil/channel_layout.h>  
8.	#include <libavutil/opt.h>  
9.	#include <libavutil/mathematics.h>  
10.	#include <libavutil/timestamp.h>  
11.	#include <libavformat/avformat.h>  
12.	#include <libswscale/swscale.h>  
13.	#include <libswresample/swresample.h>  
14.	  
15.	#define STREAM_DURATION   10.0  
16.	#define STREAM_FRAME_RATE 25 /* 25 images/s */  
17.	#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */  
18.	  
19.	#define SCALE_FLAGS SWS_BICUBIC  
20.	 #define OUTPUT_PCM 1  
21.	#define ALSA_GET_ENABLE 1  
22.	 #define MAX_AUDIO_FRAME_SIZE 192000  
23.	  
24.	  
25.	// a wrapper around a single output AVStream  
26.	typedef struct OutputStream {  
27.	    AVStream *st;  
28.	    AVCodecContext *enc;  
29.	  
30.	    /* pts of the next frame that will be generated */  
31.	    int64_t next_pts;  
32.	    int samples_count;  
33.	  
34.	    AVFrame *frame;  
35.	    AVFrame *tmp_frame;  
36.	  
37.	    float t, tincr, tincr2;  
38.	  
39.	    struct SwsContext *sws_ctx;  
40.	    struct SwrContext *swr_ctx;  
41.	} OutputStream;  
42.	  
43.	  
44.	typedef struct IntputDev {  
45.	  
46.	    AVCodecContext  *pCodecCtx;  
47.	    AVCodec         *pCodec;  
48.	    AVFormatContext *a_ifmtCtx;  
49.	    int  audioindex;  
50.	    AVFrame *pAudioFrame;  
51.	    AVPacket *in_packet;  
52.	    struct SwrContext   *audio_convert_ctx;  
53.	    uint8_t *dst_buffer;  
54.	    int out_buffer_size;  
55.	}IntputDev;  
56.	  
57.	static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)  
58.	{  
59.	    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;  
60.	  
61.	    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",  
62.	           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),  
63.	           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),  
64.	           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),  
65.	           pkt->stream_index);  
66.	}  
67.	  
68.	static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)  
69.	{  
70.	    /* rescale output packet timestamp values from codec to stream timebase */  
71.	    av_packet_rescale_ts(pkt, *time_base, st->time_base);  
72.	    pkt->stream_index = st->index;  
73.	  
74.	    /* Write the compressed frame to the media file. */  
75.	    log_packet(fmt_ctx, pkt);  
76.	    return av_interleaved_write_frame(fmt_ctx, pkt);  
77.	}  
78.	  
79.	/* Add an output stream. */  
80.	static void add_stream(OutputStream *ost, AVFormatContext *oc,  
81.	                       AVCodec **codec,  
82.	                       enum AVCodecID codec_id)  
83.	{  
84.	    AVCodecContext *c;  
85.	    int i;  
86.	  
87.	    /* find the encoder */  
88.	    *codec = avcodec_find_encoder(codec_id);  
89.	    if (!(*codec)) {  
90.	        fprintf(stderr, "Could not find encoder for '%s'\n",  
91.	                avcodec_get_name(codec_id));  
92.	        exit(1);  
93.	    }  
94.	  
95.	    ost->st = avformat_new_stream(oc, NULL);  
96.	    if (!ost->st) {  
97.	        fprintf(stderr, "Could not allocate stream\n");  
98.	        exit(1);  
99.	    }  
100.	    ost->st->id = oc->nb_streams-1;  
101.	    c = avcodec_alloc_context3(*codec);  
102.	    if (!c) {  
103.	        fprintf(stderr, "Could not alloc an encoding context\n");  
104.	        exit(1);  
105.	    }  
106.	    ost->enc = c;  
107.	  
108.	    switch ((*codec)->type) {  
109.	     int default_sample_rate=48000;//44100  
110.	    case AVMEDIA_TYPE_AUDIO:  
111.	        c->sample_fmt  = (*codec)->sample_fmts ?  
112.	            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;  
113.	        c->bit_rate    = 64000;  
114.	        c->sample_rate = 48000;  
115.	        if ((*codec)->supported_samplerates) {  
116.	            c->sample_rate = (*codec)->supported_samplerates[0];  
117.	            for (i = 0; (*codec)->supported_samplerates[i]; i++) {  
118.	                if ((*codec)->supported_samplerates[i] == 48000)  
119.	                    c->sample_rate = 48000;  
120.	            }  
121.	        }  
122.	        c->channels        = av_get_channel_layout_nb_channels(c->channel_layout);  
123.	        c->channel_layout = AV_CH_LAYOUT_STEREO;  
124.	        if ((*codec)->channel_layouts) {  
125.	            c->channel_layout = (*codec)->channel_layouts[0];  
126.	            for (i = 0; (*codec)->channel_layouts[i]; i++) {  
127.	                if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)  
128.	                    c->channel_layout = AV_CH_LAYOUT_STEREO;  
129.	            }  
130.	        }  
131.	        c->channels        = av_get_channel_layout_nb_channels(c->channel_layout);  
132.	        ost->st->time_base = (AVRational){ 1, c->sample_rate };  
133.	        break;  
134.	  
135.	    case AVMEDIA_TYPE_VIDEO:  
136.	        c->codec_id = codec_id;  
137.	  
138.	        c->bit_rate = 400000;  
139.	        /* Resolution must be a multiple of two. */  
140.	        c->width    = 352;  
141.	        c->height   = 288;  
142.	        /* timebase: This is the fundamental unit of time (in seconds) in terms 
143.	         * of which frame timestamps are represented. For fixed-fps content, 
144.	         * timebase should be 1/framerate and timestamp increments should be 
145.	         * identical to 1. */  
146.	        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };  
147.	        c->time_base       = ost->st->time_base;  
148.	  
149.	        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */  
150.	        c->pix_fmt       = STREAM_PIX_FMT;  
151.	        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {  
152.	            /* just for testing, we also add B-frames */  
153.	            c->max_b_frames = 2;  
154.	        }  
155.	        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {  
156.	            /* Needed to avoid using macroblocks in which some coeffs overflow. 
157.	             * This does not happen with normal video, it just happens here as 
158.	             * the motion of the chroma plane does not match the luma plane. */  
159.	            c->mb_decision = 2;  
160.	        }  
161.	    break;  
162.	  
163.	    default:  
164.	        break;  
165.	    }  
166.	  
167.	    /* Some formats want stream headers to be separate. */  
168.	    if (oc->oformat->flags & AVFMT_GLOBALHEADER)  
169.	        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;  
170.	}  
171.	  
172.	/**************************************************************/  
173.	/* audio output */  
174.	  
175.	static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,  
176.	                                  uint64_t channel_layout,  
177.	                                  int sample_rate, int nb_samples)  
178.	{  
179.	    AVFrame *frame = av_frame_alloc();  
180.	    int ret;  
181.	  
182.	    if (!frame) {  
183.	        fprintf(stderr, "Error allocating an audio frame\n");  
184.	        exit(1);  
185.	    }  
186.	  
187.	    frame->format = sample_fmt;  
188.	    frame->channel_layout = channel_layout;  
189.	    frame->sample_rate = sample_rate;  
190.	    frame->nb_samples = nb_samples;  
191.	  
192.	    if (nb_samples) {  
193.	        ret = av_frame_get_buffer(frame, 0);  
194.	        if (ret < 0) {  
195.	            fprintf(stderr, "Error allocating an audio buffer\n");  
196.	            exit(1);  
197.	        }  
198.	    }  
199.	  
200.	    return frame;  
201.	}  
202.	  
203.	static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)  
204.	{  
205.	    AVCodecContext *c;  
206.	    int nb_samples;  
207.	    int ret;  
208.	    AVDictionary *opt = NULL;  
209.	  
210.	    c = ost->enc;  
211.	  
212.	    /* open it */  
213.	    av_dict_copy(&opt, opt_arg, 0);  
214.	    ret = avcodec_open2(c, codec, &opt);  
215.	    av_dict_free(&opt);  
216.	    if (ret < 0) {  
217.	        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));  
218.	        exit(1);  
219.	    }  
220.	  
221.	    /* init signal generator */  
222.	    ost->t     = 0;  
223.	    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;  
224.	    /* increment frequency by 110 Hz per second */  
225.	    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;  
226.	  
227.	    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)  
228.	        nb_samples = 10000;  
229.	    else  
230.	        nb_samples = c->frame_size;  
231.	  
232.	    ost->frame     = alloc_audio_frame(c->sample_fmt, c->channel_layout,  
233.	                                       c->sample_rate, nb_samples);  
234.	    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, c->channel_layout,  
235.	                                       c->sample_rate, nb_samples);  
236.	  
237.	    printf( "c->channel_layout=%s channel=%d c->sample_fmt=%d  c->sample_rate=%d nb_samples=%d\n",  
238.	         av_ts2str(c->channel_layout),c->channels,c->sample_rate,nb_samples,c->sample_fmt);  
239.	  
240.	    /* copy the stream parameters to the muxer */  
241.	    ret = avcodec_parameters_from_context(ost->st->codecpar, c);  
242.	    if (ret < 0) {  
243.	        fprintf(stderr, "Could not copy the stream parameters\n");  
244.	        exit(1);  
245.	    }  
246.	  
247.	    /* create resampler context */  
248.	        ost->swr_ctx = swr_alloc();  
249.	        if (!ost->swr_ctx) {  
250.	            fprintf(stderr, "Could not allocate resampler context\n");  
251.	            exit(1);  
252.	        }  
253.	  
254.	        /* set options */  
255.	        av_opt_set_int       (ost->swr_ctx, "in_channel_count",   c->channels,       0);  
256.	        av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0);  
257.	        av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);  
258.	        av_opt_set_int       (ost->swr_ctx, "out_channel_count",  c->channels,       0);  
259.	        av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);  
260.	        av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0);  
261.	  
262.	        /* initialize the resampling context */  
263.	        if ((ret = swr_init(ost->swr_ctx)) < 0) {  
264.	            fprintf(stderr, "Failed to initialize the resampling context\n");  
265.	            exit(1);  
266.	        }  
267.	}  
268.	  
269.	/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and 
270.	 * 'nb_channels' channels. */  
271.	static AVFrame *get_audio_frame(OutputStream *ost)  
272.	{  
273.	    AVFrame *frame = ost->tmp_frame;  
274.	    int j, i, v;  
275.	    int16_t *q = (int16_t*)frame->data[0];  
276.	  
277.	    /* check if we want to generate more frames */  
278.	    if (av_compare_ts(ost->next_pts, ost->enc->time_base,  
279.	                      STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)  
280.	        return NULL;  
281.	  
282.	    for (j = 0; j <frame->nb_samples; j++) {  
283.	        v = (int)(sin(ost->t) * 10000);  
284.	        for (i = 0; i < ost->enc->channels; i++)  
285.	            *q++ = v;  
286.	        ost->t     += ost->tincr;  
287.	        ost->tincr += ost->tincr2;  
288.	    }  
289.	  
290.	    frame->pts = ost->next_pts;  
291.	    ost->next_pts  += frame->nb_samples;  
292.	  
293.	    return frame;  
294.	}  
295.	  
296.	/* 
297.	 * encode one audio frame and send it to the muxer 
298.	 * return 1 when encoding is finished, 0 otherwise 
299.	 */  
300.	static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)  
301.	{  
302.	    AVCodecContext *c;  
303.	    AVPacket pkt = { 0 }; // data and size must be 0;  
304.	    AVFrame *frame;  
305.	    int ret;  
306.	    int got_packet;  
307.	    int dst_nb_samples;  
308.	  
309.	    av_init_packet(&pkt);  
310.	    c = ost->enc;  
311.	  
312.	    frame = get_audio_frame(ost);  
313.	  
314.	    if (frame) {  
315.	        /* convert samples from native format to destination codec format, using the resampler */  
316.	            /* compute destination number of samples */  
317.	            dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,  
318.	                                            c->sample_rate, c->sample_rate, AV_ROUND_UP);  
319.	            av_assert0(dst_nb_samples == frame->nb_samples);  
320.	  
321.	        /* when we pass a frame to the encoder, it may keep a reference to it 
322.	         * internally; 
323.	         * make sure we do not overwrite it here 
324.	         */  
325.	        ret = av_frame_make_writable(ost->frame);  
326.	        if (ret < 0)  
327.	            exit(1);  
328.	  
329.	        /* convert to destination format */  
330.	        ret = swr_convert(ost->swr_ctx,  
331.	                          ost->frame->data, dst_nb_samples,  
332.	                          (const uint8_t **)frame->data, frame->nb_samples);  
333.	        if (ret < 0) {  
334.	            fprintf(stderr, "Error while converting\n");  
335.	            exit(1);  
336.	        }  
337.	  
338.	        frame = ost->frame;  
339.	  
340.	        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);  
341.	        ost->samples_count += dst_nb_samples;  
342.	    }  
343.	  
344.	    ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);  
345.	    if (ret < 0) {  
346.	        fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));  
347.	        exit(1);  
348.	    }  
349.	  
350.	    if (got_packet) {  
351.	        ret = write_frame(oc, &c->time_base, ost->st, &pkt);  
352.	        if (ret < 0) {  
353.	            fprintf(stderr, "Error while writing audio frame: %s\n",  
354.	                    av_err2str(ret));  
355.	            exit(1);  
356.	        }  
357.	    }  
358.	  
359.	    return (frame || got_packet) ? 0 : 1;  
360.	}  
361.	  
362.	/**************************************************************/  
363.	static AVFrame *get_audio_frame1(OutputStream *ost,IntputDev* input,int *got_pcm)  
364.	{  
365.	    int j, i, v,ret,got_picture;  
366.	    AVFrame *ret_frame=NULL;  
367.	  
368.	    AVFrame *frame = ost->tmp_frame;  
369.	  
370.	  
371.	    *got_pcm=1;  
372.	    /* check if we want to generate more frames */  
373.	    if (av_compare_ts(ost->next_pts, ost->enc->time_base,  
374.	                      STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)  
375.	        return NULL;  
376.	  
377.	  if(av_read_frame(input->a_ifmtCtx, input->in_packet)>=0){  
378.	        if(input->in_packet->stream_index==input->audioindex){  
379.	            ret = avcodec_decode_audio4(input->pCodecCtx, input->pAudioFrame , &got_picture, input->in_packet);  
380.	  
381.	            *got_pcm=got_picture;  
382.	  
383.	            if(ret < 0){  
384.	                printf("Decode Error.\n");  
385.	                av_free_packet(input->in_packet);  
386.	                return NULL;  
387.	            }  
388.	            if(got_picture){  
389.	  
390.	                printf("src nb_samples %d dst nb-samples=%d out_buffer_size=%d\n",  
391.	                    input->pAudioFrame->nb_samples,frame->nb_samples,input->out_buffer_size);  
392.	  
393.	                  swr_convert(input->audio_convert_ctx, &input->dst_buffer, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)input->pAudioFrame->data, input->pAudioFrame->nb_samples);    
394.	  
395.	                frame->pts = ost->next_pts;  
396.	                    ost->next_pts  += frame->nb_samples;  
397.	  
398.	                if(frame->nb_samples*4==input->out_buffer_size)//16bit stereo  
399.	                {  
400.	                    //memcpy(frame->data,input->dst_buffer,input->out_buffer_size);  
401.	                    memcpy(frame->data[0],input->dst_buffer,frame->nb_samples*4);  
402.	                    ret_frame= frame;  
403.	                }  
404.	            }  
405.	        }  
406.	        av_free_packet(input->in_packet);  
407.	    }  
408.	    return frame;  
409.	}  
410.	  
411.	static int write_audio_frame1(AVFormatContext *oc, OutputStream *ost,AVFrame *in_frame)  
412.	{  
413.	    AVCodecContext *c;  
414.	    AVPacket pkt = { 0 }; // data and size must be 0;  
415.	    int ret;  
416.	    int got_packet;  
417.	    int dst_nb_samples;  
418.	  
419.	    //if(in_frame==NULL)  
420.	    //  return 1;  
421.	  
422.	  
423.	    av_init_packet(&pkt);  
424.	  
425.	    AVFrame *frame=in_frame;  
426.	  
427.	    c = ost->enc;  
428.	  
429.	    if (frame) {  
430.	        /* convert samples from native format to destination codec format, using the resampler */  
431.	            /* compute destination number of samples */  
432.	            dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,  
433.	                                            c->sample_rate, c->sample_rate, AV_ROUND_UP);  
434.	            av_assert0(dst_nb_samples == frame->nb_samples);  
435.	  
436.	        /* when we pass a frame to the encoder, it may keep a reference to it 
437.	         * internally; 
438.	         * make sure we do not overwrite it here 
439.	         */  
440.	        ret = av_frame_make_writable(ost->frame);  
441.	        if (ret < 0)  
442.	            exit(1);  
443.	  
444.	        /* convert to destination format */  
445.	        ret = swr_convert(ost->swr_ctx,  
446.	                          ost->frame->data, dst_nb_samples,  
447.	                          (const uint8_t **)frame->data, frame->nb_samples);  
448.	        if (ret < 0) {  
449.	            fprintf(stderr, "Error while converting\n");  
450.	            exit(1);  
451.	        }  
452.	        frame = ost->frame;  
453.	  
454.	        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);  
455.	        ost->samples_count += dst_nb_samples;  
456.	    }  
457.	  
458.	    ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);  
459.	    if (ret < 0) {  
460.	        fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));  
461.	        exit(1);  
462.	    }  
463.	  
464.	    if (got_packet) {  
465.	        ret = write_frame(oc, &c->time_base, ost->st, &pkt);  
466.	        if (ret < 0) {  
467.	            fprintf(stderr, "Error while writing audio frame: %s\n",  
468.	                    av_err2str(ret));  
469.	            exit(1);  
470.	        }  
471.	    }  
472.	  
473.	    return (frame || got_packet) ? 0 : 1;  
474.	}  
475.	  
476.	  
477.	  
478.	static void close_stream(AVFormatContext *oc, OutputStream *ost)  
479.	{  
480.	    avcodec_free_context(&ost->enc);  
481.	    av_frame_free(&ost->frame);  
482.	    av_frame_free(&ost->tmp_frame);  
483.	    sws_freeContext(ost->sws_ctx);  
484.	    swr_free(&ost->swr_ctx);  
485.	}  
486.	  
487.	/**************************************************************/  
488.	/* media file output */  
489.	  
490.	static int flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index)    
491.	{    
492.	    int ret;    
493.	    int got_frame;    
494.	    AVPacket enc_pkt;    
495.	    if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &    
496.	        AV_CODEC_CAP_DELAY))    
497.	        return 0;    
498.	    while (1) {    
499.	        printf("Flushing stream #%u encoder\n", stream_index);    
500.	        //ret = encode_write_frame(NULL, stream_index, &got_frame);    
501.	        enc_pkt.data = NULL;    
502.	        enc_pkt.size = 0;    
503.	        av_init_packet(&enc_pkt);    
504.	        ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,    
505.	            NULL, &got_frame);    
506.	        av_frame_free(NULL);    
507.	        if (ret < 0)    
508.	            break;    
509.	        if (!got_frame){    
510.	            ret=0;    
511.	            break;    
512.	        }    
513.	        printf("Succeed to encode 1 frame! 编码成功1帧!\n");    
514.	        /* mux encoded frame */    
515.	        ret = av_interleaved_write_frame(fmt_ctx, &enc_pkt);    
516.	        if (ret < 0)    
517.	            break;    
518.	    }    
519.	    return ret;    
520.	}    
521.	  
522.	  
523.	  
524.	int main(int argc, char **argv)  
525.	{  
526.	    OutputStream video_st = { 0 }, audio_st = { 0 };  
527.	    const char *filename;  
528.	    AVOutputFormat *fmt;  
529.	    AVFormatContext *oc;  
530.	    AVCodec *audio_codec, *video_codec;  
531.	    int ret;  
532.	    int have_video = 0, have_audio = 0;  
533.	    int encode_video = 0, encode_audio = 0;  
534.	    AVDictionary *opt = NULL;  
535.	    int i;  
536.	  
537.	    if (argc < 2) {  
538.	        printf("usage: %s output_file\n"  
539.	               "API example program to output a media file with libavformat.\n"  
540.	               "This program generates a synthetic audio and video stream, encodes and\n"  
541.	               "muxes them into a file named output_file.\n"  
542.	               "The output format is automatically guessed according to the file extension.\n"  
543.	               "Raw images can also be output by using '%%d' in the filename.\n"  
544.	               "\n", argv[0]);  
545.	        return 1;  
546.	    }  
547.	  
548.	    filename = argv[1];  
549.	    for (i = 2; i+1 < argc; i+=2) {  
550.	        if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))  
551.	            av_dict_set(&opt, argv[i]+1, argv[i+1], 0);  
552.	    }  
553.	  
554.	    /* allocate the output media context */  
555.	    avformat_alloc_output_context2(&oc, NULL, NULL, filename);  
556.	    if (!oc) {  
557.	        printf("Could not deduce output format from file extension: using MPEG.\n");  
558.	        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);  
559.	    }  
560.	    if (!oc)  
561.	        return 1;  
562.	  
563.	    fmt = oc->oformat;  
564.	  
565.	    /* Add the audio and video streams using the default format codecs 
566.	     * and initialize the codecs. */  
567.	     
568.	    if (fmt->audio_codec != AV_CODEC_ID_NONE) {  
569.	        add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);  
570.	        have_audio = 1;  
571.	        encode_audio = 1;  
572.	    }  
573.	  
574.	    /* Now that all the parameters are set, we can open the audio and 
575.	     * video codecs and allocate the necessary encode buffers. */  
576.	   
577.	    if (have_audio)  
578.	        open_audio(oc, audio_codec, &audio_st, opt);  
579.	  
580.	    av_dump_format(oc, 0, filename, 1);  
581.	  
582.	    /* open the output file, if needed */  
583.	    if (!(fmt->flags & AVFMT_NOFILE)) {  
584.	        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);  
585.	        if (ret < 0) {  
586.	            fprintf(stderr, "Could not open '%s': %s\n", filename,  
587.	                    av_err2str(ret));  
588.	            return 1;  
589.	        }  
590.	    }  
591.	  
592.	  
593.	//********add alsa read***********//  
594.	#if ALSA_GET_ENABLE  
595.	    IntputDev alsa_input = { 0 };  
596.	    AVCodecContext  *pCodecCtx;  
597.	    AVCodec         *pCodec;  
598.	       AVFormatContext *a_ifmtCtx;  
599.	  
600.	//Register Device  
601.	    avdevice_register_all();  
602.	  
603.	    a_ifmtCtx = avformat_alloc_context();  
604.	  
605.	  
606.	     //Linux  
607.	    AVInputFormat *ifmt=av_find_input_format("alsa");  
608.	    if(avformat_open_input(&a_ifmtCtx,"default",ifmt,NULL)!=0){  
609.	        printf("Couldn't open input stream.default\n");  
610.	        return -1;  
611.	    }  
612.	   
613.	   
614.	    if(avformat_find_stream_info(a_ifmtCtx,NULL)<0)  
615.	    {  
616.	        printf("Couldn't find stream information.\n");  
617.	        return -1;  
618.	    }  
619.	  
620.	    int audioindex=-1;  
621.	    for(i=0; i<a_ifmtCtx->nb_streams; i++)   
622.	    if(a_ifmtCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)  
623.	    {  
624.	        audioindex=i;  
625.	        break;  
626.	    }  
627.	    if(audioindex==-1)  
628.	    {  
629.	        printf("Couldn't find a video stream.\n");  
630.	        return -1;  
631.	    }  
632.	          
633.	    pCodecCtx=a_ifmtCtx->streams[audioindex]->codec;  
634.	    pCodec=avcodec_find_decoder(pCodecCtx->codec_id);  
635.	    if(pCodec==NULL)  
636.	    {  
637.	        printf("Codec not found.\n");  
638.	        return -1;  
639.	    }  
640.	    if(avcodec_open2(pCodecCtx, pCodec,NULL)<0)  
641.	    {  
642.	        printf("Could not open codec.\n");  
643.	        return -1;  
644.	    }  
645.	  
646.	    AVPacket *in_packet=(AVPacket *)av_malloc(sizeof(AVPacket));  
647.	  
648.	    AVFrame *pAudioFrame=av_frame_alloc();  
649.	    if(NULL==pAudioFrame)  
650.	    {  
651.	        printf("could not alloc pAudioFrame\n");  
652.	        return -1;  
653.	    }  
654.	  
655.	    //audio output paramter //resample   
656.	    uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;  
657.	    int out_sample_fmt = AV_SAMPLE_FMT_S16;  
658.	    int out_nb_samples =1024; //pCodecCtx->frame_size;  
659.	    int out_sample_rate = 48000;  
660.	    int out_nb_channels = av_get_channel_layout_nb_channels(out_channel_layout);  
661.	    int out_buffer_size = av_samples_get_buffer_size(NULL, out_nb_channels, out_nb_samples, out_sample_fmt, 1);    
662.	    uint8_t *dst_buffer=NULL;    
663.	    dst_buffer = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE);   
664.	    int64_t in_channel_layout = av_get_default_channel_layout(pCodecCtx->channels);    
665.	  
666.	  
667.	    printf("audio sample_fmt=%d size=%d channel=%d  sample_rate=%d in_channel_layout=%s\n",  
668.	        pCodecCtx->sample_fmt, pCodecCtx->frame_size,  
669.	        pCodecCtx->channels,pCodecCtx->sample_rate,av_ts2str(in_channel_layout));  
670.	  
671.	    struct SwrContext   *audio_convert_ctx = NULL;    
672.	    audio_convert_ctx = swr_alloc();    
673.	    if (audio_convert_ctx == NULL)    
674.	    {    
675.	        printf("Could not allocate SwrContext\n");    
676.	        return -1;    
677.	    }    
678.	  
679.	      /* set options */  
680.	        av_opt_set_int       (audio_convert_ctx, "in_channel_count",   pCodecCtx->channels,       0);  
681.	        av_opt_set_int       (audio_convert_ctx, "in_sample_rate",     pCodecCtx->sample_rate,    0);  
682.	        av_opt_set_sample_fmt(audio_convert_ctx, "in_sample_fmt",      pCodecCtx->sample_fmt, 0);  
683.	        av_opt_set_int       (audio_convert_ctx, "out_channel_count",  out_nb_channels,       0);  
684.	        av_opt_set_int       (audio_convert_ctx, "out_sample_rate",   out_sample_rate,    0);  
685.	        av_opt_set_sample_fmt(audio_convert_ctx, "out_sample_fmt",     out_sample_fmt,     0);  
686.	  
687.	        /* initialize the resampling context */  
688.	        if ((ret = swr_init(audio_convert_ctx)) < 0) {  
689.	            fprintf(stderr, "Failed to initialize the resampling context\n");  
690.	            exit(1);  
691.	        }  
692.	  
693.	  
694.	    alsa_input.in_packet=in_packet;  
695.	    alsa_input.pCodecCtx=pCodecCtx;  
696.	    alsa_input.pCodec=pCodec;  
697.	       alsa_input.a_ifmtCtx=a_ifmtCtx;  
698.	    alsa_input.audioindex=audioindex;  
699.	    alsa_input.pAudioFrame=pAudioFrame;  
700.	    alsa_input.audio_convert_ctx=audio_convert_ctx;  
701.	    alsa_input.dst_buffer=dst_buffer;  
702.	    alsa_input.out_buffer_size=out_buffer_size;  
703.	#if OUTPUT_PCM   
704.	    FILE *fp_pcm=fopen("output.pcm","wb+");    
705.	#endif   
706.	  
707.	#endif   
708.	//******************************//  
709.	  
710.	  
711.	  
712.	    /* Write the stream header, if any. */  
713.	    ret = avformat_write_header(oc, &opt);  
714.	    if (ret < 0) {  
715.	        fprintf(stderr, "Error occurred when opening output file: %s\n",  
716.	                av_err2str(ret));  
717.	        return 1;  
718.	    }  
719.	  
720.	    int got_pcm;  
721.	  
722.	  
723.	    int frameCnt=3;  
724.	    while (encode_audio) {  
725.	        /* select the stream to encode */  
726.	  
727.	#if ALSA_GET_ENABLE  
728.	  
729.	        AVFrame *frame=get_audio_frame1(&audio_st,&alsa_input,&got_pcm);  
730.	        if(!got_pcm)  
731.	        {  
732.	            printf("get_audio_frame1 Error.\n");  
733.	            usleep(10000);  
734.	            continue;  
735.	  
736.	        }  
737.	  
738.	        encode_audio = !write_audio_frame1(oc, &audio_st,frame);  
739.	#if OUTPUT_PCM    
740.	            if(frame!=NULL)  
741.	                fwrite(frame->data[0],1,alsa_input.out_buffer_size,fp_pcm);    //Y     
742.	          
743.	#endif   
744.	  
745.	#else  
746.	  
747.	        encode_audio = !write_audio_frame(oc, &audio_st);  
748.	  
749.	#endif  
750.	    }  
751.	  
752.	    av_write_trailer(oc);  
753.	#if ALSA_GET_ENABLE  
754.	  
755.	 #if OUTPUT_PCM  
756.	    fclose(fp_pcm);  
757.	#endif   
758.	  
759.	    swr_free(&alsa_input.audio_convert_ctx);   
760.	    avcodec_close(alsa_input.pCodecCtx);      
761.	    av_free(alsa_input.pAudioFrame);  
762.	    av_free(alsa_input.dst_buffer);   
763.	    avformat_close_input(&alsa_input.a_ifmtCtx);  
764.	  
765.	#endif  
766.	  
767.	    /* Close each codec. */  
768.	    if (have_audio)  
769.	        close_stream(oc, &audio_st);  
770.	  
771.	    if (!(fmt->flags & AVFMT_NOFILE))  
772.	        /* Close the output file. */  
773.	        avio_closep(&oc->pb);  
774.	  
775.	      
776.	  
777.	    /* free the stream */  
778.	    avformat_free_context(oc);  
779.	  
780.	    return 0;  
781.	}  

3.验证

3.1编译

1.	#!/bin/sh  
2.	export PKG_CONFIG_PATH=/home/quange/ffmpeg_build/lib/pkgconfig/:$PKG_CONFIG_PATH  
3.	gcc ffmpeg_get_pcm_muxing.c -g -o ffmpeg_get_pcm_ muxing.out  -lSDLmain -lSDL  `pkg-config "libavcodec" --cflags --libs` `pkg-config "libavformat" --cflags --libs` `pkg-config "libavutil" --cflags --libs` `pkg-config "libswscale" --cflags --libs` `pkg-config "libavdevice" --cflags --libs`

3.2结果

使用软件mp4打开test.mp4,可以听到实时采集的音频
FFMPEG音视频同步-音频实时采集编码封装成MP4_第1张图片

3.3存在的问题

1、本文档源码,采集ALSA获取的源数据是48K,stereo声道,1024采样大小,经转换成pcm的格式,同样是48K,stereo声道1024大小;不同的采样率会出现部分声音数据丢失,具体原因,在 《ffmpeg-音频实时采集保存》具体分析 ,涉及其他分辨率转换,不在本文讨论范围里。

2、由于本例程没有没有对ALSA音频进行重采样,(48000采样率,1024个采样点,双声道,16bit),故若保存成ts,flv等封装格式是,默认采用的音频编码格式分别是mpga,mp3,其采样点为1152。所以运行此程序会有问题,数据无法写入。而且目前暂不探讨音频重采样问题。故想到一种解决方法是,指定AAC音频编码格式(ts,mp4等都支持AAC音频编码,参考《ffmpeg-摄像头采集编码封装》的表格)。

1.	if (fmt->audio_codec != AV_CODEC_ID_NONE) {  
2.	       add_stream(&audio_st, oc, &audio_codec,AV_CODEC_ID_AAC);// fmt->audio_codec);  
3.	       have_audio = 1;  
4.	       encode_audio = 1;  
5.	   }  

4.附件

5.参考链接

[1] ffmpeg之PCM转AAC
https://blog.csdn.net/mengzhengjie/article/details/78919067
[2]官方Encode pcm file to aac
http://ffmpeg.org/pipermail/ffmpeg-user/2014-January/019268.html

[3]PCM编码AAC,参考其普通PCM格式与AAC转格式差异 https://blog.csdn.net/mengzhengjie/article/details/78919067

[4]https://cloud.tencent.com/developer/article/1194003

你可能感兴趣的:(音视频处理,FFMPEG)