测试使用 FFMPEG
从MP4文件中解析出H264流和AAC流,之后按照固定帧率将音视频流推送到RTMP服务器。
新建项目,加入FFMPEG和LibRTMP相关依赖。
编写简单的时间控制类 CTimeStatistics
,这个类主要负责RTMP发送时候的帧控制。
#pragma once
#define WIN32_LEAN_AND_MEAN
#include
typedef long long tick_t;
#define GET_TIME(T,S,F) ((double)((T)-(S))/(double)(F/1000))
class CTimeStatistics
{
public:
CTimeStatistics()
{
_start = 0;
_stop = 0;
}
virtual ~CTimeStatistics() {};
inline void reset()
{
_start = 0;
_stop = 0;
}
inline void start()
{
_start = _get_tick();
}
inline void stop()
{
_stop = _get_tick();
}
inline double get_delta()
{
return GET_TIME(_get_tick(), _start, _get_frequency());
}
inline double get_total()
{
return GET_TIME(_stop, _start, _get_frequency());
}
protected:
tick_t _get_tick()
{
LARGE_INTEGER t1;
QueryPerformanceCounter(&t1);
return t1.QuadPart;
}
tick_t _get_frequency()
{
LARGE_INTEGER t1;
QueryPerformanceFrequency(&t1);
return t1.QuadPart;
}
private:
tick_t _start;
tick_t _stop;
};
包含简单的Start/Stop/Reset和获得时间间隔接口。
这里假定对FFMPEG的使用有些基本了解。
主要结构体定义:
// For ffmpeg demux
typedef enum _stream_type
{
STREAM_FILE_MP4 = 0,
STREAM_H264_RAW
} stream_type_t;
typedef struct _fmt_manage
{
AVFormatContext *context;
AVStream *vstream;
AVStream *astream;
} fmt_manage_t;
从文件中解析出流信息:
// Ffmpeg demux, parse streams from file
av_register_all();
if (avformat_open_input(&_manage.context, file.c_str(), 0, 0) < 0)
break;
if (avformat_find_stream_info(_manage.context, 0) < 0)
break;
if (!_parse_streams(meta, type))
break;
其中主要函数 _parse_stream
:
bool CTestLibRTMPPusher::_parse_streams(metadata_t &meta, stream_type_t type)
{
for (int i = 0; i < _manage.context->nb_streams; i++) {
// Video stream
if (AVMEDIA_TYPE_VIDEO == _manage.context->streams[i]->codecpar->codec_type) {
_manage.vstream = _manage.context->streams[i];
meta.width = _manage.vstream->codec->width;
meta.height = _manage.vstream->codec->height;
meta.fps = _manage.vstream->codec->framerate.num / _manage.vstream->codec->framerate.den;
meta.bitrate_kpbs = _manage.vstream->codec->bit_rate / 1000;
// Parse sps/pps from extradata
// If MP4,extradata stores 'avcCfg'; or stores 'sps/pps'
if (_manage.context->streams[i]->codecpar->extradata_size > 0) {
uint32_t size = _manage.context->streams[i]->codecpar->extradata_size;
uint8_t *ptr = _manage.context->streams[i]->codecpar->extradata;
switch (type)
{
case STREAM_FILE_MP4:
{
// Parse SPS/PPS from avcCfg
uint32_t offset = 5;
uint32_t num_sps = ptr[offset++] & 0x1f;
for (uint32_t j = 0; j < num_sps; j++) {
meta.vparam.size_sps = (ptr[offset++] << 8);
meta.vparam.size_sps |= ptr[offset++];
memcpy(meta.vparam.data_sps, ptr + offset, meta.vparam.size_sps);
offset += meta.vparam.size_sps;
}
uint32_t num_pps = ptr[offset++];
for (uint32_t j = 0; j < num_pps; j++) {
meta.vparam.size_pps = (ptr[offset++] << 8);
meta.vparam.size_pps |= ptr[offset++];
memcpy(meta.vparam.data_pps, ptr + offset, meta.vparam.size_pps);
offset += meta.vparam.size_pps;
}
}
break;
case STREAM_H264_RAW:
{
// Parse SPS/PPS from 'sps/pps'
uint32_t offset = 0;
if (ptr[offset] != 0x00 || ptr[offset + 1] != 0x00 || ptr[offset + 2] != 0x00 || ptr[offset + 3] != 0x01) {
// No valid data...
}
else {
// Find next pos
offset++;
while ((ptr[offset] != 0x00 || ptr[offset + 1] != 0x00 || ptr[offset + 2] != 0x00 || ptr[offset + 3] != 0x01) && (offset < size - 3))
offset++;
if ((ptr[4] & 0x1f) == 7) { // SPS first
meta.vparam.size_sps = offset - 4;
memcpy(meta.vparam.data_sps, ptr + 4, meta.vparam.size_sps);
meta.vparam.size_pps = size - offset - 4;
memcpy(meta.vparam.data_pps, ptr + offset + 4, meta.vparam.size_pps);
}
else if ((ptr[4] & 0x1f) == 8) { // PPS first
meta.vparam.size_pps = offset - 4;
memcpy(meta.vparam.data_pps, ptr + 4, meta.vparam.size_pps);
meta.vparam.size_sps = size - offset - 4;
memcpy(meta.vparam.data_sps, ptr + offset + 4, meta.vparam.size_sps);
}
}
}
break;
default:
break;
}
}
}
// Audio stream
else if (AVMEDIA_TYPE_AUDIO == _manage.context->streams[i]->codecpar->codec_type) {
_manage.astream = _manage.context->streams[i];
meta.has_audio = true;
meta.channels = _manage.astream->codec->channels;
meta.samplerate = _manage.astream->codec->sample_rate;
meta.samplesperframe = _manage.astream->codec->frame_size;
meta.datarate = _manage.astream->codec->bit_rate;
// parse esds from extra data
if (_manage.context->streams[i]->codecpar->extradata_size > 0) {
uint32_t size = _manage.context->streams[i]->codecpar->extradata_size;
uint8_t *ptr = _manage.context->streams[i]->codecpar->extradata;
meta.aparam.size_esds = size;
memcpy(meta.aparam.data_esds, ptr, size);
}
}
}
return true;
}
_manage.context->streams[i]->codecpar->codec_type
判断流类型;avcCfg
结构体,需要从中解析出SPS和PPS;当为H264裸流文件时,保存的是以 0x00,0x00,0x00,0x01
开头的SPS和PPS,需要从中解析出SPS和PPS;循环解析和发送数据:
while (_running)
{
// FPS control
//uint64_t real_time_ms = statistics.get_delta();
//uint64_t theory_time_ms = period_ms * video_frame_count;
//if (theory_time_ms > real_time_ms) {
// uint64_t wait_ms = theory_time_ms - real_time_ms;
// Sleep(wait_ms);
//}
if (!_running)
break;
// Read frames from file by ffmpeg
AVPacket pkt = { 0 };
if (av_read_frame(_manage.context, &pkt) < 0)
break;
// Video frame
if (pkt.stream_index == _manage.vstream->index) {
AVRational rt = AVRational{ 1, 1000 };
bool keyframe = pkt.flags & AV_PKT_FLAG_KEY;
// Replace size-4-bytes with 0x00,0x00,0x00,0x01
pkt.data[0] = 0x00;
pkt.data[1] = 0x00;
pkt.data[2] = 0x00;
pkt.data[3] = 0x01;
// Pts convert
//pkt.pts = av_rescale_q(pkt.pts, _manage.vstream->time_base, rt);
pkt.pts = get_time_us() / 1000;
if (first_video_timstamp == 0) {
first_video_timstamp = pkt.pts;
}
pkt.pts -= first_video_timstamp;
_send_video(pkt.size, pkt.data, pkt.pts, keyframe);
video_frame_count++;
if (video_frame_count % 100 == 0) {
printf("Send video frames: %d\n", video_frame_count);
}
}
// Audio frame
else if (pkt.stream_index == _manage.astream->index) {
AVRational rt = AVRational{ 1, 1000 };
// Add 7 bytes of ADTS header to each frame, while some file does not need when to play
uint32_t sample_index = ((_metadata.aparam.data_esds[0] & 0x07) << 1) | (_metadata.aparam.data_esds[1] >> 7);
uint32_t channels = ((_metadata.aparam.data_esds[1]) & 0x7f) >> 3;
uint32_t size = pkt.size + 7;
_audio_buf_ptr[0] = 0xff;
_audio_buf_ptr[1] = 0xf1;
_audio_buf_ptr[2] = 0x40 | (sample_index << 2) | (channels >> 2);
_audio_buf_ptr[3] = ((channels & 0x3) << 6) | (size >> 11);
_audio_buf_ptr[4] = (size >> 3) & 0xff;
_audio_buf_ptr[5] = ((size << 5) & 0xff) | 0x1f;
_audio_buf_ptr[6] = 0xfc;
memcpy(_audio_buf_ptr + 7, pkt.data, pkt.size);
// Pts convert
//pkt.pts = av_rescale_q(pkt.pts, _manage.astream->time_base, rt);
pkt.pts = get_time_us() / 1000;
if (first_audio_timstamp == 0) {
first_audio_timstamp = pkt.pts;
}
pkt.pts -= first_audio_timstamp;
_send_audio(size, _audio_buf_ptr, pkt.pts);
float ms = 1000.f * (float)_metadata.samplesperframe / (float)_metadata.samplerate;
Sleep(ms);
audio_frame_count++;
}
}
pkt.stream_index
判断;pkt.flags & AV_PKT_FLAG_KEY
判断;0x00,0x00,x00,0x01
分割,而是以4个字节的size打头,因此需要将4个字节的size替换为 0x00,0x00,x00,0x01
起始分隔符;ADTS
头,因此需要额外加上;但RTMP并不要求AAC包含ADTS,且RTMP拉流时也不包含ADTS;pts
该怎么打一直是个坑。对于RTMP协议,要保证pts递增,常用的方案是以 ms
为单位进行计算。某些服务器要求RTMP的 pts
从0开始,因此此处也这样做。对于音视频处理中常见的 PTS 时间戳问题,以及音视频同步问题,后续文章会进行单独讨论。
使用的结构体定义:
// For rtmp structure
#define H264_PARAM_LEN 512
typedef struct _h264_param
{
uint32_t size_sps;
uint8_t data_sps[H264_PARAM_LEN];
uint32_t size_pps;
uint8_t data_pps[H264_PARAM_LEN];
} h264_param_t;
#define AAC_PARAM_LEN 64
typedef struct _aac_param
{
uint32_t size_esds;
uint8_t data_esds[AAC_PARAM_LEN];
} aac_param_t;
typedef struct _metadata
{
// Video
uint32_t width;
uint32_t height;
uint32_t fps;
uint32_t bitrate_kpbs;
h264_param_t vparam;
// Audio
bool has_audio;
uint32_t channels;
uint32_t samplerate;
uint32_t samplesperframe;
uint32_t datarate;
aac_param_t aparam;
} metadata_t;
LibRTMP在Windows下的使用前和使用后要进行Socket的初始化和反初始化:
bool CTestLibRTMPPusher::_init_sockets()
{
WORD version;
WSADATA wsaData;
version = MAKEWORD(2, 2);
return (0 == WSAStartup(version, &wsaData));
}
void CTestLibRTMPPusher::_cleanup_sockets()
{
WSACleanup();
}
RTMP资源初始化:
...
_rtmp_ptr = RTMP_Alloc();
if (NULL == _rtmp_ptr)
break;
RTMP_Init(_rtmp_ptr);
...
// Parse rtmp url
_rtmp_ptr->Link.timeout = timeout_secs;
_rtmp_ptr->Link.lFlags |= RTMP_LF_LIVE;
if (RTMP_SetupURL(_rtmp_ptr, (char *)url.c_str()) < 0)
break;
// Pusher mode
RTMP_EnableWrite(_rtmp_ptr);
// Socket connection
// Handshakes and connect command
if (RTMP_Connect(_rtmp_ptr, NULL) < 0)
break;
// Setup stream and stream settings
if (RTMP_ConnectStream(_rtmp_ptr, 0) < 0)
break;
// Send metadata(video and audio settings)
if (!_send_metadata(_metadata))
break;
RTMP为Adobe公司开发,其上层传输封装主要基于 FLV 格式,这样可以使用Flash插件直接播放。因此,在发送RTMP的包时必须要属性FLV的封装规范。
FLV由Header和Body组成;Body由多组Tag组成;Tag又由TagHeader和TagData组成。由于RTMP协议的包已经包含了TagHeader的信息,因此,在推流时没必要附加上TagHeader,即实际发送的RTMP packet:
A/V data
|
添加上FLV的TagData头部分数据
|
添加包信息,用于底层分包使用
|
底层拆包发送
onMetaData
为FLV的第一个Tag。在RTMP的网络和流通道建立完毕后,需要上层发送的第一个包就是Metadata包。Metadata包主要是键值对形式,指明音视频的格式和解码信息。详细可参见文末 FLV文件的第一个Tag: onMetaData 。
代码:
RTMPPacket packet;
RTMPPacket_Alloc(&packet, RTMP_METADATA_SIZE);
RTMPPacket_Reset(&packet);
packet.m_packetType = RTMP_PACKET_TYPE_INFO;
packet.m_nChannel = 0x04;
packet.m_headerType = RTMP_PACKET_SIZE_LARGE;
packet.m_nTimeStamp = 0;
packet.m_nInfoField2 = _rtmp_ptr->m_stream_id;
/////////////////////////////////////////////
// Send media info
char *ptr = packet.m_body;
ptr = _put_byte(ptr, AMF_STRING);
ptr = _put_amf_string(ptr, "@setDataFrame");
ptr = _put_byte(ptr, AMF_STRING);
ptr = _put_amf_string(ptr, "onMetaData");
ptr = _put_byte(ptr, AMF_OBJECT);
ptr = _put_amf_string(ptr, "copyright");
ptr = _put_byte(ptr, AMF_STRING);
ptr = _put_amf_string(ptr, "firehood");
ptr = _put_amf_string(ptr, "width");
ptr = _put_amf_double(ptr, meta.width);
ptr = _put_amf_string(ptr, "height");
ptr = _put_amf_double(ptr, meta.height);
ptr = _put_amf_string(ptr, "framerate");
ptr = _put_amf_double(ptr, meta.fps);
ptr = _put_amf_string(ptr, "videodatarate");
ptr = _put_amf_double(ptr, meta.bitrate_kpbs);
double vcodec_ID = 7;
ptr = _put_amf_string(ptr, "videocodecid");
ptr = _put_amf_double(ptr, vcodec_ID);
if (meta.has_audio) {
ptr = _put_amf_string(ptr, "audiodatarate");
ptr = _put_amf_double(ptr, meta.datarate);
ptr = _put_amf_string(ptr, "audiosamplerate");
ptr = _put_amf_double(ptr, meta.samplerate);
ptr = _put_amf_string(ptr, "audiosamplesize");
ptr = _put_amf_double(ptr, meta.samplesperframe);
ptr = _put_amf_string(ptr, "stereo");
ptr = _put_amf_double(ptr, meta.channels);
double acodec_ID = 10;
ptr = _put_amf_string(ptr, "audiocodecid");
ptr = _put_amf_double(ptr, acodec_ID);
}
ptr = _put_amf_string(ptr, "");
ptr = _put_byte(ptr, AMF_OBJECT_END);
packet.m_nBodySize = ptr - packet.m_body;
if (RTMP_SendPacket(_rtmp_ptr, &packet, 0) < 0) {
RTMPPacket_Free(&packet);
return false;
}
AMF/AMF3
编码方式;主要以键值对方式;视频帧数据需要打上FLV的TagData头数据。详细可参见文末 librtmp获取视频流和音频流1 。视频包有两种,一种为视频同步数据 AVCDecoderConfigurationRecord
(解码信息包),一种为H264帧数据 One or more NALUs
(内容视频包)。
Video TagData:
Field | Type | Comment |
---|---|---|
Frame Type | UB [4] | Type of video frame. The following values are defined: 1 = key frame (for AVC, a seekable frame) 2 = inter frame (for AVC, a non-seekable frame) 3 = disposable inter frame (H.263 only) 4 = generated key frame (reserved for server use only) 5 = video info/command frame |
CodecID | UB [4] | Codec Identifier. The following values are defined: 2 = Sorenson H.263 3 = Screen video 4 = On2 VP6 5 = On2 VP6 with alpha channel 6 = Screen video version 2 7 = AVC |
AVCPacketType | IF CodecID == 7 UI8 |
The following values are defined: 0 = AVC sequence header 1 = AVC NALU 2 = AVC end of sequence (lower level NALU sequence ender is not required or supported) |
CompositionTime | IF CodecID == 7 SI24 |
IF AVCPacketType == 1 Composition time offset ELSE 0 See ISO 14496-12, 8.15.3 for an explanation of compositiontimes. The offset in an FLV file is always in milliseconds. |
F AVCPacketType == 0 AVCDecoderConfigurationRecord(AVC sequence header)
IF AVCPacketType == 1 One or more NALUs (Full frames are required)
/////////////////////////////////////////////
// Send decode info
// FLV video sequence format:
// Frame type(4 bits) + codecID(4 bits) + AVCPacketType(1 bytes) + CompositionTime + AVCDecoderConfiguration
//
uint32_t offset = 0;
packet.m_body[offset++] = 0x17;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = 0x00;
// AVCDecoderConfiguration
packet.m_body[offset++] = 0x01;
packet.m_body[offset++] = meta.param.data_sps[1];
packet.m_body[offset++] = meta.param.data_sps[2];
packet.m_body[offset++] = meta.param.data_sps[3];
packet.m_body[offset++] = 0xff;
// SPS
packet.m_body[offset++] = 0xE1;
packet.m_body[offset++] = meta.param.size_sps >> 8;
packet.m_body[offset++] = meta.param.size_sps & 0xff;
memcpy(&packet.m_body[offset], meta.param.data_sps, meta.param.size_sps);
offset += meta.param.size_sps;
// PPS
packet.m_body[offset++] = 0x01;
packet.m_body[offset++] = meta.param.size_pps >> 8;
packet.m_body[offset++] = meta.param.size_pps & 0xff;
memcpy(&packet.m_body[offset], meta.param.data_pps, meta.param.size_pps);
offset += meta.param.size_pps;
packet.m_packetType = RTMP_PACKET_TYPE_VIDEO;
packet.m_nBodySize = offset;
if (RTMP_SendPacket(_rtmp_ptr, &packet, 0) < 0) {
RTMPPacket_Free(&packet);
return false;
}
AVCDecoderConfiguration
格式即可。RTMPPacket packet;
RTMPPacket_Alloc(&packet, size + RTMP_RESERVED_HEAD_SIZE * 2);
RTMPPacket_Reset(&packet);
packet.m_packetType = RTMP_PACKET_TYPE_VIDEO;
packet.m_nChannel = 0x04;
packet.m_headerType = RTMP_PACKET_SIZE_LARGE;
packet.m_nTimeStamp = pts;
packet.m_nInfoField2 = _rtmp_ptr->m_stream_id;
uint32_t offset = 0;
if (keyframe)
packet.m_body[offset++] = 0x17;
else
packet.m_body[offset++] = 0x27;
packet.m_body[offset++] = 0x01;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = 0x00;
packet.m_body[offset++] = size >> 24;
packet.m_body[offset++] = size >> 16;
packet.m_body[offset++] = size >> 8;
packet.m_body[offset++] = size & 0xff;
memcpy(packet.m_body + offset, data_ptr, size);
packet.m_nBodySize = offset + size;
if (RTMP_SendPacket(_rtmp_ptr, &packet, 0) < 0) {
RTMPPacket_Free(&packet);
return false;
}
RTMPPacket_Free(&packet);
return true;
同样,音频也分为音频信息包和音频数据包。
Audio TagData:
名称 | 二进制值 | 介绍 |
---|---|---|
音频格式 | 4 bits[AAC]1010 | 0 = Linear PCM, platform endian 1 = ADPCM 2 = MP3 3 = Linear PCM, little endian 4 = Nellymoser 16-kHz mono 5 = Nellymoser 8-kHz mono 6 = Nellymoser 7 = G.711 A-law logarithmic PCM 8 = G.711 mu-law logarithmic PCM 9 = reserved 10 = AAC 11 = Speex 14 = MP3 8-Khz 15 = Device-specific sound |
采样率 | 2 bits[44kHZ]11 | 0 = 5.5-kHz 1 = 11-kHz 2 = 22-kHz 3 = 44-kHz 对于AAC总是3 |
采样的长度 | 1 bit | 0 = snd8Bit 1 = snd16Bit 压缩过的音频都是16bit |
音频类型 | 1 bit | 0 = sndMono,单声道 1 = sndStereo,立体声 对于AAC总是1 |
ACCPacketType | 8bit 00000000 只有SoundFormat == 10时才有此8bi的字段 | 0x00,表示音频同步包;0x01,表示音频raw数据。 |
AudioObjectType | 5bits [AAC LC]00010 | |
SampleRateIndex | 4bits [44100]0100 | |
ChannelConfig | 4bits [Stereo]0010 | |
FrameLengthFlag | 1bit | |
dependOnCoreCoder | 1bit 0 | |
extensionFlag | 1bit 0 |
音频信息包不额外发送也可以,如需发送需要根据上表生成,一般共4个字节(2bytes AACDecoderSpecificInfo和2bytesAudioSpecificConfig)。
RTMPPacket packet;
RTMPPacket_Alloc(&packet, size + RTMP_RESERVED_HEAD_SIZE * 2);
RTMPPacket_Reset(&packet);
packet.m_packetType = RTMP_PACKET_TYPE_AUDIO;
packet.m_nChannel = 0x04;
packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM;
packet.m_nTimeStamp = pts;
packet.m_hasAbsTimestamp = 0;
packet.m_nInfoField2 = _rtmp_ptr->m_stream_id;
packet.m_body[0] = 0xAF;
packet.m_body[1] = 0x01;
memcpy(packet.m_body + 2, data_ptr, size);
packet.m_nBodySize = size + 2;
if (RTMP_SendPacket(_rtmp_ptr, &packet, FALSE) < 0) {
RTMPPacket_Free(&packet);
return false;
}
RTMPPacket_Free(&packet);
return true;
RTMPPacket
内存在分配时预留了 RTMP_RESERVED_HEAD_SIZE
大小;这样,在底层拆包后填充包头时之间在预留内存部分填充就可以了。references:
FLV文件的第一个Tag: onMetaData
librtmp获取视频流和音频流1