最近接触了一个国内优秀的流媒体平台- EasyDarwin。这是一个国外开源流媒体平台Darwin深度裁剪版本,看了一段时间后就想跟自己的开发的一个android设备对接,以了却我多年对多媒体更深入的理解。(本人曾经自己开发一个H264的移动电视的软解码播放器)
在搭建好服务器后,拿出了全志的T2开发板,开始使用其团队的EasyPusher库进行推送。首先我尝试视频直播的开发,在研究了两天后,发现为我的机器的系统的解码器已经被封装成库,对外不开放。所以我不得不使用其android系统的mediaRecoder进行编码,而系统已经把其直接打包成了MP4文件,这样我无法获得直接的H264数据。于是陷入了困顿之中。在等待方案公司的帮助的时候,我寻找了别的方法去获得推送的数据流,最后不得不采用localhost技术,该技术就是将mediaRecoder编码的数据通过类似本地网络管道的方式接受下来。在接受到数据后发现还是无法进行传送。因为接受到数据没有任何MP4相关的描述信息只有数据流。一个MP4文件的数据能够播放,是因为在MP4文件中有很多的BOX构成,其中尤其有两个box非常重要:moovbox和mdatbox其中前者包含了MP4内的宏观的描述信息,后者包含的实际的数据。因为mediarecoder在处理数据后才将其中的moovbox的数据一次性写进MP4文件,所以我能获得只有mdatbox的数据。没有办法只好对从根据输入参数来获得相关信息以及大致估计时间戳。具体做法如下:
第一步:搜索MDAT数据头
第二步:将其中的数据解析成H264的数据包,其中数据帧的长度刚好四个字节,用H264的头(0x00,0x00,0x00,0x01来代替。)
第三步:调用EasyPusher将数据发送到服务器。这里需要注意的是,遇到I帧的时候需要加上SPS和PPS。当然localhost的数据是不会有这些数据的,只好自己根据分辨率等信息写好。如下
const unsigned char SPSPPSCIF[] ={0X00,0X00,0X00,0X01, 0x67,0x42, 0X00, 0X1F, 0XE5, 0X40, 0XA0, 0xfc,0X80,0X00,0X00,0X00,0X01,0X68,0XCE,0x31,0X12};//320*240
const unsigned char SPSPPS576[] ={0X00,0X00,0X00,0X01, 0x67,0x42, 0X00, 0X1F, 0XE5, 0X40, 0X5A, 0x09,0X32,0X00,0X00,0X00,0X01,0X68,0XCE,0x31,0X12}; //720*576
这些数据最好出现I帧(帧头第5个字节一般是0X67,参照H264数据规范)加上。我估计这对流媒体传输来说是很重要的。
传输过去后,在客户端可以看到视频,但丢帧非常严重。我估计主要是因为localhost以及编码器效率太低,同时时间戳也是一个估计数。所以造成了丢帧现象。没有什么好的办法了,只有等待硬件编码器的相关资料直接编码进行发送了。
绕过直播,开始搞回放。首先选择一个DEMUX工具,我选择的是MP4V2,主要是这个工具WINDOWS和android NDK版本都有,所以相对来说比较省时间。折腾一段时间后发现数据能传输上去了。但是音频和视频完全不同步。在跟EasyDarwin 团队的一个朋友讨论后我决定开始搞音视频同步,策略是采用的是讲音视频帧都同步到系统时间。果然效果还是很明显的,除了偶尔会卡住基本上能流畅播放。我估计着需要开始了解播放器端的同步了。相关的代码附录上:
MP4 DEMUX:获取视频和音频帧:
typedef struct _Media_INFO_
{
MP4FileHandle mp4File;
MP4TrackId video_trId;
MP4TrackId audio_trId;
u_int32_t video_sample_max_size;
u_int32_t video_num_samples;
u_int32_t video_width;
u_int32_t video_height;
double video_frame_rate;
u_int32_t video_timescale;
u_int32_t audio_num_samples;
int audio_num_channels;
u_int32_t audio_timescale;
MP4Duration audio_duration;
u_int32_t avgBitRate;
u_int8_t AudioType;
u_int8_t *p_audio_config;
u_int32_t n_audio_config_size;
u_int32_t n_video_config_size;
u_int32_t audio_sample_max_size;
MP4Duration video_duration;
u_int8_t *p_Vediobuffer;
u_int8_t *p_Audio_buffer;
}MEDIA_INFO;
staticint GetVideoStreamHeader(MP4FileHandle mp4File, MP4TrackId video_trId, intvideo_codec,
unsigned char*strm_hdr_buf, int *strm_hdr_leng)
{
int b;
// for MPEG4
unsigned char *p_video_conf;
unsigned int n_video_conf;
// for H.264
unsigned char **pp_sps,**pp_pps;
unsigned int *pn_sps, *pn_pps;
unsigned int n_strm_size;
int i;
switch (video_codec) {
case RAW_STRM_TYPE_M4V: //MPEG4
p_video_conf = NULL;
n_video_conf = 0;
b = MP4GetTrackESConfiguration(mp4File, video_trId,
&p_video_conf, &n_video_conf);
if (!b)
return -1;
memcpy(strm_hdr_buf, p_video_conf, n_video_conf);
free(p_video_conf);
*strm_hdr_leng = n_video_conf;
break;
case RAW_STRM_TYPE_H263: //H.263
*strm_hdr_leng = 0;
break;
case RAW_STRM_TYPE_H264RAW: // H.264
pp_sps = pp_pps = NULL;
pn_sps = pn_pps = NULL;
n_strm_size = 0;
b = MP4GetTrackH264SeqPictHeaders(mp4File, video_trId, &pp_sps,&pn_sps, &pp_pps, &pn_pps);
if (!b)
return -1;
// SPS memcpy
if (pp_sps) {
for (i=0; *(pp_sps + i); i++) {
memcpy(strm_hdr_buf +n_strm_size, h264_delimiter, sizeof(h264_delimiter));
n_strm_size +=sizeof(h264_delimiter);
memcpy(strm_hdr_buf +n_strm_size, *(pp_sps + i), *(pn_sps + i));
/*
if(NAL_UNIT_TYPE_TYPE(strm_hdr_buf[n_strm_size]) == 7) {
strm_hdr_buf[n_strm_size +1] = 66;
}
*/
n_strm_size += *(pn_sps + i);
free(*(pp_sps + i));
}
free(pp_sps);
}
// PPS memcpy
if (pp_pps) {
for (i=0; *(pp_pps + i); i++) {
memcpy(strm_hdr_buf +n_strm_size, h264_delimiter, sizeof(h264_delimiter));
n_strm_size +=sizeof(h264_delimiter);
memcpy(strm_hdr_buf + n_strm_size,*(pp_pps + i), *(pn_pps + i));
n_strm_size += *(pn_pps + i);
free(*(pp_pps + i));
}
free(pp_pps);
}
*strm_hdr_leng = n_strm_size;
break;
default: // Unknown
*strm_hdr_leng = 0;
break;
}
return 0;
}
int GetAudioFrame(MEDIA_INFO* pMedia,unsigned int *pSize, intsample_id, unsigned long* timebase ,unsigned long* dutation)
{
int nRead;
u_int8_t *p_audio_sample;
u_int32_t n_audio_sample;
int b, ret;
MP4Timestamp TimeStamp;
MP4Duration Audio_duration;
p_audio_sample = (u_int8_t *) pMedia->p_Audio_buffer;;
*pSize= pMedia->audio_sample_max_size;
n_audio_sample = *pSize;
if ((sample_id <= 0) || (sample_id >pMedia->audio_num_samples)) {
*pSize = 0;
return -1;
}
/////////////////////
// MP4ReadSample //
/////////////////////
b= MP4ReadSample(pMedia->mp4File,pMedia->audio_trId, sample_id,
&p_audio_sample,&n_audio_sample,
&TimeStamp,&Audio_duration, NULL, NULL);
if (!b) {
*pSize = 0;
return -1;
}
*pSize = nRead = n_audio_sample;
*timebase= TimeStamp;
*dutation= Audio_duration;
return nRead;
}
int GetVideoFrame(MEDIA_INFO* pMedia, intm_video_codec, unsigned int *pSize,unsigned int *isIframe, int sample_id,unsigned long* timebase, unsigned long*duration )
{
int nRead;
int n_video_hdr;
u_int8_t *p_video_sample;
u_int32_t n_video_sample;
MP4Timestamp TimeStamp;
MP4Duration video_duration;
int b, ret;
p_video_sample = (u_int8_t *) pMedia->p_Vediobuffer;
*pSize= pMedia->video_sample_max_size;
n_video_sample = *pSize;
if((sample_id <= 0) || (sample_id > pMedia->video_num_samples)) {
*pSize = 0;
return -1;
}
*isIframe = MP4GetSampleSync(pMedia->mp4File,pMedia->video_trId, sample_id);
if (*isIframe ) {
ret = GetVideoStreamHeader(pMedia->mp4File, pMedia->video_trId,m_video_codec,
p_video_sample, &n_video_hdr);
p_video_sample += n_video_hdr;
}
else
n_video_hdr = 0;
/////////////////////
// MP4ReadSample //
/////////////////////
b= MP4ReadSample(pMedia->mp4File, pMedia->video_trId, sample_id,
&p_video_sample,&n_video_sample,
&TimeStamp,&video_duration, NULL, NULL);
if (!b) {
*pSize = 0;
return -1;
}
//if (codec == h.264), the first 4 bytes are the length octets.
// They need to be changed to H.264 delimiter (00 00 00 01).
if (m_video_codec == RAW_STRM_TYPE_H264RAW) {
int h264_nal_leng;
int nal_leng_acc = 0;
do {
h264_nal_leng = p_video_sample[0];
h264_nal_leng = (h264_nal_leng << 8) | p_video_sample[1];
h264_nal_leng = (h264_nal_leng << 8) | p_video_sample[2];
h264_nal_leng = (h264_nal_leng << 8) | p_video_sample[3];
memcpy(p_video_sample, h264_delimiter, 4);
nal_leng_acc += (h264_nal_leng +4);
p_video_sample += (h264_nal_leng + 4);
} while (nal_leng_acc < n_video_sample);
}
*timebase= TimeStamp;//TimeStamp+video_duration;
*duration= video_duration;
*pSize = nRead = (n_video_hdr + n_video_sample);
return nRead;
}
时钟同步:
/返回需要延迟的时钟
typedef struct _SYN_CLOCK_CTRL_
{
unsignedchar Vedioflag;
unsignedchar Audioflag;
unsignedlong ClockBase;
unsignedlong ClockCurr;
unsignedlong VedioBase;
unsignedlong AudioBase;
}Sync_clock_Ctl;
Sync_clock_Ctl g_clock;
unsigned long Sync_clock(MEDIA_INFO*pMedia, unsigned long timebase, unsigned long duration, int type, unsignedlong* OutTime)
{
unsignedlong TimebaseNew;
unsignedlong DiffClock;
doubleTimeCalbase;
doubleTimenext;
unsignedlong CurrentTime;
unsignedlong NextTime;
unsignedlong delay;
unsignedlong TimeScale;
#ifdef _WIN32
if(g_clock.ClockBase== 0)
{
g_clock.ClockBase= ::GetTickCount();
}
g_clock.ClockCurr= ::GetTickCount();
#else
{
structtimeval tv;
gettimeofday(&tv,NULL);
//return(int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
g_clock.ClockCurr= (int64_t)tv.tv_sec * 1000 + tv.tv_usec/1000;
if(g_clock.ClockBase== 0)
{
g_clock.ClockBase= g_clock.ClockCurr;
}
}
#endif
DiffClock= g_clock.ClockCurr - g_clock.ClockBase;//时钟的耗时间Tick数//微妙级别忽略不计
if(type== VEDIO_PUSH)
{
if(g_clock.Vedioflag== 0)
{
g_clock.VedioBase= timebase;
g_clock.Vedioflag= 1;
}
TimeScale= pMedia->video_timescale;
}else
{
if(g_clock.Audioflag== 0)
{
g_clock.AudioBase= timebase;
g_clock.Audioflag= 1;
}
TimeScale= pMedia->audio_timescale;
}
TimeCalbase= ((double)(timebase-g_clock.VedioBase))/TimeScale;
Timenext= ((double)(timebase-g_clock.VedioBase+duration))/TimeScale;
//开始计算当前和小一个Sample的时间估计决定延迟//
CurrentTime= (unsigned long)(TimeCalbase*1000);
NextTime= (unsigned long)(Timenext*1000);
*OutTime= CurrentTime;
if(DiffClock> NextTime) //已经落后,快进
{
delay= 0;
}else
{
delay= NextTime- DiffClock;//重新计算时间
}
returndelay;
}