从多媒体文件中提取数据,第一步我们就是打开输入文件,获取AVFormatContext.
if((err_code = avformat_open_input(&fmt_ctx, src_filename, NULL, NULL)) < 0){
av_strerror(err_code, errors, 1024);
av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n",
src_filename,
err_code,
errors);
return -1;
}
我们要想提取流数据,必须先找到流索引
video_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if(video_stream_index < 0){
av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
av_get_media_type_string(AVMEDIA_TYPE_VIDEO),
src_filename);
return AVERROR(EINVAL);
}
接下来我们就可以从文件中读取流数据了,读取的数据可能是视频也可能是音频。我们通过AVPacket中的stream_index找到正确的包数据。
我们要明白AVPacket中可能有一帧也可能有多帧,依赖于帧的大小。
读取到帧后,我们要对帧进行处理,在每一帧前面加上StartCode,在I帧前面加上SPS和PPS块。
数据读到了AVPacket中,pkt->data的数据格式分成了很多相同的部分。
每部分的格式如下
byte 4 NAL 单元的长度,大端模式,我们记录nal的长度位NAL_SIZE
byte 1 NAL header 后五位定以了当前帧的类型,如果后五位的值是等于5那么当前帧就是I帧。
接下来的数据 大小是NAL_SIZE - 1.
整个AVPacket的长度是 pkt->size.
我们通过while循环,把每一个NALU 取出来。
do {
ret= AVERROR(EINVAL);
//因为每个视频帧的前 4 个字节是视频帧的长度
//如果buf中的数据都不能满足4字节,所以后面就没有必要再进行处理了
if (buf + 4 > buf_end)
goto fail;
//将前四字节转换成整型,也就是取出视频帧长度
for (nal_size = 0, i = 0; i<4; i++)
nal_size = (nal_size << 8) | buf[i];
buf += 4; //跳过4字节(也就是视频帧长度),从而指向真正的视频帧数据
unit_type = *buf & 0x1f; //视频帧的第一个字节里有NAL TYPE
//如果视频帧长度大于从 AVPacket 中读到的数据大小,说明这个数据包肯定是出错了
if (nal_size > buf_end - buf || nal_size < 0)
goto fail;
/* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
if (unit_type == 5) {
//在每个I帧之前都加 SPS/PPS
h264_extradata_to_annexb( fmt_ctx->streams[in->stream_index]->codecpar->extradata,
fmt_ctx->streams[in->stream_index]->codecpar->extradata_size,
&spspps_pkt,
AV_INPUT_BUFFER_PADDING_SIZE);
if ((ret=alloc_and_copy(out,
spspps_pkt.data, spspps_pkt.size,
buf, nal_size)) < 0)
goto fail;
} else {
if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
goto fail;
}
len = fwrite( out->data, 1, out->size, dst_fd);
if(len != out->size){
av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",
len,
out->size);
}
fflush(dst_fd);
next_nal:
buf += nal_size;
cumul_size += nal_size + 4;//s->length_size;
} while (cumul_size < buf_size);
如果NAL的类型等于5是I帧,我们需要加上SPS 和 PPS.
ffmpeg的sps参数和pps参数在AVStream->codecpar->extradata中
我们接下来对extradata进行分析格式如下,我们要把数据取出来放在SPS AVPacket中
aligned(8) class AVCDecoderConfigurationRecord {
unsigned int(8) configurationVersion = 1;
unsigned int(8) AVCProfileIndication;
unsigned int(8) profile_compatibility;
unsigned int(8) AVCLevelIndication;
bit(6) reserved = ‘111111’b;
unsigned int(2) lengthSizeMinusOne;
bit(3) reserved = ‘111’b;
unsigned int(5) numOfSequenceParameterSets;
for (i=0; i< numOfSequenceParameterSets; i++) {
unsigned int(16) sequenceParameterSetLength ;
bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit;
}
unsigned int(8) numOfPictureParameterSets;
for (i=0; i< numOfPictureParameterSets; i++) {
unsigned int(16) pictureParameterSetLength;
bit(8*pictureParameterSetLength) pictureParameterSetNALUnit;
}
}
获取pps和sps的数据我们依据上面的格式进行获取。
extradata的前5个字节我们暂时用不到,直接跳过。
第6个字节的后5位标识了sps的个数。接下来就是每个sps,每个sps的前2字节是sps的大小 sps-size,接下来就是 sps-size的sps数据。
sps读完后。
近挨着的一个字节标识了pps的个数。
pps的前两个字节是pps的大小pps-size,接下来是pps-size的大小。
我们注释sps和pps数据的时候,要在sps和pps的头部加入四个字节{0,0,0,1}。
将sps和pps放入一个缓存区中后,要在末尾加上64个字节的数据,数据赋值为0.用于分割。
int h264_extradata_to_annexb(const uint8_t *codec_extradata, const int codec_extradata_size, AVPacket *out_extradata, int padding)
{
uint16_t unit_size = 0;
uint64_t total_size = 0;
uint8_t *out = NULL;
uint8_t unit_nb = 0;
uint8_t sps_done = 0;
uint8_t sps_seen = 0;
uint8_t pps_seen = 0;
uint8_t sps_offset = 0;
uint8_t pps_offset = 0;
/**
* AVCC
* bits
* 8 version ( always 0x01 )
* 8 avc profile ( sps[0][1] )
* 8 avc compatibility ( sps[0][2] )
* 8 avc level ( sps[0][3] )
* 6 reserved ( all bits on )
* 2 NALULengthSizeMinusOne // 这个值是(前缀长度-1),值如果是3,那前缀就是4,因为4-1=3
* 3 reserved ( all bits on )
* 5 number of SPS NALUs (usually 1)
*
* repeated once per SPS:
* 16 SPS size
*
* variable SPS NALU data
* 8 number of PPS NALUs (usually 1)
* repeated once per PPS
* 16 PPS size
* variable PPS NALU data
*/
const uint8_t *extradata = codec_extradata + 4; //extradata存放数据的格式如上,前4个字节没用,所以将其舍弃
static const uint8_t nalu_header[4] = { 0, 0, 0, 1 }; //每个H264裸数据都是以 0001 4个字节为开头的
extradata++;//跳过一个字节,这个也没用
sps_offset = pps_offset = -1;
/* retrieve sps and pps unit(s) */
unit_nb = *extradata++ & 0x1f; /* 取 SPS 个数,理论上可以有多个, 但我没有见到过多 SPS 的情况*/
if (!unit_nb) {
goto pps;
}else {
sps_offset = 0;
sps_seen = 1;
}
while(unit_nb--) {
int err;
unit_size = AV_RB16(extradata);
total_size += unit_size + 4; //加上4字节的h264 header, 即 0001
if (total_size > INT_MAX - padding) {
av_log(NULL, AV_LOG_ERROR,
"Too big extradata size, corrupted stream or invalid MP4/AVCC bitstream\n");
av_free(out);
return AVERROR(EINVAL);
}
//2:表示上面 unit_size 的所占字结数
//这句的意思是 extradata 所指的地址,加两个字节,再加 unit 的大小所指向的地址
//是否超过了能访问的有效地址空间
if (extradata + 2 + unit_size > codec_extradata + codec_extradata_size) {
av_log(NULL, AV_LOG_ERROR, "Packet header is not contained in global extradata, "
"corrupted stream or invalid MP4/AVCC bitstream\n");
av_free(out);
return AVERROR(EINVAL);
}
//分配存放 SPS 的空间
if ((err = av_reallocp(&out, total_size + padding)) < 0)
return err;
memcpy(out + total_size - unit_size - 4, nalu_header, 4);
memcpy(out + total_size - unit_size, extradata + 2, unit_size);
extradata += 2 + unit_size;
pps:
//当 SPS 处理完后,开始处理 PPS
if (!unit_nb && !sps_done++) {
unit_nb = *extradata++; /* number of pps unit(s) */
if (unit_nb) {
pps_offset = total_size;
pps_seen = 1;
}
}
}
//余下的空间清0
if (out){
memset(out + total_size, 0, padding);
}
if (!sps_seen)
av_log(NULL, AV_LOG_WARNING,
"Warning: SPS NALU missing or invalid. "
"The resulting stream may not play.\n");
if (!pps_seen)
av_log(NULL, AV_LOG_WARNING,
"Warning: PPS NALU missing or invalid. "
"The resulting stream may not play.\n");
out_extradata->data = out;
out_extradata->size = total_size;
return 0;
}
我们要加载每个NAL单元前面加上 startcode。
static int alloc_and_copy(AVPacket *out,
const uint8_t *sps_pps, uint32_t sps_pps_size,
const uint8_t *in, uint32_t in_size)
{
uint32_t offset = out->size;
uint8_t start_code_size = 4; //annexb start code
int err;
err = av_grow_packet(out, sps_pps_size + in_size + start_code_size);
if (err < 0)
return err;
if (sps_pps)
memcpy(out->data + offset, sps_pps, sps_pps_size);
memcpy(out->data + offset + sps_pps_size + start_code_size, in, in_size);
if (!offset) {
AV_WB32(out->data + sps_pps_size, 1);
} else {
(out->data + offset + sps_pps_size)[0] =
(out->data + offset + sps_pps_size)[1] = 0;
(out->data + offset + sps_pps_size)[2] = 1;
}
return 0;
}
完整代码如下
#include
#include
#include
#include
#ifndef AV_WB32
# define AV_WB32(p, val) do { \
uint32_t d = (val); \
((uint8_t*)(p))[3] = (d); \
((uint8_t*)(p))[2] = (d)>>8; \
((uint8_t*)(p))[1] = (d)>>16; \
((uint8_t*)(p))[0] = (d)>>24; \
} while(0)
#endif
#ifndef AV_RB16
# define AV_RB16(x) \
((((const uint8_t*)(x))[0] << 8) | \
((const uint8_t*)(x))[1])
#endif
static int alloc_and_copy(AVPacket *out,
const uint8_t *sps_pps, uint32_t sps_pps_size,
const uint8_t *in, uint32_t in_size)
{
uint32_t offset = out->size;
uint8_t start_code_size = 4; //annexb start code
int err;
err = av_grow_packet(out, sps_pps_size + in_size + start_code_size);
if (err < 0)
return err;
if (sps_pps)
memcpy(out->data + offset, sps_pps, sps_pps_size);
memcpy(out->data + offset + sps_pps_size + start_code_size, in, in_size);
if (!offset) {
AV_WB32(out->data + sps_pps_size, 1);
} else {
(out->data + offset + sps_pps_size)[0] =
(out->data + offset + sps_pps_size)[1] = 0;
(out->data + offset + sps_pps_size)[2] = 1;
}
return 0;
}
int h264_extradata_to_annexb(const uint8_t *codec_extradata, const int codec_extradata_size, AVPacket *out_extradata, int padding)
{
uint16_t unit_size = 0;
uint64_t total_size = 0;
uint8_t *out = NULL;
uint8_t unit_nb = 0;
uint8_t sps_done = 0;
uint8_t sps_seen = 0;
uint8_t pps_seen = 0;
uint8_t sps_offset = 0;
uint8_t pps_offset = 0;
/**
* AVCC
* bits
* 8 version ( always 0x01 )
* 8 avc profile ( sps[0][1] )
* 8 avc compatibility ( sps[0][2] )
* 8 avc level ( sps[0][3] )
* 6 reserved ( all bits on )
* 2 NALULengthSizeMinusOne // 这个值是(前缀长度-1),值如果是3,那前缀就是4,因为4-1=3
* 3 reserved ( all bits on )
* 5 number of SPS NALUs (usually 1)
*
* repeated once per SPS:
* 16 SPS size
*
* variable SPS NALU data
* 8 number of PPS NALUs (usually 1)
* repeated once per PPS
* 16 PPS size
* variable PPS NALU data
*/
const uint8_t *extradata = codec_extradata + 4; //extradata存放数据的格式如上,前4个字节没用,所以将其舍弃
static const uint8_t nalu_header[4] = { 0, 0, 0, 1 }; //每个H264裸数据都是以 0001 4个字节为开头的
extradata++;//跳过一个字节,这个也没用
sps_offset = pps_offset = -1;
/* retrieve sps and pps unit(s) */
unit_nb = *extradata++ & 0x1f; /* 取 SPS 个数,理论上可以有多个, 但我没有见到过多 SPS 的情况*/
if (!unit_nb) {
goto pps;
}else {
sps_offset = 0;
sps_seen = 1;
}
while(unit_nb--) {
int err;
unit_size = AV_RB16(extradata);
total_size += unit_size + 4; //加上4字节的h264 header, 即 0001
if (total_size > INT_MAX - padding) {
av_log(NULL, AV_LOG_ERROR,
"Too big extradata size, corrupted stream or invalid MP4/AVCC bitstream\n");
av_free(out);
return AVERROR(EINVAL);
}
//2:表示上面 unit_size 的所占字结数
//这句的意思是 extradata 所指的地址,加两个字节,再加 unit 的大小所指向的地址
//是否超过了能访问的有效地址空间
if (extradata + 2 + unit_size > codec_extradata + codec_extradata_size) {
av_log(NULL, AV_LOG_ERROR, "Packet header is not contained in global extradata, "
"corrupted stream or invalid MP4/AVCC bitstream\n");
av_free(out);
return AVERROR(EINVAL);
}
//分配存放 SPS 的空间
if ((err = av_reallocp(&out, total_size + padding)) < 0)
return err;
memcpy(out + total_size - unit_size - 4, nalu_header, 4);
memcpy(out + total_size - unit_size, extradata + 2, unit_size);
extradata += 2 + unit_size;
pps:
//当 SPS 处理完后,开始处理 PPS
if (!unit_nb && !sps_done++) {
unit_nb = *extradata++; /* number of pps unit(s) */
if (unit_nb) {
pps_offset = total_size;
pps_seen = 1;
}
}
}
//余下的空间清0
if (out){
memset(out + total_size, 0, padding);
}
if (!sps_seen)
av_log(NULL, AV_LOG_WARNING,
"Warning: SPS NALU missing or invalid. "
"The resulting stream may not play.\n");
if (!pps_seen)
av_log(NULL, AV_LOG_WARNING,
"Warning: PPS NALU missing or invalid. "
"The resulting stream may not play.\n");
out_extradata->data = out;
out_extradata->size = total_size;
return 0;
}
int h264_mp4toannexb(AVFormatContext *fmt_ctx, AVPacket *in, FILE *dst_fd)
{
AVPacket *out = NULL;
AVPacket spspps_pkt;
int len;
uint8_t unit_type;
int32_t nal_size;
uint32_t cumul_size = 0;
const uint8_t *buf;
const uint8_t *buf_end;
int buf_size;
int ret = 0, i;
out = av_packet_alloc();
buf = in->data;
buf_size = in->size;
buf_end = in->data + in->size;
do {
ret= AVERROR(EINVAL);
//因为每个视频帧的前 4 个字节是视频帧的长度
//如果buf中的数据都不能满足4字节,所以后面就没有必要再进行处理了
if (buf + 4 > buf_end)
goto fail;
//将前四字节转换成整型,也就是取出视频帧长度
for (nal_size = 0, i = 0; i<4; i++)
nal_size = (nal_size << 8) | buf[i];
buf += 4; //跳过4字节(也就是视频帧长度),从而指向真正的视频帧数据
unit_type = *buf & 0x1f; //视频帧的第一个字节里有NAL TYPE
//如果视频帧长度大于从 AVPacket 中读到的数据大小,说明这个数据包肯定是出错了
if (nal_size > buf_end - buf || nal_size < 0)
goto fail;
/* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
if (unit_type == 5) {
//在每个I帧之前都加 SPS/PPS
h264_extradata_to_annexb( fmt_ctx->streams[in->stream_index]->codecpar->extradata,
fmt_ctx->streams[in->stream_index]->codecpar->extradata_size,
&spspps_pkt,
AV_INPUT_BUFFER_PADDING_SIZE);
if ((ret=alloc_and_copy(out,
spspps_pkt.data, spspps_pkt.size,
buf, nal_size)) < 0)
goto fail;
} else {
if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
goto fail;
}
len = fwrite( out->data, 1, out->size, dst_fd);
if(len != out->size){
av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",
len,
out->size);
}
fflush(dst_fd);
next_nal:
buf += nal_size;
cumul_size += nal_size + 4;//s->length_size;
} while (cumul_size < buf_size);
fail:
av_packet_free(&out);
return ret;
}
int main(int argc, char *argv[])
{
int err_code;
char errors[1024];
char *src_filename = NULL;
char *dst_filename = NULL;
FILE *dst_fd = NULL;
int video_stream_index = -1;
AVFormatContext *fmt_ctx = NULL;
AVPacket pkt;
av_log_set_level(AV_LOG_DEBUG);
if(argc < 3){
av_log(NULL, AV_LOG_DEBUG, "the count of parameters should be more than three!\n");
return -1;
}
src_filename = argv[1];
dst_filename = argv[2];
if(src_filename == NULL || dst_filename == NULL){
av_log(NULL, AV_LOG_ERROR, "src or dts file is null, plz check them!\n");
return -1;
}
dst_fd = fopen(dst_filename, "wb");
if (!dst_fd) {
av_log(NULL, AV_LOG_DEBUG, "Could not open destination file %s\n", dst_filename);
return -1;
}
/*open input media file, and allocate format context*/
if((err_code = avformat_open_input(&fmt_ctx, src_filename, NULL, NULL)) < 0){
av_strerror(err_code, errors, 1024);
av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n",
src_filename,
err_code,
errors);
return -1;
}
/*dump input information*/
av_dump_format(fmt_ctx, 0, src_filename, 0);
/*initialize packet*/
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
/*find best video stream*/
video_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if(video_stream_index < 0){
av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
av_get_media_type_string(AVMEDIA_TYPE_VIDEO),
src_filename);
return AVERROR(EINVAL);
}
/*read frames from media file*/
while(av_read_frame(fmt_ctx, &pkt) >=0 ){
if(pkt.stream_index == video_stream_index){
h264_mp4toannexb(fmt_ctx, &pkt, dst_fd);
}
//release pkt->data
av_packet_unref(&pkt);
}
/*close input media file*/
avformat_close_input(&fmt_ctx);
if(dst_fd) {
fclose(dst_fd);
}
return 0;
}
好几天没写博客了,总得输出点什么。
对代码的理解一定要结合格式的结构进行理解,否者会把人绕进去的。
这篇文章还得多看看。
接下来要写两篇关于yuv到h264,h264到yuv。
学习一定要慢。
一切都是积累。