音视频入门基础:WAV专题(5)——FFmpeg源码中解码WAV Header的实现

=================================================================

音视频入门基础:WAV专题系列文章:

音视频入门基础:WAV专题(1)——使用FFmpeg命令生成WAV音频文件

音视频入门基础:WAV专题(2)——WAV格式简介

音视频入门基础:WAV专题(3)——FFmpeg源码中,判断某文件是否为WAV音频文件的实现

音视频入门基础:WAV专题(4)——FFmpeg源码中获取WAV文件音频压缩编码格式、采样频率、声道数量、采样位数、码率的实现

音视频入门基础:WAV专题(5)——FFmpeg源码中解码WAV Header的实现

音视频入门基础:WAV专题(6)——通过FFprobe显示WAV音频文件每个数据包的信息

音视频入门基础:WAV专题(7)——FFmpeg源码中计算WAV音频文件每个packet的size值的实现

音视频入门基础:WAV专题(8)——FFmpeg源码中计算WAV音频文件AVStream的time_base的实现

音视频入门基础:WAV专题(9)——FFmpeg源码中计算WAV音频文件每个packet的duration和duration_time的实现

音视频入门基础:WAV专题(10)——FFmpeg源码中计算WAV音频文件每个packet的pts、dts的实现

音视频入门基础:WAV专题(11)——FFmpeg源码中计算WAV音频文件每个packet的pts_time、dts_time的实现

=================================================================

一、引言

执行FFmpeg命令:

./ffmpeg -i XXX.wav

FFmpeg内部会调用wav_probe函数检测该文件是否为WAV格式的音频文件(具体可以参考:《音视频入门基础:WAV专题(3)——FFmpeg源码中,判断某文件是否为WAV音频文件的实现》)。然后如果检测出该文件为WAV格式的音频文件,会调用wav_read_header函数解码WAV Header。

二、wav_read_header函数的定义

wav_read_header函数定义在FFmpeg源码(本文演示用的FFmpeg源码版本为5.0.3)的源文件libavformat/wavdec.c中:

/* wav input */
static int wav_read_header(AVFormatContext *s)
{
    int64_t size, av_uninit(data_size);
    int64_t sample_count = 0;
    int rf64 = 0, bw64 = 0;
    uint32_t tag;
    AVIOContext *pb      = s->pb;
    AVStream *st         = NULL;
    WAVDemuxContext *wav = s->priv_data;
    int ret, got_fmt = 0, got_xma2 = 0;
    int64_t next_tag_ofs, data_ofs = -1;

    wav->unaligned = avio_tell(s->pb) & 1;

    wav->smv_data_ofs = -1;

    /* read chunk ID */
    tag = avio_rl32(pb);
    switch (tag) {
    case MKTAG('R', 'I', 'F', 'F'):
        break;
    case MKTAG('R', 'I', 'F', 'X'):
        wav->rifx = 1;
        break;
    case MKTAG('R', 'F', '6', '4'):
        rf64 = 1;
        break;
    case MKTAG('B', 'W', '6', '4'):
        bw64 = 1;
        break;
    default:
        av_log(s, AV_LOG_ERROR, "invalid start code %s in RIFF header\n",
               av_fourcc2str(tag));
        return AVERROR_INVALIDDATA;
    }

    /* read chunk size */
    avio_rl32(pb);

    /* read format */
    if (avio_rl32(pb) != MKTAG('W', 'A', 'V', 'E')) {
        av_log(s, AV_LOG_ERROR, "invalid format in RIFF header\n");
        return AVERROR_INVALIDDATA;
    }

    if (rf64 || bw64) {
        if (avio_rl32(pb) != MKTAG('d', 's', '6', '4'))
            return AVERROR_INVALIDDATA;
        size = avio_rl32(pb);
        if (size < 24)
            return AVERROR_INVALIDDATA;
        avio_rl64(pb); /* RIFF size */

        data_size    = avio_rl64(pb);
        sample_count = avio_rl64(pb);

        if (data_size < 0 || sample_count < 0) {
            av_log(s, AV_LOG_ERROR, "negative data_size and/or sample_count in "
                   "ds64: data_size = %"PRId64", sample_count = %"PRId64"\n",
                   data_size, sample_count);
            return AVERROR_INVALIDDATA;
        }
        avio_skip(pb, size - 24); /* skip rest of ds64 chunk */

    }

    /* Create the audio stream now so that its index is always zero */
    st = avformat_new_stream(s, NULL);
    if (!st)
        return AVERROR(ENOMEM);

    for (;;) {
        AVStream *vst;
        size         = next_tag(pb, &tag, wav->rifx);
        next_tag_ofs = avio_tell(pb) + size;

        if (avio_feof(pb))
            break;

        switch (tag) {
        case MKTAG('f', 'm', 't', ' '):
            /* only parse the first 'fmt ' tag found */
            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, st)) < 0) {
                return ret;
            } else if (got_fmt)
                av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");

            got_fmt = 1;
            break;
        case MKTAG('X', 'M', 'A', '2'):
            /* only parse the first 'XMA2' tag found */
            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, st)) < 0) {
                return ret;
            } else if (got_xma2)
                av_log(s, AV_LOG_WARNING, "found more than one 'XMA2' tag\n");

            got_xma2 = 1;
            break;
        case MKTAG('d', 'a', 't', 'a'):
            if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) && !got_fmt && !got_xma2) {
                av_log(s, AV_LOG_ERROR,
                       "found no 'fmt ' tag before the 'data' tag\n");
                return AVERROR_INVALIDDATA;
            }

            if (rf64 || bw64) {
                next_tag_ofs = wav->data_end = avio_tell(pb) + data_size;
            } else if (size != 0xFFFFFFFF) {
                data_size    = size;
                next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX;
            } else {
                av_log(s, AV_LOG_WARNING, "Ignoring maximum wav data size, "
                       "file may be invalid\n");
                data_size    = 0;
                next_tag_ofs = wav->data_end = INT64_MAX;
            }

            data_ofs = avio_tell(pb);

            /* don't look for footer metadata if we can't seek or if we don't
             * know where the data tag ends
             */
            if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || (!(rf64 && !bw64) && !size))
                goto break_loop;
            break;
        case MKTAG('f', 'a', 'c', 't'):
            if (!sample_count)
                sample_count = (!wav->rifx ? avio_rl32(pb) : avio_rb32(pb));
            break;
        case MKTAG('b', 'e', 'x', 't'):
            if ((ret = wav_parse_bext_tag(s, size)) < 0)
                return ret;
            break;
        case MKTAG('S','M','V','0'):
            if (!got_fmt) {
                av_log(s, AV_LOG_ERROR, "found no 'fmt ' tag before the 'SMV0' tag\n");
                return AVERROR_INVALIDDATA;
            }
            // SMV file, a wav file with video appended.
            if (size != MKTAG('0','2','0','0')) {
                av_log(s, AV_LOG_ERROR, "Unknown SMV version found\n");
                goto break_loop;
            }
            av_log(s, AV_LOG_DEBUG, "Found SMV data\n");
            wav->smv_given_first = 0;
            vst = avformat_new_stream(s, NULL);
            if (!vst)
                return AVERROR(ENOMEM);
            wav->vst = vst;
            avio_r8(pb);
            vst->id = 1;
            vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
            vst->codecpar->codec_id = AV_CODEC_ID_SMVJPEG;
            vst->codecpar->width  = avio_rl24(pb);
            vst->codecpar->height = avio_rl24(pb);
            if ((ret = ff_alloc_extradata(vst->codecpar, 4)) < 0) {
                av_log(s, AV_LOG_ERROR, "Could not allocate extradata.\n");
                return ret;
            }
            size = avio_rl24(pb);
            wav->smv_data_ofs = avio_tell(pb) + (size - 5) * 3;
            avio_rl24(pb);
            wav->smv_block_size = avio_rl24(pb);
            if (!wav->smv_block_size)
                return AVERROR_INVALIDDATA;
            avpriv_set_pts_info(vst, 32, 1, avio_rl24(pb));
            vst->duration = avio_rl24(pb);
            avio_rl24(pb);
            avio_rl24(pb);
            wav->smv_frames_per_jpeg = avio_rl24(pb);
            if (wav->smv_frames_per_jpeg > 65536) {
                av_log(s, AV_LOG_ERROR, "too many frames per jpeg\n");
                return AVERROR_INVALIDDATA;
            }
            AV_WL32(vst->codecpar->extradata, wav->smv_frames_per_jpeg);
            goto break_loop;
        case MKTAG('L', 'I', 'S', 'T'):
        case MKTAG('l', 'i', 's', 't'):
            if (size < 4) {
                av_log(s, AV_LOG_ERROR, "too short LIST tag\n");
                return AVERROR_INVALIDDATA;
            }
            switch (avio_rl32(pb)) {
            case MKTAG('I', 'N', 'F', 'O'):
                ff_read_riff_info(s, size - 4);
                break;
            case MKTAG('a', 'd', 't', 'l'):
                if (s->nb_chapters > 0) {
                    while (avio_tell(pb) < next_tag_ofs &&
                           !avio_feof(pb)) {
                        char cue_label[512];
                        unsigned id, sub_size;

                        if (avio_rl32(pb) != MKTAG('l', 'a', 'b', 'l'))
                            break;

                        sub_size = avio_rl32(pb);
                        if (sub_size < 5)
                            break;
                        id       = avio_rl32(pb);
                        avio_get_str(pb, sub_size - 4, cue_label, sizeof(cue_label));
                        avio_skip(pb, avio_tell(pb) & 1);

                        for (int i = 0; i < s->nb_chapters; i++) {
                            if (s->chapters[i]->id == id) {
                                av_dict_set(&s->chapters[i]->metadata, "title", cue_label, 0);
                                break;
                            }
                        }
                    }
                }
                break;
            }
            break;
        case MKTAG('I', 'D', '3', ' '):
        case MKTAG('i', 'd', '3', ' '): {
            ID3v2ExtraMeta *id3v2_extra_meta;
            ff_id3v2_read_dict(pb, &ffformatcontext(s)->id3v2_meta, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);
            if (id3v2_extra_meta) {
                ff_id3v2_parse_apic(s, id3v2_extra_meta);
                ff_id3v2_parse_chapters(s, id3v2_extra_meta);
                ff_id3v2_parse_priv(s, id3v2_extra_meta);
            }
            ff_id3v2_free_extra_meta(&id3v2_extra_meta);
            }
            break;
        case MKTAG('c', 'u', 'e', ' '):
            if (size >= 4 && got_fmt && st->codecpar->sample_rate > 0) {
                AVRational tb = {1, st->codecpar->sample_rate};
                unsigned nb_cues = avio_rl32(pb);

                if (size >= nb_cues * 24LL + 4LL) {
                    for (int i = 0; i < nb_cues; i++) {
                        unsigned offset, id = avio_rl32(pb);

                        if (avio_feof(pb))
                            return AVERROR_INVALIDDATA;

                        avio_skip(pb, 16);
                        offset = avio_rl32(pb);

                        if (!avpriv_new_chapter(s, id, tb, offset, AV_NOPTS_VALUE, NULL))
                            return AVERROR(ENOMEM);
                    }
                }
            }
            break;
        }

        /* seek to next tag unless we know that we'll run into EOF */
        if ((avio_size(pb) > 0 && next_tag_ofs >= avio_size(pb)) ||
            wav_seek_tag(wav, pb, next_tag_ofs, SEEK_SET) < 0) {
            break;
        }
    }

break_loop:
    if (!got_fmt && !got_xma2) {
        av_log(s, AV_LOG_ERROR, "no 'fmt ' or 'XMA2' tag found\n");
        return AVERROR_INVALIDDATA;
    }

    if (data_ofs < 0) {
        av_log(s, AV_LOG_ERROR, "no 'data' tag found\n");
        return AVERROR_INVALIDDATA;
    }

    avio_seek(pb, data_ofs, SEEK_SET);

    if (data_size > (INT64_MAX>>3)) {
        av_log(s, AV_LOG_WARNING, "Data size %"PRId64" is too large\n", data_size);
        data_size = 0;
    }

    if (   st->codecpar->bit_rate > 0 && data_size > 0
        && st->codecpar->sample_rate > 0
        && sample_count > 0 && st->codecpar->channels > 1
        && sample_count % st->codecpar->channels == 0) {
        if (fabs(8.0 * data_size * st->codecpar->channels * st->codecpar->sample_rate /
            sample_count /st->codecpar->bit_rate - 1.0) < 0.3)
            sample_count /= st->codecpar->channels;
    }

    if (   data_size > 0 && sample_count && st->codecpar->channels
        && (data_size << 3) / sample_count / st->codecpar->channels > st->codecpar->bits_per_coded_sample  + 1) {
        av_log(s, AV_LOG_WARNING, "ignoring wrong sample_count %"PRId64"\n", sample_count);
        sample_count = 0;
    }

    /* G.729 hack (for Ticket4577)
     * FIXME: Come up with cleaner, more general solution */
    if (st->codecpar->codec_id == AV_CODEC_ID_G729 && sample_count && (data_size << 3) > sample_count) {
        av_log(s, AV_LOG_WARNING, "ignoring wrong sample_count %"PRId64"\n", sample_count);
        sample_count = 0;
    }

    if (!sample_count || av_get_exact_bits_per_sample(st->codecpar->codec_id) > 0)
        if (   st->codecpar->channels
            && data_size
            && av_get_bits_per_sample(st->codecpar->codec_id)
            && wav->data_end <= avio_size(pb))
            sample_count = (data_size << 3)
                                  /
                (st->codecpar->channels * (uint64_t)av_get_bits_per_sample(st->codecpar->codec_id));

    if (sample_count)
        st->duration = sample_count;

    if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S32LE &&
        st->codecpar->block_align == st->codecpar->channels * 4 &&
        st->codecpar->bits_per_coded_sample == 32 &&
        st->codecpar->extradata_size == 2 &&
        AV_RL16(st->codecpar->extradata) == 1) {
        st->codecpar->codec_id = AV_CODEC_ID_PCM_F16LE;
        st->codecpar->bits_per_coded_sample = 16;
    } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S24LE &&
               st->codecpar->block_align == st->codecpar->channels * 4 &&
               st->codecpar->bits_per_coded_sample == 24) {
        st->codecpar->codec_id = AV_CODEC_ID_PCM_F24LE;
    } else if (st->codecpar->codec_id == AV_CODEC_ID_XMA1 ||
               st->codecpar->codec_id == AV_CODEC_ID_XMA2) {
        st->codecpar->block_align = 2048;
    } else if (st->codecpar->codec_id == AV_CODEC_ID_ADPCM_MS && st->codecpar->channels > 2 &&
               st->codecpar->block_align < INT_MAX / st->codecpar->channels) {
        st->codecpar->block_align *= st->codecpar->channels;
    }

    ff_metadata_conv_ctx(s, NULL, wav_metadata_conv);
    ff_metadata_conv_ctx(s, NULL, ff_riff_info_conv);

    set_spdif(s, wav);

    return 0;
}

形参s:既是输入型参数也是输出型参数,指向AVFormatContext类型的变量。s->pb包含整个WAV Header的二进制数据。执行wav_read_header函数后,(WAVDemuxContext *)(s->priv_data)->vst->codecpar中的成员变量会被赋值为WAV Header中Format chunk中的信息(包括音频压缩编码格式、采样频率、声道数量、采样位数、码率)。

返回值:返回0表示解码WAV Header成功。返回一个负数表示解码WAV Header失败。

三、wav_read_header函数的内部实现分析

(一)读取WAV Header中的“区块编号”

WAV Header中的第一个区块为“区块编号”,通过下面语句读取“区块编号”的内容(关于avio_rXXX系列函数的用法可以参考:《FFmpeg源码:avio_r8、avio_rl16、avio_rl24、avio_rl32、avio_rl64函数分析》):

    /* read chunk ID */
    tag = avio_rl32(pb);

有的同学看到这里可能会有疑问:根据文章《音视频入门基础:WAV专题(2)——WAV格式简介》的描述,“区块编号”不是大端字节序的吗?而avio_rl32函数是按照小端模式读取四个字节数据。所以为什么是用avio_rl32函数而不是avio_rb32函数读取呢?avio_rb32函数才是按照大端模式读取四个字节数据的吧?

音视频入门基础:WAV专题(5)——FFmpeg源码中解码WAV Header的实现_第1张图片

原因是这样的,在语句tag = avio_rl32(pb)之后,又通过MKTAG函数(关于MKTAG和MKBETAG宏定义的用法可以参考:《FFmpeg源码:MKTAG和MKBETAG宏定义分析》)将字符(比如'R', 'I', 'F', 'F')转换为整形,按小端模式存贮,让其跟tag进行比较。所以这就是所谓的“负负得正”,“用avio_rl32函数读取然后跟MKTAG转换出来的整数进行比较”,这个跟“用avio_rb32函数读取然后用MKBETAG转换出来的整数进行比较”,效果是一样的:

    switch (tag) {
    case MKTAG('R', 'I', 'F', 'F'):
        break;
    case MKTAG('R', 'I', 'F', 'X'):
        wav->rifx = 1;
        break;
    case MKTAG('R', 'F', '6', '4'):
        rf64 = 1;
        break;
    case MKTAG('B', 'W', '6', '4'):
        bw64 = 1;
        break;
    default:
        av_log(s, AV_LOG_ERROR, "invalid start code %s in RIFF header\n",
               av_fourcc2str(tag));
        return AVERROR_INVALIDDATA;
    }

所以下面代码块:

    /* read chunk ID */
    tag = avio_rl32(pb);
    switch (tag) {
    case MKTAG('R', 'I', 'F', 'F'):
        break;
    case MKTAG('R', 'I', 'F', 'X'):
        wav->rifx = 1;
        break;
    case MKTAG('R', 'F', '6', '4'):
        rf64 = 1;
        break;
    case MKTAG('B', 'W', '6', '4'):
        bw64 = 1;
        break;
    default:
        av_log(s, AV_LOG_ERROR, "invalid start code %s in RIFF header\n",
               av_fourcc2str(tag));
        return AVERROR_INVALIDDATA;
    }

等价于:

    /* read chunk ID */
    tag = avio_rb32(pb);
    switch (tag) {
    case MKBETAG('R', 'I', 'F', 'F'):
        break;
    case MKBETAG('R', 'I', 'F', 'X'):
        wav->rifx = 1;
        break;
    case MKBETAG('R', 'F', '6', '4'):
        rf64 = 1;
        break;
    case MKBETAG('B', 'W', '6', '4'):
        bw64 = 1;
        break;
    default:
        av_log(s, AV_LOG_ERROR, "invalid start code %s in RIFF header\n",
               av_fourcc2str(tag));
        return AVERROR_INVALIDDATA;
    }

读取到“区块编号”后,如果值等于“RIFF”,表示该文件遵守RIFF格式的规则,按默认处理;如果值等于“RIFX”,让wav->rifx赋值为1,表示该文件遵守RIFX格式的规则;如果值等于“RF64”,让变量rf64赋值为1,表示这是WAVE 64位扩展格式中的一种:WAV RF64;如果值等于“BW64”,让变量bw64赋值为1,表示这是WAVE 64位扩展格式中的一种:WAV BW64;如果值不为上述,打印日志:"invalid start code XXX in RIFF header"并返回AVERROR_INVALIDDATA表示WAV Header中的数据不合法。

(二)读取WAV Header中的“总区块大小”

WAV Header中的第二个区块为“总区块大小”,通过下面语句读取“总区块大小”。由于该值可以由WAV Header中的其它值推导出来,所以对于FFmpeg来讲它没有意义,故FFmpeg没有将其存贮到内部的成员变量中:

    /* read chunk size */
    avio_rl32(pb);

(三)读取WAV Header中的“档案格式”

WAV Header中的第三个区块为“档案格式”。通过下面语句读取该值并进行比较,如果值不为“WAVE”,日志打印"invalid format in RIFF header"表示WAV Header中的数据不合法:

    /* read format */
    if (avio_rl32(pb) != MKTAG('W', 'A', 'V', 'E')) {
        av_log(s, AV_LOG_ERROR, "invalid format in RIFF header\n");
        return AVERROR_INVALIDDATA;
    }

(四)WAVE 64位扩展格式处理

FFmpeg源码内部将WAV和WAVE 64位扩展格式都放到一起处理,下面代码处理文件格式为WAVE 64位时的情况:


    if (rf64 || bw64) {
        if (avio_rl32(pb) != MKTAG('d', 's', '6', '4'))
            return AVERROR_INVALIDDATA;
        size = avio_rl32(pb);
        if (size < 24)
            return AVERROR_INVALIDDATA;
        avio_rl64(pb); /* RIFF size */

        data_size    = avio_rl64(pb);
        sample_count = avio_rl64(pb);

        if (data_size < 0 || sample_count < 0) {
            av_log(s, AV_LOG_ERROR, "negative data_size and/or sample_count in "
                   "ds64: data_size = %"PRId64", sample_count = %"PRId64"\n",
                   data_size, sample_count);
            return AVERROR_INVALIDDATA;
        }
        avio_skip(pb, size - 24); /* skip rest of ds64 chunk */

    }

(五)读取WAV Header中的子区块

WAV Header中包含“Format chunk”、“Data chunk”这些必须存在的子区块,也包含Fact chunk、Cue points chunk、Playlist chunk、Associated data list chunk等可选区块。

通过for循环不断读取子区块的数据。通过next_tag函数把“子区块标签”存入变量tag中,把“子区块大小”存入变量size中:

for (;;) {
    AVStream *vst;
    size         = next_tag(pb, &tag, wav->rifx);
//...
}

next_tag函数定义在libavformat/wavdec.c中:

static int64_t next_tag(AVIOContext *pb, uint32_t *tag, int big_endian)
{
    *tag = avio_rl32(pb);
    if (!big_endian) {
        return avio_rl32(pb);
    } else {
        return avio_rb32(pb);
    }
}

然后wav_read_header函数中通过avio_tell函数(关于该函数用法可以参考:《FFmpeg源码:avio_tell函数分析》)得到当前读取到WAV Header中的位置,加上变量size“子区块大小”,即可得到下一个子区块在WAV Header中的位置,把该值存入变量next_tag_ofs中:

next_tag_ofs = avio_tell(pb) + size;

通过上述得到的变量tag的值(子区块标签)和switch case语句来判断是哪种子区块,然后循环处理:

 for (;;) {
  //...
        switch (tag) {
        case MKTAG('f', 'm', 't', ' '):
			//...
            break;
        case MKTAG('X', 'M', 'A', '2'):
			//...
            break;
        case MKTAG('d', 'a', 't', 'a'):
			//...
            break;
        case MKTAG('f', 'a', 'c', 't'):
			//...
            break;
        case MKTAG('b', 'e', 'x', 't'):
			//...
            break;
        case MKTAG('S','M','V','0'):
			//...
            goto break_loop;
        case MKTAG('L', 'I', 'S', 'T'):
        case MKTAG('l', 'i', 's', 't'):
			//...
            break;
        case MKTAG('I', 'D', '3', ' '):
        case MKTAG('i', 'd', '3', ' '): {
			//...
            break;
        case MKTAG('c', 'u', 'e', ' '):
			//...
            break;
        }
	}
}

下面以读取“Format chunk”和“Data chunk”子区块中的信息为例子讲解FFmpeg是如何解码WAV Header中的子区块的。

(六)读取WAV Header中“Format chunk”子区块的信息

当“子区块标签”内容为“fmt ”时,表示该子区块是“Format chunk”,调用wav_parse_fmt_tag函数解析“Format chunk”子区块:

​
 for (;;) {
  //...
        switch (tag) {
        case MKTAG('f', 'm', 't', ' '):
        /* only parse the first 'fmt ' tag found */
        if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, st)) < 0) {
            return ret;
        } else if (got_fmt)
            av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");

        got_fmt = 1;
        break;
        }
	}
}

​

而wav_parse_fmt_tag函数内部又调用了ff_get_wav_header函数来获取获取音频压缩编码格式、采样频率、声道数量、采样位数、码率信息(具体可以参考:《音视频入门基础:WAV专题(4)——FFmpeg源码中获取WAV文件音频压缩编码格式、采样频率、声道数量、采样位数、码率的实现》):

static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream *st)
{
    AVIOContext *pb = s->pb;
    WAVDemuxContext *wav = s->priv_data;
    int ret;

    /* parse fmt header */
    ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx);
    if (ret < 0)
        return ret;
    handle_stream_probing(st);

    ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;

    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);

    return 0;
}

(七)读取WAV Header中“Data chunk”子区块的信息

当“子区块标签”内容为“data”时,表示该子区块是“Data chunk”:

​
 for (;;) {
  //...
        switch (tag) {
        case MKTAG('d', 'a', 't', 'a'):
            if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) && !got_fmt && !got_xma2) {
                av_log(s, AV_LOG_ERROR,
                       "found no 'fmt ' tag before the 'data' tag\n");
                return AVERROR_INVALIDDATA;
            }

            if (rf64 || bw64) {
                next_tag_ofs = wav->data_end = av_sat_add64(avio_tell(pb), data_size);
            } else if (size != 0xFFFFFFFF) {
                data_size    = size;
                next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX;
            } else {
                av_log(s, AV_LOG_WARNING, "Ignoring maximum wav data size, "
                       "file may be invalid\n");
                data_size    = 0;
                next_tag_ofs = wav->data_end = INT64_MAX;
            }

            data_ofs = avio_tell(pb);

            /* don't look for footer metadata if we can't seek or if we don't
             * know where the data tag ends
             */
            if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || (!(rf64 && !bw64) && !size))
                goto break_loop;
            break;
        }
	}
}

​

当读取到“Data chunk”时,表示这是WAV Header中的最后一个子区块了,此时变量size的值为“子区块Data chunk的大小”,即该WAV文件中存放的真正的音频数据的大小(单位为字节)。avio_tell(pb)为WAV Header的大小。所以变量next_tag_ofs为该WAV文件的总大小:

​
for (;;) {
    AVStream *vst;
    size         = next_tag(pb, &tag, wav->rifx);
    next_tag_ofs = avio_tell(pb) + size;
//...
}

​

通过语句:next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX

让wav->data_end得到该WAV文件的总大小(单位为字节):

​
​
 for (;;) {
  //...
        switch (tag) {
        case MKTAG('d', 'a', 't', 'a'):
//...
                data_size    = size;
                next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX;
//...
            break;
        }
	}
}

​

​

而WAVDemuxContext *wav = s->priv_data:

/* wav input */
static int wav_read_header(AVFormatContext *s)
{
//...
    WAVDemuxContext *wav = s->priv_data;
//...
}

所以在执行完wav_read_header函数后,在该函数外部可以通过WAVDemuxContext *wav = s->priv_data得到该WAV文件的信息,比如通过wav->data_end得到该WAV文件的总大小,单位为字节。

你可能感兴趣的:(FFmpeg源码分析,音视频技术,音视频,ffmpeg)