libavcodec\avcodec.h
/**
* Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet()
* to retrieve buffered output packets.
*
* @param avctx codec context
* @param[in] frame AVFrame containing the raw audio or video frame to be encoded.
* Ownership of the frame remains with the caller, and the
* encoder will not write to the frame. The encoder may create
* a reference to the frame data (or copy it if the frame is
* not reference-counted).
* It can be NULL, in which case it is considered a flush
* packet. This signals the end of the stream. If the encoder
* still has packets buffered, it will return them after this
* call. Once flushing mode has been entered, additional flush
* packets are ignored, and sending frames will return
* AVERROR_EOF.
*
* For audio:
* If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
* can have any number of samples.
* If it is not set, frame->nb_samples must be equal to
* avctx->frame_size for all frames except the last.
* The final frame may be smaller than avctx->frame_size.
* @return 0 on success, otherwise negative error code:
* AVERROR(EAGAIN): input is not accepted in the current state - user
* must read output with avcodec_receive_packet() (once
* all output is read, the packet should be resent, and
* the call will not fail with EAGAIN).
* AVERROR_EOF: the encoder has been flushed, and no new frames can
* be sent to it
* AVERROR(EINVAL): codec not opened, refcounted_frames not set, it is a
* decoder, or requires flush
* AVERROR(ENOMEM): failed to add packet to internal queue, or similar
* other errors: legitimate decoding errors
*/
int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame);
libavcodec\encode.c
int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
{
if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
return AVERROR(EINVAL);
if (avctx->internal->draining)
return AVERROR_EOF;
if (!frame) {
avctx->internal->draining = 1;
if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
return 0;
}
if (avctx->codec->send_frame)
return avctx->codec->send_frame(avctx, frame);
// Emulation via old API. Do it here instead of avcodec_receive_packet, because:
// 1. if the AVFrame is not refcounted, the copying will be much more
// expensive than copying the packet data
// 2. assume few users use non-refcounted AVPackets, so usually no copy is
// needed
if (avctx->internal->buffer_pkt_valid)
return AVERROR(EAGAIN);
return do_encode(avctx, frame, &(int){0});
}
从avcodec_send_frame()中可以看到,先判断AVCodec是否存在send_frame函数指针,如果存在就调用该函数,如果不存在则调用do_encode函数。本文以h264编码为例,如下:
AVCodec ff_libx264_encoder = {
.name = "libx264",
.long_name = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_H264,
.priv_data_size = sizeof(X264Context),
.init = X264_init,
.encode2 = X264_frame,
.close = X264_close,
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS |
AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
.priv_class = &x264_class,
.defaults = x264_defaults,
.init_static_data = X264_init_static,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
.wrapper_name = "libx264",
};
从ff_libx264_encoder定义发现,没有定义send_frame函数指针,所以调用do_encode。如下所示:
static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int *got_packet)
{
int ret;
*got_packet = 0;
av_packet_unref(avctx->internal->buffer_pkt);
avctx->internal->buffer_pkt_valid = 0;
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
frame, got_packet);
} else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
frame, got_packet);
} else {
ret = AVERROR(EINVAL);
}
if (ret >= 0 && *got_packet) {
// Encoders must always return ref-counted buffers.
// Side-data only packets have no data and can be not ref-counted.
av_assert0(!avctx->internal->buffer_pkt->data || avctx->internal->buffer_pkt->buf);
avctx->internal->buffer_pkt_valid = 1;
ret = 0;
} else {
av_packet_unref(avctx->internal->buffer_pkt);
}
return ret;
}
do_encode()函数最终视频会调用avcodec_encode_video2(),音频会调用avcodec_encode_audio2()函数。
avcodec_receive_packet()申明在文件libavcodec\avcodec.h中,如下所示。
/**
* Read encoded data from the encoder.
*
* @param avctx codec context
* @param avpkt This will be set to a reference-counted packet allocated by the
* encoder. Note that the function will always call
* av_frame_unref(frame) before doing anything else.
* @return 0 on success, otherwise negative error code:
* AVERROR(EAGAIN): output is not available in the current state - user
* must try to send input
* AVERROR_EOF: the encoder has been fully flushed, and there will be
* no more output packets
* AVERROR(EINVAL): codec not opened, or it is an encoder
* other errors: legitimate decoding errors
*/
int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
avcodec_receive_packet()定义在文件libavcodec\encode.c中,如下所示。
int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
{
av_packet_unref(avpkt);
if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
return AVERROR(EINVAL);
if (avctx->codec->receive_packet) {
if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
return AVERROR_EOF;
return avctx->codec->receive_packet(avctx, avpkt);
}
// Emulation via old API.
if (!avctx->internal->buffer_pkt_valid) {
int got_packet;
int ret;
if (!avctx->internal->draining)
return AVERROR(EAGAIN);
ret = do_encode(avctx, NULL, &got_packet);
if (ret < 0)
return ret;
if (ret >= 0 && !got_packet)
return AVERROR_EOF;
}
av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
avctx->internal->buffer_pkt_valid = 0;
return 0;
}
从avcodec_receive_packet()中可以看到,先判断AVCodec是否存在receive_packet函数指针,如果存在就调用该函数,如果不存在也是调用do_encode函数。h264编码中也会调用do_encode()函数。从前面分析得知,do_encode()函数最终视频会调用avcodec_encode_video2(),音频会调用avcodec_encode_audio2()函数。
/**
* Encode a frame of video.
*
* Takes input raw video data from frame and writes the next output packet, if
* available, to avpkt. The output packet does not necessarily contain data for
* the most recent frame, as encoders can delay and reorder input frames
* internally as needed.
*
* @param avctx codec context
* @param avpkt output AVPacket.
* The user can supply an output buffer by setting
* avpkt->data and avpkt->size prior to calling the
* function, but if the size of the user-provided data is not
* large enough, encoding will fail. All other AVPacket fields
* will be reset by the encoder using av_init_packet(). If
* avpkt->data is NULL, the encoder will allocate it.
* The encoder will set avpkt->size to the size of the
* output packet. The returned data (if any) belongs to the
* caller, he is responsible for freeing it.
*
* If this function fails or produces no output, avpkt will be
* freed using av_packet_unref().
* @param[in] frame AVFrame containing the raw video data to be encoded.
* May be NULL when flushing an encoder that has the
* AV_CODEC_CAP_DELAY capability set.
* @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
* output packet is non-empty, and to 0 if it is
* empty. If the function returns an error, the
* packet can be assumed to be invalid, and the
* value of got_packet_ptr is undefined and should
* not be used.
* @return 0 on success, negative error code on failure
*
* @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead
*/
attribute_deprecated
int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt,
const AVFrame *frame, int *got_packet_ptr);
该函数每个参数的含义在注释里面已经写的很清楚了,在这里用中文简述一下:
@ avctx:编码器的AVCodecContext。
@ avpkt:编码输出的AVPacket。
@ frame:编码输入的AVFrame。
@ got_packet_ptr:成功编码一个AVPacket的时候设置为1。
avcodec_encode_video2()的定义位于libavcodec\utils.c,如下所示。
int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
AVPacket *avpkt,
const AVFrame *frame,
int *got_packet_ptr)
{
int ret;
AVPacket user_pkt = *avpkt;
int needs_realloc = !user_pkt.data;
*got_packet_ptr = 0;
if (!avctx->codec->encode2) {
av_log(avctx, AV_LOG_ERROR, "This encoder requires using the avcodec_send_frame() API.\n");
return AVERROR(ENOSYS);
}
if(CONFIG_FRAME_THREAD_ENCODER &&
avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
return ff_thread_video_encode_frame(avctx, avpkt, frame, got_packet_ptr);
if ((avctx->flags&AV_CODEC_FLAG_PASS1) && avctx->stats_out)
avctx->stats_out[0] = '\0';
if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
av_packet_unref(avpkt);
return 0;
}
if (av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx))
return AVERROR(EINVAL);
if (frame && frame->format == AV_PIX_FMT_NONE)
av_log(avctx, AV_LOG_WARNING, "AVFrame.format is not set\n");
if (frame && (frame->width == 0 || frame->height == 0))
av_log(avctx, AV_LOG_WARNING, "AVFrame.width or height is not set\n");
av_assert0(avctx->codec->encode2);
ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
av_assert0(ret <= 0);
emms_c();
if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
needs_realloc = 0;
if (user_pkt.data) {
if (user_pkt.size >= avpkt->size) {
memcpy(user_pkt.data, avpkt->data, avpkt->size);
} else {
av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
avpkt->size = user_pkt.size;
ret = -1;
}
avpkt->buf = user_pkt.buf;
avpkt->data = user_pkt.data;
} else if (!avpkt->buf) {
ret = av_packet_make_refcounted(avpkt);
if (ret < 0)
return ret;
}
}
if (!ret) {
if (!*got_packet_ptr)
avpkt->size = 0;
else if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
avpkt->pts = avpkt->dts = frame->pts;
if (needs_realloc && avpkt->data) {
ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
if (ret >= 0)
avpkt->data = avpkt->buf->data;
}
if (frame)
avctx->frame_number++;
}
if (ret < 0 || !*got_packet_ptr)
av_packet_unref(avpkt);
return ret;
}
从函数的定义可以看出,avcodec_encode_video2()首先调用了av_image_check_size()检查设置的宽高参数是否合理,然后调用了AVCodec的encode2()调用具体的解码器。
int av_image_check_size2(unsigned int w, unsigned int h, int64_t max_pixels, enum AVPixelFormat pix_fmt, int log_offset, void *log_ctx)
{
ImgUtils imgutils = {
.class = &imgutils_class,
.log_offset = log_offset,
.log_ctx = log_ctx,
};
int64_t stride = av_image_get_linesize(pix_fmt, w, 0);
if (stride <= 0)
stride = 8LL*w;
stride += 128*8;
if ((int)w<=0 || (int)h<=0 || stride >= INT_MAX || stride*(uint64_t)(h+128) >= INT_MAX) {
av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
return AVERROR(EINVAL);
}
if (max_pixels < INT64_MAX) {
if (w*(int64_t)h > max_pixels) {
av_log(&imgutils, AV_LOG_ERROR,
"Picture size %ux%u exceeds specified max pixel count %"PRId64", see the documentation if you wish to increase it\n",
w, h, max_pixels);
return AVERROR(EINVAL);
}
}
return 0;
}
从代码中可以看出,av_image_check_size()主要是要求图像宽高必须为正数,而且取值不能太大。
从ff_libx264_encoder的定义可以看出,encode2()函数指向的是X264_frame()函数。
static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
int *got_packet)
{
X264Context *x4 = ctx->priv_data;
x264_nal_t *nal;
int nnal, i, ret;
x264_picture_t pic_out = {0};
int pict_type;
int bit_depth;
int64_t *out_opaque;
AVFrameSideData *sd;
x264_picture_init( &x4->pic );
x4->pic.img.i_csp = x4->params.i_csp;
#if X264_BUILD >= 153
bit_depth = x4->params.i_bitdepth;
#else
bit_depth = x264_bit_depth;
#endif
if (bit_depth > 8)
x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
if (frame) {
for (i = 0; i < x4->pic.img.i_plane; i++) {
x4->pic.img.plane[i] = frame->data[i];
x4->pic.img.i_stride[i] = frame->linesize[i];
}
x4->pic.i_pts = frame->pts;
x4->reordered_opaque[x4->next_reordered_opaque] = frame->reordered_opaque;
x4->pic.opaque = &x4->reordered_opaque[x4->next_reordered_opaque];
x4->next_reordered_opaque++;
x4->next_reordered_opaque %= x4->nb_reordered_opaque;
switch (frame->pict_type) {
case AV_PICTURE_TYPE_I:
x4->pic.i_type = x4->forced_idr > 0 ? X264_TYPE_IDR
: X264_TYPE_KEYFRAME;
break;
case AV_PICTURE_TYPE_P:
x4->pic.i_type = X264_TYPE_P;
break;
case AV_PICTURE_TYPE_B:
x4->pic.i_type = X264_TYPE_B;
break;
default:
x4->pic.i_type = X264_TYPE_AUTO;
break;
}
reconfig_encoder(ctx, frame);
if (x4->a53_cc) {
void *sei_data;
size_t sei_size;
ret = ff_alloc_a53_sei(frame, 0, &sei_data, &sei_size);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
} else if (sei_data) {
x4->pic.extra_sei.payloads = av_mallocz(sizeof(x4->pic.extra_sei.payloads[0]));
if (x4->pic.extra_sei.payloads == NULL) {
av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
av_free(sei_data);
} else {
x4->pic.extra_sei.sei_free = av_free;
x4->pic.extra_sei.payloads[0].payload_size = sei_size;
x4->pic.extra_sei.payloads[0].payload = sei_data;
x4->pic.extra_sei.num_payloads = 1;
x4->pic.extra_sei.payloads[0].payload_type = 4;
}
}
}
sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
if (sd) {
if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
} else {
if (frame->interlaced_frame == 0) {
int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
int qp_range = 51 + 6 * (bit_depth - 8);
int nb_rois;
const AVRegionOfInterest *roi;
uint32_t roi_size;
float *qoffsets;
roi = (const AVRegionOfInterest*)sd->data;
roi_size = roi->self_size;
if (!roi_size || sd->size % roi_size != 0) {
av_log(ctx, AV_LOG_ERROR, "Invalid AVRegionOfInterest.self_size.\n");
return AVERROR(EINVAL);
}
nb_rois = sd->size / roi_size;
qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));
if (!qoffsets)
return AVERROR(ENOMEM);
// This list must be iterated in reverse because the first
// region in the list applies when regions overlap.
for (int i = nb_rois - 1; i >= 0; i--) {
int startx, endx, starty, endy;
float qoffset;
roi = (const AVRegionOfInterest*)(sd->data + roi_size * i);
starty = FFMIN(mby, roi->top / MB_SIZE);
endy = FFMIN(mby, (roi->bottom + MB_SIZE - 1)/ MB_SIZE);
startx = FFMIN(mbx, roi->left / MB_SIZE);
endx = FFMIN(mbx, (roi->right + MB_SIZE - 1)/ MB_SIZE);
if (roi->qoffset.den == 0) {
av_free(qoffsets);
av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n");
return AVERROR(EINVAL);
}
qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;
qoffset = av_clipf(qoffset * qp_range, -qp_range, +qp_range);
for (int y = starty; y < endy; y++) {
for (int x = startx; x < endx; x++) {
qoffsets[x + y*mbx] = qoffset;
}
}
}
x4->pic.prop.quant_offsets = qoffsets;
x4->pic.prop.quant_offsets_free = av_free;
} else {
av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
}
}
}
}
do {
if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
return AVERROR_EXTERNAL;
ret = encode_nals(ctx, pkt, nal, nnal);
if (ret < 0)
return ret;
} while (!ret && !frame && x264_encoder_delayed_frames(x4->enc));
pkt->pts = pic_out.i_pts;
pkt->dts = pic_out.i_dts;
out_opaque = pic_out.opaque;
if (out_opaque >= x4->reordered_opaque &&
out_opaque < &x4->reordered_opaque[x4->nb_reordered_opaque]) {
ctx->reordered_opaque = *out_opaque;
} else {
// Unexpected opaque pointer on picture output
ctx->reordered_opaque = 0;
}
switch (pic_out.i_type) {
case X264_TYPE_IDR:
case X264_TYPE_I:
pict_type = AV_PICTURE_TYPE_I;
break;
case X264_TYPE_P:
pict_type = AV_PICTURE_TYPE_P;
break;
case X264_TYPE_B:
case X264_TYPE_BREF:
pict_type = AV_PICTURE_TYPE_B;
break;
default:
pict_type = AV_PICTURE_TYPE_NONE;
}
#if FF_API_CODED_FRAME
FF_DISABLE_DEPRECATION_WARNINGS
ctx->coded_frame->pict_type = pict_type;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
pkt->flags |= AV_PKT_FLAG_KEY*pic_out.b_keyframe;
if (ret) {
ff_side_data_set_encoder_stats(pkt, (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
#if FF_API_CODED_FRAME
FF_DISABLE_DEPRECATION_WARNINGS
ctx->coded_frame->quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
}
*got_packet = ret;
return 0;
}
有关X264编码的代码在以后分析X264的时候再进行详细分析。在这里我们可以我们可以简单看出该函数中有一个do while循环,其中调用了x264_encoder_encode()完成了编码的工作。