本文将使用ffmpeg提取每帧视频的运动向量MV,并使用opencv将其绘制出来。
了解视频编码的人都对运动向量不陌生,它是在进行帧间预测时标记当前块和参考块位置关系的一个向量。帧间预测包括单向预测(P帧)和双向预测(B帧),单向预测只需要一个MV,双向预测需要两个MV。
MV在ffmpeg中的定义如下:
typedef struct AVMotionVector {
/**
* Where the current macroblock comes from; negative value when it comes
* from the past, positive value when it comes from the future.
* XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
*/
//表明参考块在前面帧(负)还是后面帧(正)
int32_t source;
/**
* Width and height of the block.
*/
//所属块的宽和高
uint8_t w, h;
/**
* Absolute source position. Can be outside the frame area.
*/
int16_t src_x, src_y;
/**
* Absolute destination position. Can be outside the frame area.
*/
int16_t dst_x, dst_y;
/**
* Extra flag information.
* Currently unused.
*/
uint64_t flags;
/**
* Motion vector
* src_x = dst_x + motion_x / motion_scale
* src_y = dst_y + motion_y / motion_scale
*/
int32_t motion_x, motion_y;
uint16_t motion_scale;
} AVMotionVector;
ffmpeg的示例代码中提供了mv提取的实例程序,可以在提取mv后使用opencv将其绘制在图像上。
extern "C"
{
#include
#include
}
#include
using namespace cv;
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL;
static AVStream *video_stream = NULL;
static const char *src_filename = NULL;
static int video_stream_idx = -1;
static AVFrame *frame = NULL;
static int video_frame_count = 0;
FILE *fout;
VideoWriter out;
static int decode_packet(const AVPacket *pkt)
{
int ret = avcodec_send_packet(video_dec_ctx, pkt);
if (ret < 0) {
printf("Error while sending a packet to the decoder: %s\n");
return ret;
}
while (ret >= 0) {
ret = avcodec_receive_frame(video_dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
else if (ret < 0) {
printf("Error while receiving a frame from the decoder: %s\n");
return ret;
}
if (ret >= 0) {
int i;
AVFrameSideData *sd;
video_frame_count++;
sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
//获取每帧数据
cv::Mat yuvImg;
yuvImg.create(frame->height * 3 / 2, frame->width, CV_8UC1);
memcpy(yuvImg.data, frame->data[0], frame->linesize[0] * frame->height*sizeof(uint8_t));
memcpy(yuvImg.data + frame->linesize[0] * frame->height*sizeof(uint8_t), frame->data[1], frame->linesize[1] * frame->height/2*sizeof(uint8_t));
memcpy(yuvImg.data + (frame->linesize[0] * frame->height + frame->linesize[1] * frame->height / 2)*sizeof(uint8_t), frame->data[2], frame->linesize[2] * frame->height / 2 * sizeof(uint8_t));
cv::Mat rgbImg;
cv::cvtColor(yuvImg, rgbImg, CV_YUV2BGR_I420);
if (sd) {
const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
for (i = 0; i < sd->size / sizeof(*mvs); i++) {
const AVMotionVector *mv = &mvs[i];
//绘制mv
line(rgbImg, Point(mv->src_x, mv->src_y), Point(mv->dst_x, mv->dst_y), Scalar(0, 0, 255));
}
}
//将带mv的帧写入文件
out << rgbImg;
av_frame_unref(frame);
}
}
return 0;
}
static int open_codec_context(AVFormatContext *fmt_ctx, enum AVMediaType type)
{
int ret;
AVStream *st;
AVCodecContext *dec_ctx = NULL;
AVCodec *dec = NULL;
AVDictionary *opts = NULL;
ret = av_find_best_stream(fmt_ctx, type, -1, -1, &dec, 0);
if (ret < 0) {
fprintf(stderr, "Could not find %s stream in input file '%s'\n",
av_get_media_type_string(type), src_filename);
return ret;
}
else {
int stream_idx = ret;
st = fmt_ctx->streams[stream_idx];
dec_ctx = avcodec_alloc_context3(dec);
if (!dec_ctx) {
fprintf(stderr, "Failed to allocate codec\n");
return AVERROR(EINVAL);
}
ret = avcodec_parameters_to_context(dec_ctx, st->codecpar);
if (ret < 0) {
fprintf(stderr, "Failed to copy codec parameters to codec context\n");
return ret;
}
/* Init the video decoder */
av_dict_set(&opts, "flags2", "+export_mvs", 0);
if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
fprintf(stderr, "Failed to open %s codec\n",
av_get_media_type_string(type));
return ret;
}
video_stream_idx = stream_idx;
video_stream = fmt_ctx->streams[video_stream_idx];
video_dec_ctx = dec_ctx;
}
return 0;
}
int main(int argc, char **argv)
{
fout = fopen("out.yuv","wb");
//out.open("out.avi", CV_FOURCC('X', 'V', 'I', 'D'),25, Size(640, 272));
out.open("out.mp4", CV_FOURCC('D', 'I', 'V', 'X'), 25, Size(640, 272));
int ret = 0;
AVPacket pkt = { 0 };
if (argc != 2) {
fprintf(stderr, "Usage: %s
ffmpeg中提取的mv有几个问题:
没有给出宏块在图像中的位置
对于双向预测没有特别指出其两个mv
没有指出其具体的参考图像
感兴趣的请关注微信公众号Video Coding