OpenCV本身就已经包含很多视频读写的基础能力,但是如果需要更全面的音视频编解码的能力,目前最通用的跨平台方案就是OpenCV+FFMpeg,以下就来介绍如何透过FFMpeg对cv::Mat进行编码,并写入到文件中。
首先,需要引入一些必要头文件,注意FFMpeg的header在C++中引入需要加入extern "C"
#include
#include
#include
#include
#include
extern "C" {
#include
#include
#include
#include
#include
#include
}
接著创建一个方法,如下
int writeVideo(const std::string& video_path, std::vector& frames, int width, int height, int fps);
首先需要指定编码的格式,这里以MP4为例,可以根据需求修改
const AVCodec* videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
if (!videoCodecContext) {
std::cerr << "Error allocating video codec context" << std::endl;
exit(EXIT_FAILURE);
}
接著需要建构编码视频的Context,详细的参数介绍可以参考这里
videoCodecContext->bit_rate = 200000;
videoCodecContext->width = width;
videoCodecContext->height = height;
//videoCodecContext->time_base = (AVRational){ 1, fps }; //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
//videoCodecContext->framerate = (AVRational){ fps, 1 };
videoCodecContext->time_base.num = 1;
videoCodecContext->time_base.den = fps;
videoCodecContext->framerate.num = fps;
videoCodecContext->framerate.den = 1;
videoCodecContext->gop_size = 12;
videoCodecContext->max_b_frames = 0;
videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
Encoder的参数可以透过这样设置,详细的介绍可以参考这里
// 设置encoder参数
AVDictionary *param = 0;
av_dict_set(¶m, "preset", "medium", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
//开启Codec并写入设置
error = avcodec_open2(videoCodecContext, videoCodec, ¶m);
接著就可以开始循环写入cv::Mat
for (const cv::Mat& frame : frames) {
// convert the cv::Mat to an AVFrame
AVFrame* avFrame = av_frame_alloc();
avFrame->format = videoCodecContext->pix_fmt;
avFrame->width = width;
avFrame->height = height;
error = av_frame_get_buffer(avFrame, 0);
checkError(error, "Error allocating frame buffer");
struct SwsContext* frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
uint8_t* srcData[AV_NUM_DATA_POINTERS] = { frame.data };
int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast(frame.step) };
sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
sws_freeContext(frameConverter);
...
}
底下附上完整代码
#include
#include
#include
#include
#include
extern "C" {
#include
#include
#include
#include
#include
#include
}
// helper function to check for FFmpeg errors
inline void checkError(int error, const std::string& message) {
if (error < 0) {
//std::cerr << message << ": " << av_err2str(error) << std::endl; //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
std::cerr << message << ": " << std::to_string(error) << std::endl;
exit(EXIT_FAILURE);
}
}
int writeVideo(const std::string& video_path, std::vector& frames, int width, int height, int fps) {
// initialize FFmpeg
av_log_set_level(AV_LOG_ERROR);
avformat_network_init();
// create the output video context
AVFormatContext* formatContext = nullptr;
int error = avformat_alloc_output_context2(&formatContext, nullptr, nullptr, video_path.c_str());
checkError(error, "Error creating output context");
// create the video stream
AVStream* videoStream = avformat_new_stream(formatContext, nullptr);
if (!videoStream) {
std::cerr << "Error creating video stream" << std::endl;
exit(EXIT_FAILURE);
}
// create the video codec context
const AVCodec* videoCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
if (!videoCodecContext) {
std::cerr << "Error allocating video codec context" << std::endl;
exit(EXIT_FAILURE);
}
videoCodecContext->bit_rate = 200000;
videoCodecContext->width = width;
videoCodecContext->height = height;
//videoCodecContext->time_base = (AVRational){ 1, fps }; //error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
//videoCodecContext->framerate = (AVRational){ fps, 1 };
videoCodecContext->time_base.num = 1;
videoCodecContext->time_base.den = fps;
videoCodecContext->framerate.num = fps;
videoCodecContext->framerate.den = 1;
videoCodecContext->gop_size = 12;
videoCodecContext->max_b_frames = 0;
videoCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
if (formatContext->oformat->flags & AVFMT_GLOBALHEADER) {
videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
// 设置encoder参数
AVDictionary *param = 0;
av_dict_set(¶m, "preset", "medium", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
error = avcodec_open2(videoCodecContext, videoCodec, ¶m);
checkError(error, "Error opening");
error = avcodec_parameters_from_context(videoStream->codecpar, videoCodecContext);
checkError(error, "Error setting video codec parameters");
// open the output file
error = avio_open(&formatContext->pb, video_path.c_str(), AVIO_FLAG_WRITE);
checkError(error, "Error opening output file");
// write the video file header
error = avformat_write_header(formatContext, nullptr);
checkError(error, "Error writing video file header");
AVPacket* packet = av_packet_alloc();
if (!packet) {
std::cerr << "Error allocating packet" << std::endl;
exit(EXIT_FAILURE);
}
for (const cv::Mat& frame : frames) {
// convert the cv::Mat to an AVFrame
AVFrame* avFrame = av_frame_alloc();
avFrame->format = videoCodecContext->pix_fmt;
avFrame->width = width;
avFrame->height = height;
error = av_frame_get_buffer(avFrame, 0);
checkError(error, "Error allocating frame buffer");
struct SwsContext* frameConverter = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
uint8_t* srcData[AV_NUM_DATA_POINTERS] = { frame.data };
int srcLinesize[AV_NUM_DATA_POINTERS] = { static_cast(frame.step) };
sws_scale(frameConverter, srcData, srcLinesize, 0, height, avFrame->data, avFrame->linesize);
sws_freeContext(frameConverter);
// encode the AVFrame
// avFrame->pts = packet->pts;
avFrame->pts=frame_count*(videoStream->time_base.den)/((videoStream->time_base.num)*fps);
frame_count += 1;
error = avcodec_send_frame(videoCodecContext, avFrame);
checkError(error, "Error sending frame to video codec");
while (error >= 0) {
error = avcodec_receive_packet(videoCodecContext, packet);
if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) {
break;
}
checkError(error, "Error encoding video frame");
// write the encoded packet to the output file
packet->stream_index = videoStream->index;
error = av_interleaved_write_frame(formatContext, packet);
checkError(error, "Error writing video packet");
av_packet_unref(packet);
}
av_frame_free(&avFrame);
}
// flush the rest of the packets
int ret = 0;
avcodec_send_frame(videoCodecContext, nullptr);
do
{
av_packet_unref(packet);
ret = avcodec_receive_packet(videoCodecContext, packet);
if (!ret)
{
error = av_interleaved_write_frame(formatContext, packet);
checkError(error, "Error writing video packet");
}
} while (!ret);
av_write_trailer(formatContext);
avformat_close_input(&formatContext);
// clean up
av_packet_free(&packet);
avcodec_free_context(&videoCodecContext);
avformat_free_context(formatContext);
avformat_network_deinit();
return EXIT_SUCCESS;
}
std::vector readVideo(const std::string video_path) {
// initialize FFmpeg
av_log_set_level(AV_LOG_ERROR);
avformat_network_init();
AVFormatContext* formatContext = nullptr;
int error = avformat_open_input(&formatContext, video_path.c_str(), nullptr, nullptr);
checkError(error, "Error opening input file");
//Read packets of a media file to get stream information.
error = avformat_find_stream_info(formatContext, nullptr);
checkError(error, "Error avformat find stream info");
// find the video stream
AVStream* videoStream = nullptr;
for (unsigned int i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && !videoStream) {
videoStream = formatContext->streams[i];
}
}
if (!videoStream) {
std::cerr << "Error: input file does not contain a video stream" << std::endl;
exit(EXIT_FAILURE);
}
// create the video codec context
const AVCodec* videoCodec = avcodec_find_decoder(videoStream->codecpar->codec_id);
AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
if (!videoCodecContext) {
std::cerr << "Error allocating video codec context" << std::endl;
exit(EXIT_FAILURE);
}
std::cout << "::informations::\n";
std::cout << " bit_rate:" << videoCodecContext->bit_rate << "\n";
std::cout << " width:" << videoCodecContext->width << "\n";
std::cout << " height:" << videoCodecContext->height << "\n";
std::cout << " gop_size:" << videoCodecContext->gop_size << "\n";
std::cout << " max_b_frames:" << videoCodecContext->max_b_frames << "\n";
std::cout << " pix_fmt:" << videoCodecContext->pix_fmt << "\n";
error = avcodec_parameters_to_context(videoCodecContext, videoStream->codecpar);
checkError(error, "Error setting video codec context parameters");
error = avcodec_open2(videoCodecContext, videoCodec, nullptr);
checkError(error, "Error opening video codec");
// create the frame scaler
int width = videoCodecContext->width;
int height = videoCodecContext->height;
struct SwsContext* frameScaler = sws_getContext(width, height, videoCodecContext->pix_fmt, width, height, AV_PIX_FMT_BGR24, SWS_BICUBIC, nullptr, nullptr, nullptr);
// read the packets and decode the video frames
std::vector videoFrames;
AVPacket packet;
while (av_read_frame(formatContext, &packet) == 0) {
if (packet.stream_index == videoStream->index) {
// decode the video frame
AVFrame* frame = av_frame_alloc();
int gotFrame = 0;
error = avcodec_send_packet(videoCodecContext, &packet);
checkError(error, "Error sending packet to video codec");
error = avcodec_receive_frame(videoCodecContext, frame);
//There is not enough data for decoding the frame, have to free and get more data
if (error == AVERROR(EAGAIN))
{
av_frame_unref(frame);
av_freep(frame);
continue;
}
if (error == AVERROR_EOF)
{
std::cerr << "AVERROR_EOF" << std::endl;
break;
}
checkError(error, "Error receiving frame from video codec");
if (error == 0) {
gotFrame = 1;
}
if (gotFrame) {
// scale the frame to the desired format
AVFrame* scaledFrame = av_frame_alloc();
av_image_alloc(scaledFrame->data, scaledFrame->linesize, width, height, AV_PIX_FMT_BGR24, 32);
sws_scale(frameScaler, frame->data, frame->linesize, 0, height, scaledFrame->data, scaledFrame->linesize);
// copy the frame data to a cv::Mat object
cv::Mat mat(height, width, CV_8UC3, scaledFrame->data[0], scaledFrame->linesize[0]);
//Show mat image for testing
//cv::imshow("mat", mat);
//cv::waitKey(100); //Wait 100msec (relativly long time - for testing).
videoFrames.push_back(mat.clone());
// clean up
av_freep(&scaledFrame->data[0]);
av_frame_free(&scaledFrame);
}
av_frame_free(&frame);
}
av_packet_unref(&packet);
}
// clean up
sws_freeContext(frameScaler);
avcodec_free_context(&videoCodecContext);
avformat_close_input(&formatContext);
return videoFrames;
}
int main() {
auto videoFrames = readVideo("input.mp4");
cv::imwrite("test.png", videoFrames[10]);
writeVideo("outnow.mp4", videoFrames, videoFrames[0].cols, videoFrames[0].rows, 30);