ffmpeg硬件解码可以使用最新的vulkan来做,基本上来说,不挑操作系统是比较重要的,如果直接使用cuda也是非常好的选择。
AVPixelFormat sourcepf = AV_PIX_FMT_NV12;// AV_PIX_FMT_NV12;// AV_PIX_FMT_YUV420P;
AVPixelFormat destpf = AV_PIX_FMT_YUV420P; //AV_PIX_FMT_BGR24
AVBufferRef* hw_device_ctx = NULL;
下面的class 是我封装了一些功能,只是示例,读者可以自行修改,读的是rtsp 流,后面准备推流到gb28181上。
class c_AVDecoder:public TThreadRunable
{
AVPixelFormat sourcepf = AV_PIX_FMT_NV12;// AV_PIX_FMT_NV12;// AV_PIX_FMT_YUV420P;
AVPixelFormat destpf = AV_PIX_FMT_YUV420P; //AV_PIX_FMT_BGR24
AVBufferRef* hw_device_ctx = NULL;
SDLDraw v_draw;
int hw_decoder_init(AVCodecContext* ctx, const enum AVHWDeviceType type)
{
int err = 0;
if ((err = av_hwdevice_ctx_create(&hw_device_ctx, type,
NULL, NULL, 0)) < 0) {
fprintf(stderr, "Failed to create specified HW device.\n");
return err;
}
ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
return err;
}
//SDLDraw g_draw;
struct SwsContext* img_convert_ctx = NULL;
AVFormatContext* input_ctx = NULL;
int video_stream, ret;
AVStream* video = NULL;
AVCodecContext* decoder_ctx = NULL;
AVCodec* decoder = NULL;
AVPacket packet;
enum AVHWDeviceType type;
//转换成yuv420 或者rgb
int decode_write(AVCodecContext* avctx, AVPacket* packet)
{
AVFrame* frame = NULL, * sw_frame = NULL;
AVFrame* tmp_frame = NULL;
//AVFrame* pFrameDst = NULL;
unsigned char* out_buffer = NULL;
int ret = 0;
ret = avcodec_send_packet(avctx, packet);
if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
return ret;
}
if (img_convert_ctx == NULL)
{
img_convert_ctx = sws_getContext(avctx->width, avctx->height, sourcepf,
WIDTH, HEIGHT, destpf, SWS_FAST_BILINEAR, NULL, NULL, NULL);
}
static int i = 0;
while (1) {
if (!(frame = av_frame_alloc()) || !(sw_frame = av_frame_alloc())) {
fprintf(stderr, "Can not alloc frame\n");
ret = AVERROR(ENOMEM);
goto fail;
}
//avctx->get_buffer2
ret = avcodec_receive_frame(avctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_frame_free(&frame);
av_frame_free(&sw_frame);
return 0;
}
else if (ret < 0) {
fprintf(stderr, "Error while decoding\n");
goto fail;
}
if (frame->format == hw_pix_fmt) {
/* retrieve data from GPU to CPU */
sw_frame->format = sourcepf; // AV_PIX_FMT_NV12;// AV_PIX_FMT_YUV420P;// AV_PIX_FMT_NV12;// AV_PIX_FMT_BGR24;// AV_PIX_FMT_YUV420P;
if ((ret = av_hwframe_transfer_data(sw_frame, frame, 0)) < 0) {
fprintf(stderr, "Error transferring the data to system memory\n");
goto fail;
}
//av_frame_copy_props(sw_frame, frame);
tmp_frame = sw_frame;
}
else
tmp_frame = frame;
#if 0
if (out_buffer == NULL)
{
out_buffer = (unsigned char*)av_malloc(av_image_get_buffer_size(destpf,
WIDTH,
HEIGHT,
1));
//av_image_alloc()
av_image_fill_arrays(pFrameDst.data, pFrameDst.linesize, out_buffer,
destpf, WIDTH, HEIGHT, 1);
}
#endif
AVFrame* pFrameDst = av_frame_alloc();
av_image_alloc(pFrameDst->data, pFrameDst->linesize, WIDTH, HEIGHT, destpf, 1);
#if 1
sws_scale(img_convert_ctx, tmp_frame->data, tmp_frame->linesize,
0, avctx->height, pFrameDst->data, pFrameDst->linesize);
#endif
//cout << "into " << i++ << endl;
#if 1
if (!v_draw.push(pFrameDst))
{
av_freep(&pFrameDst->data[0]);
av_frame_free(&pFrameDst);
}
#endif
#if 0
size = av_image_get_buffer_size((AVPixelFormat)tmp_frame->format, tmp_frame->width,
tmp_frame->height, 1);
buffer = (uint8_t*)av_malloc(size);
if (!buffer) {
fprintf(stderr, "Can not alloc buffer\n");
ret = AVERROR(ENOMEM);
goto fail;
}
ret = av_image_copy_to_buffer(buffer, size,
(const uint8_t* const*)tmp_frame->data,
(const int*)tmp_frame->linesize, (AVPixelFormat)tmp_frame->format,
tmp_frame->width, tmp_frame->height, 1);
if (ret < 0) {
fprintf(stderr, "Can not copy image to buffer\n");
goto fail;
}
if ((ret = (int)fwrite(buffer, 1, size, output_file)) < 0) {
fprintf(stderr, "Failed to dump raw data.\n");
goto fail;
}
#endif
fail:
av_frame_free(&frame);
av_frame_free(&sw_frame);
//av_freep(&buffer);
if (ret < 0)
return ret;
}
}
int func_init(const char* url)
{
const char* stype = "dxva2";
type = av_hwdevice_find_type_by_name(stype);
if (type == AV_HWDEVICE_TYPE_NONE) {
fprintf(stderr, "Device type %s is not supported.\n", stype);
fprintf(stderr, "Available device types:");
while ((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE)
fprintf(stderr, " %s", av_hwdevice_get_type_name(type));
fprintf(stderr, "\n");
return -1;
}
/* open the input file */
//const char * filename = "h:/video/a.mp4";
AVDictionary* opts = NULL;
av_dict_set(&opts, "rtsp_transport", "tcp", 0);
av_dict_set(&opts, "buffer_size", "1048576", 0);
av_dict_set(&opts, "fpsprobesize", "5", 0);
av_dict_set(&opts, "analyzeduration", "5000000", 0);
if (avformat_open_input(&input_ctx, url, NULL, &opts) != 0) {
fprintf(stderr, "Cannot open input file '%s'\n", url);
return -1;
}
if (avformat_find_stream_info(input_ctx, NULL) < 0) {
fprintf(stderr, "Cannot find input stream information.\n");
return -1;
}
/* find the video stream information */
ret = av_find_best_stream(input_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
if (ret < 0) {
fprintf(stderr, "Cannot find a video stream in the input file\n");
return -1;
}
video_stream = ret;
AVStream* stream = input_ctx->streams[video_stream];
float frame_rate = stream->avg_frame_rate.num / stream->avg_frame_rate.den;//每秒多少帧
std::cout << "frame_rate is:" << frame_rate << std::endl;
//优化直接定死
/*for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(decoder, i);
if (!config) {
fprintf(stderr, "Decoder %s does not support device type %s.\n",
decoder->name, av_hwdevice_get_type_name(type));
return -1;
}
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
config->device_type == type) {
hw_pix_fmt = config->pix_fmt;
break;
}
}*/
if (!(decoder_ctx = avcodec_alloc_context3(decoder)))
return AVERROR(ENOMEM);
video = input_ctx->streams[video_stream];
if (avcodec_parameters_to_context(decoder_ctx, video->codecpar) < 0)
return -1;
decoder_ctx->get_format = get_hw_format;
if (hw_decoder_init(decoder_ctx, type) < 0)
return -1;
if ((ret = avcodec_open2(decoder_ctx, decoder, NULL)) < 0) {
fprintf(stderr, "Failed to open codec for stream #%u\n", video_stream);
return -1;
}
/* open the file to dump raw data */
//output_file = fopen(argv[3], "w+");
v_draw.init(1280, 720, (int)frame_rate);
v_draw.Start();
/* actual decoding and dump the raw data */
while (ret >= 0) {
if ((ret = av_read_frame(input_ctx, &packet)) < 0)
break;
if (video_stream == packet.stream_index)
{
//这里需要更加精确的计算
ret = decode_write(decoder_ctx, &packet);
//SDL_Delay(1000 / frame_rate);
}
av_packet_unref(&packet);
if (IsStop())
break;
}
/* flush the decoder */
packet.data = NULL;
packet.size = 0;
ret = decode_write(decoder_ctx, &packet);
av_packet_unref(&packet);
avcodec_free_context(&decoder_ctx);
avformat_close_input(&input_ctx);
av_buffer_unref(&hw_device_ctx);
return 0;
}
public:
void Run()
{
while (1)
{
if(!IsStop())
func_init(rtspurl);
}
}
};
首先声明,使用sdl来渲染画面并不是有多好,如果可以,可以使用新的绘制方式,比如直接使用opengl,直接使用vulkan来绘制是更好的,sdl封装了opengl,d3d等绘制方式,但是也失去了灵活性,当然,你从头到尾读遍了源码,另当别论,如果要使用均值处理函数来做消除百叶窗效果,摩尔纹,还是直接使用opengl 渲染更为简单,使用glsl语言就行了,甚至要使用非线性函数来增亮图形,最好也是直接使用opengl来绘制。
#pragma once
#include
#define __STDC_CONSTANT_MACROS
#define SDL_MAIN_HANDLED
extern "C"
{
#include
#include
#include
#include
#include
#include
#include
#include
#include "SDL2\SDL.h"
};
#include "c_ringbuffer.h"
#include "TThreadRunable.h"
#include "TYUVMerge.h"
#define SFM_REFRESH_EVENT (SDL_USEREVENT + 1)
#define SFM_BREAK_EVENT (SDL_USEREVENT + 2)
#define WIDTH 640
#define HEIGHT 360
typedef struct
{
int g_fps = 40;
int thread_exit = 0;
int thread_pause = 0;
}s_param;
static int sfp_refresh_thread(void *opaque)
{
s_param * param = (s_param*)opaque;
param->thread_exit = 0;
param->thread_pause = 0;
while (!param->thread_exit)
{
if (!param->thread_pause)
{
SDL_Event event;
event.type = SFM_REFRESH_EVENT;
SDL_PushEvent(&event);
}
SDL_Delay(1000 / param->g_fps);
}
param->thread_exit = 0;
param->thread_pause = 0;
//Break
SDL_Event event;
event.type = SFM_BREAK_EVENT;
SDL_PushEvent(&event);
return 0;
}
class TicToc
{
public:
TicToc()
{
tic();
}
void tic()
{
start = std::chrono::system_clock::now();
}
double toc()
{
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
return elapsed_seconds.count() * 1000;
}
private:
std::chrono::time_point<std::chrono::system_clock> start, end;
};
class SDLDraw :public TThreadRunable//实际上这里可以编码发送出去
{
int m_w = 0, m_h = 0;
SDL_Window *screen = NULL;
SDL_Renderer *sdlRenderer = NULL;
SDL_Texture *sdlTexture = NULL;
SDL_Rect sdlRect;
SDL_Thread *video_tid;
SDL_Event event;
//struct SwsContext *img_convert_ctx = NULL;
bool m_window_init = false;
lfringqueue<AVFrame, 20> v_frames;
lfringqueue<AVFrame, 20> v_frames_1;
lfringqueue<AVFrame, 20> v_frames_2;
s_param v_param;
//画布
AVFrame * v_canvas_frame = NULL;
public:
void init(int w, int h,int fps)
{
if (w != m_w || h != m_h)
{
m_w = w;
m_h = h;
if (v_canvas_frame != NULL)
{
func_uninit();
}
}
v_param.g_fps = fps;
//这是背景画布
if (v_canvas_frame == NULL)
{
v_canvas_frame = av_frame_alloc();
av_image_alloc(v_canvas_frame->data, v_canvas_frame->linesize, 1280, 720, AV_PIX_FMT_YUV420P, 1);
}
}
void func_uninit()
{
av_freep(&v_canvas_frame->data[0]);
av_frame_free(&v_canvas_frame);
}
bool push(AVFrame * frame)
{
//尝试三次插入
return v_frames.enqueue(frame,3);
}
protected:
int draw_init(/*HWND hWnd,*/ )
{
//m_w = 1280;
//m_h = 720;
if (m_window_init == false)
{
SDL_Init(SDL_INIT_VIDEO);
SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "1");
screen = SDL_CreateWindow("FF", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
1920, 1000, SDL_WINDOW_SHOWN/* SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE*/);
//screen = SDL_CreateWindowFrom((void *)(hWnd));
for (int i = 0; i < SDL_GetNumRenderDrivers(); ++i)
{
SDL_RendererInfo rendererInfo = {};
SDL_GetRenderDriverInfo(i, &rendererInfo);
cout << i << " " << rendererInfo.name << endl;
//if (rendererInfo.name != std::string("direct3d11"))
//{
// continue;
//}
}
if (screen == NULL)
{
//printf("Window could not be created! SDL_Error: %s\n", SDL_GetError());
return -1;
}
sdlRenderer = SDL_CreateRenderer(screen,0, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
//sdlRenderer = SDL_CreateRenderer(screen, -1, SDL_RENDERER_ACCELERATED);
//SDL_SetHint(SDL_HINT_RENDER_DRIVER, "opengl");
m_window_init = true;
SDL_Thread *video_tid = SDL_CreateThread(sfp_refresh_thread, NULL, &v_param);
}
return 0;
}
void draw(uint8_t *data[], int linesize[])
{
if (sdlTexture != NULL)
{
SDL_DestroyTexture(sdlTexture);
sdlTexture = NULL;
}
//m_w = w;
//m_h = h;
if (sdlTexture == NULL)
{
sdlTexture = SDL_CreateTexture(sdlRenderer, SDL_PIXELFORMAT_IYUV,
SDL_TEXTUREACCESS_STREAMING, m_w, m_h);
sdlRect.x = 0;
sdlRect.y = 0;
sdlRect.w = m_w;
sdlRect.h = m_h;// nh;
}
SDL_UpdateYUVTexture(sdlTexture, &sdlRect,
data[0], linesize[0],
data[1], linesize[1],
data[2], linesize[2]);
//SDL_RenderClear(sdlRenderer);
SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, NULL);
SDL_RenderPresent(sdlRenderer);
//video_tid = SDL_CreateThread(sfp_refresh_thread, NULL, NULL);
}
public:
void Run()
{
draw_init();
AVFrame * frame = NULL;
//while (!IsStop())
//{
// //解码播放 直接播放
// if (v_frames.dequeue(&frame))
// {
// //播放
// draw(frame->data, frame->linesize,WIDTH,HEIGHT);
// av_frame_free(&frame);
// SDL_Delay(10);
// }
// else
// {
// SDL_Delay(10);
// }
//}
#if 1
/*SDL_Thread *video_tid = SDL_CreateThread(sfp_refresh_thread, NULL, &v_param);*/
int tick = 0;
double x = 0.0f;
for (;;)
{
if (IsStop())
{
v_param.thread_exit = 1;
//break;
}
SDL_WaitEvent(&event);
if (event.type == SFM_REFRESH_EVENT)
{
if (v_frames.dequeue(&frame))
{
tick++;
TicToc tt;
//MergeYUV(v_canvas_frame->data[0], 1280, 720,
// frame->data[0], WIDTH, HEIGHT, 1, 10, 10);
MergeYUV_S(v_canvas_frame->data[0], 1280, 720,
frame->data[0], WIDTH, HEIGHT, 10, 10);
x += tt.toc();
//播放
draw(v_canvas_frame->data, v_canvas_frame->linesize);
av_freep(&frame->data[0]);
av_frame_free(&frame);
if (tick == 10)
{
tick = 0;
cout << x << endl;
x = 0.0f;
}
}
}
else if (event.type == SDL_KEYDOWN)
{
if (event.key.keysym.sym == SDLK_SPACE)
v_param.thread_pause = !v_param.thread_pause;
}
else if (event.type == SDL_QUIT)
{
v_param.thread_exit = 1;
}
else if (event.type == SFM_BREAK_EVENT)
{
break;
}
}
#endif
}
SDLDraw()
{
}
~SDLDraw()
{
func_uninit();
}
};
//SDL_FillRect(gScreenSurface, NULL, SDL_MapRGB(gScreenSurface->format, 0xFF, 0x00, 0x00));
dnn_processing从2018年开始就已经是FFmpeg中的一个视频filter,支持所有基于深度学习模型的图像处理算法,即输入和输出都是AVFrame,而处理过程使用的是深度学习模型。为什么要开发这样一个filter,因为作为FFmpeg DNN模块的maintainer,dnn_processing就是一个很好的使用者入手功能,读ffmpeg的代码就可以知道,其实自己写这些功能就行了,至于支持视频分析功能的filter,先不进行编码,主要考虑如何支持异步建立流水线,如何启用batch size,从而最大化的用好系统的并行计算能力,本来ffmpeg硬件解码后,如何直接在gpu中进行swscale,其实是不支持的,这一部分要自己写代码来支持,这是另外一回话,我们先使用dnn_processing模块再说,
这个模块可以完成针对灰度图的sobel算子的调用,其输入输出的格式是grayf32。除了这个dnn_processing还可以完成sr(超分辨率)和derain(去除雨点)filter的功能,下面使用ffmpeg命令演示对yuv和rgb格式的支持
./ffmpeg -i night.jpg -vf scale=iw*2:ih*2,format=yuv420p,dnn_processing=dnn_backend=tensorflow:model=./srcnn.pb:input=x:output=y srcnn.jpg
./ffmpeg -i small.jpg -vf format=yuv420p,scale=iw*2:ih*2,dnn_processing=dnn_backend=native:model=./srcnn.model:input=x:output=y -q:v 2 small.jpgsrcnn.jpg
./ffmpeg -i small.jpg -vf format=yuv420p,dnn_processing=dnn_backend=native:model=./espcn.model:input=x:output=y small_b.jpg
./ffmpeg -i rain.jpg -vf format=rgb24,dnn_processing=dnn_backend=native:model=./can.model:input=x:output=y derain.jpg
下图可以看出放大两倍以后还是可以的,不是很模糊,其实是因为我们的训练模型很浅,还没有好好做训练,即使如此,我仔细查看过线性差值比这个图像要差。
后面第二篇就要进行硬件解码,到提升质量filter,到输出编码了,请等待第二篇