1-FPGA硬件加速-YUV_YCbCr

这是对《基于Matlab与FPGA的图像处理教程》的学习笔记,代码和内容摘取自书中。
心得: 使用FPGA进行硬件加速的重点是消除或者减少浮点数运算,转换为定点运算,然后通过pipeline流水设计转为并行实现加速。

原理和方法

RGB与(YUV/YCbCr444)之间的原始公式(基于生物仿真学(Biometric)实验结果的),使用摄像头或者其他传感器进行转换的时候应先了解转换公式(参数会有不同,否则会导致偏色),下面是常用的计算公式。
1-FPGA硬件加速-YUV_YCbCr_第1张图片
1-FPGA硬件加速-YUV_YCbCr_第2张图片
公式放大256倍(也就是向高位移8位)得到参数
然后忽略小数(fpga中消耗资源较大)进行运算
运算完毕后移位回来
Y = ( R76 + G150 + B*29) >>8
Cb = (-R43 - G84 + B*128 + 32768) >>8
Cr = ( R128 - G107 - B*20 + 32768) >>8

Matlab程序

clc;

% -------------------------------------------------------------------------
% Read PC image to Matlab
IMG1 = imread('../../0_images/Scart.jpg');    % 读取jpg图像
h = size(IMG1,1);         % 读取图像高度
w = size(IMG1,2);         % 读取图像宽度
subplot(221);imshow(IMG1);title('RGB Image');

% -------------------------------------------------------------------------
% Relized by user logic
% Y = ( R*76 + G*150 + B*29) >>8
% Cb = (-R*43 - G*84 + B*128 + 32768) >>8
% Cr = ( R*128 - G*107 - B*20  + 32768) >>8
IMG1 = double(IMG1);
IMG_YCbCr = zeros(h,w,3);
for i = 1 : h
    for j = 1 : w
        IMG_YCbCr(i,j, 1) = bitshift(( IMG1(i,j,1)*76 + IMG1(i,j,2)*150 + IMG1(i,j,3)*29),-8);
        IMG_YCbCr(i,j,2) = bitshift((-IMG1(i,j,1)*43 - IMG1(i,j,2)*84 + IMG1(i,j,3)*128 + 32768),-8);
        IMG_YCbCr(i,j,3) = bitshift(( IMG1(i,j,1)*128 - IMG1(i,j,2)*107 - IMG1(i,j,3)*20 + 32768),-8);
    end
end

% -------------------------------------------------------------------------
% Display Y Cb Cr Channel
IMG_YCbCr = uint8(IMG_YCbCr);
subplot(222); imshow(IMG_YCbCr(:,:,1));  title('Y Channel');
subplot(223); imshow(IMG_YCbCr(:,:,2));  title('Cb Channel');
subplot(224); imshow(IMG_YCbCr(:,:,3));  title('Cr Channel');

1-FPGA硬件加速-YUV_YCbCr_第3张图片

Verilog程序

里需要注意的是pipeline,在FPGA中把这套公式拆解成了三个step。
第一步是乘积,会延时1个clk。
第二步是累加,会延时1个clk。
第三部是移位,会延时1个clk。
综上所述,需要使用3个clk的延时来实现pipeline。

`timescale 1ns/1ns
module VIP_RGB888_YCbCr444
(
    //global clock
    input               clk,                //cmos video pixel clock
    input               rst_n,              //global reset

    //Image data prepred to be processed
    input               per_img_vsync,      //Prepared Image data vsync valid signal
    input               per_img_href,       //Prepared Image data href vaild signal
    input       [7:0]   per_img_red,        //Prepared Image red data to be processed
    input       [7:0]   per_img_green,      //Prepared Image green data to be processed
    input       [7:0]   per_img_blue,       //Prepared Image blue data to be processed
    
    //Image data has been processed
    output              post_img_vsync,     //Processed Image data vsync valid signal
    output              post_img_href,      //Processed Image data href vaild signal
    output      [7:0]   post_img_Y,         //Processed Image brightness output
    output      [7:0]   post_img_Cb,        //Processed Image blue shading output
    output      [7:0]   post_img_Cr         //Processed Image red shading output
);

//--------------------------------------------
/*********************************************
//Refer to full/pc range YCbCr format
    Y   =  R*0.299 + G*0.587 + B*0.114
    Cb  = -R*0.169 - G*0.331 + B*0.5   + 128
    Cr  =  R*0.5   - G*0.419 - B*0.081 + 128
--->      
    Y   = (76 *R + 150*G + 29 *B)>>8
    Cb  = (-43*R - 84 *G + 128*B + 32768)>>8
    Cr  = (128*R - 107*G - 20 *B + 32768)>>8
**********************************************/
//Step 1
reg [15:0]  img_red_r0,   img_red_r1,   img_red_r2; 
reg [15:0]  img_green_r0, img_green_r1, img_green_r2; 
reg [15:0]  img_blue_r0,  img_blue_r1,  img_blue_r2; 
always@(posedge clk)
begin
    img_red_r0   <= per_img_red   * 8'd76;
    img_red_r1   <= per_img_red   * 8'd43;  
    img_red_r2   <= per_img_red   * 8'd128;
    img_green_r0 <= per_img_green * 8'd150;
    img_green_r1 <= per_img_green * 8'd84;
    img_green_r2 <= per_img_green * 8'd107;
    img_blue_r0  <= per_img_blue  * 8'd29;
    img_blue_r1  <= per_img_blue  * 8'd128;
    img_blue_r2  <= per_img_blue  * 8'd20;
end

//--------------------------------------------------
//Step 2
reg [15:0]  img_Y_r0;   
reg [15:0]  img_Cb_r0; 
reg [15:0]  img_Cr_r0; 
always@(posedge clk)
begin
    img_Y_r0  <= img_red_r0  + img_green_r0 + img_blue_r0;
    img_Cb_r0 <= img_blue_r1 - img_red_r1   - img_green_r1 +  16'd32768;
    img_Cr_r0 <= img_red_r2  - img_green_r2 - img_blue_r2  +  16'd32768;
end


//--------------------------------------------------
//Step 3
reg [7:0] img_Y_r1; 
reg [7:0] img_Cb_r1; 
reg [7:0] img_Cr_r1; 
always@(posedge clk)
begin
    img_Y_r1  <= img_Y_r0[15:8];
    img_Cb_r1 <= img_Cb_r0[15:8];
    img_Cr_r1 <= img_Cr_r0[15:8]; 
end

//------------------------------------------
//lag 3 clocks signal sync  
reg [2:0] per_img_vsync_r;
reg [2:0] per_img_href_r;   
always@(posedge clk or negedge rst_n)
begin
    if(!rst_n)
        begin
        per_img_vsync_r <= 0;
        per_img_href_r <= 0;
        end
    else
        begin
        per_img_vsync_r <=  {per_img_vsync_r[1:0],  per_img_vsync};
        per_img_href_r  <=  {per_img_href_r[1:0],   per_img_href};
        end
end
assign  post_img_vsync = per_img_vsync_r[2];
assign  post_img_href  = per_img_href_r[2];
assign  post_img_Y     = post_img_href ? img_Y_r1 : 8'd0;
assign  post_img_Cb    = post_img_href ? img_Cb_r1: 8'd0;
assign  post_img_Cr    = post_img_href ? img_Cr_r1: 8'd0;


endmodule

你可能感兴趣的:(FPGA_Matlab学习记录,fpga开发)