图像处理算法的FPGA实现研究

Table_y0.vhd                     \\ y0=c0×t0+c0×t2+c0×t4+c0×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y0 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y0;

architecture one of table_y0 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010110";

 when "0010"=>databuff<="00010110";

 when "0011"=>databuff<="00101100";

 when "0100"=>databuff<="00010110";

 when "0101"=>databuff<="00101100";

 when "0110"=>databuff<="00101100";

 when "0111"=>databuff<="01000010";

 when "1000"=>databuff<="00010110";

 when "1001"=>databuff<="00101100";

 when "1010"=>databuff<="00101100";

 when "1011"=>databuff<="01000010";

 when "1100"=>databuff<="00101100";

 when "1101"=>databuff<="01000010";

 when "1110"=>databuff<="01000010";

 when "1111"=>databuff<="01011000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y1.vhd                      \\ y1=c1×t1+c3×t3+c5×t5+c7×t7

library ieee;

use ieee.std_logic_1164.all;

entity table_y1 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y1;

architecture one of table_y1 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011111";

 when "0010"=>databuff<="00011010";

 when "0011"=>databuff<="00111001";

 when "0100"=>databuff<="00010001";

 when "0101"=>databuff<="00110000";

 when "0110"=>databuff<="00101011";

 when "0111"=>databuff<="01001010";

 when "1000"=>databuff<="00000110";

 when "1001"=>databuff<="00100101";

 when "1010"=>databuff<="00100000";

 when "1011"=>databuff<="00111111";

 when "1100"=>databuff<="00010111";

 when "1101"=>databuff<="00110110";

 when "1110"=>databuff<="00110001";

 when "1111"=>databuff<="01010000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y2.vhd                       \\ y2=c2×t0+c6×t2-c6×t4-c2×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y2 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y2;

architecture one of table_y2 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011101";

 when "0010"=>databuff<="00001100";

 when "0011"=>databuff<="00101001";

 when "0100"=>databuff<="11110100";

 when "0101"=>databuff<="00010001";

 when "0110"=>databuff<="00000000";

 when "0111"=>databuff<="00011101";

 when "1000"=>databuff<="11100011";

 when "1001"=>databuff<="00000000";

 when "1010"=>databuff<="11101111";

 when "1011"=>databuff<="00001100";

 when "1100"=>databuff<="11010111";

 when "1101"=>databuff<="11110100";

 when "1110"=>databuff<="11100011";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y3.vhd                      \\ y3=c3×t0-c7×t2-c1×t4-c5×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y3 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y3;

architecture one of table_y3 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00011010";

 when "0010"=>databuff<="11111010";

 when "0011"=>databuff<="00010100";

 when "0100"=>databuff<="11100001";

 when "0101"=>databuff<="11111011";

 when "0110"=>databuff<="11011011";

 when "0111"=>databuff<="11110101";

 when "1000"=>databuff<="11101111";

 when "1001"=>databuff<="00001001";

 when "1010"=>databuff<="11101001";

 when "1011"=>databuff<="00000011";

 when "1100"=>databuff<="11010000";

 when "1101"=>databuff<="11101010";

 when "1110"=>databuff<="11001010";

 when "1111"=>databuff<="11100100";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y4.vhd                       \\ y4=c4×t0-c4×t2-c4×t4+c4×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y4 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y4;

architecture one of table_y4 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010110";

 when "0010"=>databuff<="00010110";

 when "0011"=>databuff<="00000000";

 when "0100"=>databuff<="00010110";

 when "0101"=>databuff<="00000000";

 when "0110"=>databuff<="11010100";

 when "0111"=>databuff<="11101010";

 when "1000"=>databuff<="00010110";

 when "1001"=>databuff<="00101100";

 when "1010"=>databuff<="00000000";

 when "1011"=>databuff<="00010110";

 when "1100"=>databuff<="00000000";

 when "1101"=>databuff<="00010110";

 when "1110"=>databuff<="11101010";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y5.vhd                       \\y5=c5×t0-c5×t2+c7×t4+c3×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y5 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y5;

architecture one of table_y5 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00010001";

 when "0010"=>databuff<="11100001";

 when "0011"=>databuff<="11110010";

 when "0100"=>databuff<="00000110";

 when "0101"=>databuff<="00010111";

 when "0110"=>databuff<="11100111";

 when "0111"=>databuff<="11111000";

 when "1000"=>databuff<="00011010";

 when "1001"=>databuff<="00101011";

 when "1010"=>databuff<="11111011";

 when "1011"=>databuff<="00001100";

 when "1100"=>databuff<="00100000";

 when "1101"=>databuff<="00110001";

 when "1110"=>databuff<="00000001";

 when "1111"=>databuff<="00010010";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y6.vhd                    \\ y6=c6×t0-c2×t2+c2×t4-c6×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y6 is

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y6;

architecture one of table_y6 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t6,t4,t2,t0)

 begin  t<=t6&t4&t2&t0;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00001100";

 when "0010"=>databuff<="11100011";

 when "0011"=>databuff<="11101111";

 when "0100"=>databuff<="00011101";

 when "0101"=>databuff<="00101001";

 when "0110"=>databuff<="00000000";

 when "0111"=>databuff<="00001100";

 when "1000"=>databuff<="11110100";

 when "1001"=>databuff<="00000000";

 when "1010"=>databuff<="11010111";

 when "1011"=>databuff<="11100011";

 when "1100"=>databuff<="00010001";

 when "1101"=>databuff<="00011101";

 when "1110"=>databuff<="11110100";

 when "1111"=>databuff<="00000000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Table_y7.vhd                       \\ y7=c7×t0-c5×t2+c3×t4-c1×t6

library ieee;

use ieee.std_logic_1164.all;

entity table_y7 is

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end table_y7;

architecture one of table_y7 is

signal databuff:std_logic_vector(7 downto 0);

signal t:std_logic_vector(3 downto 0);

begin

 process(t7,t5,t3,t1)

 begin  t<=t7&t5&t3&t1;

 case t is

 when "0000"=>databuff<="00000000";

 when "0001"=>databuff<="00000110";

 when "0010"=>databuff<="11101111";

 when "0011"=>databuff<="11110101";

 when "0100"=>databuff<="00011010";

 when "0101"=>databuff<="00100000";

 when "0110"=>databuff<="00001001";

 when "0111"=>databuff<="00001111";

 when "1000"=>databuff<="11100001";

 when "1001"=>databuff<="11100111";

 when "1010"=>databuff<="11010000";

 when "1011"=>databuff<="11010110";

 when "1100"=>databuff<="11111011";

 when "1101"=>databuff<="00000001";

 when "1110"=>databuff<="11101010";

 when "1111"=>databuff<="11110000";

 when others=>databuff<="00000000";

 end case;

 end process;

 data<=databuff;

end one;

 

Convert.vhd                      \\ t0=x0+x7,t2=x1+x6,t4=x2+x5,t6=x3+x4,

\\t1=x0-x7,t3=x1-x6,t5=x2-x3,t7=x3-x4

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity convert is

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     t7,t6,t5,t4,t3,t2,t1,t0:out std_logic_vector(11 downto 0));

end entity convert;

architecture one of convert is

begin

t7<=x3-x4;t6<=x3+x4;t5<=x2-x5;t4<=x2+x5;t3<=x1-x6;t2<=x1+x6;t1<=x0-x7;t0<=x0+x7;

end architecture one;

 

One_d_dct.vhd         \\实现一维离散余弦变换

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity one_d_dct is

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     y7,y6,y5,y4,y3,y2,y1,y0:out std_logic_vector(11 downto 0);

     start,rst,clk:in std_logic;

     done:buffer std_logic);

end entity one_d_dct;

architecture one of one_d_dct is

component table_y0                 \\引用table_y0 实体

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y1                 \\引用table_y1 实体

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y2                \\引用table_y2 实体

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y3                 \\引用table_y3 实体

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y4                 \\引用table_y4 实体

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y5                 \\引用table_y5 实体

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y6                 \\引用table_y6 实体

port(t6,t4,t2,t0:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component table_y7                 \\引用table_y7 实体

port(t7,t5,t3,t1:in std_logic;

     data:out std_logic_vector(7 downto 0));

end component;

component convert                  \\引用convert 实体

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     t7,t6,t5,t4,t3,t2,t1,t0:out std_logic_vector(11 downto 0));

end component;

function sgn_extend (data_8:std_logic_vector(7 downto 0))

 return std_logic_vector is                    \\实现符号位数扩展

begin

 return data_8(7)&data_8&"0000000";

end function sgn_extend;

function sgn_cut (data_16:std_logic_vector(15 downto 0))

 return std_logic_vector is                    \\实现位数删减

begin

 return data_16(13 downto 2);

end function sgn_cut;

signal count:integer range 0 to 11;

signal compute:std_logic;

signal t7,t6,t5,t4,t3,t2,t1,t0:std_logic_vector(11 downto 0);

signal d7,d6,d5,d4,d3,d2,d1,d0:std_logic_vector(7 downto 0);

signal tt7,tt6,tt5,tt4,tt3,tt2,tt1,tt0:std_logic_vector(11 downto 0);

signal dy7,dy6,dy5,dy4,dy3,dy2,dy1,dy0:std_logic_vector(15 downto 0);

signal outy7,outy6,outy5,outy4,outy3,outy2,outy1,outy0:std_logic_vector(11 downto 0);

begin

y7<=outy7;y6<=outy6;y5<=outy5;y4<=outy4;y3<=outy3;y2<=outy2;y1<=outy1;y0<=outy0;

u:convert port map(x7=>x7,x6=>x6,x5=>x5,x4=>x4,x3=>x3,x2=>x2,x1=>x1,x0=>x0,t7=>t7,t6=>t6,t5=>t5,t4=>t4,t3=>t3,t2=>t2,t1=>t1,t0=>t0);

u7:table_y7 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d7);

u6:table_y6 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d6);

u5:table_y5 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d5);

u4:table_y4 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d4);

u3:table_y3 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d3);

u2:table_y2 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d2);

u1:table_y1 port map(t7=>tt7(0),t5=>tt5(0),t3=>tt3(0),t1=>tt1(0),data=>d1);

u0:table_y0 port map(t6=>tt6(0),t4=>tt4(0),t2=>tt2(0),t0=>tt0(0),data=>d0);

outy7<=sgn_cut(dy7);outy6<=sgn_cut(dy6);outy5<=sgn_cut(dy5);outy4<=sgn_cut(dy4);outy3<=sgn_cut(dy3);outy2<=sgn_cut(dy2);outy1<=sgn_cut(dy1);outy0<=sgn_cut(dy0);

process(clk,rst)

begin

 if clk'event and clk='1' then

   if rst='1' then                                \\复位处理

     count<=0;done<='0';compute<='0';     dy7<=(others=>'0');dy6<=(others=>'0');dy5<=(others=>'0');dy4<=(others=>'0');dy3<=(others=>'0');dy2<=(others=>'0');dy1<=(others=>'0');dy0<=(others=>'0');     tt7<=(others=>'0');tt6<=(others=>'0');tt5<=(others=>'0');tt4<=(others=>'0');tt3<=(others=>'0');tt2<=(others=>'0');tt1<=(others=>'0');tt0<=(others=>'0');

   else

     if done='1' then done<='0';end if;

     if compute='1' then                    \\开始计算

       if count=11 then                    \\计算符号位

         dy7<=dy7(15)&dy7(15 downto 1)-sgn_extend(d7);

         dy6<=dy6(15)&dy6(15 downto 1)-sgn_extend(d6);

         dy5<=dy5(15)&dy5(15 downto 1)-sgn_extend(d5);

         dy4<=dy4(15)&dy4(15 downto 1)-sgn_extend(d4);

         dy3<=dy3(15)&dy3(15 downto 1)-sgn_extend(d3);

         dy2<=dy2(15)&dy2(15 downto 1)-sgn_extend(d2);

         dy1<=dy1(15)&dy1(15 downto 1)-sgn_extend(d1);

         dy0<=dy0(15)&dy0(15 downto 1)-sgn_extend(d0);

         done<='1';compute<='0';

       else                              \\移位相加

         dy7<=dy7(15)&dy7(15 downto 1)+sgn_extend(d7);

         dy6<=dy6(15)&dy6(15 downto 1)+sgn_extend(d6);

         dy5<=dy5(15)&dy5(15 downto 1)+sgn_extend(d5);

         dy4<=dy4(15)&dy4(15 downto 1)+sgn_extend(d4);

         dy3<=dy3(15)&dy3(15 downto 1)+sgn_extend(d3);

         dy2<=dy2(15)&dy2(15 downto 1)+sgn_extend(d2);

         dy1<=dy1(15)&dy1(15 downto 1)+sgn_extend(d1);

         dy0<=dy0(15)&dy0(15 downto 1)+sgn_extend(d0);

       end if;

       count<=count+1;

     end if;

     if start='1' and done='0' then compute<='1';end if;

     if start='1' then

       count<=0; tt7<=t7;tt6<=t6;tt5<=t5;tt4<=t4;tt3<=t3;tt2<=t2;tt1<=t1;tt0<=t0;       dy7<=(others=>'0');dy6<=(others=>'0');dy5<=(others=>'0');dy4<=(others=>'0');dy3<=(others=>'0');dy2<=(others=>'0');dy1<=(others=>'0');dy0<=(others=>'0');

     else

       tt7(10 downto 0)<=tt7(11 downto 1);tt6(10 downto 0)<=tt6(11 downto 1);tt5(10 downto 0)<=tt5(11 downto 1);tt4(10 downto 0)<=tt4(11 downto 1);tt3(10 downto 0)<=tt3(11 downto 1);tt2(10 downto 0)<=tt2(11 downto 1);tt1(10 downto 0)<=tt1(11 downto 1);tt0(10 downto 0)<=tt0(11 downto 1);

     end if;

   end if;

 end if;

end process;

end architecture one;

 

Two_d_dct.vhd                    \\二维DCT算法模块

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity two_d_dct is

port(din:in std_logic_vector(767 downto 0);

     dout:out std_logic_vector(767 downto 0);

     clk,start,rst:in std_logic;

     done:buffer std_logic);

end entity two_d_dct;

architecture one of two_d_dct is

component one_d_dct                     \\引用一维DCT算法的one_d_dct实体

port(x7,x6,x5,x4,x3,x2,x1,x0:in std_logic_vector(11 downto 0);

     y7,y6,y5,y4,y3,y2,y1,y0:out std_logic_vector(11 downto 0);

     start,rst,clk:in std_logic;

     done:buffer std_logic);

end component;

signal data1D:std_logic_vector(767 downto 0);

signal data2D:std_logic_vector(767 downto 0);

signal donerow0,donerow1,donerow2,donerow3,donerow4,donerow5,donerow6,donerow7:std_logic;

signal donecol0,donecol1,donecol2,donecol3,donecol4,donecol5,donecol6,donecol7:std_logic;

begin                                              \\引脚连接

c0:one_d_dct port map(x0=>din(11 downto 0),x1=>din(23 downto 12),x2=>din(35 downto 24),x3=>din(47 downto 36),x4=>din(59 downto 48),x5=>din(71 downto 60),x6=>din(83 downto 72),x7=>din(95 downto 84),

                      y0=>data1D(11 downto 0),y1=>data1D(23 downto 12),y2=>data1D(35 downto 24),y3=>data1D(47 downto 36),y4=>data1D(59 downto 48),y5=>data1D(71 downto 60),y6=>data1D(83 downto 72),y7=>data1D(95 downto 84),

                      start=>start,clk=>clk,rst=>rst,done=>donecol0);

c1:one_d_dct port map(x0=>din(107 downto 96),x1=>din(119 downto 108),x2=>din(131 downto 120),x3=>din(143 downto 132),x4=>din(155 downto 144),x5=>din(167 downto 156),x6=>din(179 downto 168),x7=>din(191 downto 180),

                      y0=>data1D(107 downto 96),y1=>data1D(119 downto 108),y2=>data1D(131 downto 120),y3=>data1D(143 downto 132),y4=>data1D(155 downto 144),y5=>data1D(167 downto 156),y6=>data1D(179 downto 168),y7=>data1D(191 downto 180),

                      start=>start,clk=>clk,rst=>rst,done=>donecol1);

c2:one_d_dct port map(x0=>din(203 downto 192),x1=>din(215 downto 204),x2=>din(227 downto 216),x3=>din(239 downto 228),x4=>din(251 downto 240),x5=>din(263 downto 252),x6=>din(275 downto 264),x7=>din(287 downto 276),

                      y0=>data1D(203 downto 192),y1=>data1D(215 downto 204),y2=>data1D(227 downto 216),y3=>data1D(239 downto 228),y4=>data1D(251 downto 240),y5=>data1D(263 downto 252),y6=>data1D(275 downto 264),y7=>data1D(287 downto 276),

                      start=>start,clk=>clk,rst=>rst,done=>donecol2);

c3:one_d_dct port map(x0=>din(299 downto 288),x1=>din(311 downto 300),x2=>din(323 downto 312),x3=>din(335 downto 324),x4=>din(347 downto 336),x5=>din(359 downto 348),x6=>din(371 downto 360),x7=>din(383 downto 372),

                      y0=>data1D(299 downto 288),y1=>data1D(311 downto 300),y2=>data1D(323 downto 312),y3=>data1D(335 downto 324),y4=>data1D(347 downto 336),y5=>data1D(359 downto 348),y6=>data1D(371 downto 360),y7=>data1D(383 downto 372),

                      start=>start,clk=>clk,rst=>rst,done=>donecol3);

c4:one_d_dct port map(x0=>din(395 downto 384),x1=>din(407 downto 396),x2=>din(419 downto 408),x3=>din(431 downto 420),x4=>din(443 downto 432),x5=>din(455 downto 444),x6=>din(467 downto 456),x7=>din(479 downto 468),

                      y0=>data1D(395 downto 384),y1=>data1D(407 downto 396),y2=>data1D(419 downto 408),y3=>data1D(431 downto 420),y4=>data1D(443 downto 432),y5=>data1D(455 downto 444),y6=>data1D(467 downto 456),y7=>data1D(479 downto 468),

                      start=>start,clk=>clk,rst=>rst,done=>donecol4);

c5:one_d_dct port map(x0=>din(491 downto 480),x1=>din(503 downto 492),x2=>din(515 downto 504),x3=>din(527 downto 516),x4=>din(539 downto 528),x5=>din(551 downto 540),x6=>din(563 downto 552),x7=>din(575 downto 564),

                      y0=>data1D(491 downto 480),y1=>data1D(503 downto 492),y2=>data1D(515 downto 504),y3=>data1D(527 downto 516),y4=>data1D(539 downto 528),y5=>data1D(551 downto 540),y6=>data1D(563 downto 552),y7=>data1D(575 downto 564),

                      start=>start,clk=>clk,rst=>rst,done=>donecol5);

c6:one_d_dct port map(x0=>din(587 downto 576),x1=>din(599 downto 588),x2=>din(611 downto 600),x3=>din(623 downto 612),x4=>din(635 downto 624),x5=>din(647 downto 636),x6=>din(659 downto 648),x7=>din(671 downto 660),

                      y0=>data1D(587 downto 576),y1=>data1D(599 downto 588),y2=>data1D(611 downto 600),y3=>data1D(623 downto 612),y4=>data1D(635 downto 624),y5=>data1D(647 downto 636),y6=>data1D(659 downto 648),y7=>data1D(671 downto 660),

                      start=>start,clk=>clk,rst=>rst,done=>donecol6);

c7:one_d_dct port map(x0=>din(683 downto 672),x1=>din(695 downto 684),x2=>din(707 downto 696),x3=>din(719 downto 708),x4=>din(731 downto 720),x5=>din(743 downto 732),x6=>din(755 downto 744),x7=>din(767 downto 756),

                      y0=>data1D(683 downto 672),y1=>data1D(695 downto 684),y2=>data1D(707 downto 696),y3=>data1D(719 downto 708),y4=>data1D(731 downto 720),y5=>data1D(743 downto 732),y6=>data1D(755 downto 744),y7=>data1D(767 downto 756),

                      start=>start,clk=>clk,rst=>rst,done=>donecol7);

r0:one_d_dct port map(x0=>data1D(11 downto 0),x1=>data1D(107 downto 96),x2=>data1D(203 downto 192),x3=>data1D(299 downto 288),x4=>data1D(395 downto 384),x5=>data1D(491 downto 480),x6=>data1D(587 downto 576),x7=>data1D(683 downto 672),

                      y0=>data2D(11 downto 0),y1=>data2D(23 downto 12),y2=>data2D(35 downto 24),y3=>data2D(47 downto 36),y4=>data2D(59 downto 48),y5=>data2D(71 downto 60),y6=>data2D(83 downto 72),y7=>data2D(95 downto 84),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow0);

r1:one_d_dct port map(x0=>data1D(23 downto 12),x1=>data1D(119 downto 108),x2=>data1D(215 downto 204),x3=>data1D(311 downto 300),x4=>data1D(407 downto 396),x5=>data1D(503 downto 492),x6=>data1D(599 downto 588),x7=>data1D(695 downto 684),

                      y0=>data2D(107 downto 96),y1=>data2D(119 downto 108),y2=>data2D(131 downto 120),y3=>data2D(143 downto 132),y4=>data2D(155 downto 144),y5=>data2D(167 downto 156),y6=>data2D(179 downto 168),y7=>data2D(191 downto 180),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow1);

r2:one_d_dct port map(x0=>data1D(35 downto 24),x1=>data1D(131 downto 120),x2=>data1D(227 downto 216),x3=>data1D(323 downto 312),x4=>data1D(419 downto 408),x5=>data1D(515 downto 504),x6=>data1D(611 downto 600),x7=>data1D(707 downto 696),

                      y0=>data2D(203 downto 192),y1=>data2D(215 downto 204),y2=>data2D(227 downto 216),y3=>data2D(239 downto 228),y4=>data2D(251 downto 240),y5=>data2D(263 downto 252),y6=>data2D(275 downto 264),y7=>data2D(287 downto 276),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow2);

r3:one_d_dct port map(x0=>data1D(47 downto 36),x1=>data1D(143 downto 132),x2=>data1D(239 downto 228),x3=>data1D(335 downto 324),x4=>data1D(431 downto 420),x5=>data1D(527 downto 516),x6=>data1D(623 downto 612),x7=>data1D(719 downto 708),

                      y0=>data2D(299 downto 288),y1=>data2D(311 downto 300),y2=>data2D(323 downto 312),y3=>data2D(335 downto 324),y4=>data2D(347 downto 336),y5=>data2D(359 downto 348),y6=>data2D(371 downto 360),y7=>data2D(383 downto 372),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow3);

r4:one_d_dct port map(x0=>data1D(59 downto 48),x1=>data1D(155 downto 144),x2=>data1D(251 downto 240),x3=>data1D(347 downto 336),x4=>data1D(443 downto 432),x5=>data1D(539 downto 528),x6=>data1D(635 downto 624),x7=>data1D(731 downto 720),

                      y0=>data2D(395 downto 384),y1=>data2D(407 downto 396),y2=>data2D(419 downto 408),y3=>data2D(431 downto 420),y4=>data2D(443 downto 432),y5=>data2D(455 downto 444),y6=>data2D(467 downto 456),y7=>data2D(479 downto 468),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow4);

r5:one_d_dct port map(x0=>data1D(71 downto 60),x1=>data1D(167 downto 156),x2=>data1D(263 downto 252),x3=>data1D(359 downto 348),x4=>data1D(455 downto 444),x5=>data1D(551 downto 540),x6=>data1D(647 downto 636),x7=>data1D(743 downto 732),

                      y0=>data2D(491 downto 480),y1=>data2D(503 downto 492),y2=>data2D(515 downto 504),y3=>data2D(527 downto 516),y4=>data2D(539 downto 528),y5=>data2D(551 downto 540),y6=>data2D(563 downto 552),y7=>data2D(575 downto 564),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow5);

r6:one_d_dct port map(x0=>data1D(83 downto 72),x1=>data1D(179 downto 168),x2=>data1D(275 downto 264),x3=>data1D(371 downto 360),x4=>data1D(467 downto 456),x5=>data1D(563 downto 552),x6=>data1D(659 downto 648),x7=>data1D(755 downto 744),

                      y0=>data2D(587 downto 576),y1=>data2D(599 downto 588),y2=>data2D(611 downto 600),y3=>data2D(623 downto 612),y4=>data2D(635 downto 624),y5=>data2D(647 downto 636),y6=>data2D(659 downto 648),y7=>data2D(671 downto 660),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow6);

r7:one_d_dct port map(x0=>data1D(95 downto 84),x1=>data1D(191 downto 180),x2=>data1D(287 downto 276),x3=>data1D(383 downto 372),x4=>data1D(479 downto 468),x5=>data1D(575 downto 564),x6=>data1D(671 downto 660),x7=>data1D(767 downto 756),

                      y0=>data2D(683 downto 672),y1=>data2D(695 downto 684),y2=>data2D(707 downto 696),y3=>data2D(719 downto 708),y4=>data2D(731 downto 720),y5=>data2D(743 downto 732),y6=>data2D(755 downto 744),y7=>data2D(767 downto 756),

                      start=>donecol0,clk=>clk,rst=>rst,done=>donerow7);

process(clk,rst)

begin

  if clk'event and clk='1' then

    if rst='1' then                          \\复位处理

      done<='0';dout<=(others=>'0');

    else

      done<=donerow0;

      if donerow0='1' then dout<=data2D; end if;

    end if;

  end if;

end process;

end architecture one;

 

Dct.vhd                                  \\顶层模块文件

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity dct is

port(datain:in std_logic_vector(11 downto 0);

     dataout:out std_logic_vector(11 downto 0);

     clk,start,rst,dinclk,doutclk:in std_logic;

     done:buffer std_logic);

end entity dct;

architecture one of dct is

component two_d_dct                    \\引用二维DCT算法two_d_dct实体

port(din:in std_logic_vector(767 downto 0);

     dout:out std_logic_vector(767 downto 0);

     clk,start,rst:in std_logic;

     done:buffer std_logic);

end component;

signal inbuff:std_logic_vector(767 downto 0);

signal outbuff:std_logic_vector(767 downto 0);

signal doutbuff:std_logic_vector(767 downto 0);

signal donestate,startstate:std_logic;

begin

u:two_d_dct port map(din=>inbuff,dout=>outbuff,clk=>clk,start=>start,rst=>rst,done=>done);

process(dinclk)

begin

  if dinclk'event and dinclk='1' then             \\数据输入时钟信号

    if startstate='0' then

      inbuff(767 downto 756)<=datain;

      inbuff(755 downto 0)<=inbuff(767 downto 12);

    end if;

  end if;

end process;

process(start)                               \\开始状态处理           

begin

  if start'event and start='1' then

    startstate<=not startstate;

  end if;

end process;

process(done)                              \\结束状态处理

begin

  if done'event and done='1' then

    donestate<=not donestate;

  end if;

end process;

process(doutclk)                         

begin

  if doutclk'event and doutclk='1' then          \\输出时钟信号

    if donestate='1' then

      if done='1' then

        doutbuff<=outbuff;

      else

        dataout<=doutbuff(11 downto 0);

        doutbuff(755 downto 0)<=doutbuff(767 downto 12);

      end if;

    end if;

  end if;

end process;

end architecture one;

 

Main.vhd                         \\增加串口模块的顶层文件

library ieee;

use ieee.std_logic_1164.all;

use ieee.std_logic_unsigned.all;

entity main is

port( datain:in std_logic;

     clk,start,rst,doutclk:in std_logic;

     dataout:out std_logic;

     done:buffer std_logic  );   

end main;

architecture one of main is

component dct is

port(datain:in std_logic_vector(11 downto 0);

     dataout:out std_logic_vector(11 downto 0);

     clk,start,rst,dinclk,doutclk:in std_logic;

     done:buffer std_logic);

end component;

signal ready,doutclk2:std_logic;

signal data:std_logic_vector(11 downto 0);

signal data_out:std_logic_vector(11 downto 0);

 

signal dataoutbuff:std_logic_vector(11 downto 0);

signal datainbuff:std_logic_vector(7 downto 0);

signal count:std_logic_vector(2 downto 0);

signal count2:std_logic_vector(3 downto 0);

begin

u: dct port map(datain=>data,dataout=>data_out,clk=>clk,start=>start,rst=>rst,

dinclk=>ready,doutclk=>doutclk2,done=>done);

process(rst,clk)

begin

  if rst='0' then                         \\复位处理

    count<=(others=>'0');

  else                                 \\串并转换 

    if (clk'event and clk='1') then

      datainbuff(7 downto 1)<=datainbuff(6 downto 0);

      datainbuff(0)<=datain;

      if count="111" then

         data<=”0000”&datainbuff; ready<='1'; count<="000";

      else

         count<=count+1;

      end if;

    end if;

  end if;

end process;

process(rst,doutclk)

begin

if rst=’0’ then                      \\复位处理

  count2<=(others=>’0’);

else                              \\并串转换

  if (doutclk’event and doutclk=’1’ ) then

    dataout<=datadataoutbuff(0);

dataoutbuff(10 downto 0)<=dataoutbuff(11 downto 1);

if count2=”1011” then

  doutclk2<=’1’; dataoutbuff<=data_out; count2<=”0000”;

else

  count2<=count+1; doutclk2<=’0’;

end if;

    end if

  end if;   

end one;

 

 

 

4.2.1 乘法器快速算法

     由图4-5可知

          y0=c0×t0+c0×t2+c0×t4+c0×t6           式(4-3)

          y2=c2×t0+c6×t2-c6×t4-c2×t6            式(4-4)

          y4=c4×t0-c4×t2-c4×t4+c4×t6            式(4-5)

          y6=c6×t0-c2×t2+c2×t4-c6×t6            式(4-6)

          y1=c1×t1+c3×t3+c5×t5+c7×t7           式(4-7)

          y3=c3×t0-c7×t2-c1×t4-c5×t6            式(4-8)

          y5=c5×t0-c5×t2+c7×t4+c3×t6           式(4-9)

          y7=c7×t0-c5×t2+c3×t4-c1×t6           式(4-10)

其中t0=x0+x7,t2=x1+x6,t4=x2+x5,t6=x3+x4,t1=x0-x7,t3=x1-x6,t5=x2-x3,t7=x3-x4

    由此看出,一维DCT变换的核心算法是乘法器算法。

因乘积项中的DCT系数值是已知的,所以采用查询表和移位相加的方法进行乘法运算,此DA算法可以使硬件结构比较简单,消耗的硬件资源相对比较少,最重要的是运算周期比较短。

其具体的例子如下:

Y=x0×5+x1×3,比如(x0=1,x1=2),它们用二进制表示为x0(0001),x1(0010),5(0101),

3(0011).首先列出一张53相加所有可能的结果的表,其内容如下:

“00”:0000  0*3+0*5  “01”:0101  0*3+1*5  “10”:0011  1*3+0*5  “11”:1000 1*3+1*5

然后用x0x11位组成的两位数(01)查得的数据为0101,2位(10)查得的数据为0011,3位(00)查得的数据为0000,4位(00)查得的数据为0000,最后相加。

      0101

     0011

    0000

   0000

   0001011

其结果(0001011)化为十进制数为11,值和实际结果相符y=1*5+2*3=11.

本文中的乘法算法基本思想如上所述,本文乘法查询表的数据如下:

y0

“0000” :00000000              “0001” :00010110  c0  

“0010” :00010110  c0          “0011” :00101100  c0+c0

“0100” :00010110  c0          “0101” :00101100  c0+c0

“0110” :00101100  c0+c0       “0111” :01000010  c0+c0+c0

“1000” :00010110  c0          “1001” :00101100  c0+c0

“1010” :00101100  c0+c0       “1011” :01000010  c0+c0+c0

“1100” :00101100  c0+c0       “1101” :01000010  c0+c0+c0

“1110” :01000010  c0+c0+c0    “1111” :01011000  c0+c0+c0+c0

y2

“0000” :00000000              “0001” :00011101  c2  

“0010” :00001100  c6          “0011” :00101001  c2+c6

“0100” :11110100  -c6          “0101” :00010001  c2-c6

“0110” :00000000  c6-c6        “0111” :00011101  c2+c6-c6=c2

“1000” :11100011  -c2          “1001” :00000000  c2-c2

“1010” :11101111  c6-c2        “1011” :00001100  c2+c6-c2=c6

“1100” :11010111  -c6-c2       “1101” :11110100  c2-c6-c2=-c6

“1110” :11100011  c6-c6-c2=-c2  “1111” :00000000  c2+c6-c6-c2

y4]同理

y6]同理

y1]同理

y3]同理

y5]同理

y7]同理

4.2.2 乘法查询表的VHDL程序

 

Table_y0.vhd                     \\ y0=c0×t0+c0×t2+c0×t4+c0×t6

见附录

 

Table_y1.vhd                     \\ y1=c1×t1+c3×t3+c5×t5+c7×t7

见附录

 

Table_y2.vhd                     \\ y2=c2×t0+c6×t2-c6×t4-c2×t6

见附录

 

Table_y3.vhd                     \\ y3=c3×t0-c7×t2-c1×t4-c5×t6

见附录

 

Table_y4.vhd                     \\ y4=c4×t0-c4×t2-c4×t4+c4×t6

见附录

 

Table_y5.vhd                     \\y5=c5×t0-c5×t2+c7×t4+c3×t6

见附录

 

Table_y6.vhd                    \\ y6=c6×t0-c2×t2+c2×t4-c6×t6

见附录

 

Table_y7.vhd                    \\ y7=c7×t0-c5×t2+c3×t4-c1×t6

见附录

 

 

4.2.3 乘法查询表与硬件之间的映射问题

其中某个查询表的顶层文件引脚如图4-6所示.

图像处理算法的FPGA实现研究_第1张图片

 

图4-6 table_y0引脚图

查询表的RTL视图如图4-7所示。

图像处理算法的FPGA实现研究_第2张图片

 

图4-7 查询表的RTL视图

RTL视图可知,查询表采用的硬件结构为多路选择器MUX。通过对多路选择器MUXDATA端赋初值,可以快速的实现乘法查询。

 

 

 

一维DCT算法总流程

由4.1.2节可知,输入数据先经过预处理(convert),即(t0=x0+x7,t2=x1+x6,……),然后通过查询表查找数据,再次进行移位相加,如此循环,就可以得到8位×12位的乘法结果。其总的流程如图4-9所示。

 

图像处理算法的FPGA实现研究_第3张图片

一维DCT算法中的循环移位模块、查找乘法表后相加模块分别如图4-11、图4-12所示。

 

图像处理算法的FPGA实现研究_第4张图片

图4-11 循环移位模块

 

图像处理算法的FPGA实现研究_第5张图片

图4-12 查找乘法表后相加模块

由图4-11可以看出,循环移位模块是通过控制线把多路选择器和寄存器相结合的方法来实现循环移位功能。

     因为一维DCT算法模块中的数据是并形的,所以总的输入输出都要进行串行并行转换。

其总流程如图4-14所示。

 

图4-14 二维DCT算法总流程

4.4.2二维DCT算法VHDL程序

Two_d_dct.vhd                    \\二维DCT算法模块

见附录

 

Dct.vhd                           \\顶层模块文件

见附录

 

如果需要串口输入数据,就要增加串并转换模块,那么顶层文件如下:

Main.vhd                         \\增加串口模块的顶层文件

见附录

 

4.4.3二维DCT算法与硬件之间的映射问题

二维DCT算法的顶层文件引脚如图4-15所示.

图像处理算法的FPGA实现研究_第6张图片

 

图 4-15 二维DCT算法的顶层文件引脚图

    引脚的功能分别为clk(时钟信号),rst(复位信号),start(开始信号),datain(串行数据输入),doutclk(数据输出时钟信号),done(计算结束信号),dataout(串行数据输出)。

 

4.4.4二维DCT算法仿真

其仿真波形如图4-16所示。

 图像处理算法的FPGA实现研究_第7张图片

 

 

你可能感兴趣的:(FPGA,板块2:图像-特征提取处理)