分享自己几年之前写的h265解码器的代码,代码全部分享于github上,点击链接进去,https://github.com/tishi43/h265_decoder
c参考代码在 https://github.com/tishi43/h265_c_reference
同时分享另一个年代比较久远的h264解码器,大概有10年了,起初是作为我表弟课题来做的,绝大部分由我的表弟完成。在Altera还统治FPGA的时代,在CycloneIII上验证,这个版本是比较老的版本,后面有在Xilinx ZYNQ7020上验证和功能的完善。
https://github.com/tishi43/h264_decoder
以下大致介绍一下h.265解码器,在zynq7035平台上验证。
顶层文件是decode_stream.sv,
主要包括stream接口用于向解码器喂码流,axi3接口用于写入解码后yuv数据,以及参考帧的读入。
module decode_stream (
//global signals
input wire clk,
input wire rst,
input wire en,
//interface to bitstream memory or fifo
input wire [ 7:0] stream_mem_data_in,
input wire stream_mem_valid,
output wire [31:0] stream_mem_addr_out,
output wire stream_mem_rd, // request stream read by read_nalu
input wire stream_mem_end, // end of stream reached
output wire [12:0] pic_width_in_luma_samples, //max 4096
output wire [11:0] pic_height_in_luma_samples, //max 2160
output wire [63:0] pic_num,
output wire [ 3:0] cur_pic_dpb_slot,
output wire write_yuv,
input wire ext_mem_init_done,
input wire [31:0] fd_log,
input wire [31:0] fd_pred,
input wire [31:0] fd_intra_pred_chroma,
input wire [31:0] fd_tq_luma,
input wire [31:0] fd_tq_cb,
input wire [31:0] fd_tq_cr,
input wire [31:0] fd_filter,
input wire [31:0] fd_deblock,
//axi bus read if
input wire m_axi_arready,
output wire m_axi_arvalid,
output wire [ 3:0] m_axi_arlen,
output wire [31:0] m_axi_araddr,
output wire [ 5:0] m_axi_arid,
output wire [ 2:0] m_axi_arsize,
output wire [ 1:0] m_axi_arburst,
output wire [ 2:0] m_axi_arprot,
output wire [ 3:0] m_axi_arcache,
output wire [ 1:0] m_axi_arlock,
output wire [ 3:0] m_axi_arqos,
output wire m_axi_rready,
input wire [63:0] m_axi_rdata,
input wire m_axi_rvalid,
input wire m_axi_rlast,
//axi bus write if
input wire m_axi_awready, // Indicates slave is ready to accept a
output wire [ 5:0] m_axi_awid, // Write ID
output wire [31:0] m_axi_awaddr, // Write address
output wire [ 3:0] m_axi_awlen, // Write Burst Length
output wire [ 2:0] m_axi_awsize, // Write Burst size
output wire [ 1:0] m_axi_awburst, // Write Burst type
output wire [ 1:0] m_axi_awlock, // Write lock type
output wire [ 3:0] m_axi_awcache, // Write Cache type
output wire [ 2:0] m_axi_awprot, // Write Protection type
output wire m_axi_awvalid, // Write address valid
input wire m_axi_wready, // Write data ready
output wire [ 5:0] m_axi_wid, // Write ID tag
output wire [63:0] m_axi_wdata, // Write data
output wire [ 7:0] m_axi_wstrb, // Write strobes
output wire m_axi_wlast, // Last write transaction
output wire m_axi_wvalid, // Write valid
input wire [ 5:0] m_axi_bid, // Response ID
input wire [ 1:0] m_axi_bresp, // Write response
input wire m_axi_bvalid, // Write reponse valid
output wire m_axi_bready, // Response ready
output wire [ 5:0] m_axi_rrid,
input wire [ 1:0] m_axi_rresp
);
整体结构如下图
大致包含如下部分
read_nalu.v,rbsp_buffer.v,rbsp_buffer_simple.v,
码流进来读nalu,buffer管理。
bitstream_controller.sv,
pps.v,
sps.v,
vps.v,
slice_header.
slice_data.sv,
cu.sv,
tu.sv
这些文件用于码流解析,分别解vps,sps,pps,slice_header,slice_data,cu,tu.
cabac.v,cabac_bypass_decode_bin.v,cabac_decode_bin.v,cabac_terminate_decode_bin.v,
dec_bin_cu.v,dec_bin_gt1_etc.v,dec_bin_sd.v,dec_bin_sig.v,dec_bin_xy_pref.v,
cabac解码,3种bin的解码,以及为了减少关键路径,分成5块。
trans_quant_32.sv,trans_quant_16.sv
反量化,IDCT
intra_pred_32.sv, intra_pred_16.sv
luma和chroma的帧内预测,
inter_pred_luma.sv, inter_pred_chroma.sv
luma和chroma的帧间预测,
mv.sv
求mv,参考帧,derive_motion_vector_prediction, derive_temporal_motion_vector,两种mv的计算,这块是h.265相比h.264复杂很多的地方。
filter_64.sv,filter_32.sv,
luma和chroma的滤波,包含去方块滤波和sao滤波。
下面是zynq7035平台上的验证,block_design如下,
添加ZYNQ7000的处理器模块,暴露AXI HP slave interface,和一个AXI GP interface,
一个AXI-Stream FIFO,AXI Interconnect,AXI Interconnect转换32位宽度到8位,来喂给decode_stream,
AXI HP slave interface, 用于PL访问PS侧的ddr,
AXI GP interface,用于通过读写AXI-stream FIFO的寄存器来把码流喂向PL端。
ARM端SDK的代码,如下,从sd卡读取码流文件,写AXI-stream fifo寄存器,把码流喂向PL,解码完成保存yuv文件到sd卡。
#include
#include "platform.h"
#include "xil_printf.h"
#include "xdevcfg.h"
#include "xparameters.h"
#include "ff.h"
int SD_Init(void);
int Sd_Test_Write(void);
int Sd_Test_Read(void);
#define BASE_ADDR 0x43c00000
#define ISR BASE_ADDR
#define IER (BASE_ADDR+0x4)
#define TDFR (BASE_ADDR+0x8)
#define TDFV (BASE_ADDR+0xC)
#define TDFD (BASE_ADDR+0x10)
#define TLR (BASE_ADDR+0x14)
#define TDR (BASE_ADDR+0x2c)
static FATFS fatfs;
#define WIDTH 3840
#define HEIGHT 2160
int main()
{
init_platform();
print("zynq_sd_card_fatfs-test a\r\n");
SD_Init();
xil_printf("PLL status %x\r\n",*(unsigned int *)0xf800010c);
FIL fil;
FRESULT rc;
UINT br;
unsigned int *bit_addr=(unsigned int *)0x1000000;
rc = f_open(&fil,"in.265",FA_READ);
if(rc)
{
xil_printf("ERROR : f_open returned %d\r\n",rc);
return XST_FAILURE;
}
rc = f_lseek(&fil, 0);
rc = f_read(&fil,bit_addr,4096000,&br);
xil_printf("br=%d\r\n",br);
rc = f_close(&fil);
unsigned int i;
unsigned int j,k;
unsigned int counter;
unsigned int tdfv=0;
unsigned int isr;
unsigned int decode_times=0;
i=0;
while(i<((br+3)/4))
{
*(unsigned int *)ISR = 0xffffffff;
*(unsigned int *)IER = 0x0C000000;
*(unsigned int *)TDR = 0x2;
*(unsigned int *)TDFD = bit_addr[i];
*(unsigned int *)TDFD = bit_addr[i+1];
*(unsigned int *)TDFD = bit_addr[i+2];
*(unsigned int *)TDFD = bit_addr[i+3];
*(unsigned int *)TDFD = bit_addr[i+4];
*(unsigned int *)TDFD = bit_addr[i+5];
*(unsigned int *)TDFD = bit_addr[i+6];
*(unsigned int *)TDFD = bit_addr[i+7];
*(unsigned int *)TLR = 0x20;
do{
tdfv=*(volatile unsigned int *)TDFV;
}while(tdfv<400);
i+=8;
}
counter=200*1024*1024; //wait enough time for decode finish
while(counter>0){
counter--;
}
rc = f_open(&fil,"out.yuv",FA_WRITE|FA_CREATE_NEW);
if(rc)
{
xil_printf("ERROR : f_open returned %d\r\n",rc);
return XST_FAILURE;
}
for (j=0;j<5;j++){
for(k=0;k