axi-stream
接口,需要dma
,添加dma
IP添加HP0
AXI_MM2S和AXI_S2MM都连上HP0
原因,位宽不匹配,dma的位宽32,但是pool接受的是128
更改位宽位128
再次验证通过:
from pynq import Overlay
import numpy as np
from pynq import Xlnk
import time
import random
K=8
channel=32
width_in=28
height_in=28
Kx=4
Ky=4
width_out=int(width_in/Kx)
height_out=int(height_in/Ky)
xlnk=Xlnk()
ol=Overlay("pool_stream.bit")
ol.download();
print(ol.ip_dict.keys())
input_buffer=xlnk.cma_array(shape=((channel+K-1)//K,height_in,width_in,K),cacheable=0,dtype=np.int16)
output_buffer=xlnk.cma_array(shape=((channel+K-1)//K,height_out,width_out,K),cacheable=0,dtype=np.int16)
output_buffer_soft=xlnk.cma_array(shape=((channel+K-1)//K,height_out,width_out,K),cacheable=0,dtype=np.int16)
print(input_buffer.nbytes);
print(output_buffer.nbytes);
for i in range(input_buffer.shape[0]):
for j in range(input_buffer.shape[1]):
for k in range(input_buffer.shape[2]):
for l in range(input_buffer.shape[3]):
input_buffer[i][j][k][l]=random.randint(-2000,2000);
# for j in range(input_buffer.shape[1]):
# for k in range(input_buffer.shape[2]):
# print(input_buffer[0][j][k][0],end=' ');
# print(' ')
for i in range(output_buffer.shape[0]):
for j in range(output_buffer.shape[1]):
for k in range(output_buffer.shape[2]):
for l in range(output_buffer.shape[3]):
output_buffer[i][j][k][l]=0;
output_buffer_soft[i][j][k][l]=0;
dma=ol.axi_dma_0
pool=ol.pool_0
def Run_Pool(ch,kx,ky,feature_in,feature_out):
pool.write(0x10,(ch+K-1)//K);
pool.write(0x18,feature_in.shape[1])
pool.write(0x20,feature_in.shape[2])
pool.write(0x28,feature_out.shape[1])
pool.write(0x30,feature_out.shape[2])
pool.write(0x38,kx)
pool.write(0x40,ky)
print("start");
pool.write(0, (pool.read(0)&0x80)|0x01 ) #start pool IP
dma.recvchannel.transfer(feature_out)
dma.sendchannel.transfer(feature_in)
dma.sendchannel.wait();
print("send done")
dma.recvchannel.wait()
print("recv done")
tp=pool.read(0)
while not((tp>>1)&0x1):
tp=pool.read(0)
print("pool ip done")
def Run_Pool_Soft(ch,kx,ky,feature_in,feature_out):
for i in range(ch):
for j in range(feature_out.shape[1]):
for k in range(feature_out.shape[2]):
tp=-32768;
for ii in range(ky):
for jj in range(kx):
row=j*kx+ii
col=k*ky+jj
dat=feature_in[i//K][row][col][i%K]
if(dat>tp):
tp=dat
feature_out[i//K][j][k][i%K]=tp
starttime=time.time()
Run_Pool(channel,Kx,Ky,input_buffer,output_buffer)
endtime=time.time()
print("hardware run time=%s s"%(endtime-starttime))
starttime=time.time()
Run_Pool_Soft(channel,Kx,Ky,input_buffer,output_buffer_soft)
endtime=time.time()
print("software run time=%s s"%(endtime-starttime))
flag=1
for i in range(output_buffer.shape[0]):
for j in range(output_buffer.shape[1]):
for k in range(output_buffer.shape[2]):
for l in range(output_buffer.shape[3]):
if(output_buffer[i][j][k][l]!=output_buffer_soft[i][j][k][l]):
flag=0;
print("output_buffer [%d][%d][%d][%d] = %d"%(i,j,k,l,output_buffer[i][j][k][l]));
print("output_buffer_soft [%d][%d][%d][%d] = %d"%(i,j,k,l,output_buffer_soft[i][j][k][l]));
if(flag==1):
print("============================\n result_match\n============================\n");
else:
print("============================\n result_mismatch\n============================\n");
dma控制:
print("start");
pool.write(0, (pool.read(0)&0x80)|0x01 ) #start pool IP
dma.recvchannel.transfer(feature_out) # recv 读取数据写入DDR,接受last信号,否则会卡住
dma.sendchannel.transfer(feature_in) # send 送出数据给pool,
dma.sendchannel.wait(); # 在池化完成之前,数据一定要先传输完成;判断什么时候传输完成
print("send done")
dma.recvchannel.wait()
print("recv done")
tp=pool.read(0)
while not((tp>>1)&0x1):
tp=pool.read(0)
print("pool ip done")
添加
捕捉电路运行逻辑: