虽然看完了ug902关于vivado HLS优化的内容,但有点囫囵吞枣,所以想从实例去探索如何应用优化指令来达到最大的性能!
#ifndef _XIANGANWO3_H_
#define _XIANGANWO3_H_
#include "hls_video.h"
// maximum image size
#define MAX_WIDTH 1936
#define MAX_HEIGHT 1456
// typedef video library core structures
typedef hls::stream > AXI_STREAM;
typedef hls::Scalar<3, unsigned char> RGB_PIXEL;
typedef hls::Scalar<1, unsigned char> GRAY_PIXEL;
typedef hls::Mat RGB_IMAGE;
typedef hls::Mat GRAY_IMAGE;
// top level function for HW synthesis
int hls_XiangAnWO3(AXI_STREAM& src_axi,AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, int rows, int cols,unsigned char model[16777216]);
#endif
#include "XiangAnWO3.h"
void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,unsigned char model[16777216])
{
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
#pragma HLS dataflow
hls::Scale(srcImage,img,1.1);
loop_height: for (int i = 0; i < MAX_HEIGHT; i++) {
loop_width: for (int j = 0; j < MAX_WIDTH; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1
RGB_PIXEL src_data;
GRAY_PIXEL dst_data(0);
img>>src_data;
unsigned char B = src_data.val[0];
unsigned char G = src_data.val[1];
unsigned char R = src_data.val[2];
int rgbpixels = R + G * 256 + B * 256 * 256;
unsigned char rgbelement = model[rgbpixels];
dst_data.val[0]= (rgbelement > 0)? (unsigned char)255: 0;
img1 << dst_data;
}
}
hls::Dilate(img1,img2);
hls::Erode(img2,FluoImage);
}
void FindTarget(RGB_IMAGE& srcImage,GRAY_IMAGE& dstImage)
{
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img3(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img4(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img5(MAX_HEIGHT, MAX_WIDTH);
#pragma HLS dataflow
hls::Scale(srcImage,img,1.5);
hls::CvtColor(img,img1);
hls::Threshold(img1,img2,38,255,HLS_THRESH_BINARY_INV);
hls::Erode(img2,img3);
hls::Erode(img3,img4);
hls::Erode(img4,img5);
hls::Sobel<1,0,3>(img5,dstImage);
}
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage)
{
loop_height: for (int i = 0; i < MAX_HEIGHT; i++) {
loop_width: for (int j = 0; j < MAX_WIDTH; j++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1
GRAY_PIXEL src_data1(0),src_data2(0);
RGB_PIXEL dst_data(0,0,0);
srcImage3 >>dst_data;
srcImage1>>src_data1;
srcImage2>>src_data2;
unsigned char data1=src_data1.val[0];
unsigned char data2=src_data2.val[0];
if(data1==255)
{
dst_data.val[0]=0;
dst_data.val[1]=0;
dst_data.val[2]=255;
}
else if(data2==255)
{
dst_data.val[0]=255;
dst_data.val[1]=255;
dst_data.val[2]=255;
}
dstImage << dst_data;
}
}
}
int hls_XiangAnWO3(AXI_STREAM& src_axi, AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, int rows, int cols,unsigned char model[16777216])
{
//Create AXI streaming interfaces for the core
#pragma HLS INTERFACE axis port=src_axi
#pragma HLS INTERFACE axis port=dst_axi
#pragma HLS INTERFACE axis port=src_axi1
#pragma HLS INTERFACE axis port=src_axi2
#pragma HLS RESOURCE core=AXI_SLAVE variable=rows metadata="-bus_bundle CONTROL_BUS"
#pragma HLS RESOURCE core=AXI_SLAVE variable=cols metadata="-bus_bundle CONTROL_BUS"
#pragma HLS RESOURCE core=AXI_SLAVE variable=return metadata="-bus_bundle CONTROL_BUS"
#pragma HLS INTERFACE ap_stable port=rows
#pragma HLS INTERFACE ap_stable port=cols
RGB_IMAGE img_0(rows, cols);
RGB_IMAGE img_1(rows, cols);
GRAY_IMAGE img_2(rows, cols);
GRAY_IMAGE img_3(rows, cols);
RGB_IMAGE img_4(rows, cols);
RGB_IMAGE img_5(rows, cols);
#pragma HLS dataflow
hls::AXIvideo2Mat(src_axi, img_0);
hls::AXIvideo2Mat(src_axi1, img_1);
hls::AXIvideo2Mat(src_axi2, img_4);
FluoDetect(img_0,img_2,model);
FindTarget(img_1,img_3);
Composition(img_2,img_3,img_4,img_5);
hls::Mat2AXIvideo(img_5, dst_axi);
return (int)0;
}
性能report是:
大概要花28.5ms!!!!
1、第一次优化:arbitrary precise C++ type------reduce resource area
#ifndef _XIANGANWO3_H_
#define _XIANGANWO3_H_
#include "hls_video.h"
#include
#include
typedef ap_uint<1> uint1;
typedef ap_uint<11> uint11;
// maximum image size
#define MAX_WIDTH 1936
#define MAX_HEIGHT 1456
// typedef video library core structures
typedef hls::stream > AXI_STREAM;
typedef hls::Scalar<3, unsigned char> RGB_PIXEL;
typedef hls::Scalar<1, unsigned char> GRAY_PIXEL;
typedef hls::Mat RGB_IMAGE;
typedef hls::Mat GRAY_IMAGE;
// top level function for HW synthesis
uint1 hls_XiangAnWO3(AXI_STREAM& src_axi,AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, uint11 rows, uint11 cols,uint1 model[16777216]);
#endif
#include "XiangAnWO3.h"
void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
#pragma HLS dataflow
hls::Scale(srcImage,img,1.1);
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false
RGB_PIXEL src_data;
GRAY_PIXEL dst_data(0);
img>>src_data;
unsigned char B = src_data.val[0];
unsigned char G = src_data.val[1];
unsigned char R = src_data.val[2];
int rgbpixels = R + G * 256 + B * 256 * 256;
uint1 rgbelement = model[rgbpixels];
dst_data.val[0]= rgbelement*255;
img1 << dst_data;
}
}
hls::Dilate(img1,img2);
hls::Erode(img2,FluoImage);
}
void FindTarget(RGB_IMAGE& srcImage,GRAY_IMAGE& dstImage)
{
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img3(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img4(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img5(MAX_HEIGHT, MAX_WIDTH);
#pragma HLS dataflow
hls::Scale(srcImage,img,1.5);
hls::CvtColor(img,img1);
hls::Threshold(img1,img2,38,255,HLS_THRESH_BINARY_INV);
hls::Erode(img2,img3);
hls::Erode(img3,img4);
hls::Erode(img4,img5);
hls::Sobel<1,0,3>(img5,dstImage);
}
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
GRAY_PIXEL src_data1(0),src_data2(0);
RGB_PIXEL dst_data(0,0,0);
srcImage3 >>dst_data;
srcImage1>>src_data1;
srcImage2>>src_data2;
unsigned char data1=src_data1.val[0];
unsigned char data2=src_data2.val[0];
if(data1==255)
{
dst_data.val[0]=0;
dst_data.val[1]=0;
dst_data.val[2]=255;
}
else if(data2==255)
{
dst_data.val[0]=255;
dst_data.val[1]=255;
dst_data.val[2]=255;
}
dstImage << dst_data;
}
}
}
uint1 hls_XiangAnWO3(AXI_STREAM& src_axi, AXI_STREAM& src_axi1,AXI_STREAM& src_axi2,AXI_STREAM& dst_axi, uint11 rows, uint11 cols,uint1 model[16777216])
{
//Create AXI streaming interfaces for the core
#pragma HLS INTERFACE axis port=src_axi
#pragma HLS INTERFACE axis port=dst_axi
#pragma HLS INTERFACE axis port=src_axi1
#pragma HLS INTERFACE axis port=src_axi2
#pragma HLS RESOURCE core=AXI_SLAVE variable=rows metadata="-bus_bundle CONTROL_BUS"
#pragma HLS RESOURCE core=AXI_SLAVE variable=cols metadata="-bus_bundle CONTROL_BUS"
#pragma HLS RESOURCE core=AXI_SLAVE variable=return metadata="-bus_bundle CONTROL_BUS"
#pragma HLS INTERFACE ap_stable port=rows
#pragma HLS INTERFACE ap_stable port=cols
RGB_IMAGE img_0(rows, cols);
RGB_IMAGE img_1(rows, cols);
GRAY_IMAGE img_2(rows, cols);
GRAY_IMAGE img_3(rows, cols);
RGB_IMAGE img_4(rows, cols);
RGB_IMAGE img_5(rows, cols);
#pragma HLS dataflow
hls::AXIvideo2Mat(src_axi, img_0);
hls::AXIvideo2Mat(src_axi1, img_1);
hls::AXIvideo2Mat(src_axi2, img_4);
FluoDetect(img_0,img_2,rows,cols,model);
FindTarget(img_1,img_3);
Composition(img_2,img_3,img_4,img_5,rows,cols);
hls::Mat2AXIvideo(img_5, dst_axi);
return (uint1)0;
}
综合报告为:
虽然没能降低时延,但FF和LUT资源占用少了很多!分别少了100!
2、第二次优化:全局变量、loop bound优化------减少时延
void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
#pragma HLS dataflow
hls::Scale(srcImage,img,1.1);
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false
RGB_PIXEL src_data;
GRAY_PIXEL dst_data(0);
img>>src_data;
unsigned char B = src_data.val[0];
unsigned char G = src_data.val[1];
unsigned char R = src_data.val[2];
int rgbpixels = R + G * 256 + B * 256 * 256;
uint1 rgbelement = model[rgbpixels];
dst_data.val[0]= rgbelement*255;
img1 << dst_data;
}
}
hls::Dilate(img1,img2);
hls::Erode(img2,FluoImage);
}
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
GRAY_PIXEL src_data1(0),src_data2(0);
RGB_PIXEL dst_data(0,0,0);
srcImage3 >>dst_data;
srcImage1>>src_data1;
srcImage2>>src_data2;
unsigned char data1=src_data1.val[0];
unsigned char data2=src_data2.val[0];
if(data1==255)
{
dst_data.val[0]=0;
dst_data.val[1]=0;
dst_data.val[2]=255;
}
else if(data2==255)
{
dst_data.val[0]=255;
dst_data.val[1]=255;
dst_data.val[2]=255;
}
dstImage << dst_data;
}
}
}
综合后,查看报告,并没有减少时延!!!依旧是28.5ms!!
第3次优化:DATAFLOW-----减少时延
void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
hls::Scale(srcImage,img,1.1);
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS DEPENDENCE variable=model inter false
#pragma HLS dataflow
RGB_PIXEL src_data;
GRAY_PIXEL dst_data(0);
img>>src_data;
unsigned char B = src_data.val[0];
unsigned char G = src_data.val[1];
unsigned char R = src_data.val[2];
int rgbpixels = R + G * 256 + B * 256 * 256;
uint1 rgbelement = model[rgbpixels];
dst_data.val[0]= rgbelement*255;
img1 << dst_data;
}
}
hls::Dilate(img1,img2);
hls::Erode(img2,FluoImage);
}
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,RGB_IMAGE& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS pipeline II=1
#pragma HLS dataflow
GRAY_PIXEL src_data1(0),src_data2(0);
RGB_PIXEL dst_data(0,0,0);
srcImage3 >>dst_data;
srcImage1>>src_data1;
srcImage2>>src_data2;
unsigned char data1=src_data1.val[0];
unsigned char data2=src_data2.val[0];
if(data1==255)
{
dst_data.val[0]=0;
dst_data.val[1]=0;
dst_data.val[2]=255;
}
else if(data2==255)
{
dst_data.val[0]=255;
dst_data.val[1]=255;
dst_data.val[2]=255;
}
dstImage << dst_data;
}
}
}
其实我仿真时将一个地方不小心写错了参数,然后仿真报警告:
'hls::stream.1' is read while empty, which may result in RTL simulation hanging.
这个是因为将hls::stream 变量或hls::Mat变量重复使用了!!总所周知,ug902中写过hls类型的变量只能使用一次作为输入参数!!!再次使用时它其实已经不再了,所以只能使用一次,否则就会报这个错!!!
修改后仿真时又报警告:
simulation :warning:Hls::stream 'hls::stream.33' contains leftover data, which may result in RTL simulation hanging.
这个警告是什么原因,我还不知道!
我先没理警告继续综合会报错:
...dataflow...conditional execution on /opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:648:37 is not supported
wei
这是因为dataflow优化中,不允许有if() 条件语句!否则无法综合!!!!
4、第4次优化:DATAFLOW---if branch----multi-access---减少时延
将刚刚不允许条件语句中DATAFLOW优化的部分改成了这样:
void FluoDetect(RGB_IMAGE& srcImage,GRAY_IMAGE& FluoImage,uint11 rows,uint11 cols,uint1 model[16777216])
{
#pragma HLS UNROLL
#pragma HLS ARRAY_RESHAPE variable=model block factor=64
RGB_IMAGE img(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img1(MAX_HEIGHT, MAX_WIDTH);
GRAY_IMAGE img2(MAX_HEIGHT, MAX_WIDTH);
hls::Scale(srcImage,img,1.1);
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS PIPELINE II=1
#pragma HLS DEPENDENCE variable=model inter false
#pragma HLS DATAFLOW
RGB_PIXEL src_data;
GRAY_PIXEL dst_data(0);
img>>src_data;
unsigned char B = src_data.val[0];
unsigned char G = src_data.val[1];
unsigned char R = src_data.val[2];
int rgbpixels = R + G * 256 + B * 256 * 256;
uint1 rgbelement = model[rgbpixels];
dst_data.val[0]= rgbelement*255;
img1 << dst_data;
}
}
hls::Dilate(img1,img2);
hls::Erode(img2,FluoImage);
}
void Composition(GRAY_IMAGE& srcImage1,GRAY_IMAGE& srcImage2,RGB_IMAGE& srcImage3,AXI_STREAM& dstImage,uint11 rows,uint11 cols)
{
#pragma HLS UNROLL
AXI_STREAM8 src1;
AXI_STREAM8 src2;
RGB_IMAGE src3,src4;
hls::Mat2AXIvideo(srcImage1, src1);
hls::Mat2AXIvideo(srcImage2, src2);
hls::Duplicate(srcImage3, src3,src4);
assert(rows<=MAX_HEIGHT);
assert(cols<=MAX_WIDTH);
loop_height: for (uint11 i = 0; i < rows; i++) {
loop_width: for (uint11 j = 0; j < cols; j++) {
#pragma HLS DATAFLOW
#pragma HLS PIPELINE II=1
RGB_PIXEL dst_data(0,0,0);
src3 >>dst_data;
ap_uint<8> data1=src1.read().data;
ap_uint<8> data2=src2.read().data;
dst_data.val[0]=(1-data1/255)*data2+(!data1)*(!data2)*dst_data.val[0];
dst_data.val[1]=(1-data1/255)*data2+(!data1)*(!data2)*dst_data.val[1];
dst_data.val[2]=255*(data1 || data2)+(!data1)*(!data2)*dst_data.val[2];
src4 << dst_data;
// if(data1==255)
// {
// dst_data.val[0]=0;
// dst_data.val[1]=0;
// dst_data.val[2]=255;
// src4 << dst_data;
// }
// else if(data2==255)
// {
// dst_data.val[0]=255;
// dst_data.val[1]=255;
// dst_data.val[2]=255;
// src4 << dst_data;
// }
}
}
hls::Mat2AXIvideo(src4, dstImage);
}
注释掉的就是之前报错的条件语句。DATAFLOW优化对条件语句真是苛刻,只要出现if() 那么这个作用领域内这个优化就用不了。所以既然想用优化,那么就别用条件语句。另一个函数中的if我也改成了不用条件的形式。
同时,还改了对参数的多次访问,使用local cache!!!
综合后,报了新错:看错误信息,好像刚刚那个函数已经成功应用优化了,现在报错的是另一个函数:
INFO: [XFORM 203-721] Extract dataflow region from loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:82) of function 'Composition'.
INFO: [XFORM 203-721] Extract dataflow region from loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19) of function 'FluoDetect'.
WARNING: [XFORM 203-713] Disabling dataflow in loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19) of function 'FluoDetect' .
WARNING: [XFORM 203-713] Disabling dataflow in loop loop_width (Xiangan_wd/src/XiangAnWO3.cpp:19) of function 'FluoDetect' .
INFO: [XFORM 203-712] Store statement on variable 'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable 'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable 'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
INFO: [XFORM 203-712] Store statement on variable 'tmp.312' in a dataflow region ( 'dataflow_in_loop_loop_width403' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:22:37)) is synthesized to a separate process, please move it inside another function for better QoR.
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
WARNING: [XFORM 203-713] Argument 'model.V' has read operations in process function '__/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h_line648_proc' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:33:37).
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' (Xiangan_wd/src/XiangAnWO3.cpp:117) in the middle of dataflow may stall the dataflow pipeline:
WARNING: [XFORM 203-713] Argument 'model.V' has read operations in process function 'FluoDetect' (Xiangan_wd/src/XiangAnWO3.cpp:4).
WARNING: [XFORM 203-713] Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
Reading dataflow channel 'model.V' in the middle of dataflow may stall the dataflow pipeline:
INFO: [XFORM 203-712] Store statement on variable 'tmp.3516' in a dataflow region ( 'dataflow_in_loop_loop_width' (/opt/Xilinx/Vivado/2017.4/common/technology/autopilot/hls/hls_video_core.h:83:37)) is synthesized to a separate process, please move it inside another function for better QoR.
ERROR: [XFORM 203-801] Only one data field is allowed in AXI-Stream mode, however there are 3 data fields: srcImage3.data_stream[1].V srcImage3.data_stream[2].V dstImage.V.data.V
可以看到Extract dataflow region from ... of function 'Composition' 时没报错,但Extract dataflow region from...of function 'FluoDetect' 时报了一些错!我看了下那个函数,对参数model的multi-access 这个行为好像不好,因为ug902里说过不要对参数进行multi-access,如果要就用local cache来做!所以不管如何,我先修改这个问题。
5,第5次优化---
先看下 https://forums.xilinx.com/t5/Vivado/%E8%B7%9F-Xilinx-SAE-%E5%AD%A6-HLS-%E6%8C%81%E7%BB%AD%E6%9B%B4%E6%96%B0-%E4%B8%AD%E6%96%87%E8%AE%B2%E8%A7%A3/m-p/708179 这个是HLS 优化视频。
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595792&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595819&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=595929&highlight=HLS%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=658891&highlight=Vivado%2BHLS
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=658879&highlight=Vivado%2BHLS
http://www.openhw.org/module/forum/forum.php?mod=viewthread&tid=659217&highlight=Vivado%2BHLS
这几个都是讲HLS优化的实例,非常非常有用!!!!!!!而且写得非常非常好!!!!!!!
我看完了,但还没实践,抽时间将这几个网址的实例实践感受下。