比较硬件pragma,pipeline,里面的参数

比较硬件pragma,pipeline,里面的参数_第1张图片

for(int j = 0; j < dim; j++) {
    #pragma HLS PIPELINE
    #pragma HLS LOOP_TRIPCOUNT min=c_size_min max=c_size_max
        out[j] = in1[j] * in2[j];
//dim 40960
使用#pragma SDS data zero_copy(in1[0:dim], in2[0:dim], out[0:dim])后的优化

比较硬件pragma,pipeline,里面的参数_第2张图片

在complexf-1中 

//loop tripcount constant
    const int c_size = DATA_SIZE;//64

    //Local memory to store input and output matrices
    int local_in1[MAX_SIZE][MAX_SIZE];//64*64
    int local_in2[MAX_SIZE][MAX_SIZE];
    int local_out[MAX_SIZE][MAX_SIZE];

    //Physical implementation of memories have only a limited number of read/write
    //ports, that can be overcome by using the ARRAY_PARTITION pragma
//    #pragma HLS ARRAY_PARTITION variable=local_in1 complete dim=2
//    #pragma HLS ARRAY_PARTITION variable=local_in2 complete dim=1

    //When loop_3 is unrolled automatically, the column "k" in local_in1[i][k] is variable
    //whereas in case of local_in2[k][j] the row "k" is variable. So, for effective pipelined
    //processing, local_in1 has been partitioned in dimension 2 and local_in2 is
    //partitioned in dimension 1.

    //Burst read on input matrices local_in1 and local_in2 from DDR memory.
    read_in: for(int iter = 0, i=0, j=0; iter< dim*dim; iter++,j++){
    #pragma HLS PIPELINE
    #pragma HLS LOOP_TRIPCOUNT min=c_size*c_size max=c_size*c_size
        if( j== dim){ j = 0; i++; }
        local_in1[i][j] = in1[iter].real();
        local_in2[i][j] = in2[iter];
    }

    //Reads the input_data from local memory, performs the
    //computations and writes the data to local memory.
    loop_1: for (int i = 0 ; i < dim ; i++){
    #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
        loop_2: for(int j = 0 ; j < dim ; j++){
        #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
        //Pipelining a loop results in automatic unrolling of inner loops by the HLS compiler.
        #pragma HLS PIPELINE
            int res = 0;
            loop_3: for(int k = 0; k < c_size; k++){
//#pragma HLS PIPELINE 12/15/3808/2401
		//To enable automatic unrolling of loop, the no. of iterations
		//need to be a compile time constant, so 'c_size' is specified
		//here instead of 'dim', which is not a compile time constant.
                res += local_in1[i][k]*local_in2[k][j];
            }
            local_out[i][j] = res;

当改成640时报错,BRAM不够比较硬件pragma,pipeline,里面的参数_第3张图片

 

你可能感兴趣的:(比较硬件pragma,pipeline,里面的参数)