CSAPP perfab

CSAPP perfab

Part A
rotate实验,把nxn的正方形图片逆时针旋转90度,这是一个内存敏感的程序,优化的主要思路是分块(和cachelab很像)。由于图片的边长都是32的倍数,所以我们选择块大小为4,8,16,32去尝试哪一种性能更好,最终选择为16x16的块

Part B
smooth实验,这是一个计算敏感的程序,优化的主要思路是:
1、去掉不必要的过程调用:可以看到,为了处理边缘点,原程序用了很多min和max判断
在这里插入图片描述
好处就是代码简洁,坏处就是影响性能CPE。我们分别处理4个角点,4条边缘边和中间部分,并且将2x2,2x3,3x2和3x3的求和小循环展开。
2、中间部分,我们按照外层循环列,内层循环行的循环方式,不求3x3块的和,因为这样每种颜色要9次加法,我们可以利用上一次的sum结果,减去一行,再加上一行的计算方法,如图:
CSAPP perfab_第1张图片
当我们计算完(1,1)处的sum时,我们接着计算(2,1)处的sum,此时,蓝色部分是3x3块重合的部分,我们只需要用(1,1)的结果,减去第0行的红色块,再加上第3行的绿色块,就可以得到(2,1)处的sum,这样的话,计算一个3x3的sum就只需要6次加法,比直接加9次快了1/3
然后为啥要内层按行循环呢,因为考虑到空间局部性,内层按行循环时,3次减法和3次加法操作数是空间相邻的,大概率只有2次cache不命中。

下面图片是我的结果,其中,naive的代码是未优化的,current working是我优化过的,可以发现,即使是未优化的,性能也很不错了,我猜测是机器性能好或者硬件优化好?
CSAPP perfab_第2张图片
下面是我的kernel.c代码

/********************************************************
 * Kernels to be optimized for the CS:APP Performance Lab
 ********************************************************/

#include 
#include 
#include "defs.h"

/* 
 * Please fill in the following team struct 
 */
team_t team = {
    "haha",              /* Team name */

    "lym",     /* First member full name */
    "[email protected]",  /* First member email address */

    "",                   /* Second member full name (leave blank if none) */
    ""                    /* Second member email addr (leave blank if none) */
};

/***************
 * ROTATE KERNEL
 ***************/

/******************************************************
 * Your different versions of the rotate kernel go here
 ******************************************************/

/* 
 * naive_rotate - The naive baseline version of rotate 
 */
char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation";
void naive_rotate(int dim, pixel *src, pixel *dst) 
{
    int i, j;

    for (i = 0; i < dim; i++)
	for (j = 0; j < dim; j++)
	    dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)];
}

/* 
 * rotate - Your current working version of rotate
 * IMPORTANT: This is the version you will be graded on
 */
char rotate_descr[] = "rotate: Current working version";
void rotate(int dim, pixel *src, pixel *dst) 
{
    //naive_rotate(dim, src, dst);
    int block, i, j, ii, jj;
    block = 16;
    for (i = 0; i < dim; i += block) {
        for (j = 0; j < dim; j += block) {
            for (ii = i; ii < i+block; ii++) {
                for (jj = j; jj < j+block; jj++) {
                    dst[RIDX(dim-1-jj, ii, dim)] = src[RIDX(ii, jj, dim)];
                }

            }
        }
    }

}

/*********************************************************************
 * register_rotate_functions - Register all of your different versions
 *     of the rotate kernel with the driver by calling the
 *     add_rotate_function() for each test function. When you run the
 *     driver program, it will test and report the performance of each
 *     registered test function.  
 *********************************************************************/

void register_rotate_functions() 
{
    add_rotate_function(&naive_rotate, naive_rotate_descr);
    add_rotate_function(&rotate, rotate_descr);   
    /* ... Register additional test functions here */
}


/***************
 * SMOOTH KERNEL
 **************/

/***************************************************************
 * Various typedefs and helper functions for the smooth function
 * You may modify these any way you like.
 **************************************************************/

/* A struct used to compute averaged pixel value */
typedef struct {
    int red;
    int green;
    int blue;
    int num;
} pixel_sum;

/* Compute min and max of two integers, respectively */
static int min(int a, int b) { return (a < b ? a : b); }
static int max(int a, int b) { return (a > b ? a : b); }

/* 
 * initialize_pixel_sum - Initializes all fields of sum to 0 
 */
static void initialize_pixel_sum(pixel_sum *sum) 
{
    sum->red = sum->green = sum->blue = 0;
    sum->num = 0;
    return;
}

/* 
 * accumulate_sum - Accumulates field values of p in corresponding 
 * fields of sum 
 */
static void accumulate_sum(pixel_sum *sum, pixel p) 
{
    sum->red += (int) p.red;
    sum->green += (int) p.green;
    sum->blue += (int) p.blue;
    sum->num++;
    return;
}

/* 
 * assign_sum_to_pixel - Computes averaged pixel value in current_pixel 
 */
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) 
{
    current_pixel->red = (unsigned short) (sum.red/sum.num);
    current_pixel->green = (unsigned short) (sum.green/sum.num);
    current_pixel->blue = (unsigned short) (sum.blue/sum.num);
    return;
}

/* 
 * avg - Returns averaged pixel value at (i,j) 
 */
static pixel avg(int dim, int i, int j, pixel *src) 
{
    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;

    initialize_pixel_sum(&sum);
    for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) 
	for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) 
	    accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

    assign_sum_to_pixel(¤t_pixel, sum);
    return current_pixel;
}

/******************************************************
 * Your different versions of the smooth kernel go here
 ******************************************************/

/*
 * naive_smooth - The naive baseline version of smooth 
 */

char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
void naive_smooth(int dim, pixel *src, pixel *dst) 
{
    int i, j;

    for (i = 0; i < dim; i++)
	for (j = 0; j < dim; j++)
	    dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}

/*
 * smooth - Your current working version of smooth. 
 * IMPORTANT: This is the version you will be graded on
 */

typedef struct {
    int red;
    int green;
    int blue;
} pixelSum;

static void initialize_pixelSum(pixelSum *sum)
{
    sum->red = sum->green = sum->blue = 0;
    return;
}

static void myAvg(int number_of_pixel, pixel *dst, pixelSum *sum)
{
    dst->red = (unsigned short) (sum->red / number_of_pixel);
    dst->green = (unsigned short) (sum->green / number_of_pixel);
    dst->blue = (unsigned short) (sum->blue / number_of_pixel);
    return;
}

char smooth_descr[] = "smooth: Current working version";
void smooth(int dim, pixel *src, pixel *dst) 
{
    pixelSum sum;
    int i, j;

    //corner (0, 0)
    initialize_pixelSum(&sum);
    for (i = 0; i < 2; i++) {
        for (j = 0; j < 2; j++) {
            sum.red += (int) src[RIDX(i, j, dim)].red;
            sum.green += (int) src[RIDX(i, j, dim)].green;
            sum.blue += (int) src[RIDX(i, j, dim)].blue;
        }
    }
    myAvg(4, &dst[RIDX(0, 0, dim)], &sum);

    //corner (0, dim-1)
    initialize_pixelSum(&sum);
    for (i = 0; i < 2; i++) {
        for (j = dim-2; j < dim; j++) {
            sum.red += (int) src[RIDX(i, j, dim)].red;
            sum.green += (int) src[RIDX(i, j, dim)].green;
            sum.blue += (int) src[RIDX(i, j, dim)].blue;
        }
    }
    myAvg(4, &dst[RIDX(0, dim-1, dim)], &sum);

    //corner (dim-1, 0)
    initialize_pixelSum(&sum);
    for (i = dim-2; i < dim; i++) {
        for (j = 0; j < 2; j++) {
            sum.red += (int) src[RIDX(i, j, dim)].red;
            sum.green += (int) src[RIDX(i, j, dim)].green;
            sum.blue += (int) src[RIDX(i, j, dim)].blue;
        }
    }
    myAvg(4, &dst[RIDX(dim-1, 0, dim)], &sum);

    //corner (dim-1, dim-1)
    initialize_pixelSum(&sum);
    for (i = dim-2; i < dim; i++) {
        for (j = dim-2; j < dim; j++) {
            sum.red += (int) src[RIDX(i, j, dim)].red;
            sum.green += (int) src[RIDX(i, j, dim)].green;
            sum.blue += (int) src[RIDX(i, j, dim)].blue;
        }
    }
    myAvg(4, &dst[RIDX(dim-1, dim-1, dim)], &sum);

    //last line
    for (j = 1; j < dim-1; j++) {
        initialize_pixelSum(&sum);
        sum.red += (int) src[RIDX(dim-2, j-1, dim)].red
                 + (int) src[RIDX(dim-2, j, dim)].red
                 + (int) src[RIDX(dim-2, j+1, dim)].red;
        sum.blue += (int) src[RIDX(dim-2, j-1, dim)].blue
                   + (int) src[RIDX(dim-2, j, dim)].blue
                   + (int) src[RIDX(dim-2, j+1, dim)].blue;
        sum.green += (int) src[RIDX(dim-2, j-1, dim)].green
                    + (int) src[RIDX(dim-2, j, dim)].green
                    + (int) src[RIDX(dim-2, j+1, dim)].green;
        sum.red += (int) src[RIDX(dim-1, j-1, dim)].red
                   + (int) src[RIDX(dim-1, j, dim)].red
                   + (int) src[RIDX(dim-1, j+1, dim)].red;
        sum.blue += (int) src[RIDX(dim-1, j-1, dim)].blue
                    + (int) src[RIDX(dim-1, j, dim)].blue
                    + (int) src[RIDX(dim-1, j+1, dim)].blue;
        sum.green += (int) src[RIDX(dim-1, j-1, dim)].green
                     + (int) src[RIDX(dim-1, j, dim)].green
                     + (int) src[RIDX(dim-1, j+1, dim)].green;
        myAvg(6, &dst[RIDX(dim-1, j, dim)], &sum);
    }

    //first row
    for (i = 1; i < dim-1; i++) {
        initialize_pixelSum(&sum);
        sum.red += (int) src[RIDX(i-1, 0, dim)].red
                   + (int) src[RIDX(i, 0, dim)].red
                   + (int) src[RIDX(i+1, 0, dim)].red;
        sum.blue += (int) src[RIDX(i-1, 0, dim)].blue
                    + (int) src[RIDX(i, 0, dim)].blue
                    + (int) src[RIDX(i+1, 0, dim)].blue;
        sum.green += (int) src[RIDX(i-1, 0, dim)].green
                     + (int) src[RIDX(i, 0, dim)].green
                     + (int) src[RIDX(i+1, 0, dim)].green;
        sum.red += (int) src[RIDX(i-1, 1, dim)].red
                   + (int) src[RIDX(i, 1, dim)].red
                   + (int) src[RIDX(i+1, 1, dim)].red;
        sum.blue += (int) src[RIDX(i-1, 1, dim)].blue
                    + (int) src[RIDX(i, 1, dim)].blue
                    + (int) src[RIDX(i+1, 1, dim)].blue;
        sum.green += (int) src[RIDX(i-1, 1, dim)].green
                     + (int) src[RIDX(i, 1, dim)].green
                     + (int) src[RIDX(i+1, 1, dim)].green;
        myAvg(6, &dst[RIDX(i, 0, dim)], &sum);
    }

    //last row
    for (i = 1; i < dim-1; i++) {
        initialize_pixelSum(&sum);
        sum.red += (int) src[RIDX(i-1, dim-2, dim)].red
                   + (int) src[RIDX(i, dim-2, dim)].red
                   + (int) src[RIDX(i+1, dim-2, dim)].red;
        sum.blue += (int) src[RIDX(i-1, dim-2, dim)].blue
                    + (int) src[RIDX(i, dim-2, dim)].blue
                    + (int) src[RIDX(i+1, dim-2, dim)].blue;
        sum.green += (int) src[RIDX(i-1, dim-2, dim)].green
                     + (int) src[RIDX(i, dim-2, dim)].green
                     + (int) src[RIDX(i+1, dim-2, dim)].green;
        sum.red += (int) src[RIDX(i-1, dim-1, dim)].red
                   + (int) src[RIDX(i, dim-1, dim)].red
                   + (int) src[RIDX(i+1, dim-1, dim)].red;
        sum.blue += (int) src[RIDX(i-1, dim-1, dim)].blue
                    + (int) src[RIDX(i, dim-1, dim)].blue
                    + (int) src[RIDX(i+1, dim-1, dim)].blue;
        sum.green += (int) src[RIDX(i-1, dim-1, dim)].green
                     + (int) src[RIDX(i, dim-1, dim)].green
                     + (int) src[RIDX(i+1, dim-1, dim)].green;
        myAvg(6, &dst[RIDX(i, dim-1, dim)], &sum);
    }

    //first line
    for (j = 1; j < dim-1; j++) {
        initialize_pixelSum(&sum);
        sum.red += (int) src[RIDX(0, j-1, dim)].red
                   + (int) src[RIDX(0, j, dim)].red
                   + (int) src[RIDX(0, j+1, dim)].red;
        sum.blue += (int) src[RIDX(0, j-1, dim)].blue
                    + (int) src[RIDX(0, j, dim)].blue
                    + (int) src[RIDX(0, j+1, dim)].blue;
        sum.green += (int) src[RIDX(0, j-1, dim)].green
                     + (int) src[RIDX(0, j, dim)].green
                     + (int) src[RIDX(0, j+1, dim)].green;
        sum.red += (int) src[RIDX(1, j-1, dim)].red
                   + (int) src[RIDX(1, j, dim)].red
                   + (int) src[RIDX(1, j+1, dim)].red;
        sum.blue += (int) src[RIDX(1, j-1, dim)].blue
                    + (int) src[RIDX(1, j, dim)].blue
                    + (int) src[RIDX(1, j+1, dim)].blue;
        sum.green += (int) src[RIDX(1, j-1, dim)].green
                     + (int) src[RIDX(1, j, dim)].green
                     + (int) src[RIDX(1, j+1, dim)].green;
        myAvg(6, &dst[RIDX(0, j, dim)], &sum);
    }

    //middle
    for (j = 1; j < dim-1; j++) {
        initialize_pixelSum(&sum);
        sum.red += (int) src[RIDX(0, j-1, dim)].red
                   + (int) src[RIDX(0, j, dim)].red
                   + (int) src[RIDX(0, j+1, dim)].red;
        sum.blue += (int) src[RIDX(0, j-1, dim)].blue
                    + (int) src[RIDX(0, j, dim)].blue
                    + (int) src[RIDX(0, j+1, dim)].blue;
        sum.green += (int) src[RIDX(0, j-1, dim)].green
                     + (int) src[RIDX(0, j, dim)].green
                     + (int) src[RIDX(0, j+1, dim)].green;
        sum.red += (int) src[RIDX(1, j-1, dim)].red
                   + (int) src[RIDX(1, j, dim)].red
                   + (int) src[RIDX(1, j+1, dim)].red;
        sum.blue += (int) src[RIDX(1, j-1, dim)].blue
                    + (int) src[RIDX(1, j, dim)].blue
                    + (int) src[RIDX(1, j+1, dim)].blue;
        sum.green += (int) src[RIDX(1, j-1, dim)].green
                     + (int) src[RIDX(1, j, dim)].green
                     + (int) src[RIDX(1, j+1, dim)].green;
        sum.red += (int) src[RIDX(2, j-1, dim)].red
                   + (int) src[RIDX(2, j, dim)].red
                   + (int) src[RIDX(2, j+1, dim)].red;
        sum.blue += (int) src[RIDX(2, j-1, dim)].blue
                    + (int) src[RIDX(2, j, dim)].blue
                    + (int) src[RIDX(2, j+1, dim)].blue;
        sum.green += (int) src[RIDX(2, j-1, dim)].green
                     + (int) src[RIDX(2, j, dim)].green
                     + (int) src[RIDX(2, j+1, dim)].green;
        myAvg(9, &dst[RIDX(1, j, dim)], &sum);
        for (i = 2; i < dim-1; i++) {
            sum.red -= (int) src[RIDX(i-2, j-1, dim)].red
                       + (int) src[RIDX(i-2, j, dim)].red
                       + (int) src[RIDX(i-2, j+1, dim)].red;
            sum.green -= (int) src[RIDX(i-2, j-1, dim)].green
                       + (int) src[RIDX(i-2, j, dim)].green
                       + (int) src[RIDX(i-2, j+1, dim)].green;
            sum.blue -= (int) src[RIDX(i-2, j-1, dim)].blue
                       + (int) src[RIDX(i-2, j, dim)].blue
                       + (int) src[RIDX(i-2, j+1, dim)].blue;
            sum.red += (int) src[RIDX(i+1, j-1, dim)].red
                       + (int) src[RIDX(i+1, j, dim)].red
                       + (int) src[RIDX(i+1, j+1, dim)].red;
            sum.green += (int) src[RIDX(i+1, j-1, dim)].green
                         + (int) src[RIDX(i+1, j, dim)].green
                         + (int) src[RIDX(i+1, j+1, dim)].green;
            sum.blue += (int) src[RIDX(i+1, j-1, dim)].blue
                        + (int) src[RIDX(i+1, j, dim)].blue
                        + (int) src[RIDX(i+1, j+1, dim)].blue;
            myAvg(9, &dst[RIDX(i, j, dim)], &sum);
        }
    }
}


/********************************************************************* 
 * register_smooth_functions - Register all of your different versions
 *     of the smooth kernel with the driver by calling the
 *     add_smooth_function() for each test function.  When you run the
 *     driver program, it will test and report the performance of each
 *     registered test function.  
 *********************************************************************/

void register_smooth_functions() {
    add_smooth_function(&naive_smooth, naive_smooth_descr);
    add_smooth_function(&smooth, smooth_descr);
    /* ... Register additional test functions here */
}


你可能感兴趣的:(csapp)