[置顶] 讲述几点对程序进行效率优化的方法


        前几天看了深入理解计算机系统的 程序优化。因为前边两章节涉及到了太多的汇编,而本人不是计算机科班出身,计算机基础薄弱,看那些汇编代码的确很吃力, 打算以后对汇编慢慢来学习吧。毕竟学习不是一日之功。
   2. 程序示例
/* $begin adt */
/* Create abstract data type for vector */
typedef struct {
    long int len;//data数组的长度
    data_t *data;//定义一个数组
/* $end adt */
    int allocated_len; /* NOTE: we don't use this field in the book */
/* $begin adt */ 
} vec_rec, *vec_ptr;
/* $end adt */

data_t是 这样定义的,因为本书可能会分析道不同的数据类型对程序的影响。 tydef int  data _t;可能考虑到使用合并的一些运算特别定义了:
#define INDNT 0

#define OP +


 #define IDENT 1

#define OP *


#include <stdlib.h>
#include "combine.h"

/* $begin vec */
/* Create vector of specified length */
vec_ptr new_vec(int len)
    /* allocate header structure */
    vec_ptr result = (vec_ptr) malloc(sizeof(vec_rec));
    if (!result)
        return NULL; /* Couldn't allocate storage */
    result->len = len;
/* $end vec */
    /* We don't show this in the book */
    result->allocated_len = len;
/* $begin vec */
    /* Allocate array */
    if (len > 0) {
        data_t *data = (data_t *)calloc(len, sizeof(data_t));
    if (!data) {
        free((void *) result);
         return NULL; /* Couldn't allocate storage */
    result->data = data;
    result->data = NULL;
    return result;

 * Retrieve vector element and store at dest.
 * Return 0 (out of bounds) or 1 (successful)
int get_vec_element(vec_ptr v, int index, data_t *dest)
    if (index < 0 || index >= v->len)
    return 0;
    *dest = v->data[index];
    return 1;

/* Return length of vector */
int vec_length(vec_ptr v)
    return v->len;
/* $end vec */

/* $begin get_vec_start */
data_t *get_vec_start(vec_ptr v)
    return v->data;
/* $end get_vec_start */

 * Set vector element.
 * Return 0 (out of bounds) or 1 (successful)
int set_vec_element(vec_ptr v, int index, data_t val)
    if (index < 0 || index >= v->len)
    return 0;
    v->data[index] = val;
    return 1;

/* Set vector length. If >= allocated length, will reallocate */
void set_vec_length(vec_ptr v, int newlen)
    if (newlen > v->allocated_len) {
    v->data = calloc(newlen, sizeof(data_t));
    v->allocated_len = newlen;
    v->len = newlen;


在 合并函数1中

#include "combine.h"

/* Combining functions */

char combine1_descr[] = "combine1: Maximum use of data abstraction";
/* $begin combine1 */
/* Implementation with maximum use of data abstraction */
void combine1(vec_ptr v, data_t *dest)
    int i;

    *dest = IDENT;
    for (i = 0; i < vec_length(v); i++) {
    data_t val;
    get_vec_element(v, i, &val);
    /* $begin combineline */
    *dest = *dest OPER val;
    /* $end combineline */
/* $end combine1 */

未经优化的程序一般都是效率比较低的程序。在unix中,可以利用 ‘-O1’ 命令进行简单的优化。
    上边的代码中,我们可以发现,在寻混内部 for (i = 0; i < vec_length(v); i ++) 中,我们每次循环都会调用vec_length(v)方法,其实求
void combine2(vec_ptr v, data_t *dest)
    int i;
    int length = vec_length(v);

    *dest = IDENT;
    for (i = 0; i < length; i++) {
    data_t val;
    get_vec_element(v, i, &val);
    *dest = *dest OPER val;

这个时候,我们将代码移出了循环内部,我们只进行了一次 int length = vec_length(v); 求值。在书中有效率的比较已经达到了很大级别的提升了性能。
void lower1(char *s)
    int i;

    for (i = 0; i < strlen(s); i++)
    if (s[i] >= 'A' && s[i] <= 'Z')
        s[i] -= ('A' - 'a');

/* Convert string to lower case: faster */
void lower2(char *s)
    int i;
    int len = strlen(s);

    for (i = 0; i < len; i++)
    if (s[i] >= 'A' && s[i] <= 'Z')
        s[i] -= ('A' - 'a');

/* Implementation of library function strlen */
/* Compute length of string */
size_t strlen(const char *s)
    int length = 0;
    while (*s != '\0') {
    return length;

在测试中,当对于个长度为1048576的字符串来说lower2函数只要1.5毫秒,比lower1快乐了500000 多倍。这是一个多么惊人的数字,所以我们要好好分析一下程序。
     data_t*  get_vec_start(vect_pt v)
        return v->data;
char combine3_descr[] = "combine3: Array reference to vector data";
/* $begin combine3 */
/* Direct access to vector data */
void combine3(vec_ptr v, data_t *dest)
    int i;
    int length = vec_length(v);
    data_t *data = get_vec_start(v);

    *dest = IDENT;
    for (i = 0; i < length; i++) {
    *dest = *dest OPER data[i];
/* $end combine3 */

  char combine4_descr[] = "combine4: Array reference, accumulate in temporary";
/* $begin combine4 */
/* Accumulate result in local variable */
void combine4(vec_ptr v, data_t *dest)
    int i;
    int length = vec_length(v);
    data_t *data = get_vec_start(v);
    data_t x = IDENT;

    for (i = 0; i < length; i++) {
    x = x OPER data[i];
    *dest = x;
/* $end combine4 */

