深入理解计算机系统家庭作业第五章

/*

***5.15

*/

A.    画图略

B.   3

C.   1

D.   乘法不在关键路径上,故乘法可以按流水线执行


/*

***5.16

*/

A.   每次要加载两个数据,故至少需要两个周期

B.   循环展开并没有改变关键路径长


/*

***5.17

*/

A.   加载数据的时间

B.   IA32没有足够的寄存器来保存临时变量


/*

***5.18

*/

void inner4 (vec_ptr x,vec_ptr y;data_t *dest)
{
	long int i;
	int length = vec_length(x);
	data_t *xdata = get_vec_start(x);
	data_t *ydata = get_vec_start(y);
	data_t sum = (data_t)0;

	for(i=0;i

/*

***5.19

*/

void *word_memeset(void *s, int c, size_t n)
{
	size_t cnt = 0;
	size_t k = sizeof(unsigned long);
	unsigned long l;
	unsigned char* schar = (unsigned char*)s;
	unsigned char* lchar = (unsigned char *)&l;
	//先将long的各字节变为c的低位字节
	for(int i = 0; i < k; i++)
		lchar[i] = (unsigned char)c;
	//将地址调整为k的倍数,直到能够被k整除
	while((size_t)schar % k)
	{
		*schar++ = (unsigned char)c;
		cnt++;
	}

	//调整完地址之后求出新的n,更新cnt=0
	n = n - cnt;
	cnt = 0;
	for(int i = 0;i < n - k +1;i+=k)
	{
		for(int j = 0;j < k/4;j++)
		{
			schar[0] = lchar[0];
			schar[1] = lchar[1];
			schar[2] = lchar[2];
			schar[3] = lchar[3];
			schar += 4;
			cnt +=4;
		}
	}

	//遍历最后几个元素

	while(cnt < n)
	{
		*schar++ = (unsigned char)c;
		cnt++;
	}
	return s;

}

/*

***5.20

*/

//直接求和法的并行
double poly(double a[], double x, int degree)
{
	long int i;
	double result = a[0];
	double result1 = 0,result2 = 0,result3 = 0,result4 = 0;
	double result5 =0;
	double xpwr1 = x;
	double xpwr2 = x * xpwr1;
	double xpwr3 = x * xpwr2;
	double xpwr4 = x * xpwr3;
	double xpwr5 = x * xpwr4;
	double step = xpwr5;

	for(i = 1;i <= degree - 4;i += 5)
	{
		result1 += a[i] * xpwr1;
		result2 += a[i+1] * xpwr2;
		result3 += a[i+2] * xpwr3;
		result4 += a[i+3] * xpwr4;
		result5 += a[i+4] * xpwr5;
		xpwr1 *= step;
		xpwr2 *= step;
		xpwr3 *= step;
		xpwr4 *= step;
		xpwr5 *= step;
	}
	for(;i <= degree;i++)
	{
		result += a[i] * xpwr1;
		xpwr1 *= x;
	}
	return result = result + result1 + result2 + result3 + result4 + result5;

}

//Horner法的并行
//其实也是采用临时变量的并行累积,以等差的间隔将多项式分成n个部分后再用Horner方法
double polyh(double a[], double x, int degree)
{
	long int i;
	double result = 0;
	
	if(degree < 5)
	{
		result = a[degree];
		for(i = degree - 1; i >= 0; i--)
			result = a[i] + x * result;
	}
	//否则,5路并行计算
	else
	{
		double result1 = a[degree];
		double result2 = a[degree - 1];
		double result3 = a[degree - 2];
		double result4 = a[degree - 3];
		double result5 = a[degree - 4];
		double step = x * x * x * x * x;
		for(i = degree - 5; i >= 4; i-= 5)
		{
			result1 = a[i] + result1 * step;
			result2 = a[i - 1] + result1 * step;
			result3 = a[i - 2] + result2 * step;
			result4 = a[i - 3] + result3 * step;
			result5 = a[i - 4] + result4 * step;
		}
		for(;i >= 0;i--)
		{
			result = a[i] + x * result;
		}

		result += result1 + result2 + result3 + result4 + result5;
	}

	return result;
	
}

/*

***5.21

*/

void psum1(float a[], float p[], long int n)
{
	long int i;
	long int last_val,val1,val2,val3;
	p[0] = last_val = a[0];
	for(i = 1; i < n - 2; i += 3)
	{
		//每个加法和赋值运算都可并行
		val1 = last_val + a[i];
		val2 = last_val + (a[i] + a[i + 1]);
		val3 = last_val + ((a[i] + a[i + 1]) + a[i + 2]);
		p[i] = val1;
		p[i + 1] = val2;
		p[i + 2] = val3;
		last_val = val3;
	}
	for(;i < n;i++)
	{
		val1 = last_val + a[i];
		p[i] = val1;
		last_val = val1;
	}
}

/*

***5.22

*/

代入公式  S = 1/((1-α) + α/k)     可得:

方案1加速比为   1.25

方案2加速比为   1.2

故第一种方案比较好



你可能感兴趣的:(深入理解计算机系统家庭作业)