[第四章] 深入理解计算机系统第三版 家庭作业参考答案

代码均经过测试,请放心食用
这一章可能是最难的一章了,虽然难,但是还是挺有趣的
本章所需的网络旁注和模拟器请自行下载 http://csapp.cs.cmu.edu/3e/students.html
下面给出测试简要指南,详情请自行阅读网络旁注和 README 文件

测试简要指南

从 HCL 的实现角度来看,HCL 实际上是用来生成一种非常格式化的 C 代码的语言。HCL 文件中的所有块定义都被 HCL2C 程序转换成C函数。然后这些函数和实现其他模拟器函数的源代码一起编译,来生成一个可执行的模拟程序,比如 ssim 和 psim。
我并没有使用 GUI 模式的模拟器,因为配置太过麻烦,要下载一些东西,况且在终端进行测试就足够了;如果使用终端模式的话,请将 MakeFile 文件中关于 GUIMODE, TKLIBS, TKINC 的参数注释掉;
先下载压缩包,然后运行下列指令,如果出错可能是因为一些参数没有注释掉:

USER@NAME:~# tar xf sim.tar
USER@NAME:~# cd sim
USER@NAME:~/sim# make clean
USER@NAME:~/sim# make

可以直接使用 YIS 运行程序,它是指令集模拟器,不模拟任何具体处理器的实现:

USER@NAME:~/sim/y86-code# ../misc/yis asum.yo

ssim 和 psim 分别是 SEQ 和 PIPE 的模拟器;如果要生成模拟器(xxx代表不同版本):

USER@NAME:~/sim/seq# make clean; make ssim VERSION=xxx

也可以使用模拟器来运行某一个程序;可以在 /sim/y86-code 下复制一个 ys 文件比如 asum.ys,重命名为 bubble.ys,将里面的 sum 函数删掉,改成所要测试的函数,然后在 main 里调用;
然后 make bubble.yo 生成一个目标文件 bubble.yo,这个 yo 文件就可以用模拟器运行了:

USER@NAME:~/sim/y86-code# make bubble.yo
USER@NAME:~/sim/y86-code# ../seq/ssim -t < ./bubble.yo

如果需要测试修改后的 hcl 文件,需要先生成模拟器,在上面说过了;然后在 /sim/ptest 中测试(参数 SIM 指示需要测试的是 psim 还是 ssim,参数 TFLAGS=-i 表示测试时包含 iaddq 指令);如果测试失败,会在当前文件夹中留下导致错误的 ys 文件:

USER@NAME:~/sim/ptest# make SIM=../pipe/psim TFLAGS=-i

4.54 题中有完整的生成模拟器并测试的步骤

4.45

A.
错误,它压入了减去 8 的 %rsp 的值

B.

movq REG, -8(%rsp)
subq $8, %rsp

4.46

A.
错误,它将栈指针设置为了正确的值再减去 8

B.

addq $8, %rsp
movq -8(%rsp), REG

4.47

使用数组索引

/*参考代码*/
void bubble_a(long *data, long count) {
	long i, last;
	for (last = count - 1; last > 0; last--) {
		for (i = 0; i < last; i++) {
			if (data[i + 1] < data[i]) {
				/*Swap adjacent elements*/
				long t = data[i + 1];
				data[i + 1] = data[i];
				data[i] = t;
			}
		}
	}
}

A.
使用指针引用数组元素

void bubble_b(long *data, long count) {
	long i, last;
	for (last = count - 1; last > 0; last--) {
		for (i = 0; i < last; i++) {
			if (*(data + i + 1)< *(data + i)) {
				/*Swap adjacent elements*/
				long t = *(data + i + 1);
				*(data + i + 1) = *(data + i);
				*(data + i) = t;
			}
		}
	}
}

B.
手写汇编真麻烦我都要吐了怪不得发明高级语言
这里完整的 ys 文件,以后的题目只给出函数定义

# 从地址 0 开始执行
	.pos 0
	irmovq stack, %rsp  	# Set up stack pointer
	call main		        # Execute main program
	halt			        # Terminate program 

# 四个元素的数组
	.align 8
array:	
    .quad 0x000000000abc
	.quad 0x0000000000bc
	.quad 0x00000000000c
	.quad 0x000000000001

# 主函数
main:	
    irmovq array,%rdi
	irmovq $4,%rsi
	call bubble_b		    # bubble(array, 4)
	ret

# void bubble_b(long *data, long count)
# data in %rdi, count in %rsi
bubble_b:
	irmovq $1, %r8			 # 常数 1
	irmovq $8, %r9			 # 常数 8
	rrmovq %rsi, %rax		
	subq %r8, %rax			# last = %rax = count - 1
	je Done					# last == 0 -> jmp done
Loop1:
	xorq %rcx, %rcx			# i = %rcx = 0
Loop2:
	rrmovq %rcx, %rdx		# %rdx = i
	addq %rdx, %rdx			# %rdx = 2 * i
	addq %rdx, %rdx			# %rdx = 4 * i
	addq %rdx, %rdx			# %rdx = 8 * i
	addq %rdi, %rdx			# %rdx = data + 8 * i 
	mrmovq (%rdx), %r10		# %r10 = data[i]
	addq %r9, %rdx			# %rdx = data + 8 * i + 8
	mrmovq (%rdx), %rbx		# %rbx = data[i + 1]
	rrmovq %rbx, %r11		# %r11 = data[i + 1]
	subq %r10, %rbx			# %rbx = data[i + 1] - data[i]
	jge Test1				# data[i + 1] - data[i] > 0 -> do nothing
	rmmovq %r10, (%rdx)		# data[i + 1] = data[i]
	subq %r9, %rdx			# %rdx = %rdx - 8
	rmmovq %r11, (%rdx)		# data[i] = data[i + 1]
Test1:
	addq %r8, %rcx			# i++
	rrmovq %rcx, %r12		# %r12 = %rcx = i
	subq %rax, %r12			# i - last		
	jl Loop2				# i < last -> jmp Loop2
	subq %r8, %rax			# last--						
	jg Loop1				# last > 0 -> jmp Loop1
Done:
	ret                     # Return

# 栈从 0x200 向低地址生长
	.pos 0x200
stack:

可以看到输出,左边是运行前,右边是运行后:

Changed Memory State:
0x0018:	0x0000000000000abc	0x0000000000000001
0x0020:	0x00000000000000bc	0x000000000000000c
0x0028:	0x000000000000000c	0x00000000000000bc
0x0030:	0x0000000000000001	0x0000000000000abc

4.48

#号包围区域与 4.47 题做对比:

# void bubble_c(long *data, long count)
# data in %rdi, count in %rsi
bubble_c:
	irmovq $1, %r8			 # 常数 1
	irmovq $8, %r9			 # 常数 8
	rrmovq %rsi, %rax		
	subq %r8, %rax			# last = %rax = count - 1
	je Done					# last == 0 -> jmp done
Loop1:
	xorq %rcx, %rcx			# i = %rcx = 0
Loop2:
	rrmovq %rcx, %rdx		# %rdx = i
	addq %rdx, %rdx			# %rdx = 2 * i
	addq %rdx, %rdx			# %rdx = 4 * i
	addq %rdx, %rdx			# %rdx = 8 * i
	addq %rdi, %rdx			# %rdx = data + 8 * i 
	mrmovq (%rdx), %r10		# %r10 = data[i]
	addq %r9, %rdx			# %rdx = data + 8 * i + 8
	mrmovq (%rdx), %rbx		# %rbx = data[i + 1]
	rrmovq %rbx, %r11		# %r11 = data[i + 1]
	subq %r10, %rbx			# %rbx = data[i + 1] - data[i]
	######################### 删除部分 #################################
	##jge Test1				# data[i + 1] - data[i] > 0 -> do nothing##
	##rmmovq %r10, (%rdx)   # data[i + 1] = data[i]				     ##
	##subq %r9, %rdx	    # %rdx = %rdx - 8			             ##
	##rmmovq %r11, (%rdx)	# data[i] = data[i + 1]				     ##
	###################################################################
	
	########################## 新增部分 ######################################
	rrmovq %r11, %r12       # %r12 = data[i + 1]
	rrmovq %r10, %r13       # %r13 = data[i]
	cmovl %r10, %r12		# data[i + 1] < data[i] -> %r12 = data[i]
	cmovl %r11, %r13		# data[i + 1] < data[i] -> %r13 = data[i + 1]
	rmmovq %r12, (%rdx)     # data[i + 1] = %r12
	subq %r9, %rdx			# %rdx = %rdx - 8
	rmmovq %r13, (%rdx)     # data[i] = %r13
	#########################################################################
Test1:
	addq %r8, %rcx			# i++
	rrmovq %rcx, %r12		# %r12 = %rcx = i
	subq %rax, %r12			# i - last		
	jl Loop2				# i < last -> jmp Loop2
	subq %r8, %rax			# last--						
	jg Loop1				# last > 0 -> jmp Loop1
Done:
	ret                     # Return

4.49

一个条件传送,比较巧妙,#号包围区域与 4.47 题做对比:

# void bubble_d(long *data, long count)
# data in %rdi, count in %rsi
bubble_d:
    irmovq $1, %r8			 # 常数 1
	irmovq $8, %r9			 # 常数 8
	irmovq $0, %r12         # 常数 0
	rrmovq %rsi, %rax		
	subq %r8, %rax			# last = %rax = count - 1
	je Done					# last == 0 -> jmp done
Loop1:
	xorq %rcx, %rcx			# i = %rcx = 0
Loop2:
	rrmovq %rcx, %rdx		# %rdx = i
	addq %rdx, %rdx			# %rdx = 2 * i
	addq %rdx, %rdx			# %rdx = 4 * i
	addq %rdx, %rdx			# %rdx = 8 * i
	addq %rdi, %rdx			# %rdx = data + 8 * i 
	mrmovq (%rdx), %r10		# %r10 = data[i]
	addq %r9, %rdx			# %rdx = data + 8 * i + 8
	mrmovq (%rdx), %rbx		# %rbx = data[i + 1]
	rrmovq %rbx, %r11		# %r11 = data[i + 1]
	subq %r10, %rbx			# %rbx = data[i + 1] - data[i]
	######################### 删除部分 #################################
	##jge Test1				# data[i + 1] - data[i] > 0 -> do nothing##
	##rmmovq %r10, (%rdx)	# data[i + 1] = data[i]				     ##
	##subq %r9, %rdx		# %rdx = %rdx - 8			             ##
	##rmmovq %r11, (%rdx)	# data[i] = data[i + 1]				     ##
	###################################################################
	
	########################## 新增部分 ######################################
	cmovge %r12, %rbx		# data[i + 1] < data[i] -> %rbx = data[i + 1] - data[i] 否则 %rbx = 0
	subq %rbx, %r11         # %r11 = data[i+1] < data[i] : data[i] : data[i+1]
	rmmovq %r11, (%rdx)     # data[i+1] = %r11
	subq %r9, %rdx			# %rdx = %rdx - 8
	addq %rbx, %r10         # %r10 = data[i+1] < data[i] : data[i] : data[i+1]
	rmmovq %r10, (%rdx)     # data[i] = %r10
	#########################################################################
Test1:
	addq %r8, %rcx			# i++
	rrmovq %rcx, %r12		# %r12 = %rcx = i
	subq %rax, %r12			# i - last		
	jl Loop2				# i < last -> jmp Loop2
	subq %r8, %rax			# last--						
	jg Loop1				# last > 0 -> jmp Loop1
Done:
	ret                     # Return

4.50

完整的 ys 文件:

# 从地址 0 开始执行
	.pos 0
	irmovq stack, %rsp  	# Set up stack pointer
	call main		        # Execute main program
	halt			        # Terminate program 

# 跳转表
	.align 8
table:	
	.quad 0x00000000015e
	.quad 0x00000000017f
	.quad 0x000000000169
	.quad 0x000000000174
	.quad 0x00000000017f
	.quad 0x000000000169
	.quad 0x00000000017f
	
# 主函数
main:
	irmovq $3,%rdi
	call switchv		    # switchv(3)
	ret
	
	.pos 0x100
# long switchv(long idx)
# idx in %rdi
# 地址:0x100
switchv:
	irmovq 0xaaa, %r8		# %r8 = 0xaaa
	irmovq 0xbbb, %r9 		# %r9 = 0xbbb
	irmovq 0xccc, %r10		# %r10 = 0xccc
	irmovq 0xddd, %r11		# %r11 = 0x‭ddd
	irmovq $5, %r12			# %r12 = 5
	irmovq table, %r13		# %r13 = table
	rrmovq %rdi, %rdx
	subq %r12, %rdx			# idx - 5
	jg default				# idx > 5 -> jmp default
	addq %rdi, %rdi			# idx = 2 * idx
	addq %rdi, %rdi			# idx = 4 * idx
	addq %rdi, %rdi			# idx = 8 * idx
	addq %rdi, %r13			# %r13 = table + 8 * idx
	mrmovq (%r13), %r13
	pushq %r13
	ret
# 地址:0x15e
	rrmovq %r8, %rax
	jmp Done
# 地址:0x169
	rrmovq %r9, %rax
	jmp Done
# 地址:0x174
	rrmovq %r10, %rax
	jmp Done
# 地址:0x17f
default:
	rrmovq %r11, %rax
Done:
	ret
	
# 栈从 0x200 向低地址生长
	.pos 0x200
stack:

输出:

Changed Register State:
%rax:	0x0000000000000000	0x0000000000000ccc

4.51

参照图 4-18:

阶段 iaddq V, rB
取指 icode:ifun <- M1[PC]
rA:rB <- M1[PC+1]
valC <- M8[PC+2]
valP <- PC + 10
译码 valB <- R[rB]
执行 valE <- valB + valC
访存
写回 R[rB] <- valE
更新PC PC <- valP

4.52

根据上题修改 /sim/seq-full.hcl,先复制一个备份文件 seq-full-backup.hcl

USER@NAME:~/sim/seq# diff -u seq-full-backup.hcl seq-full.hcl
--- seq-full-backup.hcl	2014-06-23 22:01:01.000000000 +0800
+++ seq-full.hcl	2018-08-22 23:04:46.906999999 +0800
@@ -106,16 +106,16 @@
 
 bool instr_valid = icode in 
 	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-	       IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+	       IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };
 
 # Does fetched instruction require a regid byte?
 bool need_regids =
 	icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-		     IIRMOVQ, IRMMOVQ, IMRMOVQ };
+		     IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };
 
 # Does fetched instruction require a constant word?
 bool need_valC =
-	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };
+	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };
 
 ################ Decode Stage    ###################################
 
@@ -128,7 +128,7 @@
 
 ## What register should be used as the B source?
 word srcB = [
-	icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : rB;
+	icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB;
 	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
 	1 : RNONE;  # Don't need register
 ];
@@ -136,7 +136,7 @@
 ## What register should be used as the E destination?
 word dstE = [
 	icode in { IRRMOVQ } && Cnd : rB;
-	icode in { IIRMOVQ, IOPQ} : rB;
+	icode in { IIRMOVQ, IOPQ, IIADDQ} : rB;
 	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
 	1 : RNONE;  # Don't write any register
 ];
@@ -152,7 +152,7 @@
 ## Select input A to ALU
 word aluA = [
 	icode in { IRRMOVQ, IOPQ } : valA;
-	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : valC;
+	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
 	icode in { ICALL, IPUSHQ } : -8;
 	icode in { IRET, IPOPQ } : 8;
 	# Other instructions don't need ALU
@@ -161,7 +161,7 @@
 ## Select input B to ALU
 word aluB = [
 	icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-		      IPUSHQ, IRET, IPOPQ } : valB;
+		      IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
 	icode in { IRRMOVQ, IIRMOVQ } : 0;
 	# Other instructions don't need ALU
 ];
@@ -173,7 +173,7 @@
 ];
 
 ## Should the condition codes be updated?
-bool set_cc = icode in { IOPQ };
+bool set_cc = icode in { IOPQ, IIADDQ };
 
 ################ Memory Stage    ###################################

4.53

在我下载的 sim 文件中,此题的文件为 pipe-nobypass.hcl,先创建一个备份文件 pipe-nobypass-backup.hcl
1. 数据冒险
d_srcA in { e_dstE, M_dstM, M_dstE, W_dstM, W_dstE } || d_srcB in { e_dstE, M_dstM, M_dstE, W_dstM, W_dstE }发生数据冒险,需要在 E 插入气泡并暂停 F 和 D,这对于加载/使用数据冒险同样适用,由此得到数据冒险的条件:

bool s_data_hazard =
  (
    (
      d_srcA != RNONE  &&
      d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }
    ) ||
    (
      d_srcB != RNONE  &&
      d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }
    )
  )

2. 处理ret
ret 时的条件与 pipe-full.hcl 中的一致,也与书上的触发条件一致:

bool s_ret = IRET in { D_icode, E_icode, M_icode }

3. 预测错误的分支
也与书上的触发条件、 pipe-full.hcl 中的一致,其条件为:

bool s_mispredicted = (E_icode == IJXX && !e_Cnd)

不考虑组合,得出流水线控制逻辑的动作:

条件 F D E M W
(Ⅰ)数据冒险 暂停 暂停 气泡 正常 正常
(Ⅱ)处理 ret 暂停 气泡 正常 正常 正常
(Ⅲ)预测错误的分支 正常 气泡 气泡 正常 正常

4. 上述条件的组合
让我们参照书上 图 4-67 来做:
数据冒险有三种基本情况(执行/使用、访存/使用、写回/使用),JXX 只有一种,ret 有三种

寄存器 ①执行/使用 ②访存/使用 ③写回/使用 ④JXX ⑤ret ⑥ret ⑦ret
W —— —— 写回 —— —— —— ——
M —— 访存 —— —— —— —— ret
E 执行 —— —— JXX —— ret 气泡
D 使用 使用 使用 —— ret 气泡 气泡

可行的组合:
①② 或 ①③:属于数据冒险基本情况的组合,同(Ⅰ)
①⑤:与书上组合B类似,组合结果也与其相同,即同(Ⅰ)
②③:同(Ⅰ)
②④:因为不选择分支,使用被取消,所以没有数据冒险,因此同(Ⅲ)
②⑤:与书上组合B类似,同(Ⅰ)
③④:同(Ⅲ)
③⑤:与书上组合B类似,同(Ⅰ)
④⑤:同书上组合A
① (或② 或③) ④⑤:与 ④⑤ 类似,同书上组合A

根据上述分析得出:

组合 数据冒险 处理ret 预测错误的分支 F D E M W
1 0 0 0 正常 正常 正常 正常 正常
2 0 0 1 正常 气泡 气泡 正常 正常
3 0 1 0 暂停 气泡 正常 正常 正常
4 1 0 0 暂停 暂停 气泡 正常 正常
5 0 1 1 暂停 气泡 气泡 正常 正常
6 1 0 1 正常 气泡 气泡 正常 正常
7 1 1 0 暂停 暂停 气泡 正常 正常
8 1 1 1 暂停 气泡 气泡 正常 正常

因此:
F:
bubble = 0
stall = (s_data_hazard || s_ret) && (!s_data_hazard || s_ret || !s_mispredicted)
D:
bubble = s_mispredicted || (s_ret && !s_data_hazard)
stall = s_data_hazard && !s_mispredicted
E:
bubble = s_data_hazard || s_mispredicted
stall = 0
M:
不变
E:
不变

修改 pipe-nobypass.hcl:

USER@NAME:~/sim/pipe# diff -u pipe-nobypass-backup.hcl pipe-nobypass.hcl
--- pipe-nobypass-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-nobypass.hcl	2018-08-25 01:57:59.011000000 +0800
@@ -303,39 +303,43 @@
 ];
 
 ################ Pipeline Register Control #########################
 # Should I stall or inject a bubble into Pipeline Register F?
 # At most one of these can be true.
+#bool F_bubble = 0;
+#bool F_stall = (s_data_hazard || s_ret) && (!s_data_hazard || s_ret || !s_mispredicted);
 bool F_bubble = 0;
-bool F_stall =
-	# Modify the following to stall the update of pipeline register F
-	0 ||
-	# Stalling at fetch while ret passes through pipeline
-	IRET in { D_icode, E_icode, M_icode };
+bool F_stall = 
+	(((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+	(d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) || 
+	IRET in { D_icode, E_icode, M_icode }) && 
+	(!((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+	(d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) || 
+	IRET in { D_icode, E_icode, M_icode } ||
+	!(E_icode == IJXX && !e_Cnd));
 
 # Should I stall or inject a bubble into Pipeline Register D?
 # At most one of these can be true.
+#bool D_stall = s_data_hazard && !s_mispredicted;
+#bool D_bubble = s_mispredicted || (s_ret && !s_data_hazard);
+bool D_bubble = 
+	(E_icode == IJXX && !e_Cnd) || 
+	(IRET in { D_icode, E_icode, M_icode } && 
+	!((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+	(d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })));
 bool D_stall = 
-	# Modify the following to stall the instruction in decode
-	0;
-
-bool D_bubble =
-	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
-	# Stalling at fetch while ret passes through pipeline
-	!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
-	# but not condition for a generate/use hazard
-	!0 &&
-	  IRET in { D_icode, E_icode, M_icode };
+	((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+	(d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) &&
+	!(E_icode == IJXX && !e_Cnd);
 
 # Should I stall or inject a bubble into Pipeline Register E?
 # At most one of these can be true.
+#bool E_stall = 0;
+#bool E_bubble = s_data_hazard || s_mispredicted;
 bool E_stall = 0;
-bool E_bubble =
-	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
-	# Modify the following to inject bubble into the execute stage
-	0;
+bool E_bubble = 
+	((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+	(d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) ||
+	(E_icode == IJXX && !e_Cnd);
 
 # Should I stall or inject a bubble into Pipeline Register M?
 # At most one of these can be true.

参照本文开始处的指南,所有测试均通过

4.54

4.51 题的图:

阶段 iaddq V, rB
取指 icode:ifun <- M1[PC]
rA:rB <- M1[PC+1]
valC <- M8[PC+2]
valP <- PC + 10
译码 valB <- R[rB]
执行 valE <- valB + valC
访存
写回 R[rB] <- valE
更新PC PC <- valP

修改 pipe-full.hcl 文件并测试:

USER@NAME:~/sim/pipe# make clean; make psim VERSION=full
rm -f psim pipe-*.c *.o *.exe *~ 
# Building the pipe-full.hcl version of PIPE
../misc/hcl2c -n pipe-full.hcl < pipe-full.hcl > pipe-full.c
gcc -Wall -O2  -I../misc  -o psim psim.c pipe-full.c \
	../misc/isa.c  -lm
USER@NAME:~/sim/pipe# cd ../ptest/
USER@NAME:~/sim/ptest# make SIM=../pipe/psim TFLAGS=-i
./optest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 58 ISA Checks Succeed
./jtest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 96 ISA Checks Succeed
./ctest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 22 ISA Checks Succeed
./htest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 756 ISA Checks Succeed
USER@NAME:~/sim/ptest# cd ../pipe
USER@NAME:~/sim/pipe# diff -u pipe-full-backup.hcl pipe-full.hcl 
--- pipe-full-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-full.hcl	2018-08-26 02:45:01.228184132 +0800
@@ -158,7 +158,7 @@
 # Is instruction valid?
 bool instr_valid = f_icode in 
 	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-	  IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+	  IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };
 
 # Determine status code for fetched instruction
 word f_stat = [
@@ -171,11 +171,11 @@
 # Does fetched instruction require a regid byte?
 bool need_regids =
 	f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-		     IIRMOVQ, IRMMOVQ, IMRMOVQ };
+		     IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };
 
 # Does fetched instruction require a constant word?
 bool need_valC =
-	f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };
+	f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };
 
 # Predict next value of PC
 word f_predPC = [
@@ -195,14 +195,14 @@
 
 ## What register should be used as the B source?
 word d_srcB = [
-	D_icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : D_rB;
+	D_icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ  } : D_rB;
 	D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
 	1 : RNONE;  # Don't need register
 ];
 
 ## What register should be used as the E destination?
 word d_dstE = [
-	D_icode in { IRRMOVQ, IIRMOVQ, IOPQ} : D_rB;
+	D_icode in { IRRMOVQ, IIRMOVQ, IOPQ, IIADDQ} : D_rB;
 	D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
 	1 : RNONE;  # Don't write any register
 ];
@@ -239,7 +239,7 @@
 ## Select input A to ALU
 word aluA = [
 	E_icode in { IRRMOVQ, IOPQ } : E_valA;
-	E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
+	E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
 	E_icode in { ICALL, IPUSHQ } : -8;
 	E_icode in { IRET, IPOPQ } : 8;
 	# Other instructions don't need ALU
@@ -248,7 +248,7 @@
 ## Select input B to ALU
 word aluB = [
 	E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-		     IPUSHQ, IRET, IPOPQ } : E_valB;
+		     IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
 	E_icode in { IRRMOVQ, IIRMOVQ } : 0;
 	# Other instructions don't need ALU
 ];
@@ -260,7 +260,7 @@
 ];
 
 ## Should the condition codes be updated?
-bool set_cc = E_icode == IOPQ &&
+bool set_cc = E_icode in { IOPQ, IIADDQ }  &&
 	# State changes only during normal operation
 	!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };

4.55

做这个题时可以边看 图4-52 流水线化的最终实现
需注意 Cnd 表示的是选择分支(take branch),可以从 hcl 文件中的定义看出:boolsig M_Cnd 'ex_mem_curr->takebranch' # Condition flag
主要的思路就是让 “Sel+Fwd A” 在无条件转移时选择 D_valP,在条件转移时选择 D_valC;这样的话 valC 就会传递到 M_valA,从而使得“Select PC”可以(通过M_valA)接收到 ValC,以便当预测失败时使用 valC 更新 PC;

USER@NAME:~/sim/pipe# diff -u pipe-nt-backup.hcl pipe-nt.hcl
--- pipe-nt-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-nt.hcl	2018-08-26 01:35:40.452673831 +0800
@@ -80,9 +80,11 @@
 
 ##### Pipeline Register D ##########################################
 wordsig D_icode 'if_id_curr->icode'   # Instruction code
+wordsig D_ifun 'if_id_curr->ifun'	# 译码阶段的 ifun
 wordsig D_rA 'if_id_curr->ra'	     # rA field from instruction
 wordsig D_rB 'if_id_curr->rb'	     # rB field from instruction
 wordsig D_valP 'if_id_curr->valp'     # Incremented PC
+wordsig D_valC 'if_id_curr->valc'	# 译码阶段的 valC
 
 ##### Intermediate Values in Decode Stage  #########################
 
@@ -139,7 +141,7 @@
 ## What address should instruction be fetched at
 word f_pc = [
 	# Mispredicted branch.  Fetch at incremented PC
-	M_icode == IJXX && !M_Cnd : M_valA;
+	M_icode == IJXX && M_ifun != UNCOND && M_Cnd : M_valA;
 	# Completion of RET instruction
 	W_icode == IRET : W_valM;
 	# Default: Use predicted value of PC
@@ -183,7 +185,8 @@
 # Predict next value of PC
 word f_predPC = [
 	# BNT: This is where you'll change the branch prediction rule
-	f_icode in { IJXX, ICALL } : f_valC;
+	# 无条件转移
+	f_icode in { IJXX, ICALL } && f_ifun == UNCOND : f_valC;
 	1 : f_valP;
 ];
 
@@ -220,7 +223,11 @@
 ## What should be the A value?
 ## Forward into decode stage for valA
 word d_valA = [
-	D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
+	# 无条件转移
+	D_icode in { IJXX, ICALL } && D_ifun == UNCOND : D_valP;
+	# 条件转移
+	D_icode == IJXX : D_valC;
+	#D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
 	d_srcA == e_dstE : e_valE;    # Forward valE from execute
 	d_srcA == M_dstM : m_valM;    # Forward valM from memory
 	d_srcA == M_dstE : M_valE;    # Forward valE from memory
@@ -343,7 +350,7 @@
 
 bool D_bubble =
 	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
+	(E_icode == IJXX && E_ifun != UNCOND && e_Cnd) ||
 	# Stalling at fetch while ret passes through pipeline
 	# but not condition for a load/use hazard
 	!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
@@ -354,7 +361,7 @@
 bool E_stall = 0;
 bool E_bubble =
 	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
+	(E_icode == IJXX && E_ifun != UNCOND && e_Cnd) ||
 	# Conditions for a load/use hazard
 	E_icode in { IMRMOVQ, IPOPQ } &&
 	 E_dstM in { d_srcA, d_srcB};

4.56

思路和上题有异曲同工之处;valP 是送往“Sel+Fwd A”,之后会到达 M_valM;而 valC 送往 ALUA ,之后会到达 M_valE;只需比较 M_valM 和 M_valE 就知道是前向分支还是后向;然后根据 Cnd 的值修改 pc;实际上是增加了一条转发路径;

USER@NAME:~/sim/pipe# diff -u pipe-btfnt-backup.hcl pipe-btfnt.hcl
--- pipe-btfnt-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-btfnt.hcl	2018-08-26 22:31:56.452823164 +0800
@@ -83,6 +83,8 @@
 wordsig D_rA 'if_id_curr->ra'	     # rA field from instruction
 wordsig D_rB 'if_id_curr->rb'	     # rB field from instruction
 wordsig D_valP 'if_id_curr->valp'     # Incremented PC
+wordsig D_valC 'if_id_curr->valc'	 # 译码阶段的 valC
+wordsig D_ifun 'if_id_curr->ifun' 	# 译码阶段的 ifun
 
 ##### Intermediate Values in Decode Stage  #########################
 
@@ -138,8 +140,11 @@
 
 ## What address should instruction be fetched at
 word f_pc = [
-	# Mispredicted branch.  Fetch at incremented PC
-	M_icode == IJXX && !M_Cnd : M_valA;
+	# 后向分支预测错误
+	M_icode == IJXX && M_ifun != UNCOND && M_valE < M_valA && !M_Cnd : M_valA;
+	# 前向分支预测错误
+	M_icode == IJXX && M_ifun != UNCOND && M_valE > M_valA && M_Cnd : M_valE;
 	# Completion of RET instruction
 	W_icode == IRET : W_valM;
 	# Default: Use predicted value of PC
@@ -183,13 +188,14 @@
 # Predict next value of PC
 word f_predPC = [
 	# BBTFNT: This is where you'll change the branch prediction rule
+	f_icode == IJXX && f_ifun != UNCOND && f_valC < f_valP : f_valC;
+	f_icode == IJXX && f_ifun != UNCOND && f_valC > f_valP : f_valP;
 	f_icode in { IJXX, ICALL } : f_valC;
 	1 : f_valP;
 ];
 
 ################ Decode Stage ######################################
 
 ## What register should be used as the A source?
 word d_srcA = [
 	D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : D_rA;
@@ -247,7 +253,8 @@
 ## Select input A to ALU
 word aluA = [
 	E_icode in { IRRMOVQ, IOPQ } : E_valA;
-	E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
+	E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } || 
+		E_icode == IJXX : E_valC;
 	E_icode in { ICALL, IPUSHQ } : -8;
 	E_icode in { IRET, IPOPQ } : 8;
 	# Other instructions don't need ALU
@@ -257,7 +264,8 @@
 word aluB = [
 	E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
 		     IPUSHQ, IRET, IPOPQ } : E_valB;
-	E_icode in { IRRMOVQ, IIRMOVQ } : 0;
+	E_icode in { IRRMOVQ, IIRMOVQ } || 
+		E_icode == IJXX  : 0;
 	# Other instructions don't need ALU
 ];
 
@@ -343,7 +351,8 @@
 
 bool D_bubble =
 	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
+	((E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA && !e_Cnd) ||
+	(E_icode == IJXX && E_ifun != UNCOND && E_valC > E_valA && e_Cnd)) ||
 	# BBTFNT: This condition will change
 	# Stalling at fetch while ret passes through pipeline
 	# but not condition for a load/use hazard
@@ -355,7 +364,8 @@
 bool E_stall = 0;
 bool E_bubble =
 	# Mispredicted branch
-	(E_icode == IJXX && !e_Cnd) ||
+	((E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA && !e_Cnd) ||
+	(E_icode == IJXX && E_ifun != UNCOND && E_valC > E_valA && e_Cnd)) ||
 	# BBTFNT: This condition will change
 	# Conditions for a load/use hazard
 	E_icode in { IMRMOVQ, IPOPQ } &&

4.57

A.
书上加载使用冒险的条件:

E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB };
情况 1 2 3 4
E_dsM == d_srcA 1 1 0 0
E_dsM == d_srcB 1 0 1 0

情况 1 2 3 会发生加载使用冒险,因为 rB 会在执行阶段访问,所以情况 1 3 是不能通过加载转发解决的;
只需考虑情况 2 —— E_dsM == d_srcA,对于所有指令,在访存阶段才使用 valA 的只有 rmmovq 和 pushq,因此得出下列条件:

E_icode in { IMRMOVQ, IPOPQ } &&
(
  E_dstM == d_srcB ||
  (
    E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
  )
);

B.
修改一下 e_valA 的值,再修改暂停和气泡的条件就行了:

USER@NAME:~/sim/pipe# diff -u pipe-lf-backup.hcl pipe-lf.hcl
--- pipe-lf-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-lf.hcl	2018-08-30 02:05:59.609000000 +0800
@@ -271,6 +271,7 @@
 ##   from memory stage when appropriate
 ## Here it is set to the default used in the normal pipeline
 word e_valA = [
+	E_icode in { IRMMOVQ, IPUSHQ } && E_srcA == M_dstM : m_valM;
 	1 : E_valA;  # Use valA from stage pipe register
 ];
 
@@ -329,7 +330,13 @@
 bool F_stall =
 	# Conditions for a load/use hazard
 	## Set this to the new load/use condition
-	0 ||
+	E_icode in { IMRMOVQ, IPOPQ } &&
+	(
+		E_dstM == d_srcB ||
+		(
+		E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+		)
+	) ||
 	# Stalling at fetch while ret passes through pipeline
 	IRET in { D_icode, E_icode, M_icode };
 
@@ -338,14 +345,26 @@
 bool D_stall = 
 	# Conditions for a load/use hazard
 	## Set this to the new load/use condition
-	0; 
+	E_icode in { IMRMOVQ, IPOPQ } &&
+	(
+		E_dstM == d_srcB ||
+		(
+		E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+		)
+	); 
 
 bool D_bubble =
 	# Mispredicted branch
 	(E_icode == IJXX && !e_Cnd) ||
 	# Stalling at fetch while ret passes through pipeline
 	# but not condition for a load/use hazard
-	!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
+	!(E_icode in { IMRMOVQ, IPOPQ } &&
+	(
+		E_dstM == d_srcB ||
+		(
+		E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+		)
+	)) &&
 	  IRET in { D_icode, E_icode, M_icode };
 
 # Should I stall or inject a bubble into Pipeline Register E?
@@ -356,7 +375,13 @@
 	(E_icode == IJXX && !e_Cnd) ||
 	# Conditions for a load/use hazard
 	## Set this to the new load/use condition
-	0;
+	E_icode in { IMRMOVQ, IPOPQ } &&
+	(
+		E_dstM == d_srcB ||
+		(
+		E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+		)
+	);
 
 # Should I stall or inject a bubble into Pipeline Register M?
 # At most one of these can be true.

4.58

很简单

USER@NAME:~/sim/pipe# diff -u pipe-1w-backup.hcl pipe-1w.hcl 
--- pipe-1w-backup.hcl	2014-12-29 23:08:40.000000000 +0800
+++ pipe-1w.hcl	2018-08-30 04:02:30.477000000 +0800
@@ -157,6 +157,7 @@
 ## so that it will be IPOP2 when fetched for second time.
 word f_icode = [
 	imem_error : INOP;
+	D_icode == IPOPQ : IPOP2;
 	1: imem_icode;
 ];
 
@@ -169,7 +170,7 @@
 # Is instruction valid?
 bool instr_valid = f_icode in 
 	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-	  IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+	  IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IPOP2 };
 
 # Determine status code for fetched instruction
 word f_stat = [
@@ -182,7 +183,7 @@
 # Does fetched instruction require a regid byte?
 bool need_regids =
 	f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-		     IIRMOVQ, IRMMOVQ, IMRMOVQ };
+		     IIRMOVQ, IRMMOVQ, IMRMOVQ, IPOP2 };
 
 # Does fetched instruction require a constant word?
 bool need_valC =
@@ -192,6 +193,7 @@
 word f_predPC = [
 	f_icode in { IJXX, ICALL } : f_valC;
 	## 1W: Want to refetch popq one time
+	f_icode == IPOPQ : f_pc;
 	1 : f_valP;
 ];
 
@@ -204,14 +206,14 @@
 ## What register should be used as the A source?
 word d_srcA = [
 	D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : D_rA;
-	D_icode in { IPOPQ, IRET } : RRSP;
+	D_icode in { IRET } : RRSP;
 	1 : RNONE; # Don't need register
 ];
 
 ## What register should be used as the B source?
 word d_srcB = [
 	D_icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : D_rB;
-	D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
+	D_icode in { IPUSHQ, IPOPQ, ICALL, IRET, IPOP2 } : RRSP;
 	1 : RNONE;  # Don't need register
 ];
 
@@ -224,7 +226,7 @@
 
 ## What register should be used as the M destination?
 word d_dstM = [
-	D_icode in { IMRMOVQ, IPOPQ } : D_rA;
+	D_icode in { IMRMOVQ, IPOP2 } : D_rA;
 	1 : RNONE;  # Don't write any register
 ];
 
@@ -255,7 +257,7 @@
 word aluA = [
 	E_icode in { IRRMOVQ, IOPQ } : E_valA;
 	E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
-	E_icode in { ICALL, IPUSHQ } : -8;
+	E_icode in { ICALL, IPUSHQ, IPOP2 } : -8;
 	E_icode in { IRET, IPOPQ } : 8;
 	# Other instructions don't need ALU
 ];
@@ -263,7 +265,7 @@
 ## Select input B to ALU
 word aluB = [
 	E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-		     IPUSHQ, IRET, IPOPQ } : E_valB;
+		     IPUSHQ, IRET, IPOPQ, IPOP2 } : E_valB;
 	E_icode in { IRRMOVQ, IIRMOVQ } : 0;
 	# Other instructions don't need ALU
 ];
@@ -292,13 +294,13 @@
 
 ## Select memory address
 word mem_addr = [
-	M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : M_valE;
-	M_icode in { IPOPQ, IRET } : M_valA;
+	M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ, IPOP2 } : M_valE;
+	M_icode in { IRET } : M_valA;
 	# Other instructions don't need address
 ];
 
 ## Set read control signal
-bool mem_read = M_icode in { IMRMOVQ, IPOPQ, IRET };
+bool mem_read = M_icode in { IMRMOVQ, IPOP2, IRET };
 
 ## Set write control signal
 bool mem_write = M_icode in { IRMMOVQ, IPUSHQ, ICALL };
@@ -350,7 +352,7 @@
 bool F_bubble = 0;
 bool F_stall =
 	# Conditions for a load/use hazard
-	E_icode in { IMRMOVQ, IPOPQ } &&
+	E_icode in { IMRMOVQ, IPOP2 } &&
 	 E_dstM in { d_srcA, d_srcB } ||
 	# Stalling at fetch while ret passes through pipeline
 	IRET in { D_icode, E_icode, M_icode };
@@ -359,7 +361,7 @@
 # At most one of these can be true.
 bool D_stall = 
 	# Conditions for a load/use hazard
-	E_icode in { IMRMOVQ, IPOPQ } &&
+	E_icode in { IMRMOVQ, IPOP2 } &&
 	 E_dstM in { d_srcA, d_srcB };
 
 bool D_bubble =
@@ -367,7 +369,7 @@
 	(E_icode == IJXX && !e_Cnd) ||
 	# Stalling at fetch while ret passes through pipeline
 	# but not condition for a load/use hazard
-	!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
+	!(E_icode in { IMRMOVQ, IPOP2 } && E_dstM in { d_srcA, d_srcB }) &&
 	# 1W: This condition will change
 	  IRET in { D_icode, E_icode, M_icode };
 
@@ -378,7 +380,7 @@
 	# Mispredicted branch
 	(E_icode == IJXX && !e_Cnd) ||
 	# Conditions for a load/use hazard
-	E_icode in { IMRMOVQ, IPOPQ } &&
+	E_icode in { IMRMOVQ, IPOP2 } &&
 	 E_dstM in { d_srcA, d_srcB};
 
 # Should I stall or inject a bubble into Pipeline Register M?

4.59

# 4.47
	jge Test1				# data[i + 1] - data[i] > 0 -> do nothing
	rmmovq %r10, (%rdx)   # data[i + 1] = data[i]				     
	subq %r9, %rdx	    # %rdx = %rdx - 8			             
	rmmovq %r11, (%rdx)	# data[i] = data[i + 1]				     
#######################################################################
# 4.48
	rrmovq %r11, %r12       # %r12 = data[i + 1]
	rrmovq %r10, %r13       # %r13 = data[i]
	cmovl %r10, %r12		# data[i + 1] < data[i] -> %r12 = data[i]
	cmovl %r11, %r13		# data[i + 1] < data[i] -> %r13 = data[i + 1]
	rmmovq %r12, (%rdx)     # data[i + 1] = %r12
	subq %r9, %rdx			# %rdx = %rdx - 8
	rmmovq %r13, (%rdx)     # data[i] = %r13
#######################################################################	
# 4.49
	cmovge %r12, %rbx		# data[i + 1] < data[i] -> %rbx = data[i + 1] - data[i] 否则 %rbx = 0
	subq %rbx, %r11         # %r11 = data[i+1] < data[i] : data[i] : data[i+1]
	rmmovq %r11, (%rdx)     # data[i+1] = %r11
	subq %r9, %rdx			# %rdx = %rdx - 8
	addq %rbx, %r10         # %r10 = data[i+1] < data[i] : data[i] : data[i+1]
	rmmovq %r10, (%rdx)     # data[i] = %r10

设 data[i + 1] > data[i] 的几率为 50%
平均:
三者分别执行的指令数 2.5, 7, 6
三者分别插入的气泡 1, 0, 0
三者分别需要的时钟周期 3.5, 7, 6

显然 4.47 性能更好

你可能感兴趣的:(CSAPP)