深入理解计算机系统arch lab

深入理解计算机系统arch lab_第1张图片

Arch lab

一:实验梗概

在这个lab中,分为PartA,PartB,PartC。在PartA中,你要编写Y86-64程序.在PartB中,你要给SEQ处理器添加iaddq指令。在PartC,你要优化你的程序。

file contains
misc YAS和YIS
seq SEQ处理器
pipe PIPE处理器
y86_code 书上的y86代码
ptest 测试脚本
Makefile Makefile
README README

工具

YAS
Y86-64汇编器,输入.ys结尾的y86 code,输出.yo结尾的object code。

./yas ./asum.ys       //会输出asum.yo文件

YIS
Y86-64指令模拟器,输入.yo结尾的object code,输出执行结果

./yis ./asum.yo     //会输出asum.yo程序情况

SEQ
SEQ+
PIPE
SEQ处理器对应于ssim(这个可执行文件),PIPE处理器对应于psim

-h help
-g gui模式
-t 输出处理器执行和ISA执行的差异(用来检测处理器是否遵守ISA)
./ssim -t <../a.yo     //ssim执行a.yo




PART A

在sim/misc文件里,有*examples.c**文件,里面有三个函数,sum,rsum,copy。编写这三个函数的y86-64code.


sum.ys

#Execution begins at address 0,written by peanwang
	.pos 0
	irmovq stack,%rsp
	call main
	halt
# Sample linked list
	.align 8
ele1:
	.quad 0x00a
	.quad ele2
ele2:
	.quad 0x0b0
	.quad ele3
ele3:
	.quad 0xc00
	.quad 0
#This is main function
main:
	irmovq ele1,%rdi
	call sum_list
	ret 
#long sum_list(list_ptr ls)
# ls in %rdi ,return i %rax
sum_list:
	irmovq $0,%r14
        irmovq $0,%rax
L2:
        subq %r14,%rdi
        je L4
	mrmovq (%rdi),%r13
        addq %r13,%rax
	mrmovq 8(%rdi),%rdi
        jmp     L2
L4:
        ret

#stack starts here and grows to lower addresses
	.pos 0x200
stack:

深入理解计算机系统arch lab_第2张图片


rsum.ys

#rsum.ys,written by peanwang
	.pos 0
	irmovq stack,%rsp
	call main
	halt
# Sample linked list
	.align 8
ele1:
	.quad 0x00a
	.quad ele2
ele2:
	.quad 0x0b0
	.quad ele3
ele3:
	.quad 0xc00
	.quad 0

#main function
main:
	irmovq ele1,%rdi
	call rsum_list
	ret

#rsum_list(list_ptr ls)
#ls in %rdi,  return in %rax
rsum_list:
  irmovq $0,%r14
  subq %r14,%rdi
  je L7
  pushq %rbx
  mrmovq (%rdi), %rbx
  mrmovq 8(%rdi), %rdi
  call rsum_list
  addq %rbx,%rax
  popq %rbx
  ret
L7:
  irmovq $0,%rax
  ret

#stack starts here and grows to lower address
	.pos 0x200
stack:

深入理解计算机系统arch lab_第3张图片


copy.ys

#copy.ys,written by peanwang
	.pos 0
	irmovq stack,%rsp
	call main
	halt

#two block
	.align 8
# Source block
src:
	.quad 0x00a
	.quad 0x0b0
	.quad 0xc00
# Destination block
dest:
	.quad 0x111
	.quad 0x222
	.quad 0x333

main:
	irmovq src,%rdi
	irmovq dest,%rsi
	irmovq $3,%rdx
	call copy_block
	ret

#long copy_block(long *src,long *dest,long len)
copy_block:
  irmovq $0,%r14
  irmovq $1,%r13
  irmovq $8,%r12
  irmovq $0, %rax
L13:
  subq %r14,%rdx
  jle L15
  mrmovq (%rdi), %rcx
  rmmovq %rcx, (%rsi)
  xorq %rcx, %rax
  subq %r13, %rdx
  addq %r12,%rsi
  addq %r12,%rdi
  jmp L13
L15:
  ret

#stack starts here and grows to lower addresses
	.pos 0x200
stack:

深入理解计算机系统arch lab_第4张图片



PART B

在sim/seq文件夹里,修改seq-full.hcl文件,添加iaddq指令
首先:写出iaddq指令描述

state do
fetch icode:ifun<-M1[PC]
rA,rB<-M1[PC+1]
valC<-M1[PC+2]
ValP<-PC+10
decode valB<-R[rB]
execute ValE<-ValB+ValC
memory
writeback R[rB]<-ValE
PC<-valP




seq-full.hcl

#/* $begin seq-all-hcl */
####################################################################
#  HCL Description of Control for Single Cycle Y86-64 Processor SEQ   #
#  Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010       #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include '
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote '  {plusmode=0;return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       # 
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP 	'I_NOP'
wordsig IHALT	'I_HALT'
wordsig IRRMOVQ	'I_RRMOVQ'
wordsig IIRMOVQ	'I_IRMOVQ'
wordsig IRMMOVQ	'I_RMMOVQ'
wordsig IMRMOVQ	'I_MRMOVQ'
wordsig IOPQ	'I_ALU'
wordsig IJXX	'I_JMP'
wordsig ICALL	'I_CALL'
wordsig IRET	'I_RET'
wordsig IPUSHQ	'I_PUSHQ'
wordsig IPOPQ	'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ	'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes                  #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP     'REG_RSP'    	# Stack Pointer
wordsig RNONE    'REG_NONE'   	# Special value indicating "no register"

##### ALU Functions referenced explicitly                            #####
wordsig ALUADD	'A_ADD'		# ALU should add its arguments

##### Possible instruction status values                             #####
wordsig SAOK	'STAT_AOK'	# Normal execution
wordsig SADR	'STAT_ADR'	# Invalid memory address
wordsig SINS	'STAT_INS'	# Invalid instruction
wordsig SHLT	'STAT_HLT'	# Halt instruction encountered

##### Signals that can be referenced by control logic ####################

##### Fetch stage inputs		#####
wordsig pc 'pc'				# Program counter
##### Fetch stage computations		#####
wordsig imem_icode 'imem_icode'		# icode field from instruction memory
wordsig imem_ifun  'imem_ifun' 		# ifun field from instruction memory
wordsig icode	  'icode'		# Instruction control code
wordsig ifun	  'ifun'		# Instruction function
wordsig rA	  'ra'			# rA field from instruction
wordsig rB	  'rb'			# rB field from instruction
wordsig valC	  'valc'		# Constant from instruction
wordsig valP	  'valp'		# Address of following instruction
boolsig imem_error 'imem_error'		# Error signal from instruction memory
boolsig instr_valid 'instr_valid'	# Is fetched instruction valid?

##### Decode stage computations		#####
wordsig valA	'vala'			# Value from register A port
wordsig valB	'valb'			# Value from register B port

##### Execute stage computations	#####
wordsig valE	'vale'			# Value computed by ALU
boolsig Cnd	'cond'			# Branch test

##### Memory stage computations		#####
wordsig valM	'valm'			# Value read from memory
boolsig dmem_error 'dmem_error'		# Error signal from data memory


####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
	imem_error: INOP;
	1: imem_icode;		# Default: get from instruction memory
];

# Determine instruction function
word ifun = [
	imem_error: FNONE;
	1: imem_ifun;		# Default: get from instruction memory
];

bool instr_valid = icode in 
	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
	       IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ ,IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
	icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
		     IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
	icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
	icode in { IPOPQ, IRET } : RRSP;
	1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
	icode in { IOPQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
	icode in { IRRMOVQ } && Cnd : rB;
	icode in { IIRMOVQ, IOPQ ,IIADDQ } : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
	icode in { IMRMOVQ, IPOPQ } : rA;
	1 : RNONE;  # Don't write any register
];

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
	icode in { IRRMOVQ, IOPQ } : valA;
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : valC;
	icode in { ICALL, IPUSHQ } : -8;
	icode in { IRET, IPOPQ } : 8;
	# Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
	icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
		      IPUSHQ, IRET, IPOPQ ,IIADDQ } : valB;
	icode in { IRRMOVQ, IIRMOVQ } : 0;
	# Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
	icode == IOPQ : ifun;
	1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ ,IIADDQ};

################ Memory Stage    ###################################

## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };

## Select memory address
word mem_addr = [
	icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
	icode in { IPOPQ, IRET } : valA;
	# Other instructions don't need address
];

## Select memory input data
word mem_data = [
	# Value from register
	icode in { IRMMOVQ, IPUSHQ } : valA;
	# Return PC
	icode == ICALL : valP;
	# Default: Don't write anything
];

## Determine instruction status
word Stat = [
	imem_error || dmem_error : SADR;
	!instr_valid: SINS;
	icode == IHALT : SHLT;
	1 : SAOK;
];

################ Program Counter Update ############################

## What address should instruction be fetched at

word new_pc = [
	# Call.  Use instruction constant
	icode == ICALL : valC;
	# Taken branch.  Use instruction constant
	icode == IJXX && Cnd : valC;
	# Completion of RET instruction.  Use value from stack
	icode == IRET : valM;
	# Default: Use incremented PC
	1 : valP;
];
#/* $end seq-all-hcl */



编译和测试

make VERSION=full

做测试
深入理解计算机系统arch lab_第5张图片
这里写图片描述
regression_test1 output:

./optest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 49 ISA Checks Succeed
./jtest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 64 ISA Checks Succeed
./ctest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 600 ISA Checks Succeed

regression_test2 output:

./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 756 ISA Checks Succeed

PartC

任务:修改ncopy.ys和pipe-full.hcl.尽所能提高ncopy.ys性能
我逻辑控制哪里每看明白(看了5遍了(っ °Д °;)っ),所以PartC我写的不好。


pipe-full.hcl修改
①:添加iaddq指令
②:修改预测分支器,修改成BTFNT(家庭作业)。(没做出来)


ncopy.ys修改
①:循环展开
②:避免加载使用冒险




我修改pipe-full(只添加了iaddq)。和PartB差不多。



ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令

# You can modify this portion
	# Loop header
	xorq %rax,%rax		# count = 0;
	andq %rdx,%rdx		# len <= 0?
	jle Done		# if so, goto Done:

Loop:	mrmovq (%rdi), %r10	# read val from src...
	rmmovq %r10, (%rsi)	# ...and store it to dst
	andq %r10, %r10		# val <= 0?
	jle Npos		# if so, goto Npos:
	irmovq $1, %r10
	addq %r10, %rax		# count++
Npos:	irmovq $1, %r10
	subq %r10, %rdx		# len--
	irmovq $8, %r10
	addq %r10, %rdi		# src++
	addq %r10, %rsi		# dst++
	andq %rdx,%rdx		# len > 0?
	jg Loop			# if so, goto Loop:




还是各种测试
深入理解计算机系统arch lab_第6张图片
深入理解计算机系统arch lab_第7张图片

深入理解计算机系统arch lab_第8张图片
深入理解计算机系统arch lab_第9张图片
正如你们所见,这样做,只能得零分(っ °Д °;)っ。



ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令,二层循环展开

# You can modify this portion
  #Loop header
  irmovq $-1,%rcx
  addq %rdx,%rcx    #limit
  xorq %rax,%rax       #count
  jmp L2
L4:
  rmmovq %r8, (%rsi)    # *dst = %r8  val1
  rmmovq %rdi, 8(%rsi)  # *dst++ =%rdi val2
  iaddq $16,%rsi         #dst++ dst++
  rrmovq %r9, %rdi
L2:
  iaddq $0, %rcx      #limit>0 ?
  jle L7
  mrmovq (%rdi), %r8    # val1 in %r8
  
  rrmovq %rdi,%r9
  iaddq $16,%r9         #src++ src++ in %r9
  mrmovq 8(%rdi),%rdi    #get val2
  iaddq $-2, %rcx        #limit-2
  iaddq $0, %r8       #val1>0 ?
  jle L3
  iaddq $1, %rax     #count++
L3:
  iaddq $0, %rdi     #val2>0
  jle L4
  iaddq $1, %rax    #count++
  jmp L4
L7:
  irmovq $1,%r14
  andq %r14,%rdx     #len is odd?
  je Done
  mrmovq (%rdi), %rdx   #val1
  rmmovq %rdx, (%rsi)  #dst =val
  iaddq $0,%rdx
  jle Done
  iaddq $1, %rax

深入理解计算机系统arch lab_第10张图片
深入理解计算机系统arch lab_第11张图片
深入理解计算机系统arch lab_第12张图片
正如你们所见,还是只能得零分(っ °Д °;)っ。
分享两位大佬的:
46分
58.6.
真的很难QAQ

你可能感兴趣的:(csapp)