在这个lab中,分为PartA,PartB,PartC。在PartA中,你要编写Y86-64程序.在PartB中,你要给SEQ处理器添加iaddq指令。在PartC,你要优化你的程序。
file | contains |
---|---|
misc | YAS和YIS |
seq | SEQ处理器 |
pipe | PIPE处理器 |
y86_code | 书上的y86代码 |
ptest | 测试脚本 |
Makefile | Makefile |
README | README |
YAS
Y86-64汇编器,输入.ys结尾的y86 code,输出.yo结尾的object code。
./yas ./asum.ys //会输出asum.yo文件
YIS
Y86-64指令模拟器,输入.yo结尾的object code,输出执行结果
./yis ./asum.yo //会输出asum.yo程序情况
SEQ
SEQ+
PIPE
SEQ处理器对应于ssim(这个可执行文件),PIPE处理器对应于psim
-h | help |
---|---|
-g | gui模式 |
-t | 输出处理器执行和ISA执行的差异(用来检测处理器是否遵守ISA) |
./ssim -t <../a.yo //ssim执行a.yo
在sim/misc文件里,有*examples.c**文件,里面有三个函数,sum,rsum,copy。编写这三个函数的y86-64code.
#Execution begins at address 0,written by peanwang
.pos 0
irmovq stack,%rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
#This is main function
main:
irmovq ele1,%rdi
call sum_list
ret
#long sum_list(list_ptr ls)
# ls in %rdi ,return i %rax
sum_list:
irmovq $0,%r14
irmovq $0,%rax
L2:
subq %r14,%rdi
je L4
mrmovq (%rdi),%r13
addq %r13,%rax
mrmovq 8(%rdi),%rdi
jmp L2
L4:
ret
#stack starts here and grows to lower addresses
.pos 0x200
stack:
#rsum.ys,written by peanwang
.pos 0
irmovq stack,%rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
#main function
main:
irmovq ele1,%rdi
call rsum_list
ret
#rsum_list(list_ptr ls)
#ls in %rdi, return in %rax
rsum_list:
irmovq $0,%r14
subq %r14,%rdi
je L7
pushq %rbx
mrmovq (%rdi), %rbx
mrmovq 8(%rdi), %rdi
call rsum_list
addq %rbx,%rax
popq %rbx
ret
L7:
irmovq $0,%rax
ret
#stack starts here and grows to lower address
.pos 0x200
stack:
#copy.ys,written by peanwang
.pos 0
irmovq stack,%rsp
call main
halt
#two block
.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333
main:
irmovq src,%rdi
irmovq dest,%rsi
irmovq $3,%rdx
call copy_block
ret
#long copy_block(long *src,long *dest,long len)
copy_block:
irmovq $0,%r14
irmovq $1,%r13
irmovq $8,%r12
irmovq $0, %rax
L13:
subq %r14,%rdx
jle L15
mrmovq (%rdi), %rcx
rmmovq %rcx, (%rsi)
xorq %rcx, %rax
subq %r13, %rdx
addq %r12,%rsi
addq %r12,%rdi
jmp L13
L15:
ret
#stack starts here and grows to lower addresses
.pos 0x200
stack:
在sim/seq文件夹里,修改seq-full.hcl文件,添加iaddq指令
首先:写出iaddq指令描述
state | do |
---|---|
fetch | icode:ifun<-M1[PC] |
rA,rB<-M1[PC+1] | |
valC<-M1[PC+2] | |
ValP<-PC+10 | |
decode | valB<-R[rB] |
execute | ValE<-ValB+ValC |
memory | |
writeback | R[rB]<-ValE |
PC<-valP |
seq-full.hcl
#/* $begin seq-all-hcl */
####################################################################
# HCL Description of Control for Single Cycle Y86-64 Processor SEQ #
# Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010 #
####################################################################
## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work
####################################################################
# C Include's. Don't alter these #
####################################################################
quote '#include '
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote ' {plusmode=0;return sim_main(argc,argv);}'
####################################################################
# Declarations. Do not change/remove/delete any of these #
####################################################################
##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP 'I_NOP'
wordsig IHALT 'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ 'I_ALU'
wordsig IJXX 'I_JMP'
wordsig ICALL 'I_CALL'
wordsig IRET 'I_RET'
wordsig IPUSHQ 'I_PUSHQ'
wordsig IPOPQ 'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ 'I_IADDQ'
##### Symbolic represenations of Y86-64 function codes #####
wordsig FNONE 'F_NONE' # Default function code
##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP 'REG_RSP' # Stack Pointer
wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly #####
wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Possible instruction status values #####
wordsig SAOK 'STAT_AOK' # Normal execution
wordsig SADR 'STAT_ADR' # Invalid memory address
wordsig SINS 'STAT_INS' # Invalid instruction
wordsig SHLT 'STAT_HLT' # Halt instruction encountered
##### Signals that can be referenced by control logic ####################
##### Fetch stage inputs #####
wordsig pc 'pc' # Program counter
##### Fetch stage computations #####
wordsig imem_icode 'imem_icode' # icode field from instruction memory
wordsig imem_ifun 'imem_ifun' # ifun field from instruction memory
wordsig icode 'icode' # Instruction control code
wordsig ifun 'ifun' # Instruction function
wordsig rA 'ra' # rA field from instruction
wordsig rB 'rb' # rB field from instruction
wordsig valC 'valc' # Constant from instruction
wordsig valP 'valp' # Address of following instruction
boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Decode stage computations #####
wordsig valA 'vala' # Value from register A port
wordsig valB 'valb' # Value from register B port
##### Execute stage computations #####
wordsig valE 'vale' # Value computed by ALU
boolsig Cnd 'cond' # Branch test
##### Memory stage computations #####
wordsig valM 'valm' # Value read from memory
boolsig dmem_error 'dmem_error' # Error signal from data memory
####################################################################
# Control Signal Definitions. #
####################################################################
################ Fetch Stage ###################################
# Determine instruction code
word icode = [
imem_error: INOP;
1: imem_icode; # Default: get from instruction memory
];
# Determine instruction function
word ifun = [
imem_error: FNONE;
1: imem_ifun; # Default: get from instruction memory
];
bool instr_valid = icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ ,IIADDQ };
# Does fetched instruction require a regid byte?
bool need_regids =
icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };
# Does fetched instruction require a constant word?
bool need_valC =
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };
################ Decode Stage ###################################
## What register should be used as the A source?
word srcA = [
icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : rA;
icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word srcB = [
icode in { IOPQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
icode in { IIRMOVQ, IOPQ ,IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
icode in { IMRMOVQ, IPOPQ } : rA;
1 : RNONE; # Don't write any register
];
################ Execute Stage ###################################
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ ,IIADDQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
icode == IOPQ : ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = icode in { IOPQ ,IIADDQ};
################ Memory Stage ###################################
## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };
## Select memory address
word mem_addr = [
icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
icode in { IPOPQ, IRET } : valA;
# Other instructions don't need address
];
## Select memory input data
word mem_data = [
# Value from register
icode in { IRMMOVQ, IPUSHQ } : valA;
# Return PC
icode == ICALL : valP;
# Default: Don't write anything
];
## Determine instruction status
word Stat = [
imem_error || dmem_error : SADR;
!instr_valid: SINS;
icode == IHALT : SHLT;
1 : SAOK;
];
################ Program Counter Update ############################
## What address should instruction be fetched at
word new_pc = [
# Call. Use instruction constant
icode == ICALL : valC;
# Taken branch. Use instruction constant
icode == IJXX && Cnd : valC;
# Completion of RET instruction. Use value from stack
icode == IRET : valM;
# Default: Use incremented PC
1 : valP;
];
#/* $end seq-all-hcl */
make VERSION=full
./optest.pl -s ../seq/ssim
Simulating with ../seq/ssim
All 49 ISA Checks Succeed
./jtest.pl -s ../seq/ssim
Simulating with ../seq/ssim
All 64 ISA Checks Succeed
./ctest.pl -s ../seq/ssim
Simulating with ../seq/ssim
All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim
Simulating with ../seq/ssim
All 600 ISA Checks Succeed
regression_test2 output:
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 756 ISA Checks Succeed
任务:修改ncopy.ys和pipe-full.hcl.尽所能提高ncopy.ys性能
我逻辑控制哪里每看明白(看了5遍了(っ °Д °;)っ),所以PartC我写的不好。
pipe-full.hcl修改
①:添加iaddq指令
②:修改预测分支器,修改成BTFNT(家庭作业)。(没做出来)
ncopy.ys修改
①:循环展开
②:避免加载使用冒险
我修改pipe-full(只添加了iaddq)。和PartB差不多。
ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop: mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
irmovq $1, %r10
addq %r10, %rax # count++
Npos: irmovq $1, %r10
subq %r10, %rdx # len--
irmovq $8, %r10
addq %r10, %rdi # src++
addq %r10, %rsi # dst++
andq %rdx,%rdx # len > 0?
jg Loop # if so, goto Loop:
正如你们所见,这样做,只能得零分(っ °Д °;)っ。
ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令,二层循环展开
# You can modify this portion
#Loop header
irmovq $-1,%rcx
addq %rdx,%rcx #limit
xorq %rax,%rax #count
jmp L2
L4:
rmmovq %r8, (%rsi) # *dst = %r8 val1
rmmovq %rdi, 8(%rsi) # *dst++ =%rdi val2
iaddq $16,%rsi #dst++ dst++
rrmovq %r9, %rdi
L2:
iaddq $0, %rcx #limit>0 ?
jle L7
mrmovq (%rdi), %r8 # val1 in %r8
rrmovq %rdi,%r9
iaddq $16,%r9 #src++ src++ in %r9
mrmovq 8(%rdi),%rdi #get val2
iaddq $-2, %rcx #limit-2
iaddq $0, %r8 #val1>0 ?
jle L3
iaddq $1, %rax #count++
L3:
iaddq $0, %rdi #val2>0
jle L4
iaddq $1, %rax #count++
jmp L4
L7:
irmovq $1,%r14
andq %r14,%rdx #len is odd?
je Done
mrmovq (%rdi), %rdx #val1
rmmovq %rdx, (%rsi) #dst =val
iaddq $0,%rdx
jle Done
iaddq $1, %rax