uint64 ***cache; //[i][j][0] : Valid bit; [i][j][1] : Tag; [i][j][2] : LRU counter,
,否则是miss eviction
#include "cachelab.h"
typedef long unsigned int uint64;
#define Debug() { printf("wwwwqqqqq\n"); };
#define IN() { printf("-------function in------\n"); }
#define OUT() { printf("-------function out------\n"); }
uint64 cache_s;
uint64 cache_E;
uint64 cache_b;
uint64 ***cache; //[i][j][0] : Valid bit; [i][j][1] : Tag; [i][j][2] : LRU counter,
int _hits = 0, _misses = 0, _evictions = 0;
int verbose = 0;
char* ans[3] = {"miss", "miss eviction", "hit"};
void set_sEb_cache(uint64 s, uint64 E, uint64 b) {
cache_s = s;
cache_E = E;
cache_b = b;
s = (1u << cache_s);
cache = (uint64 ***)malloc(s * sizeof(uint64**));
for (uint64 i = 0; i < s; i++)
*(cache + i) = (uint64 **)malloc(cache_E * sizeof(uint64*));
for (uint64 i = 0; i < s; i++)
for (uint64 j = 0; j < cache_E; j++)
*(*(cache + i) + j) = (uint64 *)malloc(3 * sizeof(uint64));
for (uint64 i = 0; i < s; i++)
for (uint64 j = 0; j < cache_E; j++)
cache[i][j][0] = 0;
void free_cache() {
for (uint64 i = 0; i < (1u << cache_s); i++) {
for (uint64 j = 0; j < cache_E; j++)
free(*(*(cache + i) + j));
free(*(cache + i));
int T = 1;
//0miss ,1 miss_evictions, 2 hit
int LRU(uint64 address) {
uint64 tag = address >> (cache_s + cache_b);
uint64 s = (address >> cache_b) & ((1u << cache_s) - 1);
int ishit = 0;
for (uint64 j = 0; j < cache_E; j++) { //判断是否命中
if (cache[s][j][0] == 1 && cache[s][j][1] == tag) {
cache[s][j][2] = 0;
ishit = 2;
if (ishit != 2) { //未命中,则替换
unsigned index = -1; //选择位置
unsigned cnt = 0;
for (uint64 j = 0; j < cache_E; j++) {
if (cache[s][j][0] == 0) {
index = j;
if (cache[s][j][2] > cnt) {
index = j;
cnt = cache[s][j][2];
if (cache[s][index][0] == 1) {
ishit = 1;
cache[s][index][0] = 1;
cache[s][index][1] = tag;
cache[s][index][2] = 0;
for (uint64 j = 0; j < cache_E; j++) //计数器+1
return ishit;
void printHelp() {
printf("Usage: ./csim-ref [-hv] -s -E -b -t \n" );
printf(" -h Print this help message.\n");
printf(" -v Optional verbose flag.\n");
printf(" -s Number of set index bits.\n" );
printf(" -E Number of lines per set.\n" );
printf(" -b Number of block offset bits.\n" );
printf(" -t Trace file.\n\n" );
printf(" linux> ./csim-ref -s 4 -E 1 -b 4 -t traces/yi.trace\n");
printf(" linux> ./csim-ref -v -s 8 -E 2 -b 4 -t traces/yi.trace\n");
char* read_arg(int argc, char* argv[]) {
int opt;
uint64 s, E, b;
s = E = b = 0;
char* path;
while (-1 != (opt = getopt(argc, argv, "hvs:E:b:t:"))) {
switch (opt) {
case 'h':
printHelp(); break;
case 'v':
verbose = 1; break;
case 's':
s = (uint64)atoll(optarg); break;
case 'E':
E = (uint64)atoll(optarg); break;
case 'b':
b = (uint64)atoll(optarg); break;
case 't':
path = optarg; break;
printHelp(); break;
if (s == 0 || E == 0 || b == 0) {
set_sEb_cache(s, E, b);
return path;
void read_file(char *path) {
FILE * pFile;
char operation;
uint64 address;
uint64 size;
pFile = fopen(path, "r");
while (fscanf(pFile, " %c %lx,%lu", &operation, &address, &size) > 0) {
if (operation == 'I') continue;
if (verbose == 1) {
if (operation == 'M')
printf("%c %lx,%lu %s %s\n", operation, address, size, ans[LRU(address)], ans[LRU(address)]);
printf("%c %lx,%lu %s\n", operation, address, size, ans[LRU(address)]);
} else {
if (operation == 'M')
int main(int argc, char* argv[]) {
char *path = read_arg(argc, argv);
printSummary(_hits, _misses, _evictions);
return 0;
linux> make
linux> ./test-trans -M 32 -N 32
(s = 5, E = 1, b = 5) E=1,因此每一个组只有一行。
* You can define additional transpose functions below. We've defined
* a simple one below to help you get started.
void transpose_32_32(int M, int N, int A[N][M], int B[M][N]) {
int tmp[8][8];
for (int i = 0 ; i < M; i += 8)
for (int j = 0; j < N; j += 8) {
for (int x = 0; x < 8; x++)
for (int y = 0; y < 8; y++)
tmp[x][y] = A[x + i][j + y];
for (int y = 0; y < 8; y++)
for (int x = 0; x < 8; x++)
B[j + y][x + i] = tmp[x][y];
* trans.c - Matrix transpose B = A^T
* Each transpose function must have a prototype of the form:
* void trans(int M, int N, int A[N][M], int B[M][N]);
* A transpose function is evaluated by counting the number of misses
* on a 1KB direct mapped cache with a block size of 32 bytes.
#include "cachelab.h"
int is_transpose(int M, int N, int A[N][M], int B[M][N]);
void transpose_32_32(int M, int N, int A[N][M], int B[M][N]);
void transpose_64_64(int M, int N, int A[N][M], int B[M][N]);
void transpose_61_67(int M, int N, int A[N][M], int B[M][N]);
* transpose_submit - This is the solution transpose function that you
* will be graded on for Part B of the assignment. Do not change
* the description string "Transpose submission", as the driver
* searches for that string to identify the transpose function to
* be graded.
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N]) {
if (M == 32 && N == 32) transpose_32_32(M, N, A, B);
else if (M == 64 && N == 64) transpose_64_64(M, N, A, B);
else if (M == 61 && N == 67)transpose_61_67(M, N, A, B);
* You can define additional transpose functions below. We've defined
* a simple one below to help you get started.
void transpose_32_32(int M, int N, int A[N][M], int B[M][N]) {
int tmp[8][8];
for (int i = 0 ; i < M; i += 8)
for (int j = 0; j < N; j += 8) {
for (int x = 0; x < 8; x++)
for (int y = 0; y < 8; y++)
tmp[x][y] = A[x + i][j + y];
for (int y = 0; y < 8; y++)
for (int x = 0; x < 8; x++)
B[j + y][x + i] = tmp[x][y];
void transpose_64_64(int M, int N, int A[N][M], int B[M][N]) {
int tmp[8][8];
for (int i = 0 ; i < M; i += 8)
for (int j = 0; j < N; j += 8) {
for (int x = 0; x < 8; x++)
for (int y = 0; y < 8; y++)
tmp[x][y] = A[x + i][j + y];
for (int y = 0; y < 8; y++)
for (int x = 0; x < 8; x++)
B[j + y][x + i] = tmp[x][y];
void transpose_61_67(int M, int N, int A[N][M], int B[M][N]) {
for (int i = 0 ; i < N; i += 17)
for (int j = 0; j < M; j += 17)
for (int x = 0; x < 17 && x + i < N; x++)
for (int y = 0; y < 17 && y + j < M; y++)
B[j + y][x + i] = A[x + i][j + y];
* trans - A simple baseline transpose function, not optimized for the cache.
char trans_desc[] = "Simple row-wise scan transpose";
void trans(int M, int N, int A[N][M], int B[M][N])
int i, j, tmp;
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
* registerFunctions - This function registers your transpose
* functions with the driver. At runtime, the driver will
* evaluate each of the registered functions and summarize their
* performance. This is a handy way to experiment with different
* transpose strategies.
void registerFunctions()
/* Register your solution function */
registerTransFunction(transpose_submit, transpose_submit_desc);
/* Register any additional transpose functions */
registerTransFunction(trans, trans_desc);
* is_transpose - This helper function checks if B is the transpose of
* A. You can check the correctness of your transpose by calling
* it before returning from the transpose function.
int is_transpose(int M, int N, int A[N][M], int B[M][N])
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < M; ++j) {
if (A[i][j] != B[j][i]) {
return 0;
return 1;