本次实验主要完成下列三个部分:
Part I 和Part II 两者的差距非常小,在完成了第一部分的基础上,只需简单加入生产者消费者模型既能够实现多线程的代理服务器。下面来介绍其基本原理,细节在代码中都有提及,最后将贴上完整代码。
代理服务器就像一个中介,它会帮助你访问Internet,将用户和Internet有机的连接在一起。代理服务器是用户的服务端,而又是最终服务器的客户端,其模式如下图所示。
现在,整个程序的思路就很清晰了。代理服务器需要做的就是从客户端得到请求,转发到服务器,从服务器得到数据后再返回给客户端。
简单的说,一般要从下面的请求中得到请求方式、主机名、端口号、文件路径以及HTTP的协议。
GET http://www.cmu.edu:8080/hub/index.html HTTP/1.0
有时候请求是指定代码的,有时则没有(默认为80)。分类讨论即可。
这一部分我不是特别理解,主要对HTTP协议概念不是很清晰,不过只要按照指导书中提及的,添加相应字符串即可。
这一步就是简单的利用书中的rio库进行文件传输即可。
这个模型就像有一个固定大小的水箱,生产者不断像水箱中加水,而消费者不断从水箱另一端取水。很显然,如果水箱满了,生产者便无法加水,反之如果水箱空了,那消费者则无法取水。
而在实际的运用中(本题中),这一模型就称之为 预线程化的并发服务器。其实道理很简单,还是用水箱做例子,首先创建 n个线程,这就类似给出了一个固定大小的水箱,如果这时接收的到了客户的请求,那么“就向水箱中加水”,程序开始处理连接时,“就从水箱中把水取走”。这一过程会有个问题,当连接达到上限(水箱满了),此时就无法接收更多的连接;如果此时没有任何连接,那线程就会一直阻塞,直到“水箱中加入水”为止。
到这里,就可以写出多线程的代理服务器代码了。这里的sbuf.h
与书中(中文版705页)的一模一样,也可以在官网中获得。
#include "sbuf.h"
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
#define NTHREADS 4
#define SBUFSIZE 16
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
void doit(int fd);
void read_requesthdrs(rio_t *rp);
void build_header(char *header, char *hostname, char *path, rio_t *client_rio);
void parse_uri(char *uri, char *hostname, char *filepath, int *port);
int connect_endserver(char *hostname,int port,char *http_header);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
void *thread(void *vargp);
sbuf_t sbuf; /* Shared buffer of connected descriptors */
int main(int argc, char **argv) {
int listenfd, connfd;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr_storage clientaddr;
pthread_t tid;
/* Check command-line args */
if (argc != 2) {
fprintf(stderr, "usage: %s \n" , argv[0]);
exit(1);
}
printf("%s", user_agent_hdr);
/* 监听套接字 */
listenfd = Open_listenfd(argv[1]);
sbuf_init(&sbuf, SBUFSIZE);
for (int i = 0; i < NTHREADS; i++) { /* Create worker threads */
Pthread_create(&tid, NULL, thread, NULL);
}
while (1) {
clientlen = sizeof(struct sockaddr_storage);
connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
sbuf_insert(&sbuf, connfd);
Getnameinfo((SA *)&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
// Close(connfd); /* 显式关闭连接 */
}
}
void doit(int fd) {
int end_serverfd, port = 80; /* HTTP default port is 80 */
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
char filepath[MAXLINE], hostname[MAXLINE];
char endserver_header[MAXLINE];
rio_t client_rio, server_rio;
/* Read request line and headers */
Rio_readinitb(&client_rio, fd);
Rio_readlineb(&client_rio, buf, MAXLINE);
sscanf(buf, "%s %s %s", method, uri, version);
if (strcasecmp(method, "GET")) {
clienterror(fd, method, "501", "Not implemented", "Proxy does not implement this method");
return;
}
/* Parse the uri to get hostname, file, path and port */
parse_uri(uri, hostname, filepath, &port);
/* build the http header which will send to the end server */
build_header(endserver_header, hostname, filepath, &client_rio);
end_serverfd = connect_endserver(hostname, port, endserver_header);
if (end_serverfd < 0) {
printf("Connection failed\n");
return;
}
Rio_readinitb(&server_rio, end_serverfd);
/* write the http header to end server */
Rio_writen(end_serverfd, endserver_header, strlen(endserver_header));
/* Receive mgs from end server and send to client */
size_t n;
while ((n = rio_readlineb(&server_rio, buf, MAXLINE)) != 0) {
printf("proxy received %ld bytes, then send to client %d\n", n, fd);
Rio_writen(fd, buf, n);
}
Close(end_serverfd);
}
void read_requesthdrs(rio_t *rp) {
char buf[MAXLINE];
Rio_readlineb(rp, buf, MAXLINE);
while (strcmp(buf, "\r\n")) {
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
}
return;
}
void parse_uri(char *uri, char *hostname, char *filepath, int *port) {
/*parse the uri to get hostname,file path ,port*/
char* ptr = strstr(uri,"//");
ptr = ptr != NULL? ptr+2 : uri;
char *temp = strstr(ptr, ":");
if(temp!=NULL) {
*temp = '\0';
sscanf(ptr, "%s", hostname);
sscanf(temp+1, "%d%s", port, filepath);
}
else {
temp = strstr(ptr,"/");
if(temp != NULL) {
*temp = '\0';
sscanf(ptr, "%s", hostname);
*temp = '/';
sscanf(temp, "%s", filepath);
}
else {
sscanf(ptr, "%s", hostname);
}
}
return;
}
void build_header(char *header, char *hostname, char *path, rio_t *client_rio) {
char buf[MAXLINE], request_hdr[MAXLINE], other_hdr[MAXLINE], host_hdr[MAXLINE];
static const char *connection_key = "Connection";
static const char *user_agent_key= "User-Agent";
static const char *proxy_connection_key = "Proxy-Connection";
/* request line */
sprintf(request_hdr, "GET %s HTTP/1.0\r\n", path);
/* get other request header for client rio and change it */
while (Rio_readlineb(client_rio, buf, MAXLINE) > 0) {
if (strcmp(buf, "\r\n") == 0) {
break;
}
if (!strncasecmp(buf, "Host", strlen("Host"))) {
strcpy(host_hdr, buf);
continue;
}
if (!strncasecmp(buf, connection_key, strlen(connection_key))
&& !strncasecmp(buf, proxy_connection_key, strlen(proxy_connection_key))
&& !strncasecmp(buf, user_agent_key, strlen(user_agent_key))) {
strcat(other_hdr, buf);
}
}
if (strlen(host_hdr) == 0) {
sprintf(host_hdr, "GET %s HTTP/1.0\r\n", hostname);
}
sprintf(header, "%s%s%s%s%s%s%s",
request_hdr,
host_hdr,
"Connection: close\r\n",
"Proxy-Connection: close\r\n",
user_agent_hdr,
other_hdr,
"\r\n");
}
int connect_endserver(char *hostname,int port,char *http_header) {
char portstr[100];
sprintf(portstr, "%d", port);
return Open_clientfd(hostname, portstr);
}
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) {
char buf[MAXLINE], body[MAXLINE];
/* Build the HTTP response body */
sprintf(body, "Tiny Error ");
sprintf(body, "%s"ffffff"">\r\n", body);
sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
sprintf(body, "%s%s: %s\r\n"
, body, longmsg, cause);
sprintf(body, "
The Tiny Web Server\r\n");
/* Print the HTTP response */
sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-type: text/html\r\n");
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
Rio_writen(fd, buf, strlen(buf));
Rio_writen(fd, body, strlen(body));
}
void *thread(void *vargp) {
Pthread_detach(pthread_self());
while (1) {
int connfd = sbuf_remove(&sbuf);
doit(connfd); /* Service client */
Close(connfd);
}
}
cache的作用,顾名思义,就是当用户多次访问同一个资源时,如果每次都从服务器获取数据,这是一件很麻烦的事情。如果代理服务器本地把频繁访问的数据都存储下来,那么就能很好的解决这个问题。
一组并发线程访问共享变量对象时,有些线程是只读对象(从cache中读取数据),有些是修改对象(修改cache中的内容)。本次试验采用读者优先,要求不让读者等待,除非此时一个写者正在访问共享对象。
举个不是很恰当的例子。你写了份作业,大家都要传阅抄你的,那么别人抄你作业的时候你不能把本子要回来,但你在修改错误的时这时别人没法把你作业拿走。不过会有个情况,当有个人抄完作业后,你发现了一处错误想要修改,而此时另一个人也在等你的作业,这时候你就不能修改而是等到他抄完后才能修改,这就是读者优先。写者优先同理。
这里的cache,需要完成一系列基本功能才能使之正常运行。
#ifndef __CACHE_H__
#define __CACHE_H__
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
#define LRU_MAGIC_NUMBER 9999
#define CACHE_OBJS_COUNT 10
#include "csapp.h"
/* $begin cache */
typedef struct {
int readcnt; /* Initially = 0 */
int LRU;
int is_empty;
sem_t mutex; /* protects accesses to readcnt */
sem_t w; /* protects accesses to cache */
char uri[MAXLINE]; /* store uri */
char obj[MAX_OBJECT_SIZE]; /* store object from server */
} cache_block;
/* $end cache */
typedef struct {
cache_block cacheobjs[CACHE_OBJS_COUNT]; /* 10 cache blocks */
} cache_t;
void cache_init(cache_t *cache);
int cache_find(cache_t *cache, char *uri);
int cache_eviction(cache_t *cache);
void cache_store(cache_t *cache, char *uri, char *buf);
void cache_lru(cache_t *cache, int i);
void read_pre(cache_t *cache, int i);
void read_after(cache_t *cache, int i);
void write_pre(cache_t *cache, int i);
void write_after(cache_t *cache, int i);
#endif /* __CACHE_H__*/
#include "csapp.h"
#include "cache.h"
/* Create an empty cache */
void cache_init(cache_t *cache) {
for (int i = 0; i < CACHE_OBJS_COUNT; i++) {
cache->cacheobjs[i].readcnt = 0;
cache->cacheobjs[i].LRU = 0;
cache->cacheobjs[i].is_empty = 1;
sem_init(&(cache->cacheobjs[i].mutex), 0, 1);
sem_init(&(cache->cacheobjs[i].w), 0, 1);
}
}
/* find uri is in the cache or not */
int cache_find(cache_t *cache, char *uri) {
int i;
for (i = 0; i < CACHE_OBJS_COUNT; i++) {
read_pre(cache, i);
if ((cache->cacheobjs[i].is_empty==0) && (strcmp(uri, cache->cacheobjs[i].uri)==0)) {
break;
}
read_after(cache, i);
}
if (i >= CACHE_OBJS_COUNT) return -1; /* can not find url in the cache */
return i;
}
/* find an available cache */
int cache_eviction(cache_t *cache) {
int min = LRU_MAGIC_NUMBER;
int minindex = 0;
int i;
for (i = 0; i < CACHE_OBJS_COUNT; i++) {
read_pre(cache, i);
if (cache->cacheobjs[i].is_empty == 1) {
minindex = i;
read_after(cache, i);
break;
}
if (cache->cacheobjs[i].LRU < min) {
minindex = i;
read_after(cache, i);
continue;
}
read_after(cache, i);
}
return minindex;
}
void cache_store(cache_t *cache, char *uri, char *buf) {
int i = cache_eviction(cache);
write_pre(cache, i);
strcpy(cache->cacheobjs[i].uri, uri);
strcpy(cache->cacheobjs[i].obj, buf);
cache->cacheobjs[i].is_empty = 0;
cache->cacheobjs[i].LRU = LRU_MAGIC_NUMBER;
cache_lru(cache, i);
write_after(cache, i);
}
/* update the LRU number except the new cache one */
void cache_lru(cache_t *cache, int index) {
int i;
for(i=0; i<index; i++) {
write_pre(cache, i);
if(cache->cacheobjs[i].is_empty==0 && i!=index){
cache->cacheobjs[i].LRU--;
}
write_after(cache, i);
}
i++;
for(; i<CACHE_OBJS_COUNT; i++) {
write_pre(cache, i);
if(cache->cacheobjs[i].is_empty==0 && i!=index){
cache->cacheobjs[i].LRU--;
}
write_after(cache, i);
}
}
void read_pre(cache_t *cache, int i) {
P(&cache->cacheobjs[i].mutex);
cache->cacheobjs[i].readcnt++;
if (cache->cacheobjs[i].readcnt == 1) { /* first in */
P(&cache->cacheobjs[i].w);
}
V(&cache->cacheobjs[i].mutex);
}
void read_after(cache_t *cache, int i) {
P(&cache->cacheobjs[i].mutex);
cache->cacheobjs[i].readcnt--;
if (cache->cacheobjs[i].readcnt == 0) { /* Last out */
V(&cache->cacheobjs[i].w);
}
V(&cache->cacheobjs[i].mutex);
}
void write_pre(cache_t *cache, int i) {
P(&cache->cacheobjs[i].w);
}
void write_after(cache_t *cache, int i) {
V(&cache->cacheobjs[i].w);
}
#include "cache.h"
#include "sbuf.h"
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
#define LRU_MAGIC_NUMBER 9999
#define CACHE_OBJS_COUNT 10
#define SBUFSIZE 16
#define NTHREADS 4
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
void doit(int fd);
void read_requesthdrs(rio_t *rp);
void build_header(char *header, char *hostname, char *path, rio_t *client_rio);
void parse_uri(char *uri, char *hostname, char *filepath, int *port);
int connect_endserver(char *hostname,int port,char *http_header);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
void *thread(void *vargp);
sbuf_t sbuf; /* Shared buffer of connected descriptors */
cache_t cache; /* Shared cache */
int main(int argc, char **argv) {
int listenfd, connfd;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr_storage clientaddr;
pthread_t tid;
/* Check command-line args */
if (argc != 2) {
fprintf(stderr, "usage: %s \n" , argv[0]);
exit(1);
}
cache_init(&cache);
/* 监听套接字 */
listenfd = Open_listenfd(argv[1]);
sbuf_init(&sbuf, SBUFSIZE);
for (int i = 0; i < NTHREADS; i++) { /* Create worker threads */
Pthread_create(&tid, NULL, thread, NULL);
}
while (1) {
clientlen = sizeof(struct sockaddr_storage);
connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
sbuf_insert(&sbuf, connfd);
Getnameinfo((SA *)&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
// Close(connfd); /* 显式关闭连接 */
}
}
void doit(int connfd) {
int end_serverfd, port = 80; /* HTTP default port is 80 */
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
char filepath[MAXLINE], hostname[MAXLINE];
char endserver_header[MAXLINE];
rio_t client_rio, server_rio;
/* Read request line and headers */
Rio_readinitb(&client_rio, connfd);
Rio_readlineb(&client_rio, buf, MAXLINE);
sscanf(buf, "%s %s %s", method, uri, version);
char url_store[100];
strcpy(url_store,uri); /*store the original url */
if (strcasecmp(method, "GET")) {
clienterror(connfd, method, "501", "Not implemented", "Proxy does not implement this method");
return;
}
/* find cache */
int index;
if ((index = cache_find(&cache, url_store)) != -1) { /* cache hits */
read_pre(&cache, index);
Rio_writen(connfd, cache.cacheobjs[index].obj, strlen(cache.cacheobjs[index].obj));
read_after(&cache, index);
return;
}
/* Parse the uri to get hostname, file, path and port */
parse_uri(uri, hostname, filepath, &port);
/* build the http header which will send to the end server */
build_header(endserver_header, hostname, filepath, &client_rio);
end_serverfd = connect_endserver(hostname, port, endserver_header);
if (end_serverfd < 0) {
printf("Connection failed\n");
return;
}
Rio_readinitb(&server_rio, end_serverfd);
/* write the http header to end server */
Rio_writen(end_serverfd, endserver_header, strlen(endserver_header));
/* Receive mgs from end server and send to client */
char cache_buf[MAX_OBJECT_SIZE];
int buf_size = 0, n;
while ((n = rio_readlineb(&server_rio, buf, MAXLINE)) != 0) {
printf("proxy received %d bytes, then send to client %d\n", n, connfd);
buf_size += n;
if (buf_size < MAX_OBJECT_SIZE) {
strcat(cache_buf, buf);
}
Rio_writen(connfd, buf, n);
}
Close(end_serverfd);
/* store it in cache */
if (buf_size < MAX_OBJECT_SIZE) {
cache_store(&cache, url_store, cache_buf);
}
}
void read_requesthdrs(rio_t *rp) {
char buf[MAXLINE];
Rio_readlineb(rp, buf, MAXLINE);
while (strcmp(buf, "\r\n")) {
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
}
return;
}
void parse_uri(char *uri, char *hostname, char *filepath, int *port) {
/*parse the uri to get hostname,file path ,port*/
char* ptr = strstr(uri,"//");
ptr = ptr != NULL? ptr+2 : uri;
char *temp = strstr(ptr, ":");
if(temp!=NULL) {
*temp = '\0';
sscanf(ptr, "%s", hostname);
sscanf(temp+1, "%d%s", port, filepath);
}
else {
temp = strstr(ptr,"/");
if(temp != NULL) {
*temp = '\0';
sscanf(ptr, "%s", hostname);
*temp = '/';
sscanf(temp, "%s", filepath);
}
else {
sscanf(ptr, "%s", hostname);
}
}
return;
}
void build_header(char *header, char *hostname, char *path, rio_t *client_rio) {
char buf[MAXLINE], request_hdr[MAXLINE], other_hdr[MAXLINE], host_hdr[MAXLINE];
static const char *connection_key = "Connection";
static const char *user_agent_key= "User-Agent";
static const char *proxy_connection_key = "Proxy-Connection";
/* request line */
sprintf(request_hdr, "GET %s HTTP/1.0\r\n", path);
/* get other request header for client rio and change it */
while (Rio_readlineb(client_rio, buf, MAXLINE) > 0) {
if (strcmp(buf, "\r\n") == 0) {
break;
}
if (!strncasecmp(buf, "Host", strlen("Host"))) {
strcpy(host_hdr, buf);
continue;
}
if (!strncasecmp(buf, connection_key, strlen(connection_key))
&& !strncasecmp(buf, proxy_connection_key, strlen(proxy_connection_key))
&& !strncasecmp(buf, user_agent_key, strlen(user_agent_key))) {
strcat(other_hdr, buf);
}
}
if (strlen(host_hdr) == 0) {
sprintf(host_hdr, "GET %s HTTP/1.0\r\n", hostname);
}
sprintf(header, "%s%s%s%s%s%s%s",
request_hdr,
host_hdr,
"Connection: close\r\n",
"Proxy-Connection: close\r\n",
user_agent_hdr,
other_hdr,
"\r\n");
}
int connect_endserver(char *hostname,int port,char *http_header) {
char portstr[100];
sprintf(portstr, "%d", port);
return Open_clientfd(hostname, portstr);
}
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) {
char buf[MAXLINE], body[MAXLINE];
/* Build the HTTP response body */
sprintf(body, "Tiny Error ");
sprintf(body, "%s"ffffff"">\r\n", body);
sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
sprintf(body, "%s%s: %s\r\n"
, body, longmsg, cause);
sprintf(body, "
The Tiny Web Server\r\n");
/* Print the HTTP response */
sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-type: text/html\r\n");
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
Rio_writen(fd, buf, strlen(buf));
Rio_writen(fd, body, strlen(body));
}
void *thread(void *vargp) {
Pthread_detach(pthread_self());
while (1) {
int connfd = sbuf_remove(&sbuf);
doit(connfd); /* Service client */
Close(connfd);
}
}
在添加了cache
和sbuf
后,Makefile也要进行相应的更改。
# Makefile for Proxy Lab
#
# You may modify this file any way you like (except for the handin
# rule). You instructor will type "make" on your specific Makefile to
# build your proxy from sources.
CC = gcc
CFLAGS = -g -Wall
LDFLAGS = -lpthread
all: proxy
csapp.o: csapp.c csapp.h
$(CC) $(CFLAGS) -c csapp.c
proxy.o: proxy.c csapp.h
$(CC) $(CFLAGS) -c proxy.c
sbuf.o: sbuf.c csapp.h
$(CC) $(CFLAGS) -c sbuf.c
cache.o: cache.c csapp.h
$(CC) $(CFLAGS) -c cache.c
proxy: proxy.o sbuf.o csapp.o cache.o
$(CC) $(CFLAGS) proxy.o csapp.o sbuf.o cache.o -o proxy $(LDFLAGS)
# Creates a tarball in ../proxylab-handin.tar that you can then
# hand in. DO NOT MODIFY THIS!
handin:
(make clean; cd ..; tar cvf $(USER)-proxylab-handin.tar proxylab-handout --exclude tiny --exclude nop-server.py --exclude proxy --exclude driver.sh --exclude port-for-user.pl --exclude free-port.sh --exclude ".*")
clean:
rm -f *~ *.o proxy core *.tar *.zip *.gzip *.bzip *.gz
指导书中有这么一句话描述cache的性质:
If the entirety of the web server’s response is read before the maximum object size is exceeded, then the object can be cached. Using this scheme, the maximum amount of data your proxy will ever use for web objects is the following, where T is the maximum number of active connections:
MAX_CACHE_SIZE + T * MAX_OBJECT_SIZE
这里我百思不得其写,求解答!!!!
花了好几月时间终于看完了《CSAPP》,今天也算是圆满结束了。可以说受益匪浅,不过还有很多的不足需要去弥补,去学习。文中的一些例子和理解可能有很多不正确的地方,还请各位大佬纠正。