前些日子贴了两个关于断点续传的帖子,又看到firefox的downloadthemall插件的可视画界面里形象的可以看到一个文件被分成几块同时在下载,不由得想到可以由断点续传来实现多线程下载。虽然涉及的http协议不算很多很复杂,但是很显然有很多线程的http库可以使用,记得以前在svn项目中了解到libneon就是这样一个http lib。下面便是用它实现的一个超级简单的多线程下载工具,:)。 (最近比较喜欢写代码,也比较喜欢贴代码,以前一直看啊看,不想动手写)
/**
* file: rget.c
* author: rare
* date: 2008/12/07
* email: dux003#163.com
*/
#include <stdio.h>
#include <fcntl.h>
#include <pthread.h>
#include <string.h>
#include <libgen.h>
#include "ne_session.h"
#include "ne_basic.h"
#include "ne_uri.h"
#include "ne_request.h"
#define MAX_FNAME_LEN 256
#define MAX_DL_SIZE_PER_THRD (1024*500) //50KB per block. '(' & ')' is important here
//thread parameter structure
struct dl_thrd_para
{
ne_uri uri;
char fname[MAX_FNAME_LEN];
long start;
long end;
};
//callback function to get file size
int my_ne_accept_fn(void *userdata, ne_request *req, const ne_status *st)
{
long *pfsiz = (long *)userdata;
if (st->code != 200)
return 0; //ignore the data on error
if (pfsiz != NULL)
{
*pfsiz = atoi(ne_get_response_header(req, "content-length"));
}
return 0;
}
//download thread function
void* my_dl_thread_func(void* para)
{
FILE *fp;
ne_content_range range = {0};
struct dl_thrd_para dtp = {0};
struct dl_thrd_para* pdtp = (struct dl_thrd_para*)para;
memcpy(&dtp, pdtp, sizeof(dtp));
free(pdtp);
printf("fname:%s, start:%ld, end:%ld/n", dtp.fname, dtp.start, dtp.end);
range.start = dtp.start;
range.end = dtp.end;
fp = fopen(dtp.fname, "rb+");
if (fp == NULL)
{
printf("rget: Failed to open file./n");
pthread_exit(NULL);
return NULL;
}
ne_session *sess;
ne_request *req;
/* Initialize socket libraries */
if (ne_sock_init())
{
printf("rget: Failed to initialize socket libraries./n");
fclose(fp);
return NULL;
}
sess = ne_session_create(dtp.uri.scheme, dtp.uri.host, dtp.uri.port);
if (sess == NULL)
{
fprintf(stderr, "rget: Failed to create session./n");
fclose(fp);
return NULL;
}
fseek(fp, dtp.start, SEEK_SET);
ne_get_range(sess, dtp.uri.path, &range, fileno(fp));
ne_session_destroy(sess);
fclose(fp);
return NULL;
}
//use one http request to get file size
long get_file_size(ne_uri *uri)
{
long fsiz = 0;
ne_session *sess;
ne_request *req;
/* Initialize socket libraries */
if (ne_sock_init())
{
printf("rget: Failed to initialize socket libraries./n");
return -1;
}
sess = ne_session_create(uri->scheme, uri->host, uri->port);
if (sess == NULL)
{
fprintf(stderr, "rget: Failed to create session./n");
return -1;
}
req = ne_request_create(sess, "GET", uri->path);
if (req == NULL)
{
printf("rget: Failed to create request./n");
ne_session_destroy(sess);
return -1;
}
ne_add_response_body_reader(req, my_ne_accept_fn, (int(*)(void*, const char*, size_t))NULL, (void*)&fsiz);
if (ne_begin_request(req))
{
printf("rget: Failed to send request./n");
ne_request_destroy(req);
ne_session_destroy(sess);
return -1;
}
ne_end_request(req);
ne_request_destroy(req);
ne_session_destroy(sess);
return fsiz;
}
//create an empty file of size:fize
void mkfile(const char *fname, long fsiz)
{
char buf[1024] = {0};
int size = 0;
int ret;
int fd = open(fname, O_WRONLY|O_CREAT|O_TRUNC, 0666);
if (fd < 0)
return;
while (size < fsiz)
{
ret = write(fd, buf, fsiz-size<1024?fsiz-size:1024);
if (ret < 0)
{
close(fd);
unlink(fname);
return;
}
size += ret;
}
close(fd);
}
//download file
void dlfile(ne_uri *uri, const char *fname, long fsiz)
{
int i;
int thrd_num = fsiz / MAX_DL_SIZE_PER_THRD + 1;
//calculate how many threads needed
pthread_t * tid = (pthread_t*)malloc(thrd_num*sizeof(pthread_t));
for (i=0; i<thrd_num; i++)
{
struct dl_thrd_para* pdtp = (struct dl_thrd_para*)malloc(sizeof(struct dl_thrd_para));
memcpy(&pdtp->uri, uri, sizeof(ne_uri));
pdtp->start = i*MAX_DL_SIZE_PER_THRD;
if (i != thrd_num-1)
pdtp->end = (i+1)*MAX_DL_SIZE_PER_THRD - 1;
else
pdtp->end = -1;
strcpy(pdtp->fname, fname);
pthread_create(&tid[i], NULL, my_dl_thread_func, pdtp);
}
for (i=0; i<thrd_num; i++)
{
pthread_join(tid[i], NULL);
}
free(tid);
}
int main(int argc, char* argv[])
{
char fname[MAX_FNAME_LEN];
long fsiz = 0;
ne_uri uri = {0};
//more args can be designed to ficilitate this utils
if (argc != 2)
{
printf("Usage: rget URL./n");
return -1;
}
if (ne_uri_parse(argv[1], &uri) || uri.host==NULL || uri.path==NULL)
{
printf("Could not parse url %s/n", argv[1]);
return -1;
}
/* Set defaults. */
if (uri.scheme == NULL)
uri.scheme = "http";
if (uri.port == 0)
uri.port = ne_uri_defaultport(uri.scheme);
fsiz = get_file_size(&uri);
//create temp file for downloading
sprintf(fname, "%u.tmp", (unsigned int)time(NULL));
mkfile(fname, fsiz);
//download file
dlfile(&uri, fname, fsiz);
//rename temp file to target file
rename(fname, basename(uri.path));
return 0;
}
//build it
#! /bin/sh
gcc -g -o rget rget.c `neon-config --cflags` `neon-config --libs` -lpthread
#test it
./rget http://www.webdav.org/neon/neon-0.28.3.tar.gz
程序有时候能够成功,有时候会失败,暂时不想找bug了。不过我觉得从原理上来说,基本上是没错的了。
上面需要注意的一点是,http的range是从0开始的, 0~499表示第一个500字节
由于本文主要是讲如何通过断点续传实现多线程下载,所以libneon就不介绍了,大家可以到其官网上看文档。(不过好像不全,要注意看其头文件中的注释"/usr/include/neon")