http://blog.csdn.net/wangjintao1988/article/details/8003573,这个里面说明了实现思路,并且编码方式的转换是基于Java的。最近正在学习linux socket编程,因此想通过linux实现这个功能,字体编码从GB2312到ASCII字符的转换时曾经尝试用iconv,但一直无法正确转换,自己写了一个函数。
这个code如下:down.c
<pre class="cpp" name="code">#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <netdb.h> #include <sys/types.h> #include <netinet/in.h> #include <string.h> #include <sys/socket.h> #include <iconv.h> #define MAX_SIZE 1024 #define SEVER_PORT 80 /* *将GB3212格式转换为ASCII字符,并且在其中插入服务器需要的% */ int zh2b(char *inbuf,int inbytes,char *outbuf,int outbytes) { int len = 0; char ch[8]; int i; printf("\n===========ascii===========\n"); for(i=0;i<inbytes;i++) { sprintf(ch,"%0x",inbuf[i]); outbuf[len++] = '%'; outbuf[len++] = ch[6]; outbuf[len++] = ch[7]; } outbuf[len] = '\0'; for(i=0;i<len;i++) { if(outbuf[i]>='a'&&outbuf[i]<='z') outbuf[i] -= 32; } printf("\n===============end================\n"); printf("%s\n",outbuf); } /* * 向服务器发送的GET请求 */ void obtion_head_msg(const char *music,const char *artist,char *head_msg) { sprintf(head_msg,"GET /x?op=12&count=1&title="); strcat(head_msg,music); strcat(head_msg,"$$"); strcat(head_msg,artist); strcat(head_msg,"$$$$"); strcat(head_msg," HTTP/1.1\n"); strcat(head_msg,"Host: box.zhangmen.baidu.com\n"); strcat(head_msg,"User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 Firefox/29.0\n"); strcat(head_msg,"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\n"); strcat(head_msg,"Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\n"); strcat(head_msg,"Accept-Encoding: gzip, deflate\n"); strcat(head_msg,"Connection:close\n\n"); } int main(int argc,char* argv[]) { int sockfd,numbytes; char buf[MAX_SIZE]; struct hostent *h; char requestMsg[MAX_SIZE]={'\n'}; char music[64]; char artist[64]; int music_len=64; int artist_len=64; struct sockaddr_in sever_addr; if(argc != 3) { fprintf(stderr,"please input music info:example\n\n ./down music artist \n\n"); exit(1); } zh2b(argv[1],strlen(argv[1]),music,music_len); zh2b(argv[2],strlen(argv[2]),artist,artist_len); printf("before:music:%s,artist:%s\n",argv[1],argv[2]); printf("after: music:%s,artist:%s\n",music,artist); if((h=gethostbyname("box.zhangmen.baidu.com")) == NULL) { herror("gethostbyname"); exit(1); } printf("Host name :%s\n",h->h_name); printf("IP Address:%s",inet_ntoa(*((struct in_addr *)h->h_addr))); if((sockfd = socket(AF_INET,SOCK_STREAM,0)) == -1) { perror("socket error\n"); goto ERROR_; } sever_addr.sin_family = AF_INET; sever_addr.sin_port = htons(SEVER_PORT); sever_addr.sin_addr = *((struct in_addr *)h->h_addr); bzero(&(sever_addr.sin_zero),8); if(connect(sockfd,(struct sockaddr *)&sever_addr,sizeof(struct sockaddr)) == -1) { perror("connect error \n"); goto ERROR_; } obtion_head_msg(music,artist,requestMsg); printf("%s\n",requestMsg); if(send(sockfd,requestMsg,strlen(requestMsg),0)==-1) { perror("send error \n"); goto ERROR_; } numbytes = recv(sockfd,buf,MAX_SIZE,0); if(numbytes == -1) { perror("recv error\n"); goto ERROR_; } printf("\n\n=============================start html=======================\n\n"); while(numbytes > 0) { printf("%s",buf); numbytes = recv(sockfd,buf,MAX_SIZE,0); } printf("\n\n=============================end html=======================\n\n"); return 0; ERROR_: exit(1); }
之后编译:gcc -o down down.c
./down 童话 光良
便可以下载百度服务器发送过来的html,可以根据其中的内容结合这篇文档http://blog.csdn.net/wangjintao1988/article/details/8003573,进行解析出歌曲的下载路径,然后再次用socket或者其他编程方式下载。
注意事项
1.服务器GET请求中,汉字必须转化为ASCII字符并且用%隔开,如:童话(%E7%AB%A5%E8%AF%9D),光良(%E5%85%89%E8%89%AF),括号中是服务器识别的格式
2.GET请求的格式:
GET /x?op=12&count=1&title=%E7%AB%A5%E8%AF%9D$$%E5%85%89%E8%89%AF$$$$ HTTP/1.1
Host: box.zhangmen.baidu.com
User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 Firefox/29.0
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3
Accept-Encoding: gzip, deflate
Connection:close
这个可以在火狐浏览器中通过firebug插件查看你发送的GET请求,如下图:输入http://box.zhangmen.baidu.com/x?op=12&count=1&title=%E7%AB%A5%E8%AF%9D$$%E5%85%89%E8%89%AF$$$$