1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include "lyGetHttpResult.h" 5 #include "lyPublic/lyCodeConvert.c" 6 int main() 7 { 8 9 char szUrl[512] = ""; 10 char svData[1024 * 40] = ""; 11 char *szData = NULL; 12 FILE *fp; 13 char *p, *q,*q2,*p2; 14 char strFrom[100] = "", strTo[100] = ""; 15 char findStr[20] = "", andStr[20] = "</span> - ";//查找标记串 16 char outStr[100] = "",reStr[100] = ""; 17 char str[100] = "http://www.chazidian.com/jinyici/",str2[100] = ""; 18 int len, falg; 19 20 gets(strFrom);//初串 21 strcpy(reStr,strFrom); 22 CodeConvert(strFrom, str2, sizeof(str2), 2); 23 strcat(str,str2); 24 25 sprintf(szUrl, str); 26 szData = GetDataFromWeb(szUrl, NULL, NULL, 1, 5); 27 if(!szData) 28 return NULL; 29 30 CodeConvert(szData, svData, sizeof(svData), 1); 31 if(fopen("Text.txt", "r+") == NULL) 32 fp=fopen("Text.txt", "w+r"); 33 else 34 fp=fopen("Text.txt", "r+"); 35 fputs(svData, fp); 36 37 /* while(strstr(svData, strFrom) == NULL)//判断是否在本页,不在的话进入下一页 38 { 39 p = strstr(svData, "下一页"); 40 // q = p-60; 41 q=p; 42 while(*q != ':') 43 q--; 44 q+=2; 45 memset(szUrl, 0, sizeof(szUrl)); 46 len = 0; 47 while(q++ < p) 48 szUrl[len++]=*q; 49 szData = GetDataFromWeb(szUrl, NULL, NULL, 1, 5); 50 CodeConvert(szData, svData, sizeof(svData), 1); 51 } 52 */ 53 p = strstr(svData, strFrom);//找到起点 54 falg = 0; 55 if(*(p-1)=='/') 56 { 57 q = p - 1; 58 } 59 else 60 { 61 q = p - 1; 62 while(*q!='/') 63 { 64 q--; 65 } 66 p = q;//后 67 q2 = p;//前 68 p2=p-1;//前 69 while(*(--q2) != '/'); 70 p=q2+1;//后后 71 memset(strFrom,0,sizeof(strFrom)); 72 while(q2 < p2) 73 strFrom[falg++] = *(++q2); 74 } 75 76 while(*q!='"') 77 q--; 78 79 memset(szUrl, 0, sizeof(szUrl)); 80 len = 0; 81 while(++q < p) 82 szUrl[len++] = *q; 83 strcpy(findStr,strFrom);// 84 strcat(findStr,andStr);//设置查找串 85 puts(findStr); 86 CodeConvert(strFrom, strTo, sizeof(strTo), 2);//关键词转码 87 strcat(szUrl, strTo); 88 puts(szUrl); 89 szData = GetDataFromWeb(szUrl, NULL, NULL,1, 5); 90 CodeConvert(szData, svData, sizeof(svData), 1);//转码 91 // puts(svData); 92 p = strstr(svData, findStr); 93 len = strlen(outStr); 94 while(*p != '4') 95 { 96 if(*p != '<' && (*p < 'a'||*p > 'z') && *p != '/' && *p != '>' && *p != '-') 97 { 98 outStr[len++] = *p; 99 } 100 p++; 101 } 102 puts(outStr); 103 104 p = strstr(outStr,reStr);//去重 105 len = strlen(reStr); 106 if(p == &outStr[0]) 107 { 108 p+=len+2; 109 while(*p != '\0') 110 printf("%c",*(p++)); 111 } 112 else 113 { 114 q=outStr; 115 while(q != p) 116 printf("%c",*(q++)); 117 if(*q == *p) 118 q=p+len; 119 while(*q != '\0') 120 printf("%c",*(q++)); 121 } 122 free(szData); 123 szData = NULL; 124 125 /* if(fopen("Text.txt", "r+") == NULL) 126 fp=fopen("Text.txt", "w+r"); 127 else 128 fp=fopen("Text.txt", "r+"); 129 fputs(outStr, fp);*/ 130 131 /* char strFrom[100] = ""; 132 char strTo[100] = ""; 133 gets(strFrom); 134 if(CodeGbkToUnicode(strFrom,strTo,100,20)) 135 puts(strTo); 136 else 137 printf("NO~!\n"); 138 139 /* char strFrom[1024*40] = "http://www.chazidian.com/jinyici/", * strTo= NULL; 140 char szData[100] = "",svData[100] = ""; 141 char str[1024*40] ; 142 char szUrl[512] = ""; 143 FILE *fp; 144 gets(szData); 145 CodeConvert(szData, svData, sizeof(svData), 2); 146 strcat(strFrom,svData); 147 sprintf(szUrl, strFrom); 148 strTo = GetDataFromWeb(szUrl, NULL, NULL, 1, 5); 149 CodeConvert(strTo, str, sizeof(str), 1); 150 if(fopen("Text.txt", "r+") == NULL) 151 fp=fopen("Text.txt", "w+r"); 152 else 153 fp=fopen("Text.txt", "r+"); 154 fputs(str, fp); 155 puts(str);*/ 156 return 1; 157 }
早上写的是一页一页抓的,当页数太多时 会变得很慢,后来 老韦说让我写第二个 网站的时候用 网址,但是那个网站用的是内码,不能直接获取信息,可能还要检索他的内码。由此我想前面这个查字典的网站 是不是也可以用 网址去检索,后来改了下,还好可以的,这样就变得快多啦
改后的代码:
1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include "lyGetHttpResult.h" 5 #include "lyPublic/lyCodeConvert.c" 6 int main() 7 { 8 9 char szUrl[512] = ""; 10 char svData[1024 * 40] = ""; 11 char *szData = NULL; 12 // FILE *fp; 13 // char *p, *q,*q2,*p2; 14 char *p,*q; 15 char strFrom[100] = "", strTo[100] = ""; 16 char findStr[20] = "", andStr[20] = "</span> - ";//查找标记串 17 char outStr[100] = "",reStr[100] = ""; 18 char str[100] = "http://www.chazidian.com/jinyici/",str2[100] = ""; 19 int len; 20 21 gets(strFrom);//初串 22 strcpy(reStr,strFrom); 23 CodeConvert(strFrom, str2, sizeof(str2), 2);//先将汉字GBK转为UTF-8再接道网址后面 24 strcat(str,str2); 25 26 sprintf(szUrl, str); 27 szData = GetDataFromWeb(szUrl, NULL, NULL, 1, 5); 28 if(!szData) 29 return NULL; 30 31 CodeConvert(szData, svData, sizeof(svData), 1);//找汉字的时候是找GBK。,所以还要转回来 32 /* if(fopen("Text.txt", "r+") == NULL) 33 fp=fopen("Text.txt", "w+r"); 34 else 35 fp=fopen("Text.txt", "r+"); 36 fputs(svData, fp);*/ 37 strcpy(findStr,strFrom);// 38 strcat(findStr,andStr); 39 p = strstr(svData, findStr); 40 len = strlen(outStr); 41 while(*p != '4') 42 { 43 if(*p != '<' && (*p < 'a'||*p > 'z') && *p != '/' && *p != '>' && *p != '-') 44 { 45 outStr[len++] = *p; 46 } 47 p++; 48 } 49 puts(outStr); 50 51 p = strstr(outStr,reStr);//去重 52 len = strlen(reStr); 53 if(p == &outStr[0]) 54 { 55 p+=len+2; 56 while(*p != '\0') 57 printf("%c",*(p++)); 58 } 59 else 60 { 61 q=outStr; 62 while(q != p) 63 printf("%c",*(q++)); 64 if(*q == *p) 65 q=p+len; 66 while(*q != '\0') 67 printf("%c",*(q++)); 68 } 69 free(szData); 70 szData = NULL; 71 return 1; 72 }