初步实现了一个接口:
1 #include<stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include "lyPublic/lyCodeConvert.h" 5 #define X_LONGSEN 500 6 #define Y_LONGWORD 100 7 struct node 8 { 9 char MWord[Y_LONGWORD]; //对应的最高权权值 10 int order; //权值、并作为标记是否有词 11 struct node *next[16]; 12 }; 13 14 typedef struct node node; 15 char z_Str[Y_LONGWORD]; 16 17 void insertTree(char *str, node *T, char *MaxWord, int num_max); 18 void findStr(char *str, node *T); 19 int findNum(char *str, node *T); 20 int SentenceTransform(char *FromWord,char *Tostr); 21 22 /* 23 函数功能: 24 将一句话里面的部分词 转化为 权值最高的词 25 变量说明: 26 FromWord 原句子 27 Tostr 转化后的句子 28 */ 29 30 int SentenceTransform(char *FromWord,char *Tostr) 31 { 32 //FILE *fp; 33 FILE *fq; 34 char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子 35 int lenGetSen, leWord, leSen; 36 char GetWord[Y_LONGWORD] = "" ; 37 char ToWord[Y_LONGWORD] = ""; 38 char strhan[Y_LONGWORD] = ""; 39 char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词 40 char hanMax[Y_LONGWORD] = "" ; 41 node *T; 42 int i, j, k, len, s, num, max_num, f; 43 int from, to, at; 44 int num_max; 45 int lenTostr = 0 , lenZ_Str; 46 47 T = (node *)malloc(sizeof(node)); 48 //初始化节点 49 memset (T->MWord, 0 ,sizeof(T->MWord)); 50 T->order = -1; 51 for(i = 0; i < 16; i++) 52 T->next[i] = NULL; 53 54 55 /* 56 打开权值文档,文档格式: 57 平凡28&平淡--62 平庸--5 平凡--82 平常--38] 58 平常83&平常--38 寻常--31] 59 贫困24&困顿--0 贫困--42 窘迫--4] 60 贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1] 61 说明: 62 第一个是一句话权值最高的词,紧跟着的就是权值 63 & 是分隔符 64 后面的是 近义词 的词和气权值大小 65 */ 66 67 68 69 // 读取权值文档,建立字典树 70 fq = fopen ("1.txt","r++"); 71 // fp = fopen ("jieguo.txt","w+r"); 72 while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值 73 { 74 lenGetSen = strlen(GetSentence); 75 leSen = 0; 76 memset (MaxWord, 0, sizeof(MaxWord)); 77 leWord = 0; 78 leWord = 0; 79 while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9')) 80 MaxWord[leWord++] = GetSentence[leSen++]; 81 //取最高权词的权值 82 num_max = 0; 83 while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9') 84 num_max = num_max*10 + GetSentence[leSen++] - '0'; 85 leSen++; 86 while (GetSentence[leSen] != ']' && leSen < lenGetSen) 87 { 88 memset (GetWord, 0, sizeof(GetWord)); 89 memset (ToWord, 0, sizeof(ToWord)); 90 leWord = 0; 91 while (GetSentence[leSen] != '-') 92 { 93 GetWord[leWord++] = GetSentence[leSen++]; 94 } 95 HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord)); 96 insertTree (ToWord, T, MaxWord, num_max); 97 while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-') 98 leSen++; 99 } 100 } 101 102 103 //转化句子 104 len = strlen(FromWord); 105 at = 0; 106 for (i = 0; i < len;) 107 { 108 max_num = -1; 109 memset(strhan, 0, sizeof(strhan)); 110 for (j = i; j <= len; j+=2) 111 { 112 memset (GetWord, 0, sizeof(GetWord)); 113 s = 0; 114 num = -10; 115 //记录汉字 116 for (k = i; k < j; k++) 117 strhan[s++] = FromWord [k]; 118 119 //转码 120 HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord)); 121 122 if(strlen(strhan) != 0) 123 num = findNum(GetWord, T); 124 else 125 continue; 126 if (num > max_num) 127 { 128 max_num = num; 129 memset(hanMax, 0, sizeof(hanMax)); 130 strcpy(hanMax, strhan); 131 from = i; 132 to =j; 133 } 134 } 135 if(max_num != -1) 136 { 137 while(at < from) 138 { 139 //fputc(FromWord [at], fp); 140 Tostr[lenTostr++] = FromWord[at++]; 141 } 142 memset (GetWord, 0, sizeof(GetWord)); 143 memset(z_Str, 0, sizeof(z_Str)); 144 HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord)); 145 findStr(GetWord, T); 146 lenZ_Str = strlen(z_Str); 147 for(f = 0; f < lenZ_Str ;f++) 148 Tostr[lenTostr++] = z_Str[f]; 149 at = to; 150 i = to; 151 } 152 else 153 { 154 for(j = at; j < at+2; j++) 155 Tostr[lenTostr++] = FromWord[j]; 156 // fputc(FromWord[j], fp); 157 at += 2; 158 i += 2; 159 } 160 } 161 return 0; 162 } 163 164 void insertTree(char *str, node *T, char *MaxWord, int num_max) 165 { 166 int len, i, j, flag=0, id; 167 node *p, *q; 168 p = T; 169 len = strlen(str); 170 for (i = 0; i < len; i++) 171 { 172 if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字 173 id = str[i]- 'a' + 10; 174 else 175 id = str[i] - '0'; 176 if( p ->next[id] == NULL)//扩展节点 177 { 178 flag = 1; 179 q = (node *)malloc(sizeof(node)); 180 memset(q->MWord, 0, sizeof(q->MWord)); 181 q->order = -1; 182 for(j = 0;j < 16 ;j++) 183 q ->next[j] = NULL; 184 p->next[id] = q; 185 } 186 p = p->next[id]; 187 } 188 if(flag) 189 { 190 strcpy(p->MWord, MaxWord); 191 p->order = num_max; 192 } 193 else 194 { 195 if( p -> order == -1) 196 { 197 strcpy(p->MWord, MaxWord); 198 p->order = num_max ; 199 } 200 } 201 } 202 203 void findStr(char *str, node *T) 204 { 205 int len , i, id; 206 node *p; 207 p = T; 208 len = strlen(str); 209 for (i=0; i< len ; ++i) 210 { 211 if(str[i]>= 'a' && str[i] <= 'f') 212 id = str[i]- 'a' + 10; 213 else 214 id = str[i] - '0'; 215 if(p->next[id] == NULL) 216 return; 217 p = p->next[id]; 218 } 219 strcpy(z_Str, p->MWord); 220 } 221 222 223 int findNum(char *str, node *T) 224 { 225 int len, i, id; 226 node *p; 227 p = T; 228 len = strlen(str); 229 for(i = 0; i < len; i++) 230 { 231 if(str[i]>= 'a' && str[i] <= 'f') 232 id = str[i]- 'a' + 10; 233 else 234 id = str[i] - '0'; 235 if(p->next[id] == NULL) 236 return -1; 237 p = p->next[id]; 238 } 239 return p->order; 240 }
1 #include<stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include "lyPublic/lyCodeConvert.h" 5 #define X_LONGSEN 500 6 #define Y_LONGWORD 100 7 struct node 8 { 9 char MWord[Y_LONGWORD]; //对应的最高权权值 10 int order; //权值、并作为标记是否有词 11 struct node *next[16]; 12 }; 13 14 typedef struct node node; 15 char z_Str[Y_LONGWORD]; 16 17 void insertTree(char *str, node *T, char *MaxWord, int num_max); 18 void findStr(char *str, node *T); 19 int findNum(char *str, node *T); 20 int SentenceTransform(char *FromWord,char *Tostr); 21 22 /* 23 函数功能: 24 将一句话里面的部分词 转化为 权值最高的词 25 变量说明: 26 FromWord 原句子 27 Tostr 转化后的句子 28 */ 29 30 int SentenceTransform(char *FromWord,char *Tostr) 31 { 32 //FILE *fp; 33 FILE *fq; 34 char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子 35 int lenGetSen, leWord, leSen; 36 char GetWord[Y_LONGWORD] = "" ; 37 char ToWord[Y_LONGWORD] = ""; 38 char strhan[Y_LONGWORD] = ""; 39 char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词 40 char hanMax[Y_LONGWORD] = "" ; 41 node *T; 42 int i, j, k, len, s, num, max_num, f; 43 int from, to, at; 44 int num_max; 45 int lenTostr = 0 , lenZ_Str; 46 47 T = (node *)malloc(sizeof(node)); 48 //初始化节点 49 memset (T->MWord, 0 ,sizeof(T->MWord)); 50 T->order = -1; 51 for(i = 0; i < 16; i++) 52 T->next[i] = NULL; 53 54 55 /* 56 打开权值文档,文档格式: 57 平凡28&平淡--62 平庸--5 平凡--82 平常--38] 58 平常83&平常--38 寻常--31] 59 贫困24&困顿--0 贫困--42 窘迫--4] 60 贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1] 61 说明: 62 第一个是一句话权值最高的词,紧跟着的就是权值 63 & 是分隔符 64 后面的是 近义词 的词和气权值大小 65 */ 66 67 68 69 // 读取权值文档,建立字典树 70 fq = fopen ("1.txt","r++"); 71 // fp = fopen ("jieguo.txt","w+r"); 72 while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值 73 { 74 lenGetSen = strlen(GetSentence); 75 leSen = 0; 76 memset (MaxWord, 0, sizeof(MaxWord)); 77 leWord = 0; 78 leWord = 0; 79 while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9')) 80 MaxWord[leWord++] = GetSentence[leSen++]; 81 //取最高权词的权值 82 num_max = 0; 83 while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9') 84 num_max = num_max*10 + GetSentence[leSen++] - '0'; 85 leSen++; 86 while (GetSentence[leSen] != ']' && leSen < lenGetSen) 87 { 88 memset (GetWord, 0, sizeof(GetWord)); 89 memset (ToWord, 0, sizeof(ToWord)); 90 leWord = 0; 91 while (GetSentence[leSen] != '-') 92 { 93 GetWord[leWord++] = GetSentence[leSen++]; 94 } 95 HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord)); 96 insertTree (ToWord, T, MaxWord, num_max); 97 while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-') 98 leSen++; 99 } 100 } 101 102 103 //转化句子 104 len = strlen(FromWord); 105 at = 0; 106 for (i = 0; i < len;) 107 { 108 max_num = -1; 109 memset(strhan, 0, sizeof(strhan)); 110 for (j = i; j <= len; j+=2) 111 { 112 memset (GetWord, 0, sizeof(GetWord)); 113 s = 0; 114 num = -10; 115 //记录汉字 116 for (k = i; k < j; k++) 117 strhan[s++] = FromWord [k]; 118 119 //转码 120 HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord)); 121 122 if(strlen(strhan) != 0) 123 num = findNum(GetWord, T); 124 else 125 continue; 126 if (num > max_num) 127 { 128 max_num = num; 129 memset(hanMax, 0, sizeof(hanMax)); 130 strcpy(hanMax, strhan); 131 from = i; 132 to =j; 133 } 134 } 135 if(max_num != -1) 136 { 137 while(at < from) 138 { 139 //fputc(FromWord [at], fp); 140 Tostr[lenTostr++] = FromWord[at++]; 141 } 142 memset (GetWord, 0, sizeof(GetWord)); 143 memset(z_Str, 0, sizeof(z_Str)); 144 HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord)); 145 findStr(GetWord, T); 146 lenZ_Str = strlen(z_Str); 147 for(f = 0; f < lenZ_Str ;f++) 148 Tostr[lenTostr++] = z_Str[f]; 149 at = to; 150 i = to; 151 } 152 else 153 { 154 for(j = at; j < at+2; j++) 155 Tostr[lenTostr++] = FromWord[j]; 156 // fputc(FromWord[j], fp); 157 at += 2; 158 i += 2; 159 } 160 } 161 return 0; 162 } 163 164 void insertTree(char *str, node *T, char *MaxWord, int num_max) 165 { 166 int len, i, j, flag=0, id; 167 node *p, *q; 168 p = T; 169 len = strlen(str); 170 for (i = 0; i < len; i++) 171 { 172 if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字 173 id = str[i]- 'a' + 10; 174 else 175 id = str[i] - '0'; 176 if( p ->next[id] == NULL)//扩展节点 177 { 178 flag = 1; 179 q = (node *)malloc(sizeof(node)); 180 memset(q->MWord, 0, sizeof(q->MWord)); 181 q->order = -1; 182 for(j = 0;j < 16 ;j++) 183 q ->next[j] = NULL; 184 p->next[id] = q; 185 } 186 p = p->next[id]; 187 } 188 if(flag) 189 { 190 strcpy(p->MWord, MaxWord); 191 p->order = num_max; 192 } 193 else 194 { 195 if( p -> order == -1) 196 { 197 strcpy(p->MWord, MaxWord); 198 p->order = num_max ; 199 } 200 } 201 } 202 203 void findStr(char *str, node *T) 204 { 205 int len , i, id; 206 node *p; 207 p = T; 208 len = strlen(str); 209 for (i=0; i< len ; ++i) 210 { 211 if(str[i]>= 'a' && str[i] <= 'f') 212 id = str[i]- 'a' + 10; 213 else 214 id = str[i] - '0'; 215 if(p->next[id] == NULL) 216 return; 217 p = p->next[id]; 218 } 219 strcpy(z_Str, p->MWord); 220 } 221 222 223 int findNum(char *str, node *T) 224 { 225 int len, i, id; 226 node *p; 227 p = T; 228 len = strlen(str); 229 for(i = 0; i < len; i++) 230 { 231 if(str[i]>= 'a' && str[i] <= 'f') 232 id = str[i]- 'a' + 10; 233 else 234 id = str[i] - '0'; 235 if(p->next[id] == NULL) 236 return -1; 237 p = p->next[id]; 238 } 239 return p->order; 240 }
主函数:
#include<stdio.h> #include<string.h> #include "AnalysisWord.h" int main() { char strGetFromWeb[500] = ""; char strGetToWeb[500] = ""; while(gets(strGetFromWeb)) { SentenceTransform(strGetFromWeb,strGetToWeb); puts(strGetToWeb); memset(strGetToWeb, 0, sizeof(strGetToWeb)); } return 0; }