句子分析器

初步实现了一个接口:

  1 #include<stdio.h>

  2 #include <stdlib.h>

  3 #include <string.h>

  4 #include "lyPublic/lyCodeConvert.h"

  5 #define X_LONGSEN  500

  6 #define Y_LONGWORD 100

  7 struct node 

  8 {

  9     char MWord[Y_LONGWORD]; //对应的最高权权值

 10     int order; //权值、并作为标记是否有词

 11     struct node *next[16];

 12 };

 13 

 14 typedef struct node node;

 15 char z_Str[Y_LONGWORD];

 16 

 17 void insertTree(char *str, node *T, char *MaxWord, int num_max);

 18 void findStr(char *str, node *T);

 19 int findNum(char *str, node *T);

 20 int SentenceTransform(char *FromWord,char *Tostr);

 21 

 22 /*

 23 函数功能:

 24     将一句话里面的部分词 转化为 权值最高的词

 25 变量说明:

 26     FromWord 原句子

 27     Tostr 转化后的句子

 28 */

 29 

 30 int SentenceTransform(char *FromWord,char *Tostr)

 31 {

 32     //FILE *fp;  

 33     FILE *fq;

 34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子

 35     int lenGetSen, leWord, leSen; 

 36     char GetWord[Y_LONGWORD] = "" ;

 37     char ToWord[Y_LONGWORD] = "";

 38     char strhan[Y_LONGWORD] = "";

 39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词

 40     char hanMax[Y_LONGWORD] = "" ;

 41     node *T;

 42     int i, j, k, len, s, num, max_num, f;

 43     int from, to, at;

 44     int num_max;

 45     int lenTostr = 0 , lenZ_Str;

 46 

 47     T = (node *)malloc(sizeof(node));

 48         //初始化节点

 49     memset (T->MWord, 0 ,sizeof(T->MWord));

 50     T->order = -1;

 51     for(i = 0; i < 16; i++)

 52         T->next[i] = NULL;

 53 

 54 

 55 /*

 56     打开权值文档,文档格式:

 57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]

 58         平常83&平常--38 寻常--31]

 59         贫困24&困顿--0 贫困--42 窘迫--4]

 60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]

 61     说明:

 62         第一个是一句话权值最高的词,紧跟着的就是权值

 63         & 是分隔符

 64         后面的是 近义词 的词和气权值大小

 65 */

 66 

 67 

 68 

 69 //  读取权值文档,建立字典树

 70     fq = fopen ("1.txt","r++");

 71 //    fp = fopen ("jieguo.txt","w+r");

 72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值

 73     {

 74         lenGetSen = strlen(GetSentence);

 75         leSen = 0;

 76         memset (MaxWord, 0, sizeof(MaxWord));

 77         leWord = 0;

 78         leWord = 0;

 79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))

 80             MaxWord[leWord++] = GetSentence[leSen++];

 81         //取最高权词的权值 

 82         num_max = 0; 

 83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')

 84             num_max = num_max*10 + GetSentence[leSen++] - '0';

 85         leSen++;

 86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)

 87         {

 88             memset (GetWord, 0, sizeof(GetWord));

 89             memset (ToWord, 0, sizeof(ToWord));

 90             leWord = 0;

 91             while (GetSentence[leSen] != '-') 

 92             {

 93                 GetWord[leWord++] = GetSentence[leSen++];

 94             }

 95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));

 96             insertTree (ToWord, T, MaxWord, num_max);

 97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')

 98                 leSen++;

 99         }

100     }

101 

102 

103     //转化句子

104     len = strlen(FromWord);

105     at = 0;

106     for (i = 0; i < len;)

107     {

108         max_num = -1;

109         memset(strhan, 0, sizeof(strhan));

110         for (j = i; j <= len; j+=2)

111         {

112             memset (GetWord, 0, sizeof(GetWord));

113             s = 0;

114             num = -10;

115             //记录汉字

116             for (k = i; k < j; k++)

117                 strhan[s++] = FromWord [k];

118 

119                 //转码

120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));

121 

122             if(strlen(strhan) != 0)

123                 num = findNum(GetWord, T);

124             else

125                 continue;

126             if (num > max_num)

127             {

128                 max_num = num;

129                 memset(hanMax, 0, sizeof(hanMax));

130                 strcpy(hanMax, strhan);

131                 from = i;

132                 to =j;

133             }

134         }

135         if(max_num != -1)

136         {

137             while(at < from)

138             {

139                 //fputc(FromWord [at], fp);

140                 Tostr[lenTostr++] = FromWord[at++];

141             }

142             memset (GetWord, 0, sizeof(GetWord));

143             memset(z_Str, 0, sizeof(z_Str));

144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));

145             findStr(GetWord, T);

146             lenZ_Str = strlen(z_Str);

147             for(f = 0; f < lenZ_Str ;f++)

148                 Tostr[lenTostr++] = z_Str[f];

149             at = to;

150             i = to;

151         }

152         else

153         {

154             for(j = at; j < at+2; j++)

155                 Tostr[lenTostr++] = FromWord[j];

156             //    fputc(FromWord[j], fp);

157             at += 2;

158             i += 2;

159         }

160     }

161     return 0;

162 }

163 

164 void insertTree(char *str, node *T, char *MaxWord, int num_max)

165 {

166     int len, i, j, flag=0, id;

167     node *p, *q;

168     p = T;

169     len = strlen(str);

170     for (i = 0; i < len; i++)

171     {

172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字

173             id = str[i]- 'a' + 10;

174         else

175             id = str[i] - '0';

176         if( p ->next[id] == NULL)//扩展节点

177         {

178             flag = 1;

179             q = (node *)malloc(sizeof(node));

180             memset(q->MWord, 0, sizeof(q->MWord));

181             q->order = -1;

182             for(j = 0;j < 16 ;j++)

183                 q ->next[j] = NULL;

184             p->next[id] = q;

185         }

186         p = p->next[id];

187     }

188     if(flag)

189     {

190         strcpy(p->MWord, MaxWord);

191         p->order = num_max;

192     }

193     else

194     {

195         if( p -> order == -1)

196         {

197             strcpy(p->MWord, MaxWord);

198             p->order = num_max ;

199         }

200     }

201 }

202 

203 void findStr(char *str, node *T)

204 {

205     int len , i, id;

206     node *p;

207     p = T;

208     len = strlen(str);

209     for (i=0; i< len ; ++i)

210     {

211         if(str[i]>= 'a' && str[i] <= 'f')

212             id = str[i]- 'a' + 10;

213         else

214             id = str[i] - '0';

215         if(p->next[id] == NULL)

216             return;

217         p = p->next[id];

218     }

219     strcpy(z_Str, p->MWord);

220 }

221 

222 

223 int findNum(char *str, node *T)

224 {

225     int len, i, id;

226     node *p;

227     p = T;

228     len = strlen(str);

229     for(i = 0; i < len; i++)

230     {

231         if(str[i]>= 'a' && str[i] <= 'f')

232             id = str[i]- 'a' + 10;

233         else

234             id = str[i] - '0';

235         if(p->next[id] == NULL)

236             return  -1;

237         p = p->next[id];

238     }

239     return p->order;

240 }
  1 #include<stdio.h>

  2 #include <stdlib.h>

  3 #include <string.h>

  4 #include "lyPublic/lyCodeConvert.h"

  5 #define X_LONGSEN  500

  6 #define Y_LONGWORD 100

  7 struct node 

  8 {

  9     char MWord[Y_LONGWORD]; //对应的最高权权值

 10     int order; //权值、并作为标记是否有词

 11     struct node *next[16];

 12 };

 13 

 14 typedef struct node node;

 15 char z_Str[Y_LONGWORD];

 16 

 17 void insertTree(char *str, node *T, char *MaxWord, int num_max);

 18 void findStr(char *str, node *T);

 19 int findNum(char *str, node *T);

 20 int SentenceTransform(char *FromWord,char *Tostr);

 21 

 22 /*

 23 函数功能:

 24     将一句话里面的部分词 转化为 权值最高的词

 25 变量说明:

 26     FromWord 原句子

 27     Tostr 转化后的句子

 28 */

 29 

 30 int SentenceTransform(char *FromWord,char *Tostr)

 31 {

 32     //FILE *fp;  

 33     FILE *fq;

 34     char GetSentence[X_LONGSEN] = ""; // 读取权值文档的句子

 35     int lenGetSen, leWord, leSen; 

 36     char GetWord[Y_LONGWORD] = "" ;

 37     char ToWord[Y_LONGWORD] = "";

 38     char strhan[Y_LONGWORD] = "";

 39     char MaxWord[Y_LONGWORD] = ""; //每一句的权值最高词

 40     char hanMax[Y_LONGWORD] = "" ;

 41     node *T;

 42     int i, j, k, len, s, num, max_num, f;

 43     int from, to, at;

 44     int num_max;

 45     int lenTostr = 0 , lenZ_Str;

 46 

 47     T = (node *)malloc(sizeof(node));

 48         //初始化节点

 49     memset (T->MWord, 0 ,sizeof(T->MWord));

 50     T->order = -1;

 51     for(i = 0; i < 16; i++)

 52         T->next[i] = NULL;

 53 

 54 

 55 /*

 56     打开权值文档,文档格式:

 57         平凡28&平淡--62 平庸--5 平凡--82 平常--38]

 58         平常83&平常--38 寻常--31]

 59         贫困24&困顿--0 贫困--42 窘迫--4]

 60         贫困24&贫困--42 贫寒--0 清贫--31 贫穷--7 穷苦--1]

 61     说明:

 62         第一个是一句话权值最高的词,紧跟着的就是权值

 63         & 是分隔符

 64         后面的是 近义词 的词和气权值大小

 65 */

 66 

 67 

 68 

 69 //  读取权值文档,建立字典树

 70     fq = fopen ("1.txt","r++");

 71 //    fp = fopen ("jieguo.txt","w+r");

 72     while (fgets (GetSentence, 500, fq) != NULL) //读取权值文档,建立各个词对应的最高权值

 73     {

 74         lenGetSen = strlen(GetSentence);

 75         leSen = 0;

 76         memset (MaxWord, 0, sizeof(MaxWord));

 77         leWord = 0;

 78         leWord = 0;

 79         while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9'))

 80             MaxWord[leWord++] = GetSentence[leSen++];

 81         //取最高权词的权值 

 82         num_max = 0; 

 83         while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9')

 84             num_max = num_max*10 + GetSentence[leSen++] - '0';

 85         leSen++;

 86         while (GetSentence[leSen] != ']' && leSen < lenGetSen)

 87         {

 88             memset (GetWord, 0, sizeof(GetWord));

 89             memset (ToWord, 0, sizeof(ToWord));

 90             leWord = 0;

 91             while (GetSentence[leSen] != '-') 

 92             {

 93                 GetWord[leWord++] = GetSentence[leSen++];

 94             }

 95             HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord));

 96             insertTree (ToWord, T, MaxWord, num_max);

 97             while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-')

 98                 leSen++;

 99         }

100     }

101 

102 

103     //转化句子

104     len = strlen(FromWord);

105     at = 0;

106     for (i = 0; i < len;)

107     {

108         max_num = -1;

109         memset(strhan, 0, sizeof(strhan));

110         for (j = i; j <= len; j+=2)

111         {

112             memset (GetWord, 0, sizeof(GetWord));

113             s = 0;

114             num = -10;

115             //记录汉字

116             for (k = i; k < j; k++)

117                 strhan[s++] = FromWord [k];

118 

119                 //转码

120             HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord));

121 

122             if(strlen(strhan) != 0)

123                 num = findNum(GetWord, T);

124             else

125                 continue;

126             if (num > max_num)

127             {

128                 max_num = num;

129                 memset(hanMax, 0, sizeof(hanMax));

130                 strcpy(hanMax, strhan);

131                 from = i;

132                 to =j;

133             }

134         }

135         if(max_num != -1)

136         {

137             while(at < from)

138             {

139                 //fputc(FromWord [at], fp);

140                 Tostr[lenTostr++] = FromWord[at++];

141             }

142             memset (GetWord, 0, sizeof(GetWord));

143             memset(z_Str, 0, sizeof(z_Str));

144             HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord));

145             findStr(GetWord, T);

146             lenZ_Str = strlen(z_Str);

147             for(f = 0; f < lenZ_Str ;f++)

148                 Tostr[lenTostr++] = z_Str[f];

149             at = to;

150             i = to;

151         }

152         else

153         {

154             for(j = at; j < at+2; j++)

155                 Tostr[lenTostr++] = FromWord[j];

156             //    fputc(FromWord[j], fp);

157             at += 2;

158             i += 2;

159         }

160     }

161     return 0;

162 }

163 

164 void insertTree(char *str, node *T, char *MaxWord, int num_max)

165 {

166     int len, i, j, flag=0, id;

167     node *p, *q;

168     p = T;

169     len = strlen(str);

170     for (i = 0; i < len; i++)

171     {

172         if(str[i]>= 'a' && str[i] <= 'f')//当时abcdef时 转化为数字

173             id = str[i]- 'a' + 10;

174         else

175             id = str[i] - '0';

176         if( p ->next[id] == NULL)//扩展节点

177         {

178             flag = 1;

179             q = (node *)malloc(sizeof(node));

180             memset(q->MWord, 0, sizeof(q->MWord));

181             q->order = -1;

182             for(j = 0;j < 16 ;j++)

183                 q ->next[j] = NULL;

184             p->next[id] = q;

185         }

186         p = p->next[id];

187     }

188     if(flag)

189     {

190         strcpy(p->MWord, MaxWord);

191         p->order = num_max;

192     }

193     else

194     {

195         if( p -> order == -1)

196         {

197             strcpy(p->MWord, MaxWord);

198             p->order = num_max ;

199         }

200     }

201 }

202 

203 void findStr(char *str, node *T)

204 {

205     int len , i, id;

206     node *p;

207     p = T;

208     len = strlen(str);

209     for (i=0; i< len ; ++i)

210     {

211         if(str[i]>= 'a' && str[i] <= 'f')

212             id = str[i]- 'a' + 10;

213         else

214             id = str[i] - '0';

215         if(p->next[id] == NULL)

216             return;

217         p = p->next[id];

218     }

219     strcpy(z_Str, p->MWord);

220 }

221 

222 

223 int findNum(char *str, node *T)

224 {

225     int len, i, id;

226     node *p;

227     p = T;

228     len = strlen(str);

229     for(i = 0; i < len; i++)

230     {

231         if(str[i]>= 'a' && str[i] <= 'f')

232             id = str[i]- 'a' + 10;

233         else

234             id = str[i] - '0';

235         if(p->next[id] == NULL)

236             return  -1;

237         p = p->next[id];

238     }

239     return p->order;

240 }

主函数:

#include<stdio.h>

#include<string.h>

#include "AnalysisWord.h"



int main()

{

    char strGetFromWeb[500] = "";

    char strGetToWeb[500] = "";

    while(gets(strGetFromWeb))

    {

        SentenceTransform(strGetFromWeb,strGetToWeb);

        puts(strGetToWeb);

        memset(strGetToWeb, 0, sizeof(strGetToWeb));

    }

    return 0;

}

 

 

你可能感兴趣的:(分析)