计算一篇文章中单词出现的频率,并把输出频率最高的十五个单词输出来

 
#include
#include"string.h"
#define WORDNUM 65535 // max word number
#define WORDLENGTH 15 // word max length
#define PRINTWORDNUM 15 //out put max word number
char differentword[WORDNUM][WORDLENGTH]={0};//different word info
int differentCount[WORDNUM];//equal word number
int iIndex = 0;//current file word total number
int iWordCount =0;//current file different word number
void GetWordInfo(FILE *fpRead);
void SetWordList(char word[WORDLENGTH]);
void OrderWordList();
void OutPutWordList();
void DealWithWord()
{
FILE *fpRead;
if((fpRead = fopen("D:\word.txt", "r")) == NULL)
{
   printf("Cannot read file %s\n", "D:\word.txt");
   return;
}
GetWordInfo(fpRead);
OrderWordList();
OutPutWordList();
}
void GetWordInfo(FILE *fpRead)
{
int jIndex = 0;
int i = 0;
char ch;
char word[WORDLENGTH]={0};
while((ch=fgetc(fpRead))!=EOF)
{
   //putchar(ch);
   if ((ch >= 65 && ch <=90) ||(ch >= 97 && ch <=122))
   {
    if (jIndex < WORDLENGTH)
    {
     word[jIndex] = ch;
     jIndex ++;
    }
   }
   else
   {
    if (jIndex != 0)
    {
     SetWordList(word);
     jIndex = 0;
     iIndex ++;
    }
   }
}
}
void SetWordList(char word[WORDLENGTH])
{
int i;
int iEqual = 0;
if (iIndex == 0)
{
   strcpy(differentword[0],word);
   differentCount[0] = 0;
   iWordCount ++;
}
for (i = 0; i < iWordCount; i ++)
{
   if (strcmp(differentword[i],word) == 0)
   {
    differentCount[i] ++;
    iEqual = 1;
   }
}
if (iEqual == 0)
{
   strcpy(differentword[iWordCount],word);
   differentCount[iWordCount] ++;
   iWordCount ++;
}
for (i = 0; i {
   word[i] = '\0';
}
}
void OrderWordList()
{
int iCurrent = 0;
int i,j,tempValue;
char wordTemp[WORDLENGTH]={0};
for (i = 0; i < iWordCount; i ++)

   iCurrent = i;
   for (j = i + 1; j < iWordCount; j ++)
   {
    if (differentCount[iCurrent] < differentCount[j])
    {
     iCurrent = j;
    }
   } 
   if (iCurrent != i)
   {
    tempValue = differentCount[iCurrent];
    differentCount[iCurrent] = differentCount[i];
    differentCount[i] = tempValue;
    strcpy(wordTemp,differentword[iCurrent]);
    strcpy(differentword[iCurrent],differentword[i]);
    strcpy(differentword[i],wordTemp);
   }
}
}
void OutPutWordList()
{
int i;
for (i = 0; i < iWordCount && i < PRINTWORDNUM; i ++)
{
   printf("%s %d\n", differentword[i], differentCount[i]);
}
}
int main(int argc, char* argv[])
{ // 从英語文章中统计出单词出現の频率,打印出前15个出现频率高的单词
DealWithWord();
getchar();
}

 

计算一篇文章中单词出现的频率,并把输出频率最高的十五个单词输出来_第1张图片

你可能感兴趣的:(编程,file,null)