c - word counter (binary-tree)
count ascii word in a file, written by c, using binary-tree,
code:
word_counter.c:
#include <stdio.h> #include <string.h> #include <stdlib.h> #define WORD_MAX_LEN 50 #define LAST_LETTER 1 #define LAST_OTHER 0 /** * count ascii words * @author kuchaguangjie * @date 2012-05-09 14:05:21 * @mail kuchaguangjie@163.com */ struct word_count { char *word; int count; struct word_count *left; struct word_count *right; }; /** * find word from FILE * * state machine of word found: |---------------------------------------| |last_char new_char word_found| |---------------------------------------| |letter other Y | |letter letter N | |other other N | |other letter N | |---------------------------------------| letter, include: a-z A-Z, other, any other ascii char, * * @param fp * pointer to FILE * @param word * the char array to store word * @param limit * max char count in the word * * @return * 1 -> not reach EOF, 0 -> EOF, * if reach EOF, it will put into word the string found, or an empty string, */ int getword(FILE *fp, char *word, int limit) { int i = 0, last = LAST_OTHER; char c; while ((c = fgetc(fp)) != EOF) { if ((c >= 97 && c <= 122) || (c >= 65 && c <= 90)) { // a - z, A - Z, last = LAST_LETTER; if (i < WORD_MAX_LEN) word[i++] = c; } else { if (last == LAST_LETTER) { word[i] = '\0'; return 1; } last = LAST_OTHER; } } // reach EOF word[i] = '\0'; return 0; } /** * alloc memory for a struct word_count */ struct word_count *word_alloc() { return (struct word_count *) malloc(sizeof(struct word_count)); } /** * binary-tree add * * @param p * pointer to the node on/under which to add/count the word * @param word * the word to add * * @return * the pointer on which the word is add/count++ */ struct word_count *btree_add(struct word_count *p, char *word) { int cmp; if (p == NULL) { p = word_alloc(); p->word = strdup(word); // tip: here must duplicate the string, because the original string will change later, p->count = 1; p->left = p->right = NULL; } else if ((cmp = strcmp(word, p->word)) == 0) p->count++; else if (cmp < 0) p->left = btree_add(p->left, word); else p->right = btree_add(p->right, word); return p; // tip: must return the pointer, in case when original pointer is NULL, need to update it, } /* print all nodes in order */ void treeprint(struct word_count *p) { if (p != NULL) { treeprint(p->left); printf("%6d, %s\n", p->count, p->word); treeprint(p->right); } } /** * do count words from a file * * @param fp * FILE pointer * @param counts * word_count array, which is ordered, * @param n * word counts, equals to word_count array size, * * @return * total different word */ void docount(FILE *fp) { struct word_count *root = NULL; char word[WORD_MAX_LEN + 1]; int end; while ((end = getword(fp, word, WORD_MAX_LEN)) == 1) { root = btree_add(root, word); } // last word if (word[0] != '\0') { root = btree_add(root, word); } treeprint(root); } int main() { char *fpath = "/home/eric/workspace/c_workplace/practise/word_counter.c"; FILE *fp = fopen(fpath, "r"); docount(fp); return 1; }