轻松应用字典树

字典树(Trie),也叫单词查找树或键树,是一种树形结构,属于哈希树的变种。它是一种针对字符串进行操作的数据结构,典型应用是用于统计和排序大量的字符串,如在搜索引擎系统中用于文本词频统计。其主要思想是利用字符串的公共前缀来节约存储空间。字典树在插入和查询字符串的操作上具有较高的效率。

/*-------------字典树查询单词---------------*/

#include 
#include 
#include 
#include 

#define ALPHABET_SIZE 26

/* 定义节点 */
typedef struct TrieNode
{
    struct TrieNode *children[ALPHABET_SIZE];
    char character;
    bool isEndOfWord;

} TrieNode;

/* 创建新的节点 */
TrieNode *createTrieNode()
{
    TrieNode *node;
    node = malloc(sizeof(TrieNode));
    node->isEndOfWord = false;
    int i = 0;
    while (i < ALPHABET_SIZE)
    {
        node->children[i] = NULL;
        i++;
    }
    return node;
}

/* 插入新词到字典树中 */
void insert(TrieNode *root, char *word)
{
    if ((strlen(word) - 1) != 0)
    {
        char character = *word;
        if (root->children[character - 97] == NULL)
        {
            TrieNode *node = NULL;
            node = createTrieNode();
            node->character = character;
            root->children[character - 97] = node;
        }
        word++;
        insert(root->children[character - 97], word);
    }
    else
    {
        root->isEndOfWord = true;
    }
    return;
}

/* 从字典树中搜索单词 */
TrieNode *search(TrieNode *root, char *word)
{
    TrieNode *temp;
    while (*word != '\0')
    {
        char character = *word;
        if (root->children[character - 97] != NULL)
        {
            temp = root->children[character - 97];
            word++;
            root = temp;
        }
        else
        {
            printf("No possible words!!\n");
            return NULL;
        }
    }
    return root;
}

/* 打印单词 */
void printArray(char chars[], int len)
{
    int i;
    for (i = 0; i < len; i++)
    {
        printf("%c", chars[i]);
    }
    printf("\n");
}

/* 返回所有的相关单词 */
void printPathsRecur(TrieNode *node, char prefix[], int filledLen)
{
    if (node == NULL)
        return;

    prefix[filledLen] = node->character;
    filledLen++;

    if (node->isEndOfWord)
    {
        printArray(prefix, filledLen);
    }

    int i;
    for (i = 0; i < ALPHABET_SIZE; i++)
    {
        printPathsRecur(node->children[i], prefix, filledLen);
    }
}

/* 遍历字典树查询到符合条件的单词 */
void traverse(char prefix[], TrieNode *root)
{
    TrieNode *temp = NULL;
    temp = search(root, prefix);
    int j = 0;
    while (prefix[j] != '\0')
    {
        j++;
    }
    printPathsRecur(temp, prefix, j - 1);
}

#define NUMBER_OF_WORDS (354935)
#define INPUT_WORD_SIZE (100)

/* 用户输入要查询的词 */
char *receiveInput(char *s)
{
    scanf("%99s", s);
    return s;
}

int main()
{
    /* 读取字典文件 */
    int word_count = 0;
    char *words[NUMBER_OF_WORDS];
    FILE *fp = fopen("dictionary.txt", "r");

    if (fp == 0)
    {
        fprintf(stderr, "Error while opening dictionary file");
        exit(1);
    }

    words[word_count] = malloc(INPUT_WORD_SIZE);

    while (fgets(words[word_count], INPUT_WORD_SIZE, fp))
    {
        word_count++;
        words[word_count] = malloc(INPUT_WORD_SIZE);
    }

    /* 将单词放入字典树 */
    TrieNode *root = NULL;
    root = createTrieNode();
    int i;
    for (i = 0; i < NUMBER_OF_WORDS; i++)
    {
        insert(root, words[i]);
    }

    while (1)
    {
        printf("Enter keyword: ");
        char str[100];
        receiveInput(str);
        printf("\n==========================================================\n");
        printf("\n********************* Possible Words ********************\n");

        /* 从字典树中找到单词 */
        traverse(str, root);

        printf("\n==========================================================\n");
    }
}

查询结果示例:

Enter keyword: cc

==========================================================

********************* Possible Words ********************
cc
ccesser
cchaddoorck
ccid
ccitt
cckw
ccm
ccw
ccws

==========================================================
Enter keyword: 

 

 

你可能感兴趣的:(数据结构和算法,算法,数据结构)