C语言 单词查找树 Trie树

      • 单词查找树简介
        • 代码实现

单词查找树简介

Trie树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希树高。

单词查找树的模拟过程可以看链接:Prefix tree

对于下图的Trie树, 总共有4个单词,abc, ad, efa, ef
C语言 单词查找树 Trie树_第1张图片

代码实现

节点:

 #define CHARLENGTH 256

 typedef struct TrieNode {
    struct TrieNode *next[CHARLENGTH];
    int isEndOfWord;
    int count;  //子节点的数量
    char value;
} TrieNode, *TrieNodePtr, *TrieSTPtr;

插入一个节点:

void insert(TrieSTPtr root, char *key) {
    int i = 0;
    TrieSTPtr tmp = root;
    while (*(key + i) != '\0') {
        putchar(*(key+i));
        if (tmp->next[*(key + i)] == NULL) {
            TrieNodePtr t = createTrieNode(*(key + i));
            tmp->next[*(key + i)] = t;
            tmp->count++;
        }
        tmp = tmp->next[*(key + i)];
        i++;
    }
    tmp->isEndOfWord = TRUE;
}

搜索一个单词:

int searchTrie(TrieSTPtr root, char *str)
{
    if (root == NULL)
        return 0;
    TrieSTPtr tmp = root;
    int i = 0;
    while (str[i] != NULL){
        if (tmp->next[str[i]] != NULL){
            tmp = tmp->next[str[i]];
        }
        else
            return FALSE;
        i++;
    }
    if (tmp->isEndOfWord == TRUE) {
        return TRUE;
    }
    else {
        return FALSE;
    }
}

删除节点后返回路径上最近的一个键:
C语言 单词查找树 Trie树_第2张图片

删除节点,总共会有四种情况:
1. 字符串超出范围
2. 字符串刚好结束,节点没有子节点
3. 字符串已经结束,节点存在字节点(count != 0)
(1) 节点是单词的结尾(node->isEndOfWord = TRUE)
(2) 节点不是单词的结尾(node->isEndOfWord = FALSE)

TrieSTPtr deleteKey(TrieSTPtr root, char *key, int d) {
    if (root == NULL)//case 1: 超出长度
        return NULL;
    if (d != strlen(key)) {
        char c = *(key + d);
        TrieSTPtr tmp = root->next[c];
        tmp = deleteKey(root->next[c], key, d + 1);
        if (tmp == NULL) {  //节点被删
            if (root->count != 0)
                root->count--;
            if (root->isEndOfWord == TRUE) {   //case 3 其它字符串的结尾
                return root; 
            }
            else if(root->count == 0 && root->isEndOfWord == FALSE){  //case 3 其它字符串的结尾
                free(root);
                root = NULL;
                return root;
            }
        }
        else { //节点未被删
            return tmp;
        }
    }
    else {//字符串结尾
        if (root->count == 0) {  //case 2:无子节点
            free(root); //删除节点
            root = NULL;
        }
        else {  //case 3
            root->isEndOfWord = FALSE;
        }
        return root;
    }
}

完整代码:

#include 
#include 
#include 
#include 
#include 

#define LENGTH 4
#define WORDLENGTH 3
#define CHARLENGTH 256

#define TRUE 1
#define FALSE 0

typedef struct TrieNode {
    struct TrieNode *next[CHARLENGTH];
    int isEndOfWord;
    int count;  //next的数量
    char value;
} TrieNode, *TrieNodePtr, *TrieSTPtr;


TrieNodePtr createTrieNode(char key) {
    TrieNodePtr t = (TrieNodePtr)malloc(sizeof(TrieNode));
    memset(t, 0, sizeof(TrieNode));
    //t->isEndOfWord = FALSE;
    //t->count=0;
    t->value = key;
    return t;
}

TrieSTPtr createTrie() {
    TrieSTPtr t = (TrieSTPtr)malloc(sizeof(TrieNode));
    memset(t, 0, sizeof(TrieNode));
    return t;
}

void insert(TrieSTPtr root, char *key) {
    int i = 0;
    TrieSTPtr tmp = root;
    while (*(key + i) != '\0') {
        putchar(*(key+i));
        if (tmp->next[*(key + i)] == NULL) {
            TrieNodePtr t = createTrieNode(*(key + i));
            tmp->next[*(key + i)] = t;
            tmp->count++;
        }
        tmp = tmp->next[*(key + i)];
        i++;
    }
    tmp->isEndOfWord = TRUE;
}

void deleteTrie(TrieSTPtr t) {
    for (int i = 0; i < CHARLENGTH; i++) {
        if (t->next[i] != NULL) {
            deleteTrie(t->next[i]);
            free(t->next[i]);
            t->next[i] = NULL;
        }
    }
}

int searchTrie(TrieSTPtr root, char *str)
{
    if (root == NULL)
        return 0;
    TrieSTPtr tmp = root;
    int i = 0;
    while (str[i] != NULL){
        if (tmp->next[str[i]] != NULL){
            tmp = tmp->next[str[i]];
        }
        else
            return FALSE;
        i++;
    }
    if (tmp->isEndOfWord == TRUE) {
        return TRUE;
    }
    else {
        return FALSE;
    }
}

TrieSTPtr deleteKey(TrieSTPtr root, char *key, int d) {
    if (root == NULL)//超出长度
        return NULL;
    if (d != strlen(key)) {
        char c = *(key + d);
        TrieSTPtr tmp = root->next[c];
        tmp = deleteKey(root->next[c], key, d + 1);
        if (tmp == NULL) {  //节点被删
            if (root->count != 0)
                root->count--;
            if (root->isEndOfWord == TRUE) {   //其它字符串的结尾
                return root; 
            }
            else if(root->count == 0 && root->isEndOfWord == FALSE){  //其它字符串的结尾
                free(root);
                root = NULL;
                return root;
            }
        }
        else { //节点未被删
            return tmp;
        }
    }
    else {//字符串结尾
        if (root->count == 0) {  //无子节点
            free(root); //删除节点
            root = NULL;
        }
        else {
            root->isEndOfWord = FALSE;
        }
        return root;
    }
}


void main() {
    char *name[LENGTH] = {
        "abc",
        "def",
        "ape",
        "ap",
    };
    TrieSTPtr root = createTrie();
    for (int i = 0; i < LENGTH; i++) {
        insert(root, name[i]);
    }
    deleteKey(root, "ab",0);
    printf("%d\n", searchTrie(root, "ape"));
    deleteTrie(root);
}

你可能感兴趣的:(c,算法,字符串)