拼写纠错-C#实现

好久没用C#写项目了,语法都忘了:( 项目是c#写的,也只好重新学习咯。
拼写纠错的两个核心要素:数据字典和BK树,然后用编辑距离来度量两个词距离。纠错时,在构建好的BK树上查找给定距离d的节点集合,然后输出即可。
拼写纠错-C#实现_第1张图片

BK树类:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace PYcheck
{
    public delegate int DistanceFunction(object o1, object o2);  

    public class Node 
    {
        public T item;
        public Dictionary<int, Node> children;

        public Node(T item) 
        {
            this.item = item;
            this.children = new Dictionary<int, Node>();
        }
    }   

    class BKTree
    {
        private Node rootNode;
        private DistanceFunction distanceFunction;
        private int length;
        private int modCount;

        public BKTree(T t, DistanceFunction distanceFunction = null)
        {
            if (distanceFunction == null)
            {
                throw new Exception("distanceFunction cannot be null. ");
            }

            rootNode = new Node(t);
            this.distanceFunction = distanceFunction;
            length = 0;
            modCount = 0;
        }

        public bool AddNode(T t)
        {
            if (t == null)
                throw new NullReferenceException();

            if (rootNode == null)
            {
                rootNode = new Node(t);
                length = 1;
                modCount++; // Modified tree by adding root.
                return true;
            }

            Node parentNode = rootNode;
            int distance;
            while ((distance = distanceFunction(parentNode.item, t)) != 0 || !t.Equals(parentNode.item))
            {
                try
                {
                    Node childNode = parentNode.children[distance];
                    parentNode = childNode;
                }
                catch(KeyNotFoundException ex)
                {
                    parentNode.children.Add(distance, new Node(t));
                    length++;
                    modCount++;
                    return true;
                }                
            }

            return false;
        }

        public HashSet Search(T t, int radius)
        {
            HashSet res = new HashSet();
            if(rootNode != null)
            {
                Query(rootNode, t, radius, ref res);
            }
            return res;
        }

        private void Query(Node node,T t, int radius, ref HashSet res)
        {
            int distance = this.distanceFunction(node.item, t);
            if (distance <= radius)
            {
                res.Add(node.item);
            }
            for(int i = Math.Max(distance - radius, 0); i <= distance + radius; i++)
            {
                try
                {
                    Node child = node.children[i];
                    Query(child, t, radius, ref res);
                }                
                catch (KeyNotFoundException ex)
                {
                    continue;
                }
            }
        }        
    }
}

距离函数:

        /// 
        /// 计算两个字符串的编辑距离
        /// 
        /// 
        /// 
        /// 
        static int LevenshteinDistance(object obj1, object obj2)
        {
            string first = obj1 as string;
            string second = obj2 as string;

            if (first.Length > second.Length)
            {
                string temp = first;
                first = second;
                second = temp;
            }
            if (first.Length == 0)
                return second.Length;

            if (second.Length == 0)
                return first.Length;

            int first_length = first.Length + 1;
            int second_length = second.Length + 1;

            int[,] distance_matrix = new int[first_length, second_length];
        for (int i = 0; i < second_length; i++)
            {
                distance_matrix[0, i] = i;
            }

            for (int j = 1; j < first_length; j++)
            {
                distance_matrix[j, 0] = j;
            }

            for (int i = 1; i < first_length; i++)
            {
                for (int j = 1; j < second_length; j++)
                {
                    int deletion = distance_matrix[i - 1, j] + 1;
                    int insertion = distance_matrix[i, j - 1] + 1;
                    int substitution = distance_matrix[i - 1, j - 1];
                    if (first[i - 1] != second[j - 1])
                        substitution += 1;
                    int temp = Math.Min(insertion, deletion);
                    distance_matrix[i, j] = Math.Min(temp, substitution);
                }
            }

            return distance_matrix[first_length - 1, second_length - 1];
        }

你可能感兴趣的:(拼写纠错;BK树)