计算汉字相似程度

package com.thunisoft.in4.cm.associate.util;



import java.util.List;

import java.util.Map;



/**

 * 编辑距离算法

 * 

 * @author fanxf

 * 

 */

public class HanziParse {

    

    /**

     * 计算矢量距离 Levenshtein Distance(LD)

     * 

     * @param str1

     *            str1

     * @param str2

     *            str2

     * @return ld; 0:完全相等,非零代表相似距離

     */

    private static int distance(String str1, String str2) {



        int n = str1.length();

        int m = str2.length();

        if (n == 0) {

            return m;

        }

        if (m == 0) {

            return n;

        }

        /** 二维数组,存放中间距离 */

        int[][] distance = new int[n + 1][m + 1];

        /** 初始化数据 */

        int i = 0;

        for (i = 0; i <= n; i++) {

            distance[i][0] = i;

        }

        int j = 0;

        for (j = 0; j <= m; j++) {

            distance[0][j] = j;

        }

        /** 计算中间数据 */

        int cost = 0;

        for (i = 1; i <= n; i++) {

            char ch1 = str1.charAt(i - 1);

            for (j = 1; j <= m; j++) {

                char ch2 = str2.charAt(j - 1);

                if (ch1 == ch2) {

                    cost = 0;

                } else {

                    cost = 1;

                }

                /** 获取最小值 */

                distance[i][j] = min(distance[i - 1][j] + 1,

                        distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost);

            }

        }

        return distance[n][m];

    }



    /**

     * 计算最小值

     * 

     * @param one

     * @param two

     * @param three

     * @return 返回最小值

     */

    private static int min(int one, int two, int three) {

        int min = one;

        if (two < min) {

            min = two;

        }

        if (three < min) {

            min = three;

        }

        return min;

    }



    /**

     * 计算相似度

     * 

     * @param str1

     *            str1 待匹配数据

     * @param str2

     *            str2 待匹配数据

     * @return sim,匹配相似度

     */

    public static float similarity(String str1, String str2) {

        int ld = distance(str1, str2);

        return 1 - (float) ld / Math.max(str1.length(), str2.length());

    }





    /**

     * 测试

     * 

     * @param args

     */

    public static void main(String[] args) {

         HanziParse ld = new HanziParse();

         float num = ld.similarity("vvv", "vvv");

         System.out.println(num);



    }

}

 

你可能感兴趣的:(计算)