字符串匹配并获取最接近的字串

package com.spellCheckPlugin;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;

import org.apache.commons.lang.StringUtils;

import com.spellCheckPlugin.TestSimple.TempBean;

public class TestSimple {
	private static final String[] arr=new String[] {"values","values-rGB","values-fr","values-ar","values-es","values-de"};
	   public static void main(String[] args) {
	        System.out.println(getSimilarityLang("value"));
	        System.out.println(getSimilarityLang("values-rGs"));
	        System.out.println(getSimilarityLang("values-fs"));
	    }
	   
	   
	   @SuppressWarnings("unchecked")
	public static String getSimilarityLang(String targetStr) {
		   ArrayList list = new ArrayList();
		   if(StringUtils.isEmpty(targetStr))
			   return null;
		   for (String lang : arr) {
			   list.add(new TempBean(getSimilarityRatio(lang, targetStr), lang));
		   }
		   Collections.sort(list,new Comparator() {
			@Override
			public int compare(Object obj1, Object obj2) {
				if(obj1 instanceof TempBean&&obj2 instanceof TempBean) {
					TempBean o1=(TempBean)obj1;
					TempBean o2=(TempBean)obj2;
					if(o1.getSimilarityRatio()0)
			   return list.get(0).getTargetLang();
		   return null;
	   }
	   
	    /***
	     * 完全相似=1.0
	     * 完全不相似=0.0
	     */
	    public static float getSimilarityRatio(String str, String target) {
	    	 //去除空白字符、换行、标点符号
	        String regex = "[\\pP\\p{Punct}\\s]";
	        str=str.replaceAll(regex, ""); 
	        target=target.replaceAll(regex, "");
	        return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());
	    }

	    private static int compare(String str, String target) {
	        int d[][]; // 矩阵
	        int n = str.length();
	        int m = target.length();
	        int i; // 遍历str的
	        int j; // 遍历target的
	        char ch1; // str的
	        char ch2; // target的
	        int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1
	        if (n == 0) {
	            return m;
	        }
	        if (m == 0) {
	            return n;
	        }
	        d = new int[n + 1][m + 1];
	        for (i = 0; i <= n; i++) { // 初始化第一列
	            d[i][0] = i;
	        }

	        for (j = 0; j <= m; j++) { // 初始化第一行
	            d[0][j] = j;
	        }

	        for (i = 1; i <= n; i++) { // 遍历str
	            ch1 = str.charAt(i - 1);
	            // 去匹配target
	            for (j = 1; j <= m; j++) {
	                ch2 = target.charAt(j - 1);
	                if (ch1 == ch2) {
	                    temp = 0;
	                } else {
	                    temp = 1;
	                }

	                // 左边+1,上边+1, 左上角+temp取最小
	                d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);
	            }
	        }
	        return d[n][m];
	    }
	    private static int min(int one, int two, int three) {
	        return (one = one < two ? one : two) < three ? one : three;
	    }
	    static class TempBean{
	    	private float similarityRatio;
	    	private String targetLang;
	    	
			public TempBean(float similarityRatio, String targetLang) {
				super();
				this.similarityRatio = similarityRatio;
				this.targetLang = targetLang;
			}
			public TempBean() {
				super();
			}
			public float getSimilarityRatio() {
				return similarityRatio;
			}
			public void setSimilarityRatio(float similarityRatio) {
				this.similarityRatio = similarityRatio;
			}
			public String getTargetLang() {
				return targetLang;
			}
			public void setTargetLang(String targetLang) {
				this.targetLang = targetLang;
			}
	    	
	    }
	}

 

你可能感兴趣的:(字符串匹配并获取最接近的字串)