求2个字符串的最短编辑距离 java 实现

EditStepInfo.java:


import lombok.Getter;
import lombok.Setter;

import java.io.Serializable;
import java.util.List;


@Getter
@Setter
public class EditStepInfo implements Serializable {

    private String str1;
    private String str2;

    // str1和 str2 的最短编辑路基
    private int sed;

    private List steps;


    private String tempString;


}

StepVO.java:


import lombok.Getter;
import lombok.Setter;

import java.io.Serializable;


@Getter
@Setter
public class StepVO implements Serializable {

    public static final String OPT_TYPE_ADD = "add";
    public static final String OPT_TYPE_DELETE = "delete";
    public static final String OPT_TYPE_UPDATE = "UPDATE";

    /**
     * 当前变换描述
     */
    private String desc;

    private int optIndex;

    private Character optChar1;
    private Character optChar2;

    private String optType;

    /**
     * 当前变换成的字符串
     */
    private String tempString;

    public static void main(String[] args) {
        String s = "abcdf";
        Character c = s.charAt(0);
        System.out.println( c.toString() );
    }

}

ShortestEditDistanceTest.java:


import java.util.ArrayList;
import java.util.List;


public class ShortestEditDistanceTest {

    private static EditStepInfo[][] MAT_SED = null;

    public static void main(String[] args) {
        //   m o t h e r
        //   m o n s t e r
        String string1 = "calculate";
        String string2 = "caketakes";
        MAT_SED = new EditStepInfo[ string1.length() ][ string2.length() ];
        calculateShortestEditDistance(string1, string2);

        for( int i=0;i steps = editStepInfo.getSteps();
        if( steps == null || steps.size() == 0 ){
            System.out.println( "\ttempString = " + editStepInfo.getTempString() );
        }else {
            String tempString = editStepInfo.getStr1();
            for( StepVO step:steps ){
                String optType = step.getOptType();
                int optIndex = step.getOptIndex();
                Character optChar1 = step.getOptChar1();
                if( StepVO.OPT_TYPE_ADD.equals( optType ) ){
                    // 新增
                    // 0 1 2 3 4
                    tempString = addChar2String( tempString,optChar1,optIndex );
                }else if( StepVO.OPT_TYPE_DELETE.equals( optType ) ){
                    // 删除
                    // tempString = removeCharacter( tempString,optIndex );
                    tempString = updateCharacterInString( tempString,optIndex,' ' );
                }else if( StepVO.OPT_TYPE_UPDATE.equals( optType ) ){
                    // 修改
                    Character optChar2 = step.getOptChar2();
                    tempString = updateCharacterInString( tempString,optIndex,optChar2 );
                }
                System.out.println( "\t" + step.getDesc() + ":" + tempString  + " optIndex = " + optIndex);
            }
        }
    }

    private static String updateCharacterInString(String str, int updateIndex, Character character) {
        // 01234
        if( updateIndex < 0  ){
            updateIndex = 0;
        }else if( updateIndex >= str.length() ){
            updateIndex = str.length() - 1;
        }
        int length = str.length();
        String str_result = "";
        for( int i=0;i= str.length() ){
            removeIndex = str.length() - 1;
        }
        if( removeIndex == 0 ){
            return str.substring( 1 );
        }else if( removeIndex == str.length() - 1 ){
            return str.substring( 0,removeIndex );
        }else {
            return str.substring( 0,removeIndex  ) + str.substring( removeIndex + 1 );
        }
    }

    private static String addChar2String(String str,Character character, int addIndex) {
        if( addIndex > str.length() ){
            addIndex = str.length();
        }
        return str.substring( 0,addIndex ) + character + str.substring( addIndex );
    }

    private static EditStepInfo calculateShortestEditDistance( String string1,String string2 ){
        if( string1 == null || string1.length() == 0 ){
            EditStepInfo editStepInfo = new EditStepInfo();
            editStepInfo.setStr1( string1 );
            editStepInfo.setStr2( string2 );
            editStepInfo.setSed( string2.length() );
            return editStepInfo;
        }
        if( string2 == null || string2.length() == 0){
            EditStepInfo editStepInfo = new EditStepInfo();
            editStepInfo.setStr1( string1 );
            editStepInfo.setStr2( string2 );
            editStepInfo.setSed( string1.length() );
            return editStepInfo;
        }
        int len1 = string1.length();
        int len2 = string2.length();
        for( int i=0;i steps = new ArrayList<>();
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( str1 );
                            editStepInfo.setSed( steps.size() );
                        }else {
                            // str1 和 str2 的长度均为1,并且 str1 和 str2 不相等
                            // a
                            // b
                            List steps = new ArrayList<>();
                            StepVO step = new StepVO();
                            step.setDesc( "将 " + str1 + " 修改为 " + str2 );
                            step.setTempString( str2 );
                            step.setOptType( StepVO.OPT_TYPE_UPDATE );
                            step.setOptChar1( str1.charAt( 0 ) );
                            step.setOptIndex( 0 );
                            step.setOptChar2( str2.charAt( 0 ) );
                            steps.add( step );
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( getLastTempString( steps ) );
                            editStepInfo.setSed( steps.size() );
                        }
                    }else {
                        if( str2.contains( str1 ) ){
                            // str1 的长度为1,str2 的长度大于1,并且 str1 在 str2 中出现
                            //   a
                            // ..a...
                            //  组装编辑步骤信息
                            List steps = buildEditSteps(  str1.charAt(0),str2 );
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( getLastTempString( steps ) );
                            editStepInfo.setSed( steps.size() );
                        }else {
                            // str1 的长度为1,str2的长度大于1,并且str1在 str2中不存在
                            //   a
                            // ..b...
                            //  组装编辑步骤信息
                            List steps = buildEditSteps(str1.charAt(0), str2);
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( getLastTempString( steps ) );
                            editStepInfo.setSed( steps.size() );
                        }
                    }
                }else {
                    if( str2.length() == 1 ){
                        if( str1.contains( str2 ) ){
                            // ...a..
                            //    a
                            //  组装编辑步骤信息
                            List steps = buildEditSteps(str1, str2.charAt(0));
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( getLastTempString( steps ) );
                            editStepInfo.setSed( steps.size() );
                        }else {
                            // ...b..
                            //    a
                            //  组装编辑步骤信息
                            List steps = buildEditSteps(str1, str2.charAt(0));
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( getLastTempString( steps ) );
                            editStepInfo.setSed( steps.size() );
                        }
                    }else {
                        Character lastChar1 = getLastChar(str1);
                        Character lastChar2 = getLastChar(str2);
                        if( lastChar1.equals( lastChar2 ) ){
                            //    ------a
                            // ---------a
                            //  组装编辑步骤信息
                            EditStepInfo editStepInfo_prev = MAT_SED[i - 1][j - 1];
                            List steps = new ArrayList<>();
                            List steps_prev = editStepInfo_prev.getSteps();
                            String tempString = editStepInfo_prev.getTempString() + lastChar1;
                            if( steps_prev != null ){
                                steps.addAll( steps_prev );
                            }
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( tempString );
                            editStepInfo.setSed( steps.size() );
                        }else {
                            //    ----- a
                            // ........ b
                            // 1. str1 的 "-----" 部分转换为 str2,再删除 a
                            // 2. str1 转换为 str2 的 "........" 部分,再添加 b
                            // 3. str1 的 "-----" 部分转换为 str2 的 "........" 部分,再将a修改为 b
                            // 求 方法1、2、3中选一个最小的编辑步骤作为最终的编辑步骤
                            EditStepInfo editStepInfo_prev_1 = MAT_SED[i - 1][j];
                            EditStepInfo editStepInfo_prev_2 = MAT_SED[i][j - 1];
                            EditStepInfo editStepInfo_prev_3 = MAT_SED[i - 1][j - 1];
                            EditStepInfo editStepInfo_prev_min = editStepInfo_prev_1;
                            int minMethodNum = 1;
                            if( editStepInfo_prev_2.getSed() < editStepInfo_prev_min.getSed() ){
                                editStepInfo_prev_min = editStepInfo_prev_2;
                                minMethodNum = 2;
                            }
                            if( editStepInfo_prev_3.getSed() < editStepInfo_prev_min.getSed() ){
                                editStepInfo_prev_min = editStepInfo_prev_3;
                                minMethodNum = 3;
                            }
                            List steps = new ArrayList<>();
                            List steps_prev_min = editStepInfo_prev_min.getSteps();
                            if( steps_prev_min != null ){
                                steps.addAll( steps_prev_min );
                            }
                            StepVO step_last = null;
                            if( steps.size() > 0 ){
                                step_last = steps.get(steps.size() - 1);
                            }
                            String tempString = editStepInfo_prev_min.getTempString();
                            StepVO step = new StepVO();
                            if( minMethodNum == 1 ){
                                step.setDesc( "删除 " + lastChar1 );
                                step.setOptType( StepVO.OPT_TYPE_DELETE );
                                step.setOptChar1( lastChar1 );
                                step.setOptIndex( str1.length() - 1 );
                                // todo 估计有问题???
                                step.setTempString( tempString );
                            }else if( minMethodNum == 2 ){
                                step.setDesc( "添加 " + lastChar2 );
                                step.setOptType( StepVO.OPT_TYPE_ADD );
                                step.setOptChar1( lastChar2 );
                                if( step_last != null ){
                                    step.setOptIndex( step_last.getOptIndex() + 1 );
                                }else {
                                    step.setOptIndex( str1.length() );
                                }
                                // todo 估计有问题???
                                step.setTempString( tempString + lastChar2 );
                            }else if( minMethodNum == 3 ){
                                step.setDesc( "修改 " + lastChar1 + " 为 " + lastChar2 );
                                step.setOptType( StepVO.OPT_TYPE_UPDATE );
                                step.setOptChar1( lastChar1 );
                                step.setOptChar2( lastChar2 );
                                step.setOptIndex( str1.length() - 1 );
                                // todo 估计有问题???
                                step.setTempString( tempString + lastChar2 );
                            }
                            steps.add( step );
                            editStepInfo.setSteps( steps );
                            editStepInfo.setTempString( tempString );
                            editStepInfo.setSed( steps.size() );
                        }
                    }
                }
                MAT_SED[ i ][ j ] = editStepInfo;
            }
        }
        return MAT_SED[ string1.length() - 1 ][ string2.length() - 1 ];
    }

    private static String getLastTempString(List steps) {
        if( steps == null || steps.size() == 0 ){
            return null;
        }
        return steps.get( steps.size() - 1 ).getTempString();
    }

    /**
     * 组装将字符 srcChar 转换成字符串 targetString 的编辑步骤
     * @param srcChar 例如:a
     * @param targetString 例如:bcdefg
     * @return
     */
    private static List buildEditSteps(Character srcChar, String targetString) {
        boolean hasMeet = false;
        int length = targetString.length();
        List steps = new ArrayList<>();
        for( int i = 0;i < length;i++ ){
            Character char2 = targetString.charAt( i );
            if( hasMeet ){
                StepVO step = new StepVO();
                step.setDesc( "添加 " + char2 );
                step.setOptChar1( char2 );
                step.setOptIndex( i );
                step.setOptType( StepVO.OPT_TYPE_ADD );
                steps.add( step );
            }else {
                if( srcChar.equals( char2 ) ){
                    // do nothing
                    hasMeet = true;
                }else {
                    StepVO step = new StepVO();
                    step.setDesc( "添加 " + char2 );
                    step.setOptChar1( char2 );
                    step.setOptIndex( i );
                    step.setOptType( StepVO.OPT_TYPE_ADD );
                    steps.add( step );
                }
            }
        }
        if( !hasMeet ){
            // 此种情况只发生在 targetString 中不包含 srcChar 时
            StepVO step = new StepVO();
            step.setDesc( "删除 " + srcChar );
            step.setOptChar1( srcChar );
            step.setOptIndex( 0 );
            step.setOptType( StepVO.OPT_TYPE_DELETE );
            steps.add( 0,step );
        }
        //  设置 每个步骤生成的  tempString
        String tempString = String.valueOf( srcChar );
        for( StepVO step:steps ){
            String optType = step.getOptType();
            Character optChar1 = step.getOptChar1();
            if( StepVO.OPT_TYPE_DELETE.equals( optType ) ){
                tempString = tempString.replaceFirst( optChar1.toString(),"" );
            }else if( StepVO.OPT_TYPE_ADD.equals( optType ) ){
                tempString += optChar1;
            }
            step.setTempString( tempString );
        }
        return steps;
    }

    private static List buildEditSteps(String srcString, Character targetChar) {
        // abcdefg
        // c
        boolean hasMeet = false;
        int length = srcString.length();
        List steps = new ArrayList<>();
        for( int i = 0;i < length;i++ ){
            Character char1 = srcString.charAt( i );
            if( hasMeet ){
                StepVO step = new StepVO();
                step.setDesc( "删除 " + char1 );
                step.setOptChar1( char1 );
                step.setOptIndex( i );
                step.setOptType( StepVO.OPT_TYPE_DELETE );
                steps.add( step );
            }else {
                if( targetChar.equals( char1 ) ){
                    // do nothing
                    hasMeet  =true;
                }else {
                    StepVO step = new StepVO();
                    step.setDesc( "删除 " + char1 );
                    step.setOptChar1( char1 );
                    step.setOptIndex( i );
                    step.setOptType( StepVO.OPT_TYPE_DELETE );
                    steps.add( step );
                }
            }
        }
        if( !hasMeet ){
            StepVO step = new StepVO();
            step.setDesc( "添加 " + targetChar );
            step.setOptChar1( targetChar );
            step.setOptIndex( 0 );
            step.setOptType( StepVO.OPT_TYPE_ADD );
            steps.add( 0,step );
        }
        String tempString = srcString;
        for( StepVO step:steps ){
            String optType = step.getOptType();
            Character optChar1 = step.getOptChar1();
            if( StepVO.OPT_TYPE_ADD.equals( optType ) ){
                tempString += optChar1;
            }else if( StepVO.OPT_TYPE_DELETE.equals( optType ) ){
                tempString = tempString.replaceFirst( optChar1.toString(),"" );
            }
            step.setTempString( tempString );
        }
        return steps;
    }


    private static Character getLastChar(String str) {
        if( str == null || str.length() == 0 ){
            return null;
        }
        return  str.charAt(str.length() - 1);
    }
}

mother
monster
最小编辑距离:3
编辑步骤:
添加 n
添加 s
删除 h

你可能感兴趣的:(字符串相似度,算法,动态规划,java,字符串相似度,字符串最短编辑距离,动态规划算法,数据结构)