Apache Commons LevenshteinDistance莱文斯坦(相似度)算法

Apache Commons LevenshteinDistanceLevenshteinDistance(final Integer threshold) 如果阈值不为空,则距离计算将限制为最大长度。

介绍

如果阈值不为空,则距离计算将受到限制 到最大长度。

如果阈值为 null,则算法的无限版本将 被使用。

LevenshteinDistance() 方法是一个构造函数。

语法

来自LevenshteinDistance 的方法 LevenshteinDistance() 声明为:

复制

public LevenshteinDistance(final Integer threshold)

参数

LevenshteinDistance() 方法具有以下参数:

  • 整数阈值 - 如果此值为空,则距离计算将不受限制。这可能不是负面的。

以下代码演示如何使用 Apache CommonsLevenshteinDistance LevenshteinDistance(final Integer threshold)

例 1

复制

import org.apache.commons.text.*;
import org.apache.commons.text.diff.*;
import org.apache.commons.text.similarity.*;
import org.apache.commons.text.translate.*;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

class ShowVisitor<Character> implements CommandVisitor<Character> {
    private int inserts = 0;
    private int keeps = 0;
    private int deletes = 0;

    public void visitInsertCommand(Character character) {
        ++inserts;//   w w   w  .  d e  m  o   2 s    .  c o  m 
        System.out.println(String.format("insert %s", character));
    }

    public void visitKeepCommand(Character character) {
        ++keeps;
        System.out.println(String.format("keep   %s", character));
    }

    public void visitDeleteCommand(Character character) {
        ++deletes;
        System.out.println(String.format("delete %s", character));
    }

    public void printStats() {
        System.out.println(String.format("%d inserts, %d deletes, %d keeps", inserts, deletes, keeps));
    }
}

public class CommonsTextExamples {
    public static void main(String[] args) {
        caseUtilsExample();
        stringEscapeUtilsExample();
        stringSubstitutorExample();
        wordUtilsExample();

        diffExample();
        translateExample();

        similaritiesExample();
        sentenceSimilarityExample();
        distancesExample();
        sentenceDistanceExample();
    }

    private static void printExampleHeader(String example) {
        // Contains an example of TextStringBuilder
        String header = "Examples of " + example;
        System.out.println("\n" + header);

        TextStringBuilder builder = new TextStringBuilder();
        System.out.println(builder.appendPadding(header.length(), '-').toString());
    }

    public static void caseUtilsExample() {
        printExampleHeader("CaseUtils");

        String string = "java-programming-language";

        System.out.println(CaseUtils.toCamelCase(string, true, '-'));
        System.out.println(CaseUtils.toCamelCase(string, false, '-'));
    }

    public static void stringEscapeUtilsExample() {
        printExampleHeader("StringEscapeUtils");

        String string = "Department, R&D";

        System.out.println(StringEscapeUtils.escapeHtml4(string));
        System.out.println(StringEscapeUtils.escapeXml11(string));
        System.out.println(StringEscapeUtils.escapeCsv(string));

        System.out.println(StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_HTML4).append("R&D dept: ")
                .escape(string).toString());
    }

    public static void stringSubstitutorExample() {
        printExampleHeader("StringSubstitutor");

        Map<String, String> substitutions = new HashMap<>();
        substitutions.put("city", "London");
        substitutions.put("country", "England");

        // With static method
        System.out.println(StringSubstitutor.replace("${city} is the capital of ${country}", substitutions));

        // With StringSubstitutor object
        StringSubstitutor sub = new StringSubstitutor(substitutions);
        System.out.println(sub.replace("${city} is the capital of ${country}"));

        StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
        System.out.println(interpolator.replace("Base64 encoder: ${base64Encoder:Secret password}"));
    }

    public static void wordUtilsExample() {
        printExampleHeader("WordUtils");

        String longString = "This is a very long string, from https://www.example.org";
        String allLower = "all lower but ONE";
        String allCapitalized = "All Capitalized But ONE";

        System.out.println("\nWordUtils: Abbreviation");
        // Take at least 9 characters, cutting to 12 characters if no space is found before
        System.out.println(WordUtils.abbreviate(longString, 9, 12, " ..."));
        // Take at least 10 characters, cutting to 12 characters if no space is found before
        System.out.println(WordUtils.abbreviate(longString, 10, 12, " ..."));
        // Take at least 10 characters, then cut on the first space wherever it is
        System.out.println(WordUtils.abbreviate(longString, 10, -1, " ..."));

        System.out.println("\nWordUtils: Initials");
        System.out.println(WordUtils.initials(allLower));
        System.out.println(WordUtils.initials(allCapitalized));

        System.out.println("\nWordUtils: Case change");
        // Doesn't lowercase the uppercase characters
        System.out.println(WordUtils.capitalize(allLower));
        // Lowercases everything, then capitalizes the first letter of each word
        System.out.println(WordUtils.capitalizeFully(allLower));
        // Lowercases the first letter of each word
        System.out.println(WordUtils.uncapitalize(allCapitalized));
        // Swaps the case of each character
        System.out.println(WordUtils.swapCase(allLower));

        System.out.println("\nWordUtils: Wrapping");
        // Line length is 10, uses '\n' as a line break, does not break words longer than the line
        System.out.println(WordUtils.wrap(longString, 10, "\n", false) + "\n");

        // Line length is 10, uses '\n' as a line break, breaks words longer than the line
        System.out.println(WordUtils.wrap(longString, 10, "\n", true) + "\n");

        // Line length is 10, uses '\n' as a line break, breaks words longer than the line, also breaks on commas
        System.out.println(WordUtils.wrap(longString, 10, "\n", true, ",") + "\n");
    }

    public static void diffExample() {
        printExampleHeader("diff");

        String s1 = "hyperspace";
        String s2 = "cyberscape";

        StringsComparator comparator = new StringsComparator(s1, s2);
        EditScript<Character> script = comparator.getScript();

        System.out.println(
                "Longest Common Subsequence length (number of \"keep\" commands): " + script.getLCSLength());
        System.out.println("Effective modifications (number of \"insert\" and \"delete\" commands): "
                + script.getModifications());

        ShowVisitor<Character> visitor = new ShowVisitor<>();
        script.visit(visitor);
        visitor.printStats();
    }

    public static void translateExample() {
        printExampleHeader("translate");

        Map<CharSequence, CharSequence> translation = new HashMap<>();
        translation.put("e", "3");
        translation.put("l", "1");
        translation.put("t", "7");

        String s1 = "Let it be!";

        LookupTranslator lookupTranslator = new LookupTranslator(translation);
        System.out.println(lookupTranslator.translate(s1));

        UnicodeEscaper unicodeEscaper = new UnicodeEscaper();
        UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();

        String unicodeString = unicodeEscaper.translate(s1);
        System.out.println(unicodeString);
        System.out.println(unicodeUnescaper.translate(unicodeString));
    }

    public static void similaritiesExample() {
        printExampleHeader("similarities");

        String s1 = "hyperspace";
        String s2 = "cyberscape";

        JaccardSimilarity jaccard = new JaccardSimilarity();
        System.out.println("Jaccard similarity: " + jaccard.apply(s1, s2));

        JaroWinklerSimilarity jaroWinkler = new JaroWinklerSimilarity();
        System.out.println("Jaro-Winkler similarity: " + jaroWinkler.apply(s1, s2));

        LongestCommonSubsequence lcs = new LongestCommonSubsequence();
        System.out.println("Longest Common Subsequence similarity: " + lcs.apply(s1, s2));

        FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);
        System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, s2));
        System.out.println("Fuzzy score similarity: " + fuzzyScore.fuzzyScore(s1, "space"));
    }

    public static void sentenceSimilarityExample() {
        printExampleHeader("sentence similarity");

        String s1 = "string similarity";
        String s2 = "string distance";

        Map<CharSequence, Integer> vector1 = new HashMap<>();
        Map<CharSequence, Integer> vector2 = new HashMap<>();

        for (String token : s1.split(" ")) {
            vector1.put(token, vector1.getOrDefault(token, 0) + 1);
        }

        for (String token : s2.split(" ")) {
            vector2.put(token, vector2.getOrDefault(token, 0) + 1);
        }

        CosineSimilarity cosine = new CosineSimilarity();
        System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));

        // Adding one repetition of "string" to vector2
        vector2.put("string", vector2.getOrDefault("string", 0) + 1);
        System.out.println("Cosine similarity: " + cosine.cosineSimilarity(vector1, vector2));
    }

    public static void distancesExample() {
        printExampleHeader("distances");

        String s1 = "hyperspace";
        String s2 = "cyberscape";

        HammingDistance hamming = new HammingDistance();
        // Requires the two strings to have the same length
        System.out.println("Hamming distance: " + hamming.apply(s1, s2));

        JaccardDistance jaccard = new JaccardDistance();
        System.out.println("Jaccard distance: " + jaccard.apply(s1, s2));

        JaroWinklerDistance jaroWinkler = new JaroWinklerDistance();
        // The result is wrong at the moment (see https://issues.apache.org/jira/browse/TEXT-104)
        System.out.println("Jaro-Winkler distance: " + jaroWinkler.apply(s1, s2));

        LongestCommonSubsequenceDistance lcs = new LongestCommonSubsequenceDistance();
        System.out.println("Longest Common Subsequence distance: " + lcs.apply(s1, s2));

        LevenshteinDistance levenshtein = new LevenshteinDistance();
        System.out.println("Levenshtein distance: " + levenshtein.apply(s1, s2));

        LevenshteinDistance levenshteinWithThreshold = new LevenshteinDistance(3);
        // Returns -1 since the actual distance, 4, is higher than the threshold
        System.out.println("Levenshtein distance: " + levenshteinWithThreshold.apply(s1, s2));

        LevenshteinDetailedDistance levenshteinDetailed = new LevenshteinDetailedDistance();
        System.out.println("Levenshtein detailed distance: " + levenshteinDetailed.apply(s1, s2));
    }

    public static void sentenceDistanceExample() {
        printExampleHeader("sentence distance");

        String s1 = "string similarity";
        String s2 = "string distance";

        CosineDistance cosine = new CosineDistance();
        System.out.println("Cosine distance: " + cosine.apply(s1, s2));
        System.out.println("Cosine distance: " + cosine.apply(s1, s2 + " string"));
    }
}

你可能感兴趣的:(apache,算法,java)