用java实现一些距离

用java 实现一些距离度量

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;

public class Distance {
    private static final Logger logger = LoggerFactory.getLogger(Distance.class);

    /** * 余弦距离 * @param var1 第一个词的向量 * @param var2 第二个词的向量 * @return 1为最相似,返回值为0到1 */
    public double cosineDistance(List<Double> var1, List<Double> var2){

        if(var1.size() != var2.size()){
            logger.error("传入参数有误,请进行检测");
            return 0;
        }
        double fzSum = 0;
        for(int index = 0; index < var1.size(); index++){
            fzSum += ((Double)var1.get(index))*((Double)var2.get(index));
        }
        double label1 = 0;
        for(int index = 0; index < var1.size(); index++){
            label1 += Math.pow((Double)var1.get(index),2);
        }
        double label2 =0;
        for(int index = 0; index < var1.size(); index++){
            label2 += Math.pow((Double)var2.get(index),2);
        }
        double fm = Math.sqrt(label1)*Math.sqrt(label2);
        double distance = fzSum/fm;

        return distance;
    }

    /** *编辑距离 * @param var1 String 1 * @param var2 String 2 * @return */
    public double editDistance(String var1, String var2) {
        if(var1.equals(var2)) {
            return 0 ;
        }
        int[][] dp = new int[var1.length() + 1][var2.length() + 1];
        for(int i = 1;i <= var1.length();i++)
            dp[i][0] = i;
        for(int j = 1;j <= var1.length();j++)
            dp[0][j] = j;
        for(int i = 1;i <= var1.length();i++) {
            for(int j = 1;j <= var2.length();j++) {
                if(var1.charAt(i - 1) == var2.charAt(j - 1))
                    dp[i][j] = dp[i - 1][j - 1];
                else {
                    dp[i][j] = Math.min(dp[i - 1][j] + 1,
                            Math.min(dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1));
                }
            }
        }
        logger.info("{}",dp[var1.length()][var2.length()]);
        return dp[var1.length()][var2.length()];
    }


    /** * 欧式距离 * @param var1 * @param var2 * @return 0为最相似,返回值范围是0到正无穷 */
    public double euclideanDistance(List<Double> var1, List<Double> var2){
        if(var1.size() != var2.size()){
            logger.error("传入参数有误,请进行检测");
            return 0;
        }
        double sum = 0;
        for(int index =0 ;index<var1.size(); index++){
            sum += Math.pow(var2.get(index)-var1.get(index),2);
        }
        return Math.sqrt(sum);
    }

    /** * 曼哈顿距离 * @param var1 * @param var2 * @return 0为最相似,返回值范围是0到正无穷 */
    public double manhattanDistance(List<Double> var1, List<Double> var2){
        if(var1.size() != var2.size()){
            logger.error("传入参数有误,请进行检测");
            return 0;
        }
        double sum = 0;
        for(int index =0 ;index<var1.size(); index++){
            sum += Math.abs(var2.get(index)-var1.get(index));
        }

        return sum ;
    }

    private static double getZeroOneResult(double max,double min,double current){
        double result =(current-min)/(max-min);
        return result;
    }
    /////相关系数

    public double getPearsonCorrelationScore(List<Double> x, List<Double> y) {
        if (x.size() != y.size())
            throw new RuntimeException("数据不正确!");
        double[] xData = new double[x.size()];
        double[] yData = new double[x.size()];
        for (int i = 0; i < x.size(); i++) {
            xData[i] = x.get(i);
            yData[i] = y.get(i);
        }
        return getPearsonCorrelationScore(xData,yData);
    }

    public static double getPearsonCorrelationScore(double[] xData, double[] yData) {
        if (xData.length != yData.length)
            throw new RuntimeException("数据不正确!");
        double xMeans;
        double yMeans;
        double numerator = 0;// 求解皮尔逊的分子
        double denominator = 0;// 求解皮尔逊系数的分母

        double result = 0;
        // 拿到两个数据的平均值
        xMeans = getMeans(xData);
        yMeans = getMeans(yData);
        // 计算皮尔逊系数的分子
        numerator = generateNumerator(xData, xMeans, yData, yMeans);
        // 计算皮尔逊系数的分母
        denominator = generateDenomiator(xData, xMeans, yData, yMeans);
        // 计算皮尔逊系数
        result = numerator / denominator;
        return result;
    }
    private static double generateNumerator(double[] xData, double xMeans, double[] yData, double yMeans) {
        double numerator = 0.0;
        for (int i = 0; i < xData.length; i++) {
            numerator += (xData[i] - xMeans) * (yData[i] - yMeans);
        }
        return numerator;
    }
    private static double generateDenomiator(double[] xData, double xMeans, double[] yData, double yMeans) {
        double xSum = 0.0;
        for (int i = 0; i < xData.length; i++) {
            xSum += (xData[i] - xMeans) * (xData[i] - xMeans);
        }
        double ySum = 0.0;
        for (int i = 0; i < yData.length; i++) {
            ySum += (yData[i] - yMeans) * (yData[i] - yMeans);
        }
        return Math.sqrt(xSum) * Math.sqrt(ySum);
    }
    private static double getMeans(double[] datas) {
        double sum = 0.0;
        for (int i = 0; i < datas.length; i++) {
            sum += datas[i];
        }
        return sum / datas.length;
    }

}

你可能感兴趣的:(java,工作记录)