java 相似度计算

java 相似度计算_第1张图片

 


public class Same {

  public static void main(String[] args) {
    //读取excel中的数据进行相似度的计算
    File file = new File("D:\\1.xlsx");
    try {
      XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
      int tabIndex = 0;
      Sheet sheet = xssfWorkbook.getSheetAt(tabIndex);
      Row row = null;
      Cell cell1 = null;
      List list = new ArrayList<>();
    HashMap map = new HashMap();
      for (int i = 0; i <= 899; i++) {
        row = sheet.getRow(i);     //指定行
        cell1 = row.getCell(0);  //指定列
        cell1.setCellType(CellType.STRING);
        String cellValue0 = cell1.getStringCellValue();
        list.add(cellValue0);
      }
      //双重for循环,去掉重复的比较
      for (int i = 0; i < list.size()-1; i++) {
        for (int j = i+1; j < list.size(); j++) {
          String a = list.get(i);
          String b = list.get(j);
          float levenshtein = Levenshtein(a, b);
          if (levenshtein >= 0.80) {
            System.out.println(a+"<--->"+b+","+levenshtein+"\r\n");
          }
        }
      }
      System.out.println(map.size());
    } catch (Exception e) {
      e.printStackTrace();
    }

  }

  /**
   * 相似度公共方法
   * @param a
   * @param b
   * @return
   */
  public static float Levenshtein(String a, String b) {
    if (a == null && b == null) {
      return 1f;
    }
    if (a == null || b == null) {
      return 0F;
    }
    int editDistance = editDis(a, b);
    return 1 - ((float) editDistance / Math.max(a.length(), b.length()));
  }

  private static int editDis(String a, String b) {

    int aLen = a.length();
    int bLen = b.length();

    if (aLen == 0) {
      return aLen;
    }
    if (bLen == 0) {
      return bLen;
    }

    int[][] v = new int[aLen + 1][bLen + 1];
    for (int i = 0; i <= aLen; ++i) {
      for (int j = 0; j <= bLen; ++j) {
        if (i == 0) {
          v[i][j] = j;
        } else if (j == 0) {
          v[i][j] = i;
        } else if (a.charAt(i - 1) == b.charAt(j - 1)) {
          v[i][j] = v[i - 1][j - 1];
        } else {
          v[i][j] = 1 + Math.min(v[i - 1][j - 1], Math.min(v[i][j - 1], v[i - 1][j]));
        }
      }
    }
    return v[aLen][bLen];
  }
}

你可能感兴趣的:(java,算法,jvm)