Python——计算序列相似度的算法(包括求最少交换步骤,最小交换距离)

程序功能

计算如何衡量两个数字序列之间的相似度中提及的相似度指标;求两个序列转换的最少交换步骤和最小交换距离。

代码

  • 位方差(location square deviation, LSD)
def location_square_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    lst = lst_1.copy()
    if lst_2 is not None:
        if n != len(lst_2):
            return False
        for i in range(n):	# 以lst2为映射表,将lst1映射为lst可直接与[0,1,2,...]比较
            lst[lst_1.index(lst_2[i])] = i

    s = 0
    for i in range(n):
        s += (lst[i]-i) ** 2
    s /= n
    return s
  • 位均差(location mean deviation, LMD)
def location_mean_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    lst = lst_1.copy()
    if lst_2 is not None:
        if n != len(lst_2):
            return False
        for i in range(n):
            lst[lst_1.index(lst_2[i])] = i

    s = 0
    for i in range(n):
        s += abs(lst[i]-i)
    s /= n
    return s
  • 交换差(swap deviation, SD)
def swap_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    lst = lst_1.copy()
    if lst_2 is not None:
        if n != len(lst_2):
            return False
        for i in range(n):
            lst[lst_1.index(lst_2[i])] = i

    count = 0	# 计算序列中的循环数
    for i in range(n):
        if lst[i] == -1:
            continue
        p = i
        while lst[p] != -1:
            q = lst[p]
            lst[p] = -1
            p = q
        count += 1
    return n - count # 序列长减去循环数即为最小交换次数
  • 交换距离差(swap distance deviation, SDD)
def swap_distance_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    lst = lst_1.copy()
    if lst_2 is not None:
        if n != len(lst_2):
            return False
        for i in range(n):
            lst[lst_1.index(lst_2[i])] = i

    swap_lst = []
    weight = 0
    while location_mean_deviation(lst) != 0:
        r_best = 0	# 最佳交换收益
        i_best = 0
        j_best = 0
        for i in range(n):
            for j in range(i+1, n):	# 遍历所有交换,寻找最佳交换步骤
            	# 交换收益r=交换后位均差的下降值ΔLMD(A,B)/交换距离(j-i)
            	# 令交换距离恒为1可求最少交换步骤&最少交换次数
                r = ((abs(lst[i]-i)+abs(lst[j]-j)) - (abs(lst[j]-i)+abs(lst[i]-j)))/(j-i)
                if r > r_best:
                    r_best = r
                    i_best = i
                    j_best = j
        lst[i_best], lst[j_best] = lst[j_best], lst[i_best]
        weight += (j_best-i_best)
        swap_lst.append((i_best, j_best))
    # return swap_lst # 求最小交换距离的步骤(交换距离为1则是求最少交换步骤)
    return weight
  • 值方差(value square deviation, VSD)
def value_square_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    if lst_2 is not None:
        if n != len(lst_2):
            return False
    else:
        lst_2 = [i for i in range(n)]
    s = 0
    for i in range(n):
        s += (lst_1[i] - lst_2[i]) ** 2
    s /= n
    return s
  • 值均差(value mean deviation, VMD)
def value_mean_deviation(lst_1, lst_2=None):
    n = len(lst_1)
    if lst_2 is not None:
        if n != len(lst_2):
            return False
    else:
        lst_2 = [i for i in range(n)]

    s = 0
    for i in range(n):
        s += abs(lst_1[i] - lst_2[i])
    s /= n
    return s
  • 点积比(dot product ratio, DPR)
def dot_product_ratio(lst_1, lst_2=None):
    n = len(lst_1)
    if lst_2 is not None:
        if n != len(lst_2):
            return False
    else:
        lst_2 = [i for i in range(n)]

    s = 0
    max_s = 0
    for i in range(n):
        s += lst_1[i] * lst_2[i]
        max_s += lst_1[i] ** 2
    s /= max_s
    return s
  • 归一化点积比(normalization dot product ratio, NDPR)
def normalization_dot_product_ratio(lst_1, lst_2=None):
    n = len(lst_1)
    if lst_2 is not None:
        if n != len(lst_2):
            return False
    else:
        lst_2 = [i for i in range(n)]

    s = (2*n-1)/(n+1)*dot_product_ratio(lst_1, lst_2)-(n-2)/(n+1)
    return s

你可能感兴趣的:(算法)