欧氏距离比较相似度的python实现

# -*- coding: utf-8 -*-
import xlrd
import xlwt
import unicodedata

workbook = xlrd.open_workbook(r'C:\Users\wangqiqi\Desktop\classify.xlsx')
#读取sheet1工作簿获取待分类数据
def read_sheet1():
    sheet1 = workbook.sheet_by_index(0)
    r1=sheet1.nrows
    vl1=[]
    for r in range(r1):
            vdic1={}
          #if sheet1.row(r)[3].value:
            vdic1['mile']= sheet1.row(r)[3].value
            vdic1['num']=sheet1.row(r)[5].value
            vdic1['weight']=sheet1.row(r)[6].value
            vdic1['age']=sheet1.row(r)[7].value
            vl1.append(vdic1)
    return vl1
#读取sheet2工作簿获取已分类数据
def read_sheet2():
    sheet2 = workbook.sheet_by_index(1)
    r2=sheet2.nrows
    vl2=[]
    for r in range(r2):
            vdic2={}
          #if sheet2.row(r)[3].value:
            vdic2['mile']=sheet2.row(r)[3].value
            vdic2['num']=sheet2.row(r)[5].value
            vdic2['weight']=sheet2.row(r)[6].value
            vdic2['age']=sheet2.row(r)[7].value
            vl2.append(vdic2)
    return vl2
#设置excel输出样式
def set_style(name,height,bold=False):
    style = xlwt.XFStyle()  # 初始化样式
    font = xlwt.Font()  # 为样式创建字体
    font.name = name # 'Times New Roman'
    font.bold = bold
    font.color_index = 4
    font.height = height
    # borders= xlwt.Borders()
    # borders.left= 6
    # borders.right= 6
    # borders.top= 6
    # borders.bottom= 6
    style.font = font
    # style.borders = border
    return style
#输出结果到excel
def write_excel(n,mol):
    f = xlwt.Workbook()
    sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True) #创建sheet
    row0 = [u'sheet1所在行',u'sheet2所在行',u'min_distance']
    for i in range(0,len(row0)):
        sheet1.write(0,i,row0[i],set_style('Times New Roman',220,True))
    for i in range(1,86):
        sheet1.write(i,0,i,set_style('Times New Roman',220,True))
        sheet1.write(i,1,n[i-1]+1,set_style('Times New Roman',220,True))
        sheet1.write(i,2,mol[i-1],set_style('Times New Roman',220,True))
    f.save('C:\Users\wangqiqi\Desktop\distancs.xls') #保存文件

if __name__ == '__main__':
    r=0
    d=0
    l1=read_sheet1()
    print len(l1)
    l2=read_sheet2()
    print len(l2)
    mul=[]
    kl=[]
    i=0
    for i in range(len(l1)):
        ol=[]
        for j in range(len(l2)):
             sum=(l1[i]['mile']-l2[j]['mile'] )**2+(l1[i]['num']-l2[j]['num'])**2+(l1[i]['weight'] -l2[j]['weight'] )**2+(l1[i]['age'] -l2[j]['age'] )**2 #欧氏距离计算公式
             ol.append(sum**0.5) 
        #print min(ol)
        mol=min(ol)
        mul.append(mol)
        for k in range(len(ol)):
           if ol[k]==mol:
               i+=1
               kl.append(k)
        print i

    write_excel(kl,mul)



你可能感兴趣的:(Python,统计)