python-模糊查询

# 主要用Levenshtein库 以及TF-IDF的想法
# 实现通过变压器名和线路名找到数据库中对应的记录


#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "wxf"
# Email: [email protected]
# Date: 2018/5/11

import csv
import Levenshtein
import pandas as pd

tran_info = pd.read_excel('./贵州省平均功率因素统计.xlsx')
print(tran_info.head())
tran_find_table = pd.read_excel('./xxx.xlsx')
print(tran_find_table.head())

tran_info['组合名字'] = tran_info['线路名字'] + tran_info['配变名称']

print(tran_find_table.columns)
print(tran_info.columns)

patchfile = open('./patch1.csv', 'w', newline='')
writer_patchfile = csv.writer(patchfile)
writer_patchfile.writerow(['匹配率', '编号', '供电局名称', '变压器名称', 'cj_mp_id', '三相表匹配项名称'])

for line_name in tran_find_table['线路'].values:
    min_juli = 10
    bianhao = ''
    gongdianju = ''
    bianyaqibingchen = ''
    jiliangdian = ''
    sanxiangbiaomingchen = str(line_name)
    for t in tran_info.values:
        tran_name = t[18]
        juli = Levenshtein.distance(str(tran_name), str(line_name))
        if juli < min_juli:
            min_juli = juli
            bianhao = t[0]
            gongdianju = t[4]
            bianyaqibingchen = t[18]
            jiliangdian = t[13]
            if min_juli == 0:
                break
            pass
        pass
    pass

    writer_patchfile.writerow([str(min_juli), bianhao, gongdianju, bianyaqibingchen, jiliangdian, sanxiangbiaomingchen])
pass

patchfile.close()

patchfile = open('./patch2.csv', 'w', newline='')
writer_patchfile = csv.writer(patchfile)
writer_patchfile.writerow(['匹配率', '编号', '供电局名称', '变压器名称', 'cj_mp_id', '三相表匹配项名称'])

for t in tran_info.values:
    if t[18] in tran_find_table['线路'].values:
        # print('100%',t[0],t[4],t[18], t[13])
        writer_patchfile.writerow(['100%', t[0], t[4], t[18], t[13], 'NULL'])
        pass
    elif t[18] in tran_find_table['台区'].values:
        # print('100%',t[0],t[4],t[18], t[13])
        writer_patchfile.writerow(['100%', t[0], t[4], t[18], t[13], 'NULL'])
        pass
    else:
        for line_name in tran_find_table['线路'].values:
            juli = Levenshtein.distance(str(t[18]), str(line_name))
            if juli == 1:
                # print('90%', t[0], t[4], t[18], t[13], '匹配:', str(line_name))
                writer_patchfile.writerow(['90%', t[0], t[4], t[18], t[13], str(line_name)])
            pass

            if juli == 2:
                # print('70%', t[0], t[4], t[18], t[13], '匹配:', str(line_name))
                writer_patchfile.writerow(['90%', t[0], t[4], t[18], t[13], str(line_name)])
            pass

            if juli == 3:
                print('60%', t[0], t[4], t[18], t[13], '匹配:', str(line_name))
                writer_patchfile.writerow(['90%', t[0], t[4], t[18], t[13], str(line_name)])
            pass
        pass
    pass
pass

你可能感兴趣的:(python-模糊查询)