from sqlalchemy import create_engine,Table,Column,Date,Integer,String,ForeignKey
from fuzzywuzzy import process
import os
import pymssql
import pymysql
import datetime
import time
import sys
import numpy
#import text2vec
from text2vec import Similarity
#from longconv import *
from gensim import similarities
import paramiko
from smb.SMBConnection import *
import csv
import re
import sqlalchemy
import pandas as pd
from sqlalchemy import MetaData,create_engine,Table,Integer
import socket
import pandas
#import gradio as gr
import datetime
from scipy.spatial.distance import pdist
from text2vec import Similarity
t1=time.time()
# 中文句向量模型(CoSENT)
sim_model = Similarity(model_name_or_path='shibing624/text2vec-base-chinese',
similarity_type='cosine', embedding_type='sbert')
def ai_text(sentence1, sentence2):
#scores=[]
#dict={}
score = sim_model.get_score(sentence1, sentence2)
# print("{} \t\t {} \t\t Score: {:.4f}".format(sentence1, sentence2, score))
return score
#dict[sentence2] = score
#print(dict)
def db_conn(db_host,db_user,db_passwd,db_database,db_port):
conn=pymssql.connect(server=db_host,user=db_user,password=db_passwd,database=db_database,port=db_port)
cur=conn.cursor()
if not cur:
raise Exception('数据库连接失败')
else:print('succeed')
cur.execute("""
select * from ODS_Legal_WorldCheck_Name""")
wc = cur.fetchall()
cur2 =conn.cursor()
# cur2.execute('''select top 50 * from ODS_Legal_Ariba_VendorData_New ''')
cur2.execute(''' select * from
(select *, ROW_NUMBER() OVER (order by SMVendorID) AS ROWNUM
from ODS_Legal_Ariba_VendorData_New) t
where ROWNUM between 1 and 10 ''')
av = cur2.fetchall()
for row1 in av:
max_rs = 0
for row2 in wc:
rs = ai_text(row1[1],row2[1])
if max_rs < rs :
max_rs = rs
max_rs = round(max_rs,2)
max_id = row2[0]
max_name=row2[1]
print(row1[0],row1[1],max_id,max_name,max_rs)
t2=time.time()
print(t2-t1)
if __name__=='__main__':
db_host='10.111.*.****'
db_user='***dmin'
db_passwd = '*******'
db_port='20001'
db_database='ODS_***'
db_conn(db_host,db_user,db_passwd,db_database,db_port)