基于Python推荐系统

1、算法1:基于召唤师信息推荐

我们编辑程序对从带玩公司提供的接口所扒下来的数据进行处理,我从这些数据中提取了每个召唤师上路,中路,下路adc,辅助,打野的使用频率作为向量,通过比较欧氏距离最小推荐出一位和召唤师最相近的召唤师(它的信息包含他最常用三位英雄,及其位置使用频次)。

#coding=utf-8
import sqlite3
import numpy
def jisuan(vec1,vec2):
    dist=numpy.sqrt(numpy.sum(numpy.square(vec1-vec2)))
    return dist
def tuijian(usename1,id1):
    user = open(r'C:\Users\win10\Desktop\LOL\userlist.txt','r+')
    usename =usename1
    id=id1
    b=user.readlines()
    for i in b:
#         print i.split(',')[0]
        if usename==i.split(',')[0] and id==i.split(',')[2]:
            qquin = i.split(',')[1]
#     print qquin                                
    cx = sqlite3.connect('C:\Users\win10\Desktop\LOL\shujuku.db')
    cu = cx.cursor()
    cu.execute("select * from catalog")
    alluser=cu.fetchall()    
    j=0
    aaa=0
    a=[]
    for user in alluser:
        if qquin==user[1]: 
            a=user
            mm = j
            break
        j=j+1       
    a1 = [a[9],a[10],a[11],a[12],a[13]]
    v1 = numpy.array(a1)
    juli=[]        
    for user1 in alluser:
         a2 = [user1[9],user1[10],user1[11],user1[12],user1[13]]
         v2=numpy.array(a2)
         juli.append(jisuan(v1,v2))
    kk = 0
    min= 10.0
    for i in juli:   
         #print juli
         if min>i and kk!=j:
             min=i
         kk=kk+1 
    bb=0       
    for ii in juli:
         if ii==min:
             return alluser[bb]  
         bb=bb+1 
print tuijian('杀你0没商量','9')

2、算法2:基于英雄信息推荐

通过为各个英雄的:位置,标签,上手难度,在法师,坦克等方面的能力建立向量,通过比较向量的欧式距离大小为召唤师推荐三个和他最善长使用的三个英雄最相似的英雄。

#coding=utf-8
import sqlite3
import numpy
import xlrd
def jisuan(vec1,vec2):
    dist=numpy.sqrt(numpy.sum(numpy.square(vec1-vec2)))
    return dist
def hero_tuijian(u_name,id1):
    user = open(r'C:\Users\win10\Desktop\LOL\userlist.txt','r+')
    b=user.readlines()
    usename=u_name
    id=id1
    listhero=[]
    for i in b:
#         print i.split(',')[0]
        if usename==i.split(',')[0] and id==i.split(',')[2]:
            qquin = i.split(',')[1]
#     print qquin
    cx = sqlite3.connect('C:\Users\win10\Desktop\LOL\shujuku.db')
    cu = cx.cursor()
    cu.execute("select * from catalog")
    alluser=cu.fetchall()
    
    for user in alluser:            
       if qquin==user[1]:
           a=user
           break
#     print a
    hero1=float(a[3])
#     print hero1
    hero2=float(a[5])
    hero3=float(a[7])
    data = xlrd.open_workbook('C:\Users\win10\Desktop\LOL\dataForHeroes.xlsx')
    table = data.sheet_by_name(u'sheet1')
    nrows = table.nrows
    h_hr3 = []
    h_hr2 = []
    h_hr1 = []
    heHang1=0
    heHang2=0
    heHang3=0
    for i in range(nrows):
        if hero1==table.row_values(i)[0]:
            #print table.row_values(i)
            h_hr1 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]                        
            heHang1=i
#             print heHang1
            #print h_hr1
        if hero2==table.row_values(i)[0]:
            #print table.row_values(i)
            h_hr2 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]            
            heHang2=i
#             print heHang2
            #print h_hr2
        if hero3==table.row_values(i)[0]:
           # print table.row_values(i)
            h_hr3 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]                           
            heHang3=i
#             print heHang3
            #print h_hr3
    h_XL1=numpy.array(h_hr1)
    h_XL2=numpy.array(h_hr2)
    h_XL3=numpy.array(h_hr3)
#     min_juli1=jisuan( h_XL2,h_XL1)
#     print min_juli1 
    min_true1=1000.0
    min_true2=1000.0
    min_true3=1000.0
    juli1=[]
    juli2=[]
    juli3=[]             
    for j in range(1,nrows):
        if j==heHang1 or j==heHang2 or j==heHang3:
            pass
        else:
            mabe_hero = [table.row_values(j)[6],table.row_values(j)[7],table.row_values(j)[8],table.row_values(j)[9],table.row_values(j)[10],table.row_values(j)[11],table.row_values(j)[12],table.row_values(j)[13],table.row_values(j)[14],table.row_values(j)[15],table.row_values(j)[16],table.row_values(j)[17],table.row_values(j)[18],table.row_values(j)[20],table.row_values(j)[21],table.row_values(j)[22],table.row_values(j)[23],table.row_values(j)[24],table.row_values(j)[25]]            
            v1=numpy.array(mabe_hero)
            min_juli3=jisuan(v1,h_XL3)
            min_juli2=jisuan(v1,h_XL2)          
            min_juli1=jisuan(v1,h_XL1)            
            juli1.append(min_juli1)
            juli2.append(min_juli2)
            juli3.append(min_juli3)
            if min_true1>min_juli1:
                min_true1=min_juli1
            if min_true2>min_juli2:
                min_true2=min_juli2
            if min_true3>min_juli3:
                min_true3=min_juli3           
    for j,element in enumerate(juli1):
        if min_true1==element:
            listhero.append(table.row_values(j)[1])     
    for j,element in enumerate(juli2):
        if min_true2==element:
            listhero.append(table.row_values(j)[1])
    for j,element in enumerate(juli3):
        if min_true3==element:
            str(table.row_values(j))
            listhero.append(table.row_values(j)[1])
#             print soup.prettify('utf-8').decode('utf8').encode(table.row_values(j))
    return listhero                       
print hero_tuijian('你怎么这么可爱啊','1')      

3、算法3:SVD主成分分析相似度

首先根据收集到的数据建立user-item的映射关系矩阵,对该矩阵进行使用numpy库中的SVD变换拆解成sigma矩阵、V矩阵和U矩阵。其中sigma矩阵只有对角线上有数据,既是一系列不相关的特征值。可采取PCA的方法,将特征值进行压缩,能量取大约90%即能大约表示原矩阵,该方法是一个有损压缩,但会大大减少计算量。由原矩阵、sigma矩阵和U矩阵相乘成item-特征的映射矩阵。当对某一用户推荐item时首先找到该用户对所有已用过的item的评价,对其每一件没使用过的item进行评价。评价方法为相似度评价,相似度评价方法有三种:欧氏距离、皮尔逊距离、余弦距离。本次实验主要选用了余弦距离。对每一个没使用过的item对其他使用过的物品进行相似度比较,比较因子为已压缩过的特征。后将用户对使用过商品的评分作为权值与商品相似度乘积,所有相似度之和即为没使用过item的评分,按评分排序去最高的几位作为推荐。


# coding: utf-8



from numpy import *  
from numpy import linalg as la #用到别名
import test3
def loadExData2():  
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],  
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],  
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],  
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],  
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],  
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],  
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],  
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],  
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],  
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],  
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
def loadExData3():  
    data_temp=test3.loadExData()
    data=[]
    items=['1','2','3','4','5','6','7','8','9','10',
           '11','12','13','14','15','16','17','18','19','20',
           '21','22','23','24','25','26','27','28','29','30',
           '31','32','33','34','35','36','37','38','39','40',
           '41','42','43','44','45','48','50','51','53','54',
           '55','56','57','58','59','60','61','62','63','64',
           '67','68','69','72','74','75','76','77','78','79',
           '80','81','82','83','84','85','86','89','90','91',
           '92','96','98','99','101','102','103','104','105','106',
           '107','110','111','112','113','114','115','117','119','120',
           '121','122','126','127','131','133','134','136','143','150',
           '154','157','161','163','164','201','202','203','222','223',
           '236','238','240','245','254','266','267','268','412','420','421','427','429','432','497','498']
#     for j in items:
#         print j
    for i in data_temp.keys():
        lis=[]
        a=data_temp.get(i)
        for j in items:
            lis.append(a.get(j))
        data.append(lis)
#         print i
#         print lis
    
    return data
def ecludSim(inA,inB):  
    return 1.0/(1.0 + la.norm(inA - inB))  #计算向量的第二范式,相当于直接计算了欧式距离  
  
def pearsSim(inA,inB):  
    if len(inA) < 3 : return 1.0  
    return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1] #corrcoef直接计算皮尔逊相关系数  
  
def cosSim(inA,inB):  
    num = float(inA.T*inB)  
    denom = la.norm(inA)*la.norm(inB)  
    return 0.5+0.5*(num/denom)  #计算余弦相似度
def standEst(dataMat, user, simMeas, item):   
    n = shape(dataMat)[1] #计算列的数量,物品的数量  
    simTotal = 0.0; ratSimTotal = 0.0   
    for j in range(n):  
        userRating = dataMat[user,j]  
#         print(dataMat[user,j])  
        if userRating == 0: continue  #如果用户u没有对物品j进行打分,那么这个判断就可以跳过了  
        overLap = nonzero(logical_and(dataMat[:,item].A>0, \
                                      dataMat[:,j].A>0))[0]    #找到对物品 j 和item都打过分的用户  
        if len(overLap) == 0: similarity = 0  
        else: similarity = simMeas(dataMat[overLap,item], dataMat[overLap,j])     #利用相似度计算两个物品之间的相似度  
                                     
#         print 'the %d and %d similarity is: %f' % (item, j, similarity)  
        simTotal += similarity  
        ratSimTotal += similarity * userRating  #待推荐物品与用户打过分的物品之间的相似度*用户对物品的打分  
    if simTotal == 0: return 0  
    else: return ratSimTotal/simTotal 
def svdEst(dataMat, user, simMeas, item,n,xformedItems):  
#     n = shape(dataMat)[1]  
    simTotal = 0.0; ratSimTotal = 0.0  
#     U,Sigma,VT = la.svd(dataMat) #直接进行分解  
# #     print Sigma
#     Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix  
#     xformedItems = dataMat.T * U[:,:4] * Sig4.I  #create transformed items 
#     print xformedItems
    for j in range(n):  
        userRating = dataMat[user,j]  
        if userRating == 0 or j==item: continue  
        similarity = simMeas(xformedItems[item,:].T,                             xformedItems[j,:].T)  
#         print 'the %d and %d similarity is: %f' % (item, j, similarity)  
        simTotal += similarity  
        ratSimTotal += similarity * userRating  
    if simTotal == 0: return 0  
    else: return ratSimTotal/simTotal 
def recommend(dataMat, userid, N=3, simMeas=cosSim, estMethod=svdEst): 
    usr = open(r'C:\Users\dell\Desktop\shujuji\userlist.txt','r')
    user=-1
    for i in usr.readlines():
        te=i.split(',')[0]
        qu=i.split(',')[1]
        vid=i.split(',')[2]
        user+=1
        if '\xef\xbb\xbf'  in te:
            te=te.replace('\xef\xbb\xbf','')
        if te==userid:
            print vid+qu
            print user
            usr.close()
            break 
    unratedItems = nonzero(dataMat[user,:].A==0)[1]  #find unrated items  nonzero()[1]返回的是非零值所在的行数,返回的是一个元组   if len(unratedItems) == 0: return 'you rated everything'  
    itemScores = []
    n = shape(dataMat)[1]  
    U,Sigma,VT = la.svd(dataMat) #直接进行分解 
#     print U 
#     print VT
#     print Sigma
#     x=0
#     i=0
#     for a in Sigma:
#         i+=1
#         x+=a
#         if x>=375:break
#     print i
    Sig4 = mat(eye(106)*Sigma[:106]) #arrange Sig4 into a diagonal matrix  
    xformedItems = dataMat.T * U[:,:106] * Sig4.I  #create transformed items 
#     print U[:,:4]
#     print xformedItems
    for item in unratedItems:  
        if estMethod==standEst:
            estimatedScore=estMethod(dataMat, user, simMeas, item)
        else:
            estimatedScore = estMethod(dataMat, user, simMeas, item,n,xformedItems)  
        itemScores.append((item, estimatedScore))  
    te=sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
    return te[0][0],te[1][0],te[2][0]

myMat=mat(loadExData3())
# print recommend(myMat,0,estMethod=standEst)
print recommend(myMat,'cao奈何桥断了',estMethod=svdEst)
# print recommend(myMat,2,estMethod=svdEst)
# print recommend(myMat,3,estMethod=svdEst)
# print recommend(myMat,4,estMethod=svdEst)
# print recommend(myMat,5,estMethod=svdEst)





你可能感兴趣的:(基于Python推荐系统)