前言
在github上想找案例推理实现代码,找到一个简介是基于CBR的鸡尾酒推荐代码,项目链接:vizkids/Case-Based-Reasoning-using-Python 。项目源文件里有几处错误,我修改了一下已经跑通了,大概的记录一下代码细节,但这个项目对我并没有帮助
# coding: utf-8
# time:2023.03.31
# reeditor: 早知晓
from xml.dom import minidom
from lxml import etree
import os
# 接收某鸡尾酒名称,返回鸡尾酒的原材料列表
def getCocktailIngredient(cocktailname) :
tree = etree.parse('cocktails.xml') # 利用etree.parse解析存储鸡尾酒信息的xml文件,获取xml中的内容
root = tree.getroot() # 获取根标签recipes,而不是格式声明内容
titlelist = root.findall('recipe/title') # 获取所有鸡尾酒名称标签title
titleValue = cocktailname
ilist=[]
titleRoot = getTitleRoot(titlelist,titleValue)
for node in titleRoot.getchildren():
if node.tag == 'ingredients':
ingredientsList = node.getchildren()
for i in ingredientsList:
ilist.append(i.text)
return ilist
# 接收某鸡尾酒名称,返回鸡尾酒的制作步骤列表
def getCocktailPreparation(cocktailname) :
tree = etree.parse('cocktails.xml')
root = tree.getroot()
titlelist = root.findall('recipe/title')
titleValue = cocktailname
plist=[]
titleRoot = getTitleRoot(titlelist,titleValue)
for node in titleRoot.getchildren():
if node.tag == "preparation":
ingredientsList = node.getchildren()
for i in ingredientsList:
plist.append(i.text)
return plist
def getTitleRoot(titlelist,titleValue):
for title in titlelist:
if title.text == titleValue:
titleRoot = title.getparent() # 返回title节点
return titleRoot
def getCocktailList():
xmldoc = minidom.parse('cocktails.xml')
cocktaillist = xmldoc.getElementsByTagName('title') # 获取所有title元素
cocktaillist1 ={cocktaillist[1],cocktaillist[2],cocktaillist[3]}
return cocktaillist1
def get_xmltext(parent, subnode_name):
node = parent.getElementsByTagName(subnode_name)[0]
return "".join([ch.toxml() for ch in node.childNodes])
# 将用户输入选项保存至input.txt文件中
def putCocktails(fruits,vegetables,alcoholicLiqueurs,nonalcoholicLiqueurs,tasteEnhancers,others):
list = ",".join(others)
text_file = open("input.txt", "w")
text_file.write("fruit:%s" % fruits+'\n')
text_file.write("vegetables:%s" % vegetables+'\n')
text_file.write("alcoholicLiqueurs:%s" % alcoholicLiqueurs+'\n')
text_file.write("nonalcoholicLiqueurs:%s" % nonalcoholicLiqueurs+'\n')
text_file.write("tasteEnhancers:%s" % tasteEnhancers+'\n')
text_file.write("others:%s" % list+'\n')
text_file.close()
# 获取每个鸡尾酒的信息,包括名称、原材料,存入cocktaillib文件中
def xmlParse():
a=etree.parse("cocktails.xml")
b=a.getroot()
d=[] # 存储所有鸡尾酒的原材料分类信息 [[],[],[]…]
list_alchohol=['white rum','kirsch','Cremant','cognac','champagne','vodka','martini','Noilly_Prat','Whiskey','Benedictine','pastis','rum','malibu rum','dry white wine','Porto','dark rum','White martini','Angostura bitter','pisang ambon','cava','Prosecco','Amber rum','lemon liqueur','sparkling wine','campari','vermouth','Creme de café','triple sec','white wine','red martini','plum brandy','rice wine','calvados','cachaca']
list_fruits=['lemon','raspberry','citrus fruit','strawberry','kiwi fruit','orange','apricot','currant/black currant','lime','"lime zest(outer skin of lime)"','blood orange','pineapple apple','grapefruit','banana','litchi','berry','melon']
list_veggies=['cucumbers','lime','wasabi','lemongrass','ginger','mint','tomato','coriander','guava']
list_liqueur=['Lemonade','Blue curacao','syrup','passion fruit syrup','Orange juice','cointreau','grenadine','pineapple juice','coconut','lime juice','Apricot juice','Creme de cassis','apple juice','grapefruit juice','Hard cider','Cranberry juice','Banana juice','Mango juice','Passion fruit juice','rice milk','coconut milk','Tamarin juice','coffee','apple cider','apricot liqueur','worcestershire sauce','"Currant syrup(blackcurrant juice)"','orgeat syrup','grand marnier','litchi juice','berry juice','strawberry juice','coffee liqueur','coca-cola']
list_te=['sugar','cane sugar','sour cream','tabasco sauce','light whipping cream','egg','cinnamon','nutmeg','brown sugar','anise basil','powdered sugar','granulated sugar','vanilla sugar','celery salt','pepper','milk','salt']
list_others=['ice cube','sparkling water','ice cream','soda water','sparkling mineral water']
# 获取每个鸡尾酒的信息,包括名称、原材料
for i in range(len(b)): # len(getroot)返回根节点及其子节点个数,不包括后代节点
recipe=[] # 存储一个鸡尾酒的原材料分类信息["","",…]
recipe.append(b[i][0].text) # 获取title节点中的文本内容
for c in b[i][1]: # 获取ingredients的所有子节点
if c.attrib["food"] is not None:
if c.attrib["food"] in list_alchohol:
a='alcoholicLiqueurs:'+c.attrib["food"]
recipe.append(a)
continue
if c.attrib["food"] in list_fruits:
a='fruit:'+c.attrib["food"]
recipe.append(a)
continue
if c.attrib["food"] in list_veggies:
a='vegetables:'+c.attrib["food"]
recipe.append(a)
continue
if c.attrib["food"] in list_liqueur:
a='nonalcoholicLiqueurs:'+c.attrib["food"]
recipe.append(a)
continue
if c.attrib["food"] in list_te:
a='tasteEnhancers:'+c.attrib["food"]
recipe.append(a)
continue
if c.attrib["food"].lower() in list_others:
a='others:'+c.attrib["food"]
recipe.append(a)
continue
d.append(recipe)
# 将所有鸡尾酒的原材料写入文件中
f = open("cocktail_recipe_lib","w")
for receipes in d:
f.write((',').join(receipes))
f.write('\n')
f.close()
# 计算用户选择的原材料与原有鸡尾酒的相似度
def getSimilarityScore():
def fileRead(filepath,var):
f=open(filepath)
globals()[var]=f.readlines()
f.close()
fileRead("cocktail_recipe_lib",'recipes')
fileRead("alcoholicLiqueurs",'alcoholicLiqueurs') # alcoholicLiqueurs存储手工计算含酒精液体之间的相似度的文件
fileRead("nonalcoholicLiqueurs",'nonalcoholicLiqueurs') # nonalcoholicLiqueurs存储手工计算不含酒精液体之间的相似度的文件
fileRead("fruit",'fruit') # 存储手工计算水果之间的相似度的文件
fileRead("vegetables",'vegetables')
fileRead("tasteEnhancers",'tasteEnhancers')
fileRead("others",'others')
fileRead("input.txt",'input')
d=[] # 存储每个鸡尾酒的鸡尾酒名称,归一化相似度值,用户选择酒精数量
# 利用相似矩阵计算相似度值,如果ing和cing与itype中前两个元素匹配,则返回itype中的相似度,否则没有返回值
def getscore(ing,cing,itype):
reference=eval(str(itype))
for recs in reference:
recs=recs.strip()
recs=recs.split(',')
if (recs[0]==ing and recs[1]==cing) or (recs[0]==cing and recs[1]==ing):
return recs[2]
# 将用户输入与系统预存的recipes匹配,寻找最相似的案例
for recipe in recipes: # 遍历鸡尾酒案例库
final_score=0 # Final score 保存用户输入和recipe的相似度
recipe=recipe.strip()
score={} # 存储所有鸡尾酒与用户选择的原材料的总相似度
alcohol_content=0 # 记录用户选择酒精数量
comp_list=recipe.split(',') #字符串拆分 eg:["Ti'punch","alcoholicLiqueurs:white rum","nonalcoholicLiqueurs:syrup","fruit:lime"]
for comps in comp_list: # 遍历某个鸡尾酒的原材料
try:
ctype,cing=comps.split(':') # ctype存储原料类型,cing存储原料值。没有:的元素只能返回一个字符串,没法给两个变量赋值,导致报错ValueError
except ValueError:
continue # 跳过鸡尾酒名称字符串
score[ctype]=0 #存储某原材料种类相似值
for ingredients in input:
try: # 跳过用户没选择的原材料种类
ingredients=ingredients.strip()
itype,ing=ingredients.split(":")
ing_cmp=ing.split(',') # 获取同种原材料的多个值
for sub_ing in ing_cmp:
if itype == ctype and sub_ing !='':
scr=getscore(sub_ing,cing,itype) # Getting score for similarities of each and every input ingredients against recipes (Only that are in same category)
if ctype=="alcoholicLiqueurs":
alcohol_content+=1
if scr is not None and score[ctype] < float(scr):
score[ctype] = scr # Maximum similarity will be retained
except ValueError:
continue
for k,v in score.items():
final_score+=float(v) # 计算用户input和recipe之间的相似度,将每种原材料相似度最大值相加得出最终相似度值
if len(comp_list)-1>0:
rec=comp_list[0],int(final_score/(len(comp_list)-1)*100),alcohol_content # rec=鸡尾酒名称,归一化相似度值,用户选择酒精数量
else:
rec=comp_list[0],0,alcohol_content
d.append(rec)
e=sorted(d,key=lambda x:float(x[1]),reverse=True) # Sort the matches in descending order to find the best set of matches
# 输出匹配度最高的三个案例
if os.path.exists("output.txt"):
os.remove("output.txt")
f=open("output.txt","w")
for a in e[0:3]:
ac="" # 代表酒精浓度
if a[2]==0:
ac="Nil"
elif a[2] > 0 and a[2] <3:
ac="Moderate"
else:
ac="High"
f.write(a[0]+","+str(a[1])+"% ,"+ac)
f.write("\n")
f.close()
def retrieval():
xmlParse() # 解析xml文件,将鸡尾酒名称、原材料,存入cocktaillib文件中
getSimilarityScore() # 计算用户选择的原材料与原有鸡尾酒的相似度
# 创建 adapt.txt 文件,写入一个鸡尾酒相关信息的xml形式
def writeAdaptDetails(cName,cIngredient,cPreparation):
CIngredientList = cIngredient.split(",")
CPreparationList = cPreparation.split(".")
text_file = open("adapt.txt", "w")
text_file.write("" +'\n')
text_file.write("" +cName.strip()+""+'\n')
text_file.write("" +'\n')
for i in CIngredientList:
text_file.write("" +i.strip()+""+'\n')
text_file.write(""+'\n')
text_file.write("" +'\n')
for i in CPreparationList:
text_file.write("" +i.strip()+""+'\n')
text_file.write(""+'\n')
text_file.write(""+'\n')
text_file.close()
# 处理list,删掉英文字符[]
def listToStringWithoutBrackets(list1):
return str(list1).replace('[','').replace(']','')
# 向xml文件中插入一条新的鸡尾酒信息
def adaptXML():
f=open("cocktails.xml","r")
a=f.readlines()
f.close()
a.pop() # 删掉跟标签的闭合标签
g=open("adapt.txt","r")
h=g.readlines()
for i in h:
a.append(i)
f=open("cocktails.xml","w") # w模式会删除原有内容,从头写入新内容
for j in a:
f.write(j)
f.write("")
系统实现的重点就是计算用户选择的鸡尾酒组成成分与已知鸡尾酒的相似度,该系统采用的实现方法是手动保存各组成成分每个种类可能包含的成分之间的相似值文件(alcoholicLiqueurs、fruit、noalcoholicLiqueurs、others、tasteEnhancers、vegetables),然后遍历已知鸡尾酒的组成成分,保存相同类型组成成分的最大值,计算每个鸡尾酒所有组成成分最大值之和,最终获得所有鸡尾酒的相似值。推荐相似值前三的鸡尾酒给用户。
1、home 界面。
2、推荐鸡尾酒界面。
3、展示鸡尾酒和制作步骤。
(1)下载代码中引用的包,xml、lxml、os、flask;
(2)将cocktailList.py内容更新;
(3)运行routes.py,在浏览中打开运行窗口弹出的网址,即为鸡尾酒推荐系统的web界面,可以调试一下。
关键实现部分相似度计算用不上,所以这个项目白运行了,继续找别的代码,figting!!!搞科研,找工作,oh my god!好焦虑!!! *100