'''
Created on 2020年1月7日
@author: myz
'''
import tkinter
import tkinter.font as tkFont
import threading
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
root=tkinter.Tk()
root.title('两种算法对比')
root.geometry('900x900')
input_var=tkinter.StringVar()
bayes_text=tkinter.Text(root,width=61,height=25)
decision_text=tkinter.Text(root,width=61,height=25)
predict_text=tkinter.Text(root,width=100,height=20)
ft1 = tkFont.Font(size=20, slant=tkFont.ITALIC) #设置字体
dff=pd.DataFrame()
score1=0.0
score2=0.0
score3=0.0
score4=0.0
y_pred_bayes=[]
y_pred_decisiontree=[]
'''
贝叶斯多项式模型线程
'''
class bayes(threading.Thread):
def __init__(self,dff):
super(bayes,self).__init__()
self.dff=dff
self.clf=MultinomialNB(fit_prior=True,alpha=6.0)
self.tfidf=TfidfVectorizer(max_features=4000,lowercase=False,ngram_range=(2,4))
self.train_data=[]
self.test_data=[]
self.train_class=[]
self.test_class=[]
def run(self):
list_action=[i for i in self.dff['behavior_type']]
'''
获取时间与商品id连在一起作为预测数据list_target
'''
list_time=[i for i in self.dff['time']]
list_item=[i for i in self.dff['item_id']]
list_target=[]
lenn=len(list_action)
for i in range(lenn):
target=''
target=str(list_item[i])
target=target+str(list_time[i])
list_target.append(str(target))
'''
数据拆分为训练及和测试集
'''
self.train_data,self.test_data,self.train_class,self.test_class=train_test_split(list_target,list_action,shuffle=True,test_size=0.25)
'''
特征提取
'''
self.tfidf.fit(self.train_data)
tf_train_data=self.tfidf.fit_transform(self.train_data)
tf_test_data=self.tfidf.fit_transform(self.test_data)
'''
朴素贝叶斯分类器预测
'''
self.clf.fit(tf_train_data,self.train_class)
# tf_test_data = tfidf.transform(test_data)
y_pred = self.clf.predict(tf_test_data)
global score3
global score4
score3=self.clf.score(tf_train_data,self.train_class)
score4=self.clf.score(tf_test_data,self.test_class)
bayes_text.insert(tkinter.INSERT, '贝叶斯训练集分数{}\n'.format(score3))
bayes_text.insert(tkinter.INSERT, '贝叶斯测试集分数{}\n'.format(score4))
print('bayes训练集分数为:{}'.format(self.clf.score(tf_train_data,self.train_class)))
print('bayes测试集分数为:{}'.format(self.clf.score(tf_test_data,self.test_class)))
print('分类报告{}'.format(classification_report(y_pred,self.test_class)))
bayes_text.insert(tkinter.INSERT, '贝叶斯分类报告{}\n'.format(classification_report(y_pred,self.test_class)))#显示精确度、召回率、所有标签结果的平均值、所有标签结果的加权平均值
def predict(self):
listt=[]
listt.append(input_var.get())
self.tfidf.fit_transform(listt)
self.tfidf.fit_transform(self.train_data)
data1=self.tfidf.transform(listt)
global y_pred_bayes
y_pred_bayes=self.clf.predict(data1)
print('预测为:{}'.format(y_pred_bayes))
# print(test_data[:50])
# print(y_pred[:50])
'''
决策树模型线程
'''
class DecisionTree(threading.Thread):
def __init__(self,dff):
super(DecisionTree,self).__init__()
self.dff=dff
self.model=DecisionTreeClassifier(max_depth=30)#控制决策树的深度最大为4
self.tfidf=TfidfVectorizer(max_features=4000,lowercase=False,ngram_range=(2,4))
self.train_data2=[]
self.test_data2=[]
self.train_class2=[]
self.test_class2=[]
def run(self):
# for i in self.dff['behavior_type']:
# if i==4:
# list.append(1)
# else:
# list.append(0)
# model=SVC(C=0.8, kernel='rbf', gamma=20, decision_function_shape='ovr')
list_action=[i for i in self.dff['behavior_type']]
'''
获取时间与商品id连在一起作为预测数据list_target
'''
list_time=[i for i in self.dff['time']]
list_item=[i for i in self.dff['item_id']]
list_target=[]
lenn=len(list_action)
for i in range(lenn):
target=''
target=str(list_item[i])
target=target+str(list_time[i])
list_target.append(str(target))
'''
数据拆分为训练及和测试集
'''
self.train_data2,self.test_data2,self.train_class2,self.test_class2=train_test_split(list_target,list_action,shuffle=True,test_size=0.25)
'''
特征提取
'''
self.tfidf.fit(self.train_data2)
tf_train_data2=self.tfidf.fit_transform(self.train_data2)
tf_test_data2=self.tfidf.fit_transform(self.test_data2)
self.model.fit(tf_train_data2,self.train_class2)
y_pred=self.model.predict(tf_test_data2)
global score1
global score2
score1=self.model.score(tf_train_data2,self.train_class2)
score2=self.model.score(tf_test_data2,self.test_class2)
decision_text.insert(tkinter.INSERT, '决策树训练集分数{}\n'.format(score1))
decision_text.insert(tkinter.INSERT, '决策树测试集分数{}\n'.format(score2))
print('决策树模型训练集分数为:{}'.format(self.model.score(tf_train_data2,self.train_class2)))
print('决策树模型测试集分数为:{}'.format(self.model.score(tf_test_data2,self.test_class2)))
print('分类报告{}'.format(classification_report(y_pred,self.test_class2)))
decision_text.insert(tkinter.INSERT, '决策树分类报告\n{}'.format(classification_report(y_pred,self.test_class2)))
# print(test_data2[:50])
# print(y_pred[:50])
def predict(self):
listt=[]
listt.append(input_var.get())
self.tfidf.fit_transform(listt)
self.tfidf.fit_transform(self.train_data2)
data1=self.tfidf.transform(listt)
global y_pred_decisiontree
y_pred_decisiontree=self.model.predict(data1)
print('预测为:{}'.format(y_pred_decisiontree))
def interface():
title_label=tkinter.Label(root,text='预测用户行为',font=ft1)
title_label.grid(row=0,columnspan=8,pady=8)
bayes_button=tkinter.Button(root,text='bayes模型',command=bayes_start) # command 点击事件命令
bayes_button.grid(row=1,column=0,columnspan=4,pady=2)
decision_button=tkinter.Button(root,text='decision_tree模型',command=decision_tree) # command 点击事件命令
decision_button.grid(row=1,column=4,columnspan=4,pady=2)
bayes_text.grid(row=2,column=0,columnspan=4,padx=8)
decision_text.grid(row=2,column=4,columnspan=4,padx=8)
smalltitle_label=tkinter.Label(root,text='预测用户行为',font=ft1)
smalltitle_label.grid(row=3,columnspan=8,pady=8)
input_entry=tkinter.Entry(root,textvariable=input_var,width=100)
input_entry.grid(row=4,column=0,pady=10,columnspan=7,ipady=7)
bayes_predict_button=tkinter.Button(root,text='bayes预测',command=bayes_predict) # command 点击事件命令
bayes_predict_button.grid(row=4,column=7,pady=10)
predict_text.grid(row=5,column=0,rowspan=10,columnspan=7,padx=8)
decision_predict_button=tkinter.Button(root,text='decision_tree预测',command=decision_predict) # command 点击事件命令
decision_predict_button.grid(row=5,column=7)
root.mainloop()
def bayes_start():
print('启动贝叶斯模型评估')
bayes_text.insert(tkinter.INSERT, '启动贝叶斯模型评估\n')
m1.start()
# time.sleep(300)
# bayes_text.insert(tkinter.INSERT, '贝叶斯训练集分数{}\n'.format(score3))
# bayes_text.insert(tkinter.INSERT, '贝叶斯测试集分数{}\n'.format(score4))
def decision_tree():
print('启动决策树模型评估')
decision_text.insert(tkinter.INSERT, '启动决策树模型评估\n')
m2.start()
# time.sleep(60)
# decision_text.insert(tkinter.INSERT, '决策树训练集分数{}\n'.format(score1))
# decision_text.insert(tkinter.INSERT, '决策树测试集分数{}\n'.format(score2))
def bayes_predict():
print('开始bayes预测')
m1.predict()
predict_text.insert(tkinter.INSERT, '开始贝叶斯预测\n')
predict_text.insert(tkinter.INSERT, '消费者针对此商品,在此时的行为预测为:{}\n'.format(y_pred_bayes))
# predict_text.insert(tkinter.INSERT, y_pred_bayes)
def decision_predict():
print('开始决策树预测')
m2.predict()
predict_text.insert(tkinter.INSERT, '开始决策树预测\n')
# predict_text.insert(tkinter.INSERT, input_var.get())
predict_text.insert(tkinter.INSERT, '消费者针对此商品,在此时的行为预测为:{}\n'.format(y_pred_decisiontree))
if __name__=='__main__':
dff=pd.read_csv('clean_user33.csv')
m1=bayes(dff)
m2=DecisionTree(dff)
interface()