检索式聊天机器人依据制定的答复规则,将问题在语料库中进行排序学习技术和深度(正则)匹配技术找到适合当前输入的最佳回复,该方法只能以固定的语言模式(设定的规则)进行回复,无法实现词语的多样组合,比较死板
,应用领域受限,同时语料库越庞大,回复速度回越慢。
内存中设定语料库作为测试语料,通过简单的搜索判断问题是否在语料库中,若在语料库中,则随机回复一个对应的答案,若问题不在语料库中,则回复规定的答案。
#-*-coding:utf-8-*-
import random
greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
random_greeting = random.choice(greetings)
questions = ["最近过得如何?","最近工作怎么样呀?","工作还顺利吗?"]
responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
random_response = random.choice(responses)
while True:
question = input('测试爸爸说>')
if question in greetings:
print(random_greeting)
elif question in questions:
print(random_response)
elif question == '再见':
break
else:
print("别逼逼了,我不知道你在说啥!!!!")
在简单判断问题是否在语料库中的基础上,对问题进行分析,即利用自然语言处理技术,进行语法分析,提取关键词,将关键词与语料库中的语料进行对比,增加了回复的准确率和机器人的应对能力。
import nltk
import jieba
import pynlpir
from nltk import word_tokenize
import random
#下载分词器punkt
# nltk.download('punkt')
greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
random_greeting = random.choice(greetings)
questions = ["如何","怎么样","顺利","工作"]
responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
random_response = random.choice(responses)
while True:
question = input('测试爸爸说>')
#jieba分词
cut = jieba.cut(question,cut_all=True)
#分词结果转成字符串格式,generator to string
cut_result = " ".join(cut)
# 字符串转成列表,string to list
cut_cut = cut_result.split()
#提取关键词,英文,不适合中文
# cleaned_input = word_tokenize(question)
# print(type(cleaned_input))
# print(cleaned_input)
#比对关键词,进行回复
if not set(cut_cut).isdisjoint(greetings):
print(random_greeting)
elif not set(cut_cut).isdisjoint(questions):
print(random_response)
elif question == '再见':
break
else:
print("别逼逼了,我不知道你在说啥!!!!")
robot_server.py
import socketserver
import nltk, jieba, pynlpir
import random
class Robot(socketserver.BaseRequestHandler):
# handle线程处理函数
def handle(self):
# 问候语列表
greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
# 生成随机的问候回复
random_greeting = random.choice(greetings)
# 问题列表
questions = ["如何","怎么样","顺利","工作"]
# 回复语列表
responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
# 随机返回答复
random_response = random.choice(responses)
# 请求
conn = self.request
greet = bytes("你好,我是robot~", encoding="utf-8")
# 向客户端发送问候语
conn.sendall(greet)
while True:
# 接收客户端发送的字节数据
rec_request = conn.recv(1024)
# 转换成字符串
rec_str = str(rec_request, encoding="utf-8")
# jieba切词
cut = jieba.cut(rec_str, cut_all=True)
# 连接成字符串
cut_str = " ".join(cut)
# 形成列表
cut_list = cut_str.split()
# 查询切词的列表数据是否在给定词库里
if not set(cut_list).isdisjoint(greetings):
res_info = bytes(random_greeting, encoding="utf-8")
# 向客户端发送数据
conn.sendall(res_info)
elif not set(cut_list).isdisjoint(questions):
res_info = bytes(random_response, encoding='utf-8')
conn.sendall(res_info)
elif rec_str == 'q':
break
else:
res_info = bytes("别逼逼了,我不知道你在说啥!!!", encoding='utf-8')
conn.sendall(res_info)
if __name__ == "__main__":
server = socketserver.ThreadingTCPServer(("127.0.0.1", 8080), Robot)
server.serve_forever()
client_chat.py
import socket
class Chat(object):
def chat(self):
# 建立socket流服务
s = socket.socket()
# 设定连接服务端的IP地址
host = "127.0.0.1"
port = 8080
# tuple格式,connect参数为tuple格式
addr = (host, port)
# 连接服务端
s.connect(addr)
# recv接收服务端字节数据,1024表示最大接收的字节量
# 服务端发送的是字节bytes类型
rec_bytes = s.recv(1024)
# 字节bytes转为unicode字符用于显示
rec_str = str(rec_bytes, encoding="utf-8")
print("Response Infomation: {}".format(rec_str))
while True:
input_chat = input("你有什么问题>>")
if input_chat == 'q':
# sendall函数的参数类型为字节bytes
# 将输入转为字节类型
s.sendall(bytes(input_chat, encoding='utf-8'))
break
else:
s.sendall(bytes(input_chat, encoding='utf-8'))
rec_bytes = s.recv(1024)
rec_str = str(rec_bytes, encoding='utf-8')
print("Response Infomation: {}".format(rec_str))
if __name__ == "__main__":
init_chat = Chat()
init_chat.chat()
[参考文献]
[1]https://www.cnblogs.com/veitch-623/p/6828608.html?utm_source=itdadao&utm_medium=referral
[2]https://www.cnblogs.com/aylin/p/5572104.html