检索式聊天机器人及Socket实现对话功能

1 检索式聊天机器人

检索式聊天机器人依据制定的答复规则,将问题在语料库中进行排序学习技术和深度(正则)匹配技术找到适合当前输入的最佳回复,该方法只能以固定的语言模式(设定的规则)进行回复,无法实现词语的多样组合,比较死板,应用领域受限,同时语料库越庞大,回复速度回越慢。

1.0 回复指定内容

内存中设定语料库作为测试语料,通过简单的搜索判断问题是否在语料库中,若在语料库中,则随机回复一个对应的答案,若问题不在语料库中,则回复规定的答案。

#-*-coding:utf-8-*-
import random 
greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
random_greeting = random.choice(greetings)
questions = ["最近过得如何?","最近工作怎么样呀?","工作还顺利吗?"]
responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
random_response = random.choice(responses) 
while True:
	question = input('测试爸爸说>')
	if question in greetings:
		print(random_greeting)
	elif question in questions:
		print(random_response)
	elif question == '再见':
		break
	else:
		print("别逼逼了,我不知道你在说啥!!!!")

1.2 提取关键词,回复指定内容

在简单判断问题是否在语料库中的基础上,对问题进行分析,即利用自然语言处理技术,进行语法分析,提取关键词,将关键词与语料库中的语料进行对比,增加了回复的准确率和机器人的应对能力。

import nltk
import jieba
import pynlpir
from nltk import word_tokenize
import random
#下载分词器punkt
# nltk.download('punkt')
greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
random_greeting = random.choice(greetings)
questions = ["如何","怎么样","顺利","工作"]
responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
random_response = random.choice(responses) 
while True:
	question = input('测试爸爸说>')
	#jieba分词
	cut = jieba.cut(question,cut_all=True)
	#分词结果转成字符串格式,generator to string
	cut_result = " ".join(cut)
	# 字符串转成列表,string to list
	cut_cut = cut_result.split()
	#提取关键词,英文,不适合中文
	# cleaned_input = word_tokenize(question)
	# print(type(cleaned_input))
	# print(cleaned_input)
	#比对关键词,进行回复
	if not set(cut_cut).isdisjoint(greetings):
		print(random_greeting)
	elif not set(cut_cut).isdisjoint(questions):
		print(random_response)
	elif question == '再见':
		break
	else:
		print("别逼逼了,我不知道你在说啥!!!!")

2 Socket提取关键词聊天机器人

2.1 机器人服务端

robot_server.py

import socketserver
import nltk, jieba, pynlpir
import random

class Robot(socketserver.BaseRequestHandler):
	# handle线程处理函数
	def handle(self):
		# 问候语列表
		greetings = ['你好','嗨','嘿','哈喽','您好','好久不见','Hi','Hello','Hey']
		# 生成随机的问候回复
		random_greeting = random.choice(greetings)
		# 问题列表
		questions = ["如何","怎么样","顺利","工作"]
		# 回复语列表
		responses = ['最近过得不错!','又涨工资了,嘿嘿!','很忙,很忙,但是很开心!你呢?','你最近怎么样呀?']
		# 随机返回答复
		random_response = random.choice(responses)
		# 请求
		conn = self.request
		greet = bytes("你好,我是robot~", encoding="utf-8")
		# 向客户端发送问候语
		conn.sendall(greet)
		while True:
			# 接收客户端发送的字节数据
			rec_request = conn.recv(1024)
			# 转换成字符串
			rec_str = str(rec_request, encoding="utf-8")
			# jieba切词
			cut = jieba.cut(rec_str, cut_all=True)
			# 连接成字符串
			cut_str = " ".join(cut)
			# 形成列表
			cut_list = cut_str.split()
			# 查询切词的列表数据是否在给定词库里
			if not set(cut_list).isdisjoint(greetings):
				res_info = bytes(random_greeting, encoding="utf-8")
				# 向客户端发送数据
				conn.sendall(res_info)
			elif not set(cut_list).isdisjoint(questions):
				res_info = bytes(random_response, encoding='utf-8')
				conn.sendall(res_info)
			elif rec_str == 'q':
				break
			else:
				res_info = bytes("别逼逼了,我不知道你在说啥!!!", encoding='utf-8')
				conn.sendall(res_info)
if __name__ == "__main__":
	server = socketserver.ThreadingTCPServer(("127.0.0.1", 8080), Robot)
	server.serve_forever()

2.2 机器人客户端

client_chat.py

import socket

class Chat(object):
	def chat(self):
		# 建立socket流服务
		s = socket.socket()
		# 设定连接服务端的IP地址
		host = "127.0.0.1"
		port = 8080
		# tuple格式,connect参数为tuple格式
		addr = (host, port)
		# 连接服务端
		s.connect(addr)
		# recv接收服务端字节数据,1024表示最大接收的字节量
		# 服务端发送的是字节bytes类型
		rec_bytes = s.recv(1024)
		# 字节bytes转为unicode字符用于显示
		rec_str = str(rec_bytes, encoding="utf-8")
		print("Response Infomation: {}".format(rec_str))
		while True:
			input_chat = input("你有什么问题>>")
			if input_chat == 'q':
				# sendall函数的参数类型为字节bytes
				# 将输入转为字节类型
				s.sendall(bytes(input_chat, encoding='utf-8'))
				break
			else:
				s.sendall(bytes(input_chat, encoding='utf-8'))
				rec_bytes = s.recv(1024)
				rec_str = str(rec_bytes, encoding='utf-8')
				print("Response Infomation: {}".format(rec_str))

if __name__ == "__main__":
	init_chat = Chat()
	init_chat.chat()

[参考文献]
[1]https://www.cnblogs.com/veitch-623/p/6828608.html?utm_source=itdadao&utm_medium=referral
[2]https://www.cnblogs.com/aylin/p/5572104.html


你可能感兴趣的:(#,自然语言处理)