基于TensorFLow2.0做了一个伪强化学习课程作业,可以实现两个人工智能不断对弈,并从中汲取经验,彼此对抗升级。最近事情比较多,直接把源码扔这供以后参考吧。
import tensorflow as tf
import numpy as np
#一号选手(模拟AI)
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(9,input_shape=[9]),
tf.keras.layers.Dense(30, activation='relu'),
tf.keras.layers.Dense(20, activation='relu'),
tf.keras.layers.Dense(20, activation='relu'),
tf.keras.layers.Dense(9, activation='softmax')
])
#二号选手(模拟玩家)
model2 = tf.keras.models.Sequential([
tf.keras.layers.Dense(9,input_shape=[9]),
tf.keras.layers.Dense(30, activation='sigmoid'),
tf.keras.layers.Dense(60, activation='sigmoid'),
tf.keras.layers.Dense(30, activation='sigmoid'),
tf.keras.layers.Dense(9, activation='softmax')
])
#模型训练方法
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
def transformData(allGame):#训练数据对调
transGame=allGame.copy()
for game in transGame:
for x in game:
if x==0:
x=1
else:
x=0
return transGame
#判断游戏是否结束和获胜者
def winGame(game):
for i in range(3):
if game[3*i]==game[3*i+1] and game[3*i+1]==game[3*i+2]:
if game[3*i]==1:
#print("玩家胜1")
return 0,1
if game[3*i]==0:
#print("AI胜")
return 0,0
if game[i]== game[3+i]== game[6+i]:
if game[i]==1:
#print("玩家胜2")
return 0,1
if game[i]==0:
#print("AI胜")
return 0,0
if game[0]==game[4]==game[8]:
if game[0]==1:
#print("玩家胜3")
return 0,1
if game[0]==0:
#print("AI胜")
return 0,0
if game[2]==game[4]==game[6]:
if game[2]==1:
#print("玩家胜4")
return 0,1
if game[2]==0:
#print("AI胜")
return 0,0
if game.count(-1)==0:
#print("平局!")
return 0,-1
return 1,-1
#展示棋盘现状
def showGame(game):
for i in range(3):
s=""
for j in range(3):
if game[3*i+j]==-1:
s+=" . "
if game[3*i+j]==1:
s+=" o "
if game[3*i+j]==0:
s+=" x "
print(s)
#一号选手的学习过程
def ML_Fit(allGame,allAction):
a_game=np.array(allGame)
a_action=np.array(allAction)
model.fit(a_game,a_action,epochs=200,verbose=0)
prediction=model.predict(np.array([game]))
#print(prediction,np.argmax(prediction))
return np.argmax(prediction)
#二号选手的学习过程
def ML_Fit_self(allGame,allAction):
a_game=np.array(allGame)
a_action=np.array(allAction)
model.fit(a_game,a_action,epochs=200,verbose=0)
prediction=model2.predict(np.array([game]))
#print(prediction,np.argmax(prediction))
return np.argmax(prediction)
#一号选手实战
def AI(game):
for i in range(9):
if game[i]==-1:
oneStep=game.copy()
oneStep[i]=0
if winGame(oneStep)[1]==0:
return i
#单步会输,优先围堵。
for i in range(9):
if game[i]==-1:
oneStep=game.copy()
oneStep[i]=1
if winGame(oneStep)[1]==1:
return i
prediction=model.predict(np.array([game]))
#print(prediction,np.argmax(prediction))
while game[np.argmax(prediction)]!=-1:
prediction[0][np.argmax(prediction)]=-1
return np.argmax(prediction)
#二号选手实战
def AI_self(game):
#不允许出现低级错误,单步能胜直接执行。
for i in range(9):
if game[i]==-1:
oneStep=game.copy()
oneStep[i]=1
if winGame(oneStep)[1]==1:
return i
#单步会输,优先围堵。
for i in range(9):
if game[i]==-1:
oneStep=game.copy()
oneStep[i]=0
if winGame(oneStep)[1]==0:
return i
prediction=model2.predict(np.array([game]))
#print(prediction,np.argmax(prediction))
while game[np.argmax(prediction)]!=-1:#已有棋子,则选择第二高的值
prediction[0][np.argmax(prediction)]=-1
return np.argmax(prediction)
epochs=200#总训练胜场盘数
winTime=0#一号选手胜场数
winTime_self=0#二号选手胜场数
noWinner=0
i=0
#一号选手的教材
all_allGame=[]#存储棋盘数据
all_allAction=[]#存储棋盘动作
#二号选手的教材
all_allGame_self=[]
all_allAction_self=[]
#while winTime100:
all_allGame.pop(0)
all_allAction.pop(0)
print("删除旧样本")
if len(all_allGame_self)>100:
print("删除旧样本")
all_allGame_self.pop(0)
all_allAction_self.pop(0)
m=AI_self(game)
if game[m]!=-1:
print("已有落子!请重下")
continue
#print("假想敌下")
game[m]=1
allGame_self+=[game]
allAction_self+=[m]
#showGame(game)
#print("------------")
if winGame(game)[0]==0:
break
#print("实战AI下")
n=AI(game);
allGame+=[game]
allAction+=[n]
game[n]=0
#showGame(game)
#print(allGame,allAction)
if winGame(game)[1]!=1:#一号选手后手获胜或平局就学一号的下法
all_allGame+=allGame
all_allAction+=allAction
if winGame(game)[1]==0:
winTime+=1
print("AI获胜!")
ML_Fit(all_allGame,all_allAction)
if winGame(game)[1]==1:#二号选手先手获胜就学二号的下法
winTime_self+=1
all_allGame_self+=allGame_self
all_allAction_self+=allAction_self
ML_Fit(transformData(all_allGame_self),all_allAction_self)
ML_Fit_self(all_allGame_self,all_allAction_self)
if winGame(game)[1]==-1:
noWinner+=1
print("平局")
print("当前盘数:",i)
#输出获胜次数
print("AI获胜次数:",winTime,"模拟AI获胜次数:",winTime_self,"平局次数:",noWinner)
#系统提示训练对局结束,下面和真人较量!
import winsound
winsound.PlaySound("SystemHand", winsound.SND_ALIAS)
# 一号选手
allGame=[]
allAction=[]
game=[-1 for i in range(9)]
while winGame(game)[0]:
m=int(input())
if game[m]!=-1:
print("已有落子!请重下")
continue
print("玩家下")
game[m]=1
showGame(game)
print("------------")
if winGame(game)[0]==0:
break
print("AI下")
n=AI(game);
game[n]=0
showGame(game)
if winGame(game)[1]==1:
print("玩家获胜")
if winGame(game)[1]==0:
print("AI获胜")
if winGame(game)[1]==-1:
print("平局!")
#二号选手
game=[-1 for i in range(9)]
while winGame(game)[0]:
print("AI_self下")
n=AI_self(game);
game[n]=1
showGame(game)
if winGame(game)[0]==0:
break
print("------------")
m=int(input())
while game[m]!=-1:
m=int(input())
print("已有落子!请重下")
print("玩家下")
game[m]=0
showGame(game)
if winGame(game)[0]==0:
break
if winGame(game)[1]==1:
print("AI_self获胜")
if winGame(game)[1]==0:
print("玩家获胜")
if winGame(game)[1]==-1:
print("平局!")