unity中关于MLAGENTS支持智能AI给稍微复杂的变化环境下的AI像人类玩家一样思考,带来了极大惊喜。分享一下机器学习的模式。
如图:
给定的剧情比较简单,要求AI尽可能在纷繁复杂的物体中间捡拾绿色物体,就跟AI需要靠近某些攻击对象或者分类捡拾有用道具的需求基础。上一篇已经给大家介绍了环境的搭建,本篇之间干货。
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using MLAgents;
using System;
public class NPCAgent : Agent
{
public GameObject ground;
public GameObject redcube;
public GameObject greencube;
RayPerception rayPer;
Rigidbody rd;
public override void InitializeAgent()
{
base.InitializeAgent();
rayPer = GetComponent<RayPerception>();
rd = GetComponent<Rigidbody>();
}
public override void CollectObservations()
{
float rayDistance = 12f;
float[] rayAngles = { 20f,60f,90f,120f,160f};
string[] detectableObjects = { "Cube+", "Cube-","wall" };
AddVectorObs(GetStepCount()/(float)agentParameters.maxStep);
AddVectorObs(rayPer.Perceive(rayDistance,rayAngles,detectableObjects,0f,0f));
}
public override void AgentAction(float[] vectorAction, string textAction)
{
// AddReward(-1f/agentParameters.maxStep);
MoveAgent(vectorAction);
}
private void MoveAgent(float[] vectorAction)
{
if (GameObject.FindGameObjectsWithTag("Cube+").Length <= 0)
{
Done();
return;
}
Vector3 dirToGo = Vector3.zero;
Vector3 rotateDir = Vector3.zero;
dirToGo = transform.forward * Mathf.Clamp(vectorAction[0], -1f, 1f);
rotateDir = transform.up * Mathf.Clamp(vectorAction[1], -1f, 1f);
transform.Rotate(rotateDir,Time.deltaTime*150f);
rd.AddForce(dirToGo*1.5f,ForceMode.VelocityChange);
}
void OnCollisionEnter(Collision col)
{
if (col.gameObject.CompareTag("Cube+"))
{
SetReward(1f);
col.gameObject.transform.position= new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
Debug.Log("ok successful!");
}
if (col.gameObject.CompareTag("Cube-"))
{
SetReward(-1f);
col.gameObject.transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
Debug.Log("sorry!");
}
if (col.gameObject.CompareTag("wall"))
{
SetReward(-0.1f);
}
}
public override void AgentReset()
{
transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0.45f, UnityEngine.Random.Range(-40f, 40f));
transform.rotation = Quaternion.Euler(0f, UnityEngine.Random.Range(0f, 360f), 0f);
// transform.rotation = Quaternion.Euler(0f, 0f, 0f);
rd.velocity *= 0f;
for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube+").Length ; i++)
{
GameObject.FindGameObjectsWithTag("Cube+")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
}
for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube-").Length; i++)
{
GameObject.FindGameObjectsWithTag("Cube-")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
}
}
}
当见到绿色Cube时奖励1分,见到有毒红色Cube惩罚1分,当碰到墙壁减掉0.1分,这是AI的奖励训练依据。
观察参数要求观察视觉看到的物体与自身的距离,这会让AI的大脑中尽量选择近距离的物体做选择。当然我发现我最后的训练结果就是,并不是一致像一个程序机器人从身边扫描,会跟人一样,有时会选择另外的方向角度去做下步选择,当然大部分是近距离先处理。
public override List<float> Perceive(float rayDistance,
float[] rayAngles, string[] detectableObjects,
float startOffset, float endOffset)
{
perceptionBuffer.Clear();
// For each ray sublist stores categorical information on detected object
// along with object distance.
foreach (float angle in rayAngles)
{
endPosition = transform.TransformDirection(
PolarToCartesian(rayDistance, angle));
endPosition.y = endOffset;
if (Application.isEditor)
{
Debug.DrawRay(transform.position + new Vector3(0f, startOffset, 0f),
endPosition, Color.black, 0.01f, true);
}
float[] subList = new float[detectableObjects.Length + 2];
if (Physics.SphereCast(transform.position +
new Vector3(0f, startOffset, 0f), 0.5f,
endPosition, out hit, rayDistance))
{
for (int i = 0; i < detectableObjects.Length; i++)
{
if (hit.collider.gameObject.CompareTag(detectableObjects[i]))
{
subList[i] = 1;
subList[detectableObjects.Length + 1] = hit.distance / rayDistance;
break;
}
}
}
else
{
subList[detectableObjects.Length] = 1f;
}
perceptionBuffer.AddRange(subList);
}
return perceptionBuffer;
}