CS188 项目二

Question 1:Reflex Agent

class ReflexAgent(Agent):
    """
    A reflex agent chooses an action at each choice point by examining
    its alternatives via a state evaluation function.

    The code below is provided as a guide.  You are welcome to change
    it in any way you see fit, so long as you don't touch our method
    headers.
    """


    def getAction(self, gameState):
        """
        You do not need to change this method, but you're welcome to.

        getAction chooses among the best options according to the evaluation function.

        Just like in the previous project, getAction takes a GameState and returns
        some Directions.X for some X in the set {NORTH, SOUTH, WEST, EAST, STOP}
        """
        # Collect legal moves and successor states
        legalMoves = gameState.getLegalActions()

        # Choose one of the best actions
        scores = [self.evaluationFunction(gameState, action) for action in legalMoves]
        bestScore = max(scores)
        bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
        chosenIndex = random.choice(bestIndices) # Pick randomly among the best

        "Add more of your code here if you want to"

        return legalMoves[chosenIndex]

    def evaluationFunction(self, currentGameState, action):
        """
        Design a better evaluation function here.

        The evaluation function takes in the current and proposed successor
        GameStates (pacman.py) and returns a number, where higher numbers are better.

        The code below extracts some useful information from the state, like the
        remaining food (newFood) and Pacman position after moving (newPos).
        newScaredTimes holds the number of moves that each ghost will remain
        scared because of Pacman having eaten a power pellet.

        Print out these variables to see what you're getting, then combine them
        to create a masterful evaluation function.
        """
        # Useful information you can extract from a GameState (pacman.py)
        successorGameState = currentGameState.generatePacmanSuccessor(action)
        newPos = successorGameState.getPacmanPosition()
        newFood = successorGameState.getFood()
        newGhostStates = successorGameState.getGhostStates()
        newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]

        "*** YOUR CODE HERE ***"
        # return Value [-1,1]

        newFood = newFood.asList()
        ghostPos = [(G.getPosition()[0], G.getPosition()[1]) for G in newGhostStates]
        scared = min(newScaredTimes) > 0

        # if not new ScaredTimes new state is ghost: return lowest value

        if not scared and (newPos in ghostPos):
            return -1.0

        if newPos in currentGameState.getFood().asList():
            return 1

        closestFoodDist = sorted(newFood, key=lambda fDist: util.manhattanDistance(fDist, newPos))
        closestGhostDist = sorted(ghostPos, key=lambda gDist: util.manhattanDistance(gDist, newPos))

        fd = lambda fDis: util.manhattanDistance(fDis, newPos)
        gd = lambda gDis: util.manhattanDistance(gDis, newPos)

        return 1.0 / fd(closestFoodDist[0]) - 1.0 / gd(closestGhostDist[0])

        # return successorGameState.getScore()

Question 2:Minimax

class MinimaxAgent(MultiAgentSearchAgent):
    """
    Your minimax agent (question 2)
    """

    def getAction(self, gameState):
        """
        Returns the minimax action from the current gameState using self.depth
        and self.evaluationFunction.

        Here are some method calls that might be useful when implementing minimax.

        gameState.getLegalActions(agentIndex):
        Returns a list of legal actions for an agent
        agentIndex=0 means Pacman, ghosts are >= 1

        gameState.generateSuccessor(agentIndex, action):
        Returns the successor game state after an agent takes an action

        gameState.getNumAgents():
        Returns the total number of agents in the game

        gameState.isWin():
        Returns whether or not the game state is a winning state

        gameState.isLose():
        Returns whether or not the game state is a losing state
        """
        "*** YOUR CODE HERE ***"
        GhostIndex = [i for i in range(1, gameState.getNumAgents())]

        def term(state, d):
            return state.isWin() or state.isLose() or d == self.depth

        def min_value(state, d, ghost):  # minimizer

            if term(state, d):
                return self.evaluationFunction(state)

            "Value for Min node. May have multiple ghosts"
            v = 10000000000000000
            for action in state.getLegalActions(ghost):
                if ghost == GhostIndex[-1]:
                    v = min(v, max_value(state.generateSuccessor(ghost, action), d + 1))
                else:
                    v = min(v, min_value(state.generateSuccessor(ghost, action), d, ghost + 1))
            # print(v)
            return v

        def max_value(state, d):  # maximizer

            if term(state, d):
                return self.evaluationFunction(state)

            "Value for Max node"
            v = -10000000000000000
            for action in state.getLegalActions(0):
                v = max(v, min_value(state.generateSuccessor(0, action), d, 1))
            # print(v)
            return v

        "Select action for Max node"
        res = [(action, min_value(gameState.generateSuccessor(0, action), 0, 1)) for action in
                gameState.getLegalActions(0)]
        res.sort(key=lambda k: k[1])

        return res[-1][0]

        util.raiseNotDefined()

Question 3:Alpha-Beta Pruning

class AlphaBetaAgent(MultiAgentSearchAgent):
    """
    Your minimax agent with alpha-beta pruning (question 3)
    """

    def getAction(self, gameState):
        """
        Returns the minimax action using self.depth and self.evaluationFunction
        """
        "*** YOUR CODE HERE ***"
        GhostIndex = [i for i in range(1, gameState.getNumAgents())]
        inf = 1e100

        def term(state, d):
            return state.isWin() or state.isLose() or d == self.depth

        def min_value(state, d, ghost, A, B):  # minimizer

            if term(state, d):
                return self.evaluationFunction(state)

            v = inf
            for action in state.getLegalActions(ghost):
                if ghost == GhostIndex[-1]:  # next is maximizer with pacman
                    v = min(v, max_value(state.generateSuccessor(ghost, action), d + 1, A, B))
                else:  # next is minimizer with next-ghost
                    v = min(v, min_value(state.generateSuccessor(ghost, action), d, ghost + 1, A, B))

                if v < A:
                    return v
                B = min(B, v)

            return v

        def max_value(state, d, A, B):  # maximizer

            if term(state, d):
                return self.evaluationFunction(state)

            v = -inf
            for action in state.getLegalActions(0):
                v = max(v, min_value(state.generateSuccessor(0, action), d, 1, A, B))

                if v > B:
                    return v
                A = max(A, v)

            return v

        def alphabeta(state):

            v = -inf
            act = None
            A = -inf
            B = inf

            for action in state.getLegalActions(0):  # maximizing
                tmp = min_value(gameState.generateSuccessor(0, action), 0, 1, A, B)

                if v < tmp:  # same as v = max(v, tmp)
                    v = tmp
                    act = action

                if v > B:  # pruning
                    return v
                A = max(A, tmp)

            return act

        return alphabeta(gameState)

        util.raiseNotDefined()

Question 4:Expectimax

class ExpectimaxAgent(MultiAgentSearchAgent):
    """
      Your expectimax agent (question 4)
    """

    def getAction(self, gameState):
        """
        Returns the expectimax action using self.depth and self.evaluationFunction

        All ghosts should be modeled as choosing uniformly at random from their
        legal moves.
        """
        "*** YOUR CODE HERE ***"
        GhostIndex = [i for i in range(1, gameState.getNumAgents())]

        def term(state, d):
            return state.isWin() or state.isLose() or d == self.depth

        def exp_value(state, d, ghost):  # minimizer

            if term(state, d):
                return self.evaluationFunction(state)

            v = 0
            prob = 1 / len(state.getLegalActions(ghost))

            for action in state.getLegalActions(ghost):
                if ghost == GhostIndex[-1]:
                    v += prob * max_value(state.generateSuccessor(ghost, action), d + 1)
                else:
                    v += prob * exp_value(state.generateSuccessor(ghost, action), d, ghost + 1)
            # print(v)
            return v

        def max_value(state, d):  # maximizer

            if term(state, d):
                return self.evaluationFunction(state)

            v = -10000000000000000
            for action in state.getLegalActions(0):
                v = max(v, exp_value(state.generateSuccessor(0, action), d, 1))
            # print(v)
            return v

        res = [(action, exp_value(gameState.generateSuccessor(0, action), 0, 1)) for action in
               gameState.getLegalActions(0)]
        res.sort(key=lambda k: k[1])

        return res[-1][0]

        util.raiseNotDefined()

Question 5:Evaluation Function

def betterEvaluationFunction(currentGameState):
    """
    Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
    evaluation function (question 5).

    DESCRIPTION: 
    """
    "*** YOUR CODE HERE ***"

    newPos = currentGameState.getPacmanPosition()
    newFood = currentGameState.getFood().asList()
    newGhostStates = currentGameState.getGhostStates()
    newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]

    # 以当前游戏状态为准,评价函数由距离最近的食物颗粒的距离给出,如果没有颗粒,则为0
    eval = currentGameState.getScore()
    foodDist = float("inf")
    for food in newFood:
        foodDist = min(foodDist, util.manhattanDistance(food, newPos))
    eval += 1.0 / foodDist

    return eval

你可能感兴趣的:(笔记)