python求解纳什均衡

背景知识

双人纳什均衡求解问题,假设:

  • 玩家 0 0 0,行动 { 0 , ⋯   , m − 1 } \{0, \cdots, m-1\} {0,,m1},收益 A ∈ R + m × n A \in \reals_{+}^{m \times n} AR+m×n,策略 p ∈ R + m × 1 p \in \reals_{+}^{m \times 1} pR+m×1
  • 玩家 1 1 1,行动 { m , ⋯   , m + n − 1 } \{m, \cdots, m+n-1\} {m,,m+n1},收益 B ∈ R + m × n B \in \reals_{+}^{m \times n} BR+m×n,策略 q ∈ R + n × 1 q \in \reals_{+}^{n \times 1} qR+n×1

最优反应条件:

  • A q ∈ { 0 , u } m Aq \in \{0, u\}^{m} Aq{0,u}m
  • B T p ∈ { 0 , v } n B^Tp \in \{0, v\}^{n} BTp{0,v}n

polyhedron:

  • P † : { ( p , v ) ∣ 1 T p = 1 , p ≽ 0 , B T p ≼ v } P^{\dag}: \{ (p,v) \mid 1^Tp = 1, p \succcurlyeq 0, B^Tp \preccurlyeq v \} P:{(p,v)1Tp=1,p0,BTpv}
  • Q † : { ( q , u ) ∣ 1 T q = 1 , q ≽ 0 , A q ≼ u } Q^{\dag}: \{ (q,u) \mid 1^Tq = 1, q \succcurlyeq 0, Aq \preccurlyeq u \} Q:{(q,u)1Tq=1,q0,Aqu}

polytope:

  • P : { p ∣ p ≽ 0 , B T p ≼ 1 } P: \{ p \mid p \succcurlyeq 0, B^Tp \preccurlyeq 1 \} P:{pp0,BTp1}
  • Q : { q ∣ q ≽ 0 , A q ≼ 1 } Q: \{ q \mid q \succcurlyeq 0, Aq \preccurlyeq 1 \} Q:{qq0,Aq1}

只需求解原点以外的顶点。

算法概述

单独求解 P P P的顶点和 Q Q Q的顶点,然后匹配二者的顶点,根据:

  • ( A q ) i < 1 (Aq)_i < 1 (Aq)i<1当且仅当 p i = 0 p_i = 0 pi=0
  • ( B T p ) i < 1 (B^Tp)_i < 1 (BTp)i<1当且仅当 q j = 0 q_j = 0 qj=0

拓展延申

混合策略 k k k个分量非零称作 k k k支撑。
假设问题非退化,对于任意策略,如果策略是 k k k支撑,那么策略最优反应是 k k k支撑。
如果问题非退化,对于任意纳什均衡,如果 p p p k k k支撑,那么 q q q k k k支撑,如果 q q q k k k支撑,那么 p p p k k k支撑。

复杂度:

  • support enumeration: O ( 4 min ⁡ { m , n } ) \Omicron\left(4^{\min\{m,n\}}\right) O(4min{m,n})
  • vertex enumeration: O ( 2. 6 min ⁡ { m , n } ) \Omicron\left(2.6^{\min\{m,n\}}\right) O(2.6min{m,n})(这是高维多面体的性质)

代码

数据格式

http://www.gambit-project.org/gambit14/formats.html

import re
import os
import sys
import time

import nash2
import nash3

if os.name == 'nt':
    os.system('color')


def load_nfg(nfg_path):
    nfg_comment_list = []
    nfg_player_list = []
    nfg_action_list = []
    nfg_payoff_list = []

    with open(nfg_path, 'rt') as nfg_file:
        first_line = nfg_file.readline()
        assert first_line.startswith('NFG 1 R')

        first_line = first_line.lstrip('NFG 1 R').lstrip()
        while not first_line.count('"') >= 1:
            first_line = nfg_file.readline().rstrip('\n')

        comment_line = first_line.lstrip('"')
        comment_line = comment_line.replace('\\"', '\\x22')

        while not '"' in comment_line:
            nfg_comment_list.append(comment_line)

            comment_line = nfg_file.readline().rstrip('\n')
            comment_line = comment_line.replace('\\"', '\\x22')

        meta_line = comment_line[comment_line.find('"') + 1 :]
        while not (meta_line.count('{') >= 2 and meta_line.count('}') >= 2):
            meta_line += nfg_file.readline().rstrip('\n')

        string_pattern = re.compile(r'\{([^\}]+)\}')
        player_pattern = re.compile(r'"([^"]+)"')
        action_pattern = re.compile(r'([0-9]+)')

        meta_string = string_pattern.findall(meta_line)
        player_string = meta_string[0]
        action_string = meta_string[1]

        for player in player_pattern.findall(player_string):
            nfg_player_list.append(player)

        for action in action_pattern.findall(action_string):
            nfg_action_list.append(int(action))

        payoff_line = None
        while not payoff_line:
            payoff_line = nfg_file.readline().rstrip('\n')

        for payoff in payoff_line.strip().split():
            nfg_payoff_list.append(int(payoff))

    return len(nfg_player_list), nfg_action_list, nfg_payoff_list


def dump_ne(ne_path, ne_list):
    with open(ne_path, 'wt') as ne_file:
        for ne_record in ne_list:
            ne_file.write(repr(tuple(ne_record)).lstrip('(').rstrip(')'))
            ne_file.write('\n')


def solve_ne(nfg_path, ne_path):
    player_number, action_list, payoff_list = load_nfg(nfg_path)
    if player_number == 2:
        ne_list = nash2.solve_ne_two_mixed(action_list, payoff_list)
    else:
        ne_list = nash3.solve_ne_many_pure(
            player_number, action_list, payoff_list
        )
    dump_ne(ne_path, ne_list)


def load_ne(ne_path):
    ne_list = []
    component_pattern = re.compile(r'[^,]+')
    with open(ne_path, 'rt') as ne_file:
        while (
            ne_line := ne_file.readline().rstrip('\n').lstrip('[').rstrip(']')
        ):
            ne_record = []
            for component_string in component_pattern.findall(ne_line):
                ne_component = eval(component_string)
                if abs(ne_component) < 1e-8:
                    ne_component = 0.0
                ne_record.append(ne_component)
            ne_list.append(tuple(ne_record))
    ne_list.sort()
    return ne_list


def compare_ne(ne_path, ne_tgt_path):
    ne_list = load_ne(ne_path)
    ne_tgt_list = load_ne(ne_tgt_path)
    if len(ne_list) == len(ne_tgt_list):
        for ne_record, ne_tgt_record in zip(ne_list, ne_tgt_list):
            for ne_component, ne_tgt_component in zip(ne_record, ne_tgt_record):
                if abs(ne_component - ne_tgt_component) > 1e-8:
                    return False
    return True


def log_one(ansi, stem, comment):
    print(
        '{} \033[{}m[{}]\033[0m {}'.format(
            time.strftime('%H:%M:%S'), ansi, stem, comment
        )
    )


def deploy_all(nfg_dir, ne_dir, stem_list):
    for stem in stem_list:
        timestamp_before = time.time()
        solve_ne(
            os.path.join(nfg_dir, '{}.nfg'.format(stem)),
            os.path.join(ne_dir, '{}.ne'.format(stem)),
        )
        timestamp_after = time.time()

        ansi = 93
        log_one(
            ansi,
            stem,
            'solve_ne in {:.2f}s'.format(timestamp_after - timestamp_before),
        )


def difftest_all(ne_dir, ne_tgt_dir, stem_list):
    for stem in stem_list:
        is_same = compare_ne(
            os.path.join(ne_dir, '{}.ne'.format(stem)),
            os.path.join(ne_tgt_dir, '{}.ne'.format(stem)),
        )

        if is_same:
            ansi = 92
        else:
            ansi = 91
        log_one(
            ansi,
            stem,
            'compare_ne as {:s}'.format('good' if is_same else 'bad'),
        )


def deploy():
    curr_dir, _ = os.path.split(__file__)

    nfg_dir = os.path.join(curr_dir, 'input')
    ne_dir = os.path.join(curr_dir, 'output')
    stem_list = []

    for nfg_name in os.listdir(nfg_dir):
        if nfg_name.endswith('.nfg'):
            stem_list.append(nfg_name.rstrip('.nfg'))

    deploy_all(nfg_dir, ne_dir, stem_list)


def difftest():
    curr_dir, _ = os.path.split(__file__)

    nfg_dir = os.path.join(curr_dir, 'example_input')
    ne_dir = os.path.join(curr_dir, 'example_output')
    ne_tgt_dir = os.path.join(curr_dir, 'example_input')
    stem_list = []

    for nfg_name in os.listdir(nfg_dir):
        if nfg_name.endswith('.nfg'):
            stem_list.append(nfg_name.rstrip('.nfg'))

    stem_list.sort()
    deploy_all(nfg_dir, ne_dir, stem_list)
    difftest_all(ne_dir, ne_tgt_dir, stem_list)


if __name__ == '__main__':
    supported_routines = {'deploy': deploy, 'difftest': difftest}
    if len(sys.argv) > 1 and (routine := sys.argv[1]) in supported_routines:
        supported_routines[routine]()
    else:
        difftest()
        deploy()

双人 混合策略 纳什均衡

import numpy as np

from scipy.optimize import linprog
from scipy.spatial import HalfspaceIntersection


def get_bimatrix(action_list, payoff_list):
    m, n = action_list

    A = [[None for j in range(n)] for i in range(m)]
    B = [[None for j in range(n)] for i in range(m)]
    for j in range(n):
        for i in range(m):
            A[i][j] = payoff_list[(i + j * m) * 2 + 0]
            B[i][j] = payoff_list[(i + j * m) * 2 + 1]

    A = np.array(A)
    B = np.array(B)

    A -= A.min()
    B -= B.min()

    return m, n, A, B


def get_polytope(mat):
    num, dim = mat.shape
    empathy_polyhedron = np.concatenate((mat, -np.ones((num, 1))), axis=1)
    sanity_polyhedron = np.concatenate(
        (-np.identity(dim), np.zeros((dim, 1))), axis=1
    )
    return np.concatenate((empathy_polyhedron, sanity_polyhedron), axis=0)


def get_feasible(mat):
    num, dim = mat.shape
    A_ub = np.concatenate(
        (
            np.concatenate((mat, -np.identity(dim)), axis=0),
            np.concatenate(
                (np.linalg.norm(mat, axis=1, keepdims=True), np.ones((dim, 1))),
                axis=0,
            ),
        ),
        axis=1,
    )
    b_ub = np.concatenate((np.ones((num, 1)), np.zeros((dim, 1))), axis=0)
    c = np.zeros((dim + 1,))
    c[dim] = -1
    result = linprog(
        c=c, A_ub=A_ub, b_ub=b_ub, A_eq=None, b_eq=None, bounds=(0, None)
    )
    assert result.status == 0
    return result.x[:-1]


def get_hyperplanes(polytope, vertex):
    A_slack = polytope[:, :-1]
    b_slack = polytope[:, -1:]
    (hyperplanes,) = np.nonzero(
        np.isclose(np.ravel(A_slack @ vertex.reshape(-1, 1) + b_slack), 0)
    )
    return hyperplanes.tolist()


def get_vertecies(mat):
    polytope = get_polytope(mat)
    feasible = get_feasible(mat)
    intersection = HalfspaceIntersection(polytope, feasible)

    vertecies = []
    for vertex in intersection.intersections:
        if vertex.max() < np.inf and not np.all(np.isclose(vertex, 0)):
            vertecies.append((vertex, get_hyperplanes(polytope, vertex)))
    return vertecies


def solve_ne_two_mixed(action_list, payoff_list):
    '''
    算法:多面体顶点。
    '''

    m, n, A, B = get_bimatrix(action_list, payoff_list)

    q_vertecies = get_vertecies(A)
    p_vertecies = get_vertecies(B.T)

    mixed_list = []
    for p_vertex, m_hyperplanes in p_vertecies:
        for q_vertex, n_hyperplanes in q_vertecies:
            m_labels = set(m_hyperplanes)
            n_labels = set(map(lambda i: (i + n) % (m + n), n_hyperplanes))
            if len(m_labels | n_labels) == m + n:
                p = p_vertex / p_vertex.sum()
                q = q_vertex / q_vertex.sum()
                mixed = p.tolist() + q.tolist()
                mixed_list.append(mixed)
    return mixed_list

多人 纯策略 纳什均衡

def get_state_number(action_list):
    state_number = 1
    for action_number in action_list:
        state_number *= action_number
    return state_number


def get_state_list(player_number, action_list):
    state_number = get_state_number(action_list)

    state_list = []

    state = [0 for _ in range(player_number)]
    for state_index in range(state_number):
        state_list.append([element for element in state])
        for player in range(player_number):
            state[player] += 1
            if state[player] < action_list[player]:
                break
            else:
                state[player] = 0

    return state_list


def get_player_state_list(player_number, action_list, player):
    player_state_number = get_state_number(action_list) // action_list[player]

    player_state_list = []

    player_state = [0 for _ in range(player_number)]
    player_state[player] = None
    for player_state_index in range(player_state_number):
        player_state_list.append([element for element in player_state])
        for enemy in range(player_number):
            if enemy != player:
                player_state[enemy] += 1
                if player_state[enemy] < action_list[enemy]:
                    break
                else:
                    player_state[enemy] = 0
            else:
                continue

    return player_state_list


def solve_ne_many_pure(player_number, action_list, payoff_list):
    '''
    算法:共同最优反应。
    '''

    state_payoff = {}
    state_response = {}
    for state_index, state in enumerate(
        get_state_list(player_number, action_list)
    ):
        state_payoff[tuple(state)] = payoff_list[
            player_number * state_index : player_number * (state_index + 1)
        ]
        state_response[tuple(state)] = [False for _ in range(player_number)]

    for player in range(player_number):
        for player_state in get_player_state_list(
            player_number, action_list, player
        ):
            player_state_best_payoff = -float('inf')
            for player_choice in range(action_list[player]):
                player_state[player] = player_choice
                if (
                    player_state_best_payoff
                    < state_payoff[tuple(player_state)][player]
                ):
                    player_state_best_payoff = state_payoff[
                        tuple(player_state)
                    ][player]
                player_state[player] = None
            for player_choice in range(action_list[player]):
                player_state[player] = player_choice
                if (
                    player_state_best_payoff
                    <= state_payoff[tuple(player_state)][player]
                ):
                    state_response[tuple(player_state)][player] = True
                player_state[player] = None

    pure_list = []
    for state in get_state_list(player_number, action_list):
        if all(state_response[tuple(state)]):
            pure = []
            for player in range(player_number):
                player_pure = [0 for _ in range(action_list[player])]
                player_pure[state[player]] = 1
                pure += player_pure
            pure_list.append(pure)
    return pure_list

你可能感兴趣的:(python)