双人纳什均衡求解问题,假设:
最优反应条件:
polyhedron:
polytope:
只需求解原点以外的顶点。
单独求解 P P P的顶点和 Q Q Q的顶点,然后匹配二者的顶点,根据:
混合策略 k k k个分量非零称作 k k k支撑。
假设问题非退化,对于任意策略,如果策略是 k k k支撑,那么策略最优反应是 k k k支撑。
如果问题非退化,对于任意纳什均衡,如果 p p p是 k k k支撑,那么 q q q是 k k k支撑,如果 q q q是 k k k支撑,那么 p p p是 k k k支撑。
复杂度:
http://www.gambit-project.org/gambit14/formats.html
import re
import os
import sys
import time
import nash2
import nash3
if os.name == 'nt':
os.system('color')
def load_nfg(nfg_path):
nfg_comment_list = []
nfg_player_list = []
nfg_action_list = []
nfg_payoff_list = []
with open(nfg_path, 'rt') as nfg_file:
first_line = nfg_file.readline()
assert first_line.startswith('NFG 1 R')
first_line = first_line.lstrip('NFG 1 R').lstrip()
while not first_line.count('"') >= 1:
first_line = nfg_file.readline().rstrip('\n')
comment_line = first_line.lstrip('"')
comment_line = comment_line.replace('\\"', '\\x22')
while not '"' in comment_line:
nfg_comment_list.append(comment_line)
comment_line = nfg_file.readline().rstrip('\n')
comment_line = comment_line.replace('\\"', '\\x22')
meta_line = comment_line[comment_line.find('"') + 1 :]
while not (meta_line.count('{') >= 2 and meta_line.count('}') >= 2):
meta_line += nfg_file.readline().rstrip('\n')
string_pattern = re.compile(r'\{([^\}]+)\}')
player_pattern = re.compile(r'"([^"]+)"')
action_pattern = re.compile(r'([0-9]+)')
meta_string = string_pattern.findall(meta_line)
player_string = meta_string[0]
action_string = meta_string[1]
for player in player_pattern.findall(player_string):
nfg_player_list.append(player)
for action in action_pattern.findall(action_string):
nfg_action_list.append(int(action))
payoff_line = None
while not payoff_line:
payoff_line = nfg_file.readline().rstrip('\n')
for payoff in payoff_line.strip().split():
nfg_payoff_list.append(int(payoff))
return len(nfg_player_list), nfg_action_list, nfg_payoff_list
def dump_ne(ne_path, ne_list):
with open(ne_path, 'wt') as ne_file:
for ne_record in ne_list:
ne_file.write(repr(tuple(ne_record)).lstrip('(').rstrip(')'))
ne_file.write('\n')
def solve_ne(nfg_path, ne_path):
player_number, action_list, payoff_list = load_nfg(nfg_path)
if player_number == 2:
ne_list = nash2.solve_ne_two_mixed(action_list, payoff_list)
else:
ne_list = nash3.solve_ne_many_pure(
player_number, action_list, payoff_list
)
dump_ne(ne_path, ne_list)
def load_ne(ne_path):
ne_list = []
component_pattern = re.compile(r'[^,]+')
with open(ne_path, 'rt') as ne_file:
while (
ne_line := ne_file.readline().rstrip('\n').lstrip('[').rstrip(']')
):
ne_record = []
for component_string in component_pattern.findall(ne_line):
ne_component = eval(component_string)
if abs(ne_component) < 1e-8:
ne_component = 0.0
ne_record.append(ne_component)
ne_list.append(tuple(ne_record))
ne_list.sort()
return ne_list
def compare_ne(ne_path, ne_tgt_path):
ne_list = load_ne(ne_path)
ne_tgt_list = load_ne(ne_tgt_path)
if len(ne_list) == len(ne_tgt_list):
for ne_record, ne_tgt_record in zip(ne_list, ne_tgt_list):
for ne_component, ne_tgt_component in zip(ne_record, ne_tgt_record):
if abs(ne_component - ne_tgt_component) > 1e-8:
return False
return True
def log_one(ansi, stem, comment):
print(
'{} \033[{}m[{}]\033[0m {}'.format(
time.strftime('%H:%M:%S'), ansi, stem, comment
)
)
def deploy_all(nfg_dir, ne_dir, stem_list):
for stem in stem_list:
timestamp_before = time.time()
solve_ne(
os.path.join(nfg_dir, '{}.nfg'.format(stem)),
os.path.join(ne_dir, '{}.ne'.format(stem)),
)
timestamp_after = time.time()
ansi = 93
log_one(
ansi,
stem,
'solve_ne in {:.2f}s'.format(timestamp_after - timestamp_before),
)
def difftest_all(ne_dir, ne_tgt_dir, stem_list):
for stem in stem_list:
is_same = compare_ne(
os.path.join(ne_dir, '{}.ne'.format(stem)),
os.path.join(ne_tgt_dir, '{}.ne'.format(stem)),
)
if is_same:
ansi = 92
else:
ansi = 91
log_one(
ansi,
stem,
'compare_ne as {:s}'.format('good' if is_same else 'bad'),
)
def deploy():
curr_dir, _ = os.path.split(__file__)
nfg_dir = os.path.join(curr_dir, 'input')
ne_dir = os.path.join(curr_dir, 'output')
stem_list = []
for nfg_name in os.listdir(nfg_dir):
if nfg_name.endswith('.nfg'):
stem_list.append(nfg_name.rstrip('.nfg'))
deploy_all(nfg_dir, ne_dir, stem_list)
def difftest():
curr_dir, _ = os.path.split(__file__)
nfg_dir = os.path.join(curr_dir, 'example_input')
ne_dir = os.path.join(curr_dir, 'example_output')
ne_tgt_dir = os.path.join(curr_dir, 'example_input')
stem_list = []
for nfg_name in os.listdir(nfg_dir):
if nfg_name.endswith('.nfg'):
stem_list.append(nfg_name.rstrip('.nfg'))
stem_list.sort()
deploy_all(nfg_dir, ne_dir, stem_list)
difftest_all(ne_dir, ne_tgt_dir, stem_list)
if __name__ == '__main__':
supported_routines = {'deploy': deploy, 'difftest': difftest}
if len(sys.argv) > 1 and (routine := sys.argv[1]) in supported_routines:
supported_routines[routine]()
else:
difftest()
deploy()
import numpy as np
from scipy.optimize import linprog
from scipy.spatial import HalfspaceIntersection
def get_bimatrix(action_list, payoff_list):
m, n = action_list
A = [[None for j in range(n)] for i in range(m)]
B = [[None for j in range(n)] for i in range(m)]
for j in range(n):
for i in range(m):
A[i][j] = payoff_list[(i + j * m) * 2 + 0]
B[i][j] = payoff_list[(i + j * m) * 2 + 1]
A = np.array(A)
B = np.array(B)
A -= A.min()
B -= B.min()
return m, n, A, B
def get_polytope(mat):
num, dim = mat.shape
empathy_polyhedron = np.concatenate((mat, -np.ones((num, 1))), axis=1)
sanity_polyhedron = np.concatenate(
(-np.identity(dim), np.zeros((dim, 1))), axis=1
)
return np.concatenate((empathy_polyhedron, sanity_polyhedron), axis=0)
def get_feasible(mat):
num, dim = mat.shape
A_ub = np.concatenate(
(
np.concatenate((mat, -np.identity(dim)), axis=0),
np.concatenate(
(np.linalg.norm(mat, axis=1, keepdims=True), np.ones((dim, 1))),
axis=0,
),
),
axis=1,
)
b_ub = np.concatenate((np.ones((num, 1)), np.zeros((dim, 1))), axis=0)
c = np.zeros((dim + 1,))
c[dim] = -1
result = linprog(
c=c, A_ub=A_ub, b_ub=b_ub, A_eq=None, b_eq=None, bounds=(0, None)
)
assert result.status == 0
return result.x[:-1]
def get_hyperplanes(polytope, vertex):
A_slack = polytope[:, :-1]
b_slack = polytope[:, -1:]
(hyperplanes,) = np.nonzero(
np.isclose(np.ravel(A_slack @ vertex.reshape(-1, 1) + b_slack), 0)
)
return hyperplanes.tolist()
def get_vertecies(mat):
polytope = get_polytope(mat)
feasible = get_feasible(mat)
intersection = HalfspaceIntersection(polytope, feasible)
vertecies = []
for vertex in intersection.intersections:
if vertex.max() < np.inf and not np.all(np.isclose(vertex, 0)):
vertecies.append((vertex, get_hyperplanes(polytope, vertex)))
return vertecies
def solve_ne_two_mixed(action_list, payoff_list):
'''
算法:多面体顶点。
'''
m, n, A, B = get_bimatrix(action_list, payoff_list)
q_vertecies = get_vertecies(A)
p_vertecies = get_vertecies(B.T)
mixed_list = []
for p_vertex, m_hyperplanes in p_vertecies:
for q_vertex, n_hyperplanes in q_vertecies:
m_labels = set(m_hyperplanes)
n_labels = set(map(lambda i: (i + n) % (m + n), n_hyperplanes))
if len(m_labels | n_labels) == m + n:
p = p_vertex / p_vertex.sum()
q = q_vertex / q_vertex.sum()
mixed = p.tolist() + q.tolist()
mixed_list.append(mixed)
return mixed_list
def get_state_number(action_list):
state_number = 1
for action_number in action_list:
state_number *= action_number
return state_number
def get_state_list(player_number, action_list):
state_number = get_state_number(action_list)
state_list = []
state = [0 for _ in range(player_number)]
for state_index in range(state_number):
state_list.append([element for element in state])
for player in range(player_number):
state[player] += 1
if state[player] < action_list[player]:
break
else:
state[player] = 0
return state_list
def get_player_state_list(player_number, action_list, player):
player_state_number = get_state_number(action_list) // action_list[player]
player_state_list = []
player_state = [0 for _ in range(player_number)]
player_state[player] = None
for player_state_index in range(player_state_number):
player_state_list.append([element for element in player_state])
for enemy in range(player_number):
if enemy != player:
player_state[enemy] += 1
if player_state[enemy] < action_list[enemy]:
break
else:
player_state[enemy] = 0
else:
continue
return player_state_list
def solve_ne_many_pure(player_number, action_list, payoff_list):
'''
算法:共同最优反应。
'''
state_payoff = {}
state_response = {}
for state_index, state in enumerate(
get_state_list(player_number, action_list)
):
state_payoff[tuple(state)] = payoff_list[
player_number * state_index : player_number * (state_index + 1)
]
state_response[tuple(state)] = [False for _ in range(player_number)]
for player in range(player_number):
for player_state in get_player_state_list(
player_number, action_list, player
):
player_state_best_payoff = -float('inf')
for player_choice in range(action_list[player]):
player_state[player] = player_choice
if (
player_state_best_payoff
< state_payoff[tuple(player_state)][player]
):
player_state_best_payoff = state_payoff[
tuple(player_state)
][player]
player_state[player] = None
for player_choice in range(action_list[player]):
player_state[player] = player_choice
if (
player_state_best_payoff
<= state_payoff[tuple(player_state)][player]
):
state_response[tuple(player_state)][player] = True
player_state[player] = None
pure_list = []
for state in get_state_list(player_number, action_list):
if all(state_response[tuple(state)]):
pure = []
for player in range(player_number):
player_pure = [0 for _ in range(action_list[player])]
player_pure[state[player]] = 1
pure += player_pure
pure_list.append(pure)
return pure_list