-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplay_tournament.py
58 lines (46 loc) · 2.71 KB
/
play_tournament.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from agents.BNN_PIMC_Player import BNNPIMCPlayer
from agents.PIMC_Player import PIMCPlayer
from agents.RandomPlayer import RandomPlayer
from game_environment import SchafkopfEnv
def main():
pimc_player = PIMCPlayer(10, 40, RandomPlayer())
bnn_pimc_player = BNNPIMCPlayer(10, 40, RandomPlayer())
participants = [pimc_player, pimc_player, bnn_pimc_player, bnn_pimc_player]
number_of_games = 100
for i in range(len(participants)):
for j in range(i + 1, len(participants)):
p1 = participants[0]
p2 = participants[1]
p3 = participants[2]
p4 = participants[3]
cummulative_reward = [0, 0, 0, 0]
players = [p1, p2, p3, p4]
for k in range(2): # run the same tournament twice with different positions of players
print(' ')
schafkopf_env = SchafkopfEnv(seed=1)
# if k == 0:
# players = [p1, p1, p2, p2]
# else:
# players = [p2, p2, p1, p1]
# cummulative_reward.reverse()
# tournament loop
for game_nr in range(1, number_of_games + 1):
state, reward, terminal = schafkopf_env.reset()
while not terminal:
action, prob = players[state["game_state"].current_player].act(state)
state, reward, terminal = schafkopf_env.step(action, prob)
cummulative_reward = [cummulative_reward[m] + reward[m] for m in range(4)]
if game_nr % 100 == 0:
print('.', end='')
# schafkopf_env.print_game()
print(cummulative_reward)
print("player " + str(i) + " vs. player " + str(j) + " = " + str(
(cummulative_reward[2] + cummulative_reward[3]) / (2 * 2 * number_of_games)) + " to " + str(
(cummulative_reward[0] + cummulative_reward[1]) / (2 * 2 * number_of_games)))
# print("--------Episode: " + str(i_episode) + " game simulation (s) = " + str(t1 - t0))
# print("--------Cummulative reward: " + str(cummulative_reward))
# print("--------per game reward: " + str([i /i_episode for i in cummulative_reward] ))
# print("--------monte_carlo_tree_search rewards: " + str(((cummulative_reward[1] + cummulative_reward[3]) / i_episode)/2))
# print("--------monte_carlo_tree_search rewards: " + str(((cummulative_reward[1] + cummulative_reward[3]) / i_episode)/2))
if __name__ == '__main__':
main()