Skip to content

Commit

Permalink
Update Deep CFR implementations to implement Policy
Browse files Browse the repository at this point in the history
The `player_id` argument is not need, but still needs to be present
in order to implement `Policy`. For example without it `PolicyBot`
breaks as it passes three arguments (self, the game state and the player id).
  • Loading branch information
plamentotev committed Jun 30, 2024
1 parent 82b5aac commit c749781
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion open_spiel/python/algorithms/deep_cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def _sample_action_from_advantage(self, state, player):

return advantages, matched_regrets

def action_probabilities(self, state):
def action_probabilities(self, state, player_id=None):
"""Returns action probabilities dict for a single batch."""
cur_player = state.current_player()
legal_actions = state.legal_actions(cur_player)
Expand Down
2 changes: 1 addition & 1 deletion open_spiel/python/algorithms/deep_cfr_tf2.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def _sample_action_from_advantage(self, state, player):
info_state, legal_actions_mask, player)
return advantages.numpy(), matched_regrets.numpy()

def action_probabilities(self, state):
def action_probabilities(self, state, player_id=None):
"""Returns action probabilities dict for a single batch."""
cur_player = state.current_player()
legal_actions = state.legal_actions(cur_player)
Expand Down
2 changes: 1 addition & 1 deletion open_spiel/python/jax/deep_cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def _sample_action_from_advantage(self, state, player):
info_state, legal_actions_mask, self._params_adv_network[player])
return advantages, matched_regrets

def action_probabilities(self, state):
def action_probabilities(self, state, player_id=None):
"""Returns action probabilities dict for a single batch."""
cur_player = state.current_player()
legal_actions = state.legal_actions(cur_player)
Expand Down
2 changes: 1 addition & 1 deletion open_spiel/python/pytorch/deep_cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ def _sample_action_from_advantage(self, state, player):
matched_regrets[max(legal_actions, key=lambda a: raw_advantages[a])] = 1
return advantages, matched_regrets

def action_probabilities(self, state):
def action_probabilities(self, state, player_id=None):
"""Computes action probabilities for the current player in state.
Args:
Expand Down

0 comments on commit c749781

Please sign in to comment.