Skip to content

Commit

Permalink
just eval for now
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaiyotech committed Aug 30, 2023
1 parent 62d72a1 commit 5685591
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 8 deletions.
6 changes: 3 additions & 3 deletions learner_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
ent_coef=0.01,
)

run_id = "gp_run_2v2_3.64"
run_id = "gp_run_2v2_3.65"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="GP_Run_2v2_3.64",
name="GP_Run_2v2_3.65",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand Down Expand Up @@ -155,7 +155,7 @@
disable_gradient_logging=True,
)

alg.load("GP_saves/Opti_1693321581.5721147/Opti_62250/checkpoint.pt")
alg.load("GP_saves/Opti_1693337736.1739774/Opti_62430/checkpoint.pt")

alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr
Expand Down
301 changes: 301 additions & 0 deletions random_eval_matchmaker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
import numpy as np
import itertools

from rocket_learn.agent.discrete_policy import DiscretePolicy
from rocket_learn.matchmaker.base_matchmaker import BaseMatchmaker
from rocket_learn.rollout_generator.redis.utils import get_rating, get_ratings, get_pretrained_ratings, LATEST_RATING_ID
from rocket_learn.utils.util import probability_NvsM
from rocket_learn.agent.types import PretrainedAgents


class RandomEvalMatchmaker(BaseMatchmaker):
# from JPK314 just didn't feel like updating rocket learn right now to use it
def __init__(self, sigma_target=1, pretrained_agents: PretrainedAgents = None, non_latest_version_prob=[1, 0, 0, 0],
past_version_prob=1, full_team_trainings=0, full_team_evaluations=0, force_non_latest_orange=False,
showmatch=False,
orange_agent_text_file=None):
"""
:param sigma_target: The sigma value at which agents stop playing in eval matches frequently
:param pretrained_agents: a configuration dict for how and how often to use pretrained agents in matchups.
:param non_latest_version_prob: An array such that the ith index is the probability that i agents in a training matchup are not the latest version of the model.
:param past_version_prob: The probability that a non-latest agent in the matchup is a past version of the model.
:param full_team_trainings: The probability that a match uses all agents of the same type on a given team in training.
:param full_team_evaluations: The probability that a match uses all agents of the same type on a given team in evals.
:param force_non_latest_orange: A boolean that, if true, ensures the first player in the list is latest agent (if one exists in the match).
"""
self.orange_agents_text_file = orange_agent_text_file
self.showmatch = showmatch
self.sigma_target = sigma_target
self.non_latest_version_prob = np.array(
non_latest_version_prob) / sum(non_latest_version_prob)
self.past_version_prob = 1 if pretrained_agents is None else past_version_prob
self.full_team_trainings = full_team_trainings or showmatch
self.full_team_evaluations = full_team_evaluations or showmatch
self.force_non_latest_orange = force_non_latest_orange or showmatch
if pretrained_agents is not None:
self.consider_pretrained = True
pretrained_agents_keys, pretrained_agents_values = zip(
*pretrained_agents.items())
self.pretrained_agents = pretrained_agents_keys
pretrained_probs = [p["prob"] for p in pretrained_agents_values]
self.pretrained_probs = np.array(
pretrained_probs) / sum(pretrained_probs)
self.pretrained_evals = [p["eval"]
for p in pretrained_agents_values]
self.pretrained_p_deterministic_training = [
p["p_deterministic_training"] if p["p_deterministic_training"] is not None else 1 for p in pretrained_agents_values]
self.pretrained_keys = [p["key"] for p in pretrained_agents_values]
self.pretrained_eval_keys = [k for i, k in enumerate(
self.pretrained_keys) if self.pretrained_evals[i]]
else:
self.consider_pretrained = False

def generate_matchup(self, redis, n_agents, evaluate):
full_team_match = np.random.random() < (
self.full_team_evaluations if evaluate else self.full_team_trainings)
if evaluate:
n_agents = np.random.choice([2, 4, 6])
if not evaluate:
n_non_latest = np.random.choice(
len(self.non_latest_version_prob), p=self.non_latest_version_prob)
if full_team_match:
# We make either one or zero non-latest pick
n_non_latest = int(n_non_latest > 0)
else:
# We only want a maximum of half the agents to be non latest in training matchups
n_non_latest = min(n_agents // 2, n_non_latest)
n_past_version = np.random.binomial(
n_non_latest, self.past_version_prob)
if self.consider_pretrained:
n_each_pretrained = np.random.multinomial(
n_non_latest-n_past_version, self.pretrained_probs)

per_team = n_agents // 2
gamemode = f"{per_team}v{per_team}"
gamemode = '1v0' if gamemode == '0v0' else gamemode
latest_id = redis.get(LATEST_RATING_ID).decode("utf-8")
latest_key = f"{latest_id}-stochastic"

# This is the version of the most recent model (NOT just eval models)
# Doing this instead of int(redis.get(VERSION_LATEST)) because the latest model is whatever is currently in rollout worker
latest_version = -1

# This is a training match with all latest agents, no further logic necessary
if not evaluate and n_non_latest == 0:
rating = get_rating(gamemode, latest_key, redis)
return [latest_version] * n_agents, [rating] * n_agents, False, n_agents // 2, n_agents // 2

past_version_ratings = get_ratings(gamemode, redis)
# This is the rating of the most recent eval model
latest_rating = past_version_ratings[latest_key]
past_version_ratings_keys, past_version_ratings_values = zip(
*past_version_ratings.items())

# We also have the pretrained agents' ratings (the ones that have eval set to true, that is)
if self.consider_pretrained:
pretrained_ratings = get_pretrained_ratings(gamemode, redis)
# Actually, we need all the ratings, but we'll split out the ones just to be used in the eval pool
pretrained_ratings_items = pretrained_ratings.items()
if pretrained_ratings.items():
pretrained_ratings_keys, pretrained_ratings_values = zip(
*pretrained_ratings_items)
else:
pretrained_ratings_keys = ()
pretrained_ratings_values = ()
pretrained_ratings_items_eval = [p for p in pretrained_ratings_items if "-".join(
p[0].split("-")[:-1]) in self.pretrained_eval_keys or p[0] in self.pretrained_eval_keys]
if pretrained_ratings_items_eval:
pretrained_ratings_keys_eval, pretrained_ratings_values_eval = zip(
*pretrained_ratings_items_eval)
else:
pretrained_ratings_keys_eval = ()
pretrained_ratings_values_eval = ()
else:
pretrained_ratings = []
pretrained_ratings_keys_eval = ()
pretrained_ratings_values_eval = ()

all_ratings_keys = past_version_ratings_keys + pretrained_ratings_keys_eval
all_ratings_values = past_version_ratings_values + pretrained_ratings_values_eval

if evaluate and len(all_ratings_keys) < 2:
# Can't run evaluation game, not enough agents
return [latest_version] * n_agents, [latest_rating] * n_agents, False, n_agents // 2, n_agents // 2

if evaluate: # Evaluation game, try to find agents with high sigma
sigmas = np.array(
[r.sigma for r in all_ratings_values])
probs = np.clip(sigmas - self.sigma_target, a_min=0, a_max=None)
s = probs.sum()
if s == 0: # No versions with high sigma available
if np.random.normal(0, self.sigma_target) > 1:
# Some chance of doing a match with random versions, so they might correct themselves
probs = np.ones_like(probs) / len(probs)
else:
return [latest_version] * n_agents, [latest_rating] * n_agents, False, n_agents // 2, n_agents // 2
else:
probs /= s
chosen_first_idx = np.random.choice(len(probs), p=probs)
chosen_first_key = all_ratings_keys[chosen_first_idx]
chosen_first_rating = all_ratings_values[chosen_first_idx]
else:
chosen_first_key = latest_version
chosen_first_rating = latest_rating

if full_team_match:
versions = [chosen_first_key] * per_team
ratings = [chosen_first_rating] * per_team
else:
versions = [chosen_first_key]
ratings = [chosen_first_rating]

if not evaluate:
# In a training match, we also need to add in the pretrained agents we have already decided upon
has_pretrained = n_non_latest - n_past_version > 0
if full_team_match and has_pretrained:
pretrained_idx = list(n_each_pretrained).index(1)
use_deterministic = np.random.random(
) < self.pretrained_p_deterministic_training[pretrained_idx]
pretrained_agent = self.pretrained_agents[pretrained_idx]
if isinstance(pretrained_agent, DiscretePolicy):
pretrained_key = self.pretrained_keys[pretrained_idx] + (
"-deterministic" if use_deterministic else "-stochastic")
else:
pretrained_key = self.pretrained_keys[pretrained_idx]
versions += [pretrained_key] * per_team
ratings += [pretrained_ratings_values[pretrained_ratings_keys.index(
pretrained_key)]] * per_team
if self.force_non_latest_orange or np.random.random() < 0.5:
# replace latest with peak
if self.showmatch:
assert full_team_match
sorted_ratings = {k: v for k, v in
sorted(past_version_ratings.items(), key=lambda item: item[1].mu)}
peak_version = list(sorted_ratings)[-1]
for i in range(per_team):
versions[i] = peak_version
if self.orange_agents_text_file is not None:
with open(self.orange_agents_text_file, 'w') as file:
file.write(versions[per_team])
return versions, ratings, False, n_agents // 2, n_agents // 2
else:
mid = len(versions) // 2
return (versions[mid:] + versions[:mid]), (ratings[mid:] + ratings[:mid]), False, n_agents // 2, n_agents // 2
elif has_pretrained:
for i, n_agent in enumerate(n_each_pretrained):
pretrained_agent = self.pretrained_agents[i]
if isinstance(pretrained_agent, DiscretePolicy):
n_deterministic = np.random.binomial(
n_agent, self.pretrained_p_deterministic_training[i])
versions += [self.pretrained_keys[i] +
"-deterministic"] * n_deterministic
versions += [self.pretrained_keys[i] +
"-stochastic"] * (n_agent - n_deterministic)
ratings += [pretrained_ratings_values[pretrained_ratings_keys.index(
self.pretrained_keys[i] + "-deterministic")]] * n_deterministic
ratings += [pretrained_ratings_values[pretrained_ratings_keys.index(
self.pretrained_keys[i] + "-stochastic")]] * (n_agent - n_deterministic)
else:
versions += [self.pretrained_keys[i]] * n_agent
ratings += [pretrained_ratings_values[pretrained_ratings_keys.index(
self.pretrained_keys[i])]] * n_agent

# and the number of agents that are latest. If full_team_match is true then at this point in execution we must have n_past_version == 1 so there is nothing to add
if not full_team_match:
versions += [latest_version] * (n_agents - n_non_latest - 1)
ratings += [latest_rating] * (n_agents - n_non_latest - 1)

# At this point in execution, ratings contains all the information necessary to randomly fill the remaining spots with players via matchmaking pool.
# If evaluate, this pool includes pretrained. If not, this pool is only past versions.

ratings_values_pool = all_ratings_values if evaluate else past_version_ratings_values
ratings_keys_pool = all_ratings_keys if evaluate else past_version_ratings_keys
probs = np.zeros(len(ratings_values_pool))

# The point of this matchmaker is to use fully random evals - no concept of fairness. Therefore we will weight all agents with equal probability of being chosen.
# In evaluation matches where full_team_match is true, we don't want to have our remaining pick be the same agent as chosen_first.
# Outside of evaluation matches, nothing changes.
for i, rating in enumerate(ratings_values_pool):
if evaluate and full_team_match and ratings_keys_pool[i] == versions[0]:
p = 0
elif evaluate:
p = 0.5
else:
p = probability_NvsM(
[rating] * per_team, [ratings[0]] * per_team)
probs[i] = p * (1 - p)

probs /= probs.sum()

if full_team_match:
chosen_idx = np.random.choice(len(probs), p=probs)
versions += [ratings_keys_pool[chosen_idx]] * per_team
ratings += [ratings_values_pool[chosen_idx]] * per_team
if self.force_non_latest_orange or np.random.random() < 0.5:
return versions, ratings, evaluate, n_agents // 2, n_agents // 2
else:
mid = len(versions) // 2
return (versions[mid:] + versions[:mid]), (ratings[mid:] + ratings[:mid]), evaluate, n_agents // 2, n_agents // 2
else:
chosen_idxs = np.random.choice(
len(probs), size=n_agents - 1 if evaluate else n_past_version, p=probs)
for chosen_idx in chosen_idxs:
versions += [ratings_keys_pool[chosen_idx]]
ratings += [ratings_values_pool[chosen_idx]]

# We now have all the versions and ratings that will be used in parallel lists. Same as above - if evaluate, no fairness weighting.
# Otherwise, weight all permutations of matches by fairness and pick one
if len(versions) != n_agents:
print("what??")
matchups = []
qualities = []
for team1 in itertools.combinations(range(n_agents), per_team):
team2 = [idx for idx in range(n_agents) if idx not in team1]
team1_ratings_keys = [versions[v] for v in team1]
team1_ratings_values = [ratings[v] for v in team1]
team2_ratings_keys = [versions[v] for v in team2]
team2_ratings_values = [ratings[v] for v in team2]
# Don't want team against team in evals
if sorted(str(k) for k in team1_ratings_keys) == sorted(str(k) for k in team2_ratings_keys):
p = 0
elif evaluate:
p = 0.5
else:
p = probability_NvsM(
team1_ratings_values, team2_ratings_values)
matchups.append(list(team1))
qualities.append(p * (1 - p))
qualities = np.array(qualities)
s = qualities.sum()
if s == 0:
# Bad luck, just do regular match
return [latest_version] * n_agents, [latest_rating] * n_agents, False, n_agents // 2, n_agents // 2

# Pick a matchup, randomize ordering within team, and reorder versions and ratings accordingly
# Due to the way combinations works, randomizing which team team1 is on (orange or blue) doesn't change anything
matchup_idx = np.random.choice(len(matchups), p=qualities / s)
team1 = matchups[matchup_idx]
team2 = [idx for idx in range(n_agents) if idx not in team1]
np.random.shuffle(team1)
np.random.shuffle(team2)
if self.force_non_latest_orange and latest_version in versions:
latest_version_idx = versions.index(latest_version)
if latest_version_idx in team1:
blue_team = team1
orange_team = team2
else:
blue_team = team2
orange_team = team1
# Swap the latest version car to be in position 0
temp_idx = blue_team.index(latest_version_idx)
temp = blue_team[temp_idx]
blue_team[temp_idx] = blue_team[0]
blue_team[0] = temp
team1 = blue_team
team2 = orange_team

ordering = team1 + team2
versions = [versions[idx] for idx in ordering]
ratings = [ratings[idx] for idx in ordering]

return versions, ratings, evaluate, n_agents // 2, n_agents // 2
14 changes: 9 additions & 5 deletions worker_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from mybots_statesets import EndKickoff
import Constants_gp
import os
from random_eval_matchmaker import RandomEvalMatchmaker

from pretrained_agents.necto.necto_v1 import NectoV1
from pretrained_agents.nexto.nexto_v2 import NextoV2
Expand Down Expand Up @@ -131,8 +132,8 @@
local = True
auto_minimize = True
game_speed = 100
evaluation_prob = 0.02
past_version_prob = 1 # 0.5 # 0.1
evaluation_prob = 1
past_version_prob = 0 # 0.5 # 0.1
non_latest_version_prob = [0.85, 0.1, 0.05, 0.0] # this includes past_version and pretrained
deterministic_streamer = True
force_old_deterministic = True
Expand All @@ -156,9 +157,12 @@

pretrained_agents = Constants_gp.pretrained_agents

matchmaker = Matchmaker(sigma_target=0.5, pretrained_agents=pretrained_agents, past_version_prob=past_version_prob,
full_team_trainings=0.8, full_team_evaluations=1, force_non_latest_orange=False,
non_latest_version_prob=non_latest_version_prob)
# matchmaker = Matchmaker(sigma_target=0.5, pretrained_agents=pretrained_agents, past_version_prob=past_version_prob,
# full_team_trainings=0.8, full_team_evaluations=1, force_non_latest_orange=False,
# non_latest_version_prob=non_latest_version_prob)

matchmaker = RandomEvalMatchmaker(sigma_target=0.5, past_version_prob=0, full_team_trainings=1, full_team_evaluations=1,
force_non_latest_orange=False, non_latest_version_prob=non_latest_version_prob)


# terminals = [GoalScoredCondition(),
Expand Down

0 comments on commit 5685591

Please sign in to comment.