Skip to content

Commit

Permalink
attempt to add boost option to selector
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaiyotech committed Feb 1, 2023
1 parent 055d5b6 commit c758dbd
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 23 deletions.
15 changes: 8 additions & 7 deletions CoyoteObs.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,8 @@ def add_boosts_to_obs(self, obs, player_car: PhysicsObject, inverted: bool):
# obs.extend(self.create_boost_packet(player_car, i, inverted))

def add_players_to_obs(self, obs: List, state: GameState, player: PlayerData, ball: PhysicsObject,
prev_act: np.ndarray, inverted: bool, previous_model_action, zero_other_players: bool):
prev_act: np.ndarray, inverted: bool, previous_model_action, zero_other_players: bool,
zero_boost: bool):

# player_data = self.create_player_packet(player, player.inverted_car_data
# if inverted else player.car_data, ball, prev_act, previous_model_action)
Expand All @@ -641,11 +642,11 @@ def add_players_to_obs(self, obs: List, state: GameState, player: PlayerData, ba
player.inverted_car_data.angular_velocity if inverted else player.car_data.angular_velocity,
player.inverted_car_data.forward() if inverted else player.car_data.forward(),
player.inverted_car_data.up() if inverted else player.inverted_car_data.up(),
player.boost_amount, player.on_ground, player.has_jump, player.has_flip,
0 if zero_boost else player.boost_amount, player.on_ground, player.has_jump, player.has_flip,
player.is_demoed, demo_timer, self.POS_STD, self.VEL_STD, self.ANG_STD,
ball.position, ball.linear_velocity, prev_act, self.boosttimes[cid],
self.jumptimes[cid], self.airtimes[cid], self.fliptimes[cid], self.handbrakes[cid],
self.flipdirs[cid][0], self.flipdirs[cid][1],
self.flipdirs[cid][0], self.flipdirs[cid][1]
)
else:
player_data = self.create_player_packet_njit(
Expand All @@ -654,9 +655,9 @@ def add_players_to_obs(self, obs: List, state: GameState, player: PlayerData, ba
player.inverted_car_data.angular_velocity if inverted else player.car_data.angular_velocity,
player.inverted_car_data.forward() if inverted else player.car_data.forward(),
player.inverted_car_data.up() if inverted else player.inverted_car_data.up(),
player.boost_amount, player.on_ground, player.has_jump, player.has_flip,
0 if zero_boost else player.boost_amount, player.on_ground, player.has_jump, player.has_flip,
player.is_demoed, demo_timer, self.POS_STD, self.VEL_STD, self.ANG_STD,
ball.position, ball.linear_velocity, prev_act,
ball.position, ball.linear_velocity, prev_act
)

if self.stack_size != 0:
Expand Down Expand Up @@ -792,7 +793,7 @@ def model_add_action_to_stack(self, new_action: np.ndarray, car_id: int):
stack.insert(0, new_action[0] / self.model_action_size)

def build_obs(self, player: PlayerData, state: GameState, previous_action: np.ndarray,
previous_model_action: np.ndarray = None, obs_info=None) -> Any:
previous_model_action: np.ndarray = None, obs_info=None, zero_boost: bool = False,) -> Any:

if self.any_timers:
self._update_addl_timers(player, state, previous_action)
Expand Down Expand Up @@ -823,7 +824,7 @@ def build_obs(self, player: PlayerData, state: GameState, previous_action: np.nd
obs = []
players_data = []
player_dat = self.add_players_to_obs(players_data, state, player, ball, previous_action, inverted,
previous_model_action, self.zero_other_cars)
previous_model_action, self.zero_other_cars, zero_boost)
obs.extend(player_dat)
obs.extend(self.create_ball_packet(ball))
if not self.embed_players and not self.remove_other_cars:
Expand Down
12 changes: 7 additions & 5 deletions CoyoteParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_model_action_space() -> int:
def get_model_action_size(self) -> int:
return len(self._lookup_table)

def parse_actions(self, actions: Any, state: GameState) -> np.ndarray:
def parse_actions(self, actions: Any, state: GameState, zero_boost: bool = False) -> np.ndarray:
# hacky pass through to allow multiple types of agent actions while still parsing nectos

# strip out fillers, pass through 8sets, get look up table values, recombine
Expand All @@ -139,7 +139,8 @@ def parse_actions(self, actions: Any, state: GameState) -> np.ndarray:
parsed_actions.append(self._lookup_table[stripped_action])
else:
parsed_actions.append(action)

if zero_boost:
parsed_actions[6] = 0
return np.asarray(parsed_actions)


Expand Down Expand Up @@ -466,7 +467,7 @@ def make_lookup_table(num_models):
return actions

def get_action_space(self) -> gym.spaces.Space:
return Discrete(len(self._lookup_table))
return Discrete(len(self._lookup_table), 1)

@staticmethod
def get_model_action_space() -> int:
Expand All @@ -485,7 +486,8 @@ def parse_actions(self, actions: Any, state: GameState) -> np.ndarray:
for i, action in enumerate(actions):
# if self.prev_model[i] != action:
# self.prev_action[i] = None
action = int(action) # change ndarray [0.] to 0
zero_boost = bool(action[1]) # boost action 1 means no boost usage
action = int(action[0]) # change ndarray [0.] to 0
player = state.players[i]
# override state for recovery

Expand All @@ -495,7 +497,7 @@ def parse_actions(self, actions: Any, state: GameState) -> np.ndarray:
newstate = override_abs_state(player, state, action)

obs = self.models[action][1].build_obs(
player, newstate, self.prev_actions[i], obs_info=self.obs_info)
player, newstate, self.prev_actions[i], obs_info=self.obs_info, zero_boost=zero_boost)
parse_action = self.models[action][0].act(obs)[0]
if self.selection_listener is not None and i == 0: # only call for first player
self.selection_listener.on_selection(self.sub_model_names[action], parse_action)
Expand Down
6 changes: 3 additions & 3 deletions learner_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@
ent_coef=0.01,
)

run_id = "recovery_run10.00"
run_id = "recovery_run10.01"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Recovery_Run10.00",
name="Recovery_Run10.01",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand Down Expand Up @@ -167,7 +167,7 @@
disable_gradient_logging=True,
)

# alg.load("recovery_saves/Opti_1675199748.2565064/Opti_180/checkpoint.pt")
alg.load("recovery_saves/Opti_1675257994.1376092/Opti_30/checkpoint.pt")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

Expand Down
12 changes: 6 additions & 6 deletions learner_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@
ent_coef=0.01,
)

run_id = "selector_run_6.07"
run_id = "selector_run_test1"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Selector_Run_6.07",
name="Selector_Run_test1",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand Down Expand Up @@ -123,13 +123,13 @@

actor = Sequential(Linear(input_size, 256), LeakyReLU(), Linear(256, 256), LeakyReLU(), Linear(256, 128),
LeakyReLU(),
Linear(128, action_size))
Linear(128, action_size + 1))

critic = Opti(embedder=Sequential(Linear(35, 128), LeakyReLU(), Linear(128, 35 * 5)), net=critic)

actor = Opti(embedder=Sequential(Linear(35, 128), LeakyReLU(), Linear(128, 35 * 5)), net=actor)

actor = DiscretePolicy(actor, shape=(action_size,))
actor = DiscretePolicy(actor, shape=(action_size, 1))

optim = torch.optim.Adam([
{"params": actor.parameters(), "lr": logger.config.actor_lr},
Expand All @@ -140,7 +140,7 @@
print(f"Gamma is: {gamma}")
count_parameters(agent)

action_dict = { i: k for i,k in enumerate(Constants_selector.SUB_MODEL_NAMES) }
action_dict = {i: k for i, k in enumerate(Constants_selector.SUB_MODEL_NAMES)}
alg = PPO(
rollout_gen,
agent,
Expand All @@ -157,7 +157,7 @@
num_actions=action_size,
)

alg.load("Selector_saves/Opti_1673548000.4596217/Opti_2525/checkpoint.pt")
# alg.load("Selector_saves/Opti_1673548000.4596217/Opti_2525/checkpoint.pt")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

Expand Down
4 changes: 2 additions & 2 deletions submodels/submodel_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self, filename, parser=CoyoteAction()):
torch.set_num_threads(1)
self.action_parser = parser

def act(self, state, deterministic=True):
def act(self, state, deterministic=True, zero_boost=False):
with torch.no_grad():
all_actions = []
dist = get_action_distribution(state, self.actor)
Expand All @@ -73,4 +73,4 @@ def act(self, state, deterministic=True):
padded_actions.append(action)

all_actions = padded_actions
return self.action_parser.parse_actions(all_actions, state)
return self.action_parser.parse_actions(all_actions, state, zero_boost)

0 comments on commit c758dbd

Please sign in to comment.