Skip to content

Commit

Permalink
roll back ceiling pinch to step 1020, big reward changes, terminal ch…
Browse files Browse the repository at this point in the history
…ange
  • Loading branch information
Kaiyotech committed Dec 20, 2022
1 parent 273dee5 commit 605d622
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 28 deletions.
12 changes: 6 additions & 6 deletions learner_ceil_pinch.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from rocket_learn.utils.stat_trackers.common_trackers import Speed, Demos, TimeoutRate, Touch, EpisodeLength, Boost, \
BehindBall, TouchHeight, DistToBall, AirTouch, AirTouchHeight, BallHeight, BallSpeed, CarOnGround, GoalSpeed,\
MaxGoalSpeed
from my_stattrackers import GoalSpeedTop5perc

# ideas for models:
# get to ball as fast as possible, sometimes with no boost, rewards exist
Expand Down Expand Up @@ -51,10 +52,10 @@
ent_coef=0.01,
)

run_id = "ceil_pinch_run1"
run_id = "ceil_pinch_run1.02"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Ceil_Pinch_Run1",
name="Ceil_Pinch_Run1.02",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand All @@ -67,7 +68,7 @@
stat_trackers = [
Speed(normalize=True), Demos(), TimeoutRate(), Touch(), EpisodeLength(), Boost(), BehindBall(), TouchHeight(),
DistToBall(), AirTouch(), AirTouchHeight(), BallHeight(), BallSpeed(normalize=True), CarOnGround(),
GoalSpeed(), MaxGoalSpeed(),
GoalSpeed(), MaxGoalSpeed(), GoalSpeedTop5perc(),
]

rollout_gen = RedisRolloutGenerator("Opti_ceil_pinch",
Expand All @@ -82,12 +83,11 @@
velocity_pb_w=0.025,
velocity_bg_w=2,
acel_ball_w=2,
exit_velocity_w=3,
team_spirit=0,
cons_air_touches_w=3,
jump_touch_w=1,
wall_touch_w=0.5,
goal_speed_exp=1.75,
touch_height_exp=1.3
),
lambda: CoyoteAction(),
save_every=logger.config.save_every * 3,
Expand Down Expand Up @@ -131,7 +131,7 @@
disable_gradient_logging=True,
)

alg.load("ceil_pinch_saves/Opti_1665659392.7073987/Opti_2570/checkpoint.pt")
alg.load("ceil_pinch_saves/Opti_1664986050.1984107/Opti_1020/checkpoint.pt")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

Expand Down
8 changes: 4 additions & 4 deletions learner_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
fps = 120 / frame_skip
gamma = np.exp(np.log(0.5) / (fps * half_life_seconds))
config = dict(
actor_lr=0,
actor_lr=1e-4,
critic_lr=1e-4,
n_steps=Constants_recovery.STEP_SIZE,
batch_size=200_000,
Expand All @@ -55,10 +55,10 @@
ent_coef=0.01,
)

run_id = "recovery_run4.07"
run_id = "recovery_run4.08"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Recovery_Run4.07",
name="Recovery_Run4.08",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand Down Expand Up @@ -136,7 +136,7 @@
disable_gradient_logging=True,
)

alg.load("recovery_saves/Opti_1671375787.1485393/Opti_3330/checkpoint.pt")
alg.load("recovery_saves/Opti_1671444731.3342474/Opti_3500/checkpoint.pt")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

Expand Down
4 changes: 2 additions & 2 deletions rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def pre_step(self, state: GameState):
if self.backboard_bounce and not self.floor_bounce:
player_rewards[self.blue_toucher] += self.double_tap_w
if self.blue_touch_height > GOAL_HEIGHT:
player_rewards[self.blue_toucher] += self.aerial_goal_w
player_rewards[self.blue_toucher] += self.aerial_goal_w * (goal_speed / (CAR_MAX_SPEED * 1.25))
player_rewards[:mid] += self.team_spirit * goal_reward
elif self.orange_touch_timer < self.touch_timeout and self.zero_sum:
player_rewards[mid:] -= goal_reward
Expand All @@ -452,7 +452,7 @@ def pre_step(self, state: GameState):
if self.backboard_bounce and not self.floor_bounce:
player_rewards[self.orange_toucher] += self.double_tap_w
if self.orange_touch_height > GOAL_HEIGHT:
player_rewards[self.orange_toucher] += self.aerial_goal_w
player_rewards[self.orange_toucher] += self.aerial_goal_w * (goal_speed / (CAR_MAX_SPEED * 1.25))
player_rewards[mid:] += self.team_spirit * goal_reward

elif self.blue_touch_timer < self.touch_timeout and self.zero_sum:
Expand Down
33 changes: 17 additions & 16 deletions worker_ceil_pinch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,24 @@
from torch import set_num_threads
import Constants_ceil_pinch
import os
set_num_threads(1)

set_num_threads(1)

if __name__ == "__main__":
rew = ZeroSumReward(zero_sum=Constants_ceil_pinch.ZERO_SUM,
goal_w=0,
aerial_goal_w=10,
double_tap_w=20,
concede_w=-10,
velocity_pb_w=0.025,
velocity_bg_w=2,
acel_ball_w=2,
team_spirit=0,
cons_air_touches_w=3,
jump_touch_w=1,
wall_touch_w=0.5,
goal_speed_exp=1.75,
touch_height_exp=1.3)
goal_w=0,
aerial_goal_w=10,
double_tap_w=20,
concede_w=-10,
velocity_pb_w=0.025,
velocity_bg_w=2,
acel_ball_w=2,
exit_velocity_w=3,
team_spirit=0,
cons_air_touches_w=3,
jump_touch_w=1,
wall_touch_w=0.5,
)
frame_skip = Constants_ceil_pinch.FRAME_SKIP
fps = 120 // frame_skip
name = "Default"
Expand Down Expand Up @@ -63,7 +63,6 @@
streamer_mode = True
evaluation_prob = 0
game_speed = 1
deterministic_streamer = True
auto_minimize = False

match = Match(
Expand All @@ -77,7 +76,9 @@
terminal_conditions=[GoalScoredCondition(),
BallTouchGroundCondition(min_time_sec=0,
tick_skip=Constants_ceil_pinch.FRAME_SKIP,
time_after_ground_sec=1),
time_after_ground_sec=0,
check_towards_goal=True,
y_distance_goal=1500),
],
reward_function=rew,
tick_skip=frame_skip,
Expand Down

0 comments on commit 605d622

Please sign in to comment.