From d7fb525936e6b610749125543124461d1e6321cc Mon Sep 17 00:00:00 2001 From: Kaiyotech <93724202+Kaiyotech@users.noreply.github.com> Date: Tue, 27 Sep 2022 09:51:44 -0400 Subject: [PATCH] change to zero mean on both kickoff and pinch, freeze actor. --- Constants_kickoff.py | 2 +- Constants_pinch.py | 2 +- learner_kickoff.py | 4 ++-- learner_pinch.py | 10 +++++----- worker_pinch.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Constants_kickoff.py b/Constants_kickoff.py index 4ca06ab..af30a30 100644 --- a/Constants_kickoff.py +++ b/Constants_kickoff.py @@ -1,5 +1,5 @@ FRAME_SKIP = 4 TIME_HORIZON = 4 # horizon in seconds T_STEP = FRAME_SKIP / 120 # real time per rollout step -ZERO_SUM = False +ZERO_SUM = True STEP_SIZE = 1_000_000 diff --git a/Constants_pinch.py b/Constants_pinch.py index ec19fa3..46b7d32 100644 --- a/Constants_pinch.py +++ b/Constants_pinch.py @@ -1,6 +1,6 @@ FRAME_SKIP = 4 TIME_HORIZON = 4 # horizon in seconds T_STEP = FRAME_SKIP / 120 # real time per rollout step -ZERO_SUM = False +ZERO_SUM = True STEP_SIZE = 1_000_000 DB_NUM = 1 diff --git a/learner_kickoff.py b/learner_kickoff.py index 8241b7c..6637a5c 100644 --- a/learner_kickoff.py +++ b/learner_kickoff.py @@ -39,7 +39,7 @@ fps = 120 / frame_skip gamma = np.exp(np.log(0.5) / (fps * half_life_seconds)) config = dict( - actor_lr=1e-4, + actor_lr=0, critic_lr=1e-4, n_steps=STEP_SIZE, batch_size=100_000, @@ -128,7 +128,7 @@ disable_gradient_logging=True, ) - alg.load("kickoff_saves/Opti_1664201562.2237294/Opti_1640/checkpoint.pt") + alg.load("kickoff_saves/Opti_1664218733.7430687/Opti_1800/checkpoint.pt") alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr diff --git a/learner_pinch.py b/learner_pinch.py index f043c5c..2d16f84 100644 --- a/learner_pinch.py +++ b/learner_pinch.py @@ -39,7 +39,7 @@ fps = 120 / frame_skip gamma = np.exp(np.log(0.5) / (fps * half_life_seconds)) config = dict( - actor_lr=1e-4, + actor_lr=0, critic_lr=1e-4, n_steps=Constants_pinch.STEP_SIZE, batch_size=100_000, @@ -79,11 +79,11 @@ concede_w=-10, velocity_pb_w=0.025, velocity_bg_w=0.5, - acel_ball_w=3, - punish_low_touch_w=-0.1, # increase later + acel_ball_w=5, + punish_low_touch_w=-0.5, # increase later team_spirit=1, cons_air_touches_w=1, - jump_touch_w=1.5, + jump_touch_w=1, wall_touch_w=1, ), lambda: CoyoteAction(), @@ -128,7 +128,7 @@ disable_gradient_logging=True, ) - alg.load("pinch_saves/Opti_1664165982.0115736/Opti_2070/checkpoint.pt") + alg.load("pinch_saves/Opti_1664215027.7179081/Opti_2550/checkpoint.pt") alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr diff --git a/worker_pinch.py b/worker_pinch.py index f7cd902..54b11be 100644 --- a/worker_pinch.py +++ b/worker_pinch.py @@ -24,11 +24,11 @@ concede_w=-10, velocity_pb_w=0.025, velocity_bg_w=0.5, - acel_ball_w=3, - punish_low_touch_w=-0.1, # increase later + acel_ball_w=5, + punish_low_touch_w=-0.5, # increase later team_spirit=1, cons_air_touches_w=1, - jump_touch_w=1.5, + jump_touch_w=1, wall_touch_w=1) frame_skip = Constants_pinch.FRAME_SKIP fps = 120 // frame_skip