Skip to content

Commit

Permalink
changes to add new rewards and helpers for dtap. Godspeed Opti. Incre…
Browse files Browse the repository at this point in the history
…ased time horizon from 2 to 4. Starting from scratch, 50 freeze.
  • Loading branch information
Kaiyotech committed May 24, 2023
1 parent e877114 commit f8a87c1
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Constants_dtap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pretrained_agents.KBB.kbb import KBB

FRAME_SKIP = 4
TIME_HORIZON = 2 # horizon in seconds
TIME_HORIZON = 4 # horizon in seconds
T_STEP = FRAME_SKIP / 120 # real time per rollout step
ZERO_SUM = False
STEP_SIZE = 500_000
Expand Down
16 changes: 15 additions & 1 deletion CoyoteObs.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ def __init__(self, tick_skip=8, team_size=3, expanding: bool = True, extra_boost
mask_aerial_opp=False,
selector_infinite_boost=None,
doubletap_indicator=False,
dtap_dict=None,
):
super().__init__()
self.doubletap_indicator = doubletap_indicator
self.dtap_dict = dtap_dict
if self.doubletap_indicator:
self.floor_bounce = False
self.backboard_bounce = False
Expand Down Expand Up @@ -279,6 +281,14 @@ def pre_step(self, state: GameState):
ball_near_wall = abs(state.ball.position[1]) > (BACK_WALL_Y - BALL_RADIUS * 2)
if not touched and ball_near_wall and ball_bounced_backboard:
self.backboard_bounce = True
self.dtap_dict["ball_hit_bb"] = False

if touched and not self.dtap_dict["hit_towards_bb"]:
self.dtap_dict["hit_towards_bb"] = True

if touched and self.dtap_dict["hit_towards_bb"] and self.dtap_dict["ball_hit_bb"]:
self.dtap_dict["hit_towards_goal"] = True

self.prev_ball_vel = np.array(state.ball.linear_velocity)

def _update_timers(self, state: GameState):
Expand Down Expand Up @@ -743,7 +753,11 @@ def add_players_to_obs(self, obs: List, state: GameState, player: PlayerData, ba
)

if self.doubletap_indicator:
player_data.extend(list([int(self.backboard_bounce), int(self.floor_bounce)]))
player_data.extend(list([int(self.backboard_bounce),
int(self.floor_bounce),
int(self.dtap_dict["hit_towards_bb"]),
int(self.dtap_dict["hit_towards_goal"]),
]))

if self.stack_size != 0:
if self.selector:
Expand Down
44 changes: 24 additions & 20 deletions learner_dtap.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@
ent_coef=0.01,
)

run_id = "dtap_run3.00"
run_id = "dtap_runtest4.00"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Dtap_Run3.00",
name="Dtap_Runtest4.00",
project="Opti",
entity="kaiyotech",
id=run_id,
Expand All @@ -72,30 +72,34 @@
GoalSpeed()
]

dtap_status = {"hit_towards_bb": False,
"ball_hit_bb": False,
"hit_towards_goal": False,
}

rollout_gen = RedisRolloutGenerator("Opti_Dtap",
redis,
lambda: CoyoteObsBuilder(expanding=True, tick_skip=Constants_dtap.FRAME_SKIP,
team_size=3, extra_boost_info=False,
embed_players=False,
add_jumptime=True,
add_airtime=True,
add_fliptime=True,
add_boosttime=True,
add_handbrake=True,
doubletap_indicator=True,
dtap_dict=dtap_status,
),
lambda: ZeroSumReward(zero_sum=Constants_dtap.ZERO_SUM,
concede_w=-10,
double_tap_w=10,
velocity_bg_w=0,
velocity_pb_w=0,
acel_ball_w=0,
jump_touch_w=0,
wall_touch_w=0,
velocity_bg_w=0.1,
velocity_pb_w=0.1,
acel_ball_w=1,
jump_touch_w=1,
wall_touch_w=1,
backboard_bounce_rew=1,
tick_skip=Constants_dtap.FRAME_SKIP,
flatten_wall_height=True,
double_tap_floor_mult=0.5,
double_tap_floor_mult=0.8,
dtap_dict=dtap_status,
fancy_dtap=True,
dtap_helper_w=0.1,
),
lambda: CoyoteAction(),
save_every=logger.config.save_every * 3,
Expand All @@ -107,13 +111,13 @@
max_age=1,
)

critic = Sequential(Linear(231, 256), LeakyReLU(), Linear(256, 128), LeakyReLU(),
Linear(128, 128), LeakyReLU(),
Linear(128, 1))
critic = Sequential(Linear(226, 256), LeakyReLU(), Linear(256, 256), LeakyReLU(),
Linear(256, 256), LeakyReLU(),
Linear(256, 1))

actor = Sequential(Linear(231, 96), LeakyReLU(), Linear(96, 96), LeakyReLU(),
Linear(96, 96), LeakyReLU(),
Linear(96, 373))
actor = Sequential(Linear(226, 256), LeakyReLU(), Linear(256, 256), LeakyReLU(),
Linear(256, 128), LeakyReLU(),
Linear(128, 373))

actor = DiscretePolicy(actor, (373,))

Expand All @@ -140,7 +144,7 @@
disable_gradient_logging=True,
)

# alg.load("GP_saves/Opti_1682795258.7251265/Opti_41230/checkpoint.pt")
# alg.load("dtap_saves/Opti_1683834997.1134317/Opti_11780/checkpoint.pt")

alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr
Expand Down
22 changes: 21 additions & 1 deletion rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,12 @@ def __init__(
flatten_wall_height=False,
backboard_bounce_rew=0,
double_tap_floor_mult=0,
dtap_dict=None,
fancy_dtap=False,
dtap_helper_w=0,
):
self.dtap_helper_w = dtap_helper_w
self.fancy_dtap = fancy_dtap
self.double_tap_floor_mult = double_tap_floor_mult
self.backboard_bounce_rew = backboard_bounce_rew
self.vel_po_mult_ss = vel_po_mult_ss
Expand Down Expand Up @@ -251,6 +256,7 @@ def __init__(
self.backboard_bounce = False
self.floor_bounce = False
self.got_reset = []
self.dtap_dict = dtap_dict
# for aerial goal
self.blue_touch_height = -1
self.orange_touch_height = -1
Expand Down Expand Up @@ -447,10 +453,24 @@ def pre_step(self, state: GameState):
norm_pos_diff = pos_diff / np.linalg.norm(pos_diff)
norm_vel = vel / BALL_MAX_SPEED
vel_bg_reward = float(np.dot(norm_pos_diff, norm_vel))
player_rewards[i] += self.velocity_bg_w * vel_bg_reward
if not self.fancy_dtap or (self.fancy_dtap and self.dtap_dict["hit_towards_goal"]):
player_rewards[i] += self.velocity_bg_w * vel_bg_reward
# no vel_bg reward unless hit towards goal when doing fancy dtap
if self.got_reset[i] and player.has_jump and not player.on_ground:
player_rewards[i] += self.has_flip_reset_vbg_w * vel_bg_reward

# fancy_dtap portion
if self.fancy_dtap and self.dtap_dict["hit_towards_bb"] and not self.dtap_dict["ball_hit_bb"]:
# dtap_helper - ball towards y, negative z?, x towards center, mostly y is high
objective = np.array([state.ball.position[0] / 2, BACK_WALL_Y, 1200]) # dirty
vel = state.ball.linear_velocity
pos_diff = objective - state.ball.position
pos_diff[1] = pos_diff[1] * 5 # mostly care about y
norm_pos_diff = pos_diff / np.linalg.norm(pos_diff)
norm_vel = vel / BALL_MAX_SPEED
dtap_help_rew = float(np.dot(norm_pos_diff, norm_vel))
player_rewards[i] += self.dtap_helper_w * dtap_help_rew

# distance ball from halfway (for kickoffs)
# 1 at max oppo wall, 0 at midfield, -1 at our wall
if player.team_num == BLUE_TEAM:
Expand Down
8 changes: 4 additions & 4 deletions setter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


class CoyoteSetter(DynamicGMSetter):
def __init__(self, mode, end_object_choice=None, simulator=False):
def __init__(self, mode, end_object_choice=None, simulator=False, dtap_dict=None):
if simulator:
from rlgym_sim.utils.state_setters import DefaultState
from rlgym_sim.utils.state_setters.random_state import RandomState
Expand Down Expand Up @@ -257,10 +257,10 @@ def __init__(self, mode, end_object_choice=None, simulator=False):
self.setters.append(
WeightedSampleSetter(
(AugmentSetter(ReplaySetter(double_tap_replays[i], defender_front_goal_weight=0,
random_boost=True),
True, False, False),
random_boost=True, dtap_dict=dtap_dict, initial_state_dict=(0, 0, 0)),
True, False, False),
AugmentSetter(ReplaySetter(easy_double_tap_replays[i], defender_front_goal_weight=0,
random_boost=True),
random_boost=True, dtap_dict=dtap_dict, initial_state_dict=(1, 0, 0)),
True, False, False),
), (0.2, 0.8))
)
Expand Down
35 changes: 18 additions & 17 deletions worker_dtap.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,27 @@
set_num_threads(1)

if __name__ == "__main__":

dtap_status = {"hit_towards_bb": False,
"ball_hit_bb": False,
"hit_towards_goal": False,
}

rew = ZeroSumReward(zero_sum=Constants_dtap.ZERO_SUM,
concede_w=-10,
double_tap_w=10,
velocity_bg_w=0,
velocity_pb_w=0,
acel_ball_w=0,
jump_touch_w=0,
wall_touch_w=0,
velocity_bg_w=0.1,
velocity_pb_w=0.1,
acel_ball_w=1,
jump_touch_w=1,
wall_touch_w=1,
backboard_bounce_rew=1,
tick_skip=Constants_dtap.FRAME_SKIP,
flatten_wall_height=True,
double_tap_floor_mult=0.5,
double_tap_floor_mult=0.8,
dtap_dict=dtap_status,
fancy_dtap=True,
dtap_helper_w=0.1,
)
frame_skip = Constants_dtap.FRAME_SKIP
fps = 120 // frame_skip
Expand Down Expand Up @@ -97,7 +106,7 @@
from rlgym.utils.terminal_conditions.common_conditions import GoalScoredCondition, TimeoutCondition, \
NoTouchTimeoutCondition

setter = CoyoteSetter(mode="doubletap", simulator=simulator)
setter = CoyoteSetter(mode="doubletap", simulator=simulator, dtap_dict=dtap_status,)

match = Match(
game_speed=game_speed,
Expand All @@ -107,12 +116,8 @@
obs_builder=CoyoteObsBuilder(expanding=True, tick_skip=Constants_dtap.FRAME_SKIP, team_size=team_size,
extra_boost_info=False, embed_players=False,
infinite_boost_odds=infinite_boost_odds,
add_jumptime=True,
add_airtime=True,
add_fliptime=True,
add_boosttime=True,
add_handbrake=True,
doubletap_indicator=True,
dtap_dict=dtap_status,
),
action_parser=CoyoteAction(),
terminal_conditions=[GoalScoredCondition(),
Expand All @@ -133,12 +138,8 @@
obs_builder=CoyoteObsBuilder(expanding=True, tick_skip=Constants_dtap.FRAME_SKIP, team_size=team_size,
extra_boost_info=False, embed_players=False,
infinite_boost_odds=infinite_boost_odds,
add_jumptime=True,
add_airtime=True,
add_fliptime=True,
add_boosttime=True,
add_handbrake=True,
doubletap_indicator=True,
dtap_dict=dtap_status,
),
action_parser=CoyoteAction(),
terminal_conditions=[GoalScoredCondition(),
Expand Down

0 comments on commit f8a87c1

Please sign in to comment.