Skip to content

Commit

Permalink
flick roll back to 14910 and add angle exit velocity mult, make no z …
Browse files Browse the repository at this point in the history
…exit, min goal speed, terminal attacker close to goal.
  • Loading branch information
Kaiyotech committed Dec 12, 2022
1 parent c2a2807 commit 3426a76
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def __init__(
self.exit_rewarded = [False] * 6
self.last_touch_car = None
self.launch_angle_car = [None] * 6
self.exit_vel_save = [None] * 6

def pre_step(self, state: GameState):
if state != self.current_state:
Expand Down Expand Up @@ -265,6 +266,7 @@ def pre_step(self, state: GameState):
else:
if last.ball_touched:
self.launch_angle_car[i] = last.car_data.forward()[:-1] / np.linalg.norm(last.car_data.forward()[:-1])
self.exit_vel_save[i] = state.ball.linear_velocity[:-1] / np.linalg.norm(state.ball.linear_velocity[:-1])
if self.kickoff_timer - self.last_touch_time > self.exit_vel_arm_time_steps and not self.exit_rewarded[i] and self.last_touch_car == i:
self.exit_rewarded[i] = True
# rewards 1 for a 120 kph flick (3332 uu/s), 11 for a 6000 uu/s (max speed)
Expand All @@ -277,9 +279,11 @@ def pre_step(self, state: GameState):
ang_mult = 1
if self.exit_vel_angle_w != 0:
# 0.785 is 45
unit_vector_2 = state.ball.linear_velocity[:-1] / np.linalg.norm(state.ball.linear_velocity[:-1])
dot_product = np.dot(self.launch_angle_car[i], unit_vector_2)

dot_product = np.dot(self.launch_angle_car[i], self.exit_vel_save[i])
angle = min(np.arccos(dot_product), 0.785) / 0.785
if np.isnan(angle): # rare enough to just avoid in the data
angle = 0
ang_mult = self.exit_velocity_w * max(angle, 0.1) # 0.1 is a small mult to still reward 0 angle
vel_mult = self.exit_velocity_w * 0.5 * ((xy_norm_ball_vel ** 5) / (3332 ** 5) + ((xy_norm_ball_vel ** 2) / (3332 ** 2)))
player_rewards[i] += vel_mult * req_reset * ang_mult
Expand Down Expand Up @@ -476,6 +480,7 @@ def reset(self, initial_state: GameState):
self.exit_rewarded = [False] * 6
self.last_touch_car = None
self.launch_angle_car = [None] * 6
self.exit_vel_save = [None] * 6
self.previous_action = np.asarray([-1] * len(initial_state.players))

def get_reward(self, player: PlayerData, state: GameState, previous_action: np.ndarray, previous_model_action: np.ndarray) -> float:
Expand Down

0 comments on commit 3426a76

Please sign in to comment.