Skip to content

Commit

Permalink
lots of stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaiyotech committed Sep 8, 2022
1 parent ee3e684 commit 59d319c
Show file tree
Hide file tree
Showing 7 changed files with 367 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Constants.py → Constants_kickoff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FRAME_SKIP = 8
TIME_HORIZON = 6 # horizon in seconds
TIME_HORIZON = 2 # horizon in seconds
T_STEP = FRAME_SKIP / 120 # real time per rollout step
ZERO_SUM = False
15 changes: 9 additions & 6 deletions learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from CoyoteParser import CoyoteAction
import numpy as np
from rewards import ZeroSumReward
from Constants import FRAME_SKIP, TIME_HORIZON, ZERO_SUM
from Constants_kickoff import FRAME_SKIP, TIME_HORIZON, ZERO_SUM

from utils.misc import count_parameters

Expand All @@ -31,6 +31,7 @@
# lix reset?
# normal play as well as possible, rewards exist
# aerial play without pinch, rewards exist
# kickoff, 5 second terminal, reward ball distance into opp half
set_num_threads(1)

if __name__ == "__main__":
Expand All @@ -51,14 +52,15 @@
ent_coef=0.01,
)

run_id = "test1"
run_id = "kickoff_test1"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="cAIyote",
project="cAIyoteV1",
name="Valger_kickoff",
project="Valger",
entity="kaiyotech",
id=run_id,
config=config,
settings=wandb.Settings(_disable_stats=True, _disable_meta=True),
)
redis = Redis(username="user1", password=os.environ["redis_user1_key"], db=1) # host="192.168.0.201",
redis.delete("worker-ids")
Expand All @@ -69,7 +71,7 @@
GoalSpeed(), MaxGoalSpeed(),
]

rollout_gen = RedisRolloutGenerator("cAIyote",
rollout_gen = RedisRolloutGenerator("Valger_kickoff",
redis,
lambda: CoyoteObsBuilder(expanding=True, tick_skip=FRAME_SKIP, team_size=3),
lambda: ZeroSumReward(zero_sum=ZERO_SUM),
Expand Down Expand Up @@ -114,10 +116,11 @@
gamma=logger.config.gamma,
logger=logger,
zero_grads_with_none=True,
disable_gradient_logging=True,
)

# alg.load("model_saves/")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

alg.run(iterations_per_save=logger.config.save_every, save_dir="model_saves")
alg.run(iterations_per_save=logger.config.save_every, save_dir="kickoff_saves")
135 changes: 135 additions & 0 deletions learner_kickoff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import wandb
import torch.jit

from torch.nn import Linear, Sequential, LeakyReLU

from redis import Redis
from rocket_learn.agent.actor_critic_agent import ActorCriticAgent
from rocket_learn.agent.discrete_policy import DiscretePolicy
from rocket_learn.ppo import PPO
from rocket_learn.rollout_generator.redis.redis_rollout_generator import RedisRolloutGenerator
from CoyoteObs import CoyoteObsBuilder

from CoyoteParser import CoyoteAction
import numpy as np
from rewards import ZeroSumReward
from Constants_kickoff import FRAME_SKIP, TIME_HORIZON, ZERO_SUM

from utils.misc import count_parameters

import os
from torch import set_num_threads
from rocket_learn.utils.stat_trackers.common_trackers import Speed, Demos, TimeoutRate, Touch, EpisodeLength, Boost, \
BehindBall, TouchHeight, DistToBall, AirTouch, AirTouchHeight, BallHeight, BallSpeed, CarOnGround, GoalSpeed,\
MaxGoalSpeed
# TODO profile everything before starting to make sure everything is as fast as possible

# ideas for models:
# get to ball as fast as possible, sometimes with no boost, rewards exist
# pinches (ceiling and kuxir and team?), score in as few touches as possible with high velocity
# half flip, wavedash, wall dash, how to do this one?
# lix reset?
# normal play as well as possible, rewards exist
# aerial play without pinch, rewards exist
# kickoff, 5 second terminal, reward ball distance into opp half
set_num_threads(1)

if __name__ == "__main__":
frame_skip = FRAME_SKIP
half_life_seconds = TIME_HORIZON
fps = 120 / frame_skip
gamma = np.exp(np.log(0.5) / (fps * half_life_seconds))
config = dict(
actor_lr=2e-4,
critic_lr=2e-4,
n_steps=100_000,
batch_size=100_000,
minibatch_size=50_000,
epochs=50,
gamma=gamma,
save_every=100,
model_every=1000,
ent_coef=0.01,
)

run_id = "kickoff_test1"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Valger_kickoff",
project="Valger",
entity="kaiyotech",
id=run_id,
config=config,
settings=wandb.Settings(_disable_stats=True, _disable_meta=True),
)
redis = Redis(username="user1", password=os.environ["redis_user1_key"], db=1) # host="192.168.0.201",
redis.delete("worker-ids")

stat_trackers = [
Speed(normalize=True), Demos(), TimeoutRate(), Touch(), EpisodeLength(), Boost(), BehindBall(), TouchHeight(),
DistToBall(), AirTouch(), AirTouchHeight(), BallHeight(), BallSpeed(normalize=True), CarOnGround(),
GoalSpeed(), MaxGoalSpeed(),
]

rollout_gen = RedisRolloutGenerator("Valger_kickoff",
redis,
lambda: CoyoteObsBuilder(expanding=True, tick_skip=FRAME_SKIP, team_size=3),
lambda: ZeroSumReward(zero_sum=ZERO_SUM,
goal_w=10,
concede_w=-10,
velocity_pb_w=0.01,
boost_gain_w=1,
demo_w=5,
got_demoed_w=-5,
kickoff_w=0.1,
ball_opp_half_w=0.05,
team_spirit=0),
lambda: CoyoteAction(),
save_every=logger.config.save_every,
model_every=logger.config.model_every,
logger=logger,
clear=True, # TODO check this
stat_trackers=stat_trackers,
# gamemodes=("1v1", "2v2", "3v3"),
max_age=1,
)

critic = Sequential(Linear(247, 512), LeakyReLU(), Linear(512, 512), LeakyReLU(),

Linear(512, 512), LeakyReLU(), Linear(512, 512), LeakyReLU(), Linear(512, 512),
LeakyReLU(), Linear(512, 512), LeakyReLU(),
Linear(512, 1))

actor = Sequential(Linear(247, 512), LeakyReLU(), Linear(512, 512), LeakyReLU(), Linear(512, 512), LeakyReLU(),
Linear(512, 512), LeakyReLU(), Linear(512, 91))

actor = DiscretePolicy(actor, (91,))

optim = torch.optim.Adam([
{"params": actor.parameters(), "lr": logger.config.actor_lr},
{"params": critic.parameters(), "lr": logger.config.critic_lr}
])

agent = ActorCriticAgent(actor=actor, critic=critic, optimizer=optim)
print(f"Gamma is: {gamma}")
count_parameters(agent)

alg = PPO(
rollout_gen,
agent,
ent_coef=logger.config.ent_coef,
n_steps=logger.config.n_steps,
batch_size=logger.config.batch_size,
minibatch_size=logger.config.minibatch_size,
epochs=logger.config.epochs,
gamma=logger.config.gamma,
logger=logger,
zero_grads_with_none=True,
disable_gradient_logging=True,
)

# alg.load("model_saves/")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

alg.run(iterations_per_save=logger.config.save_every, save_dir="kickoff_saves")
38 changes: 24 additions & 14 deletions rewards.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from rlgym.utils.common_values import BLUE_TEAM, BLUE_GOAL_BACK, ORANGE_GOAL_BACK, ORANGE_TEAM, BALL_MAX_SPEED, \
CAR_MAX_SPEED, BALL_RADIUS, GOAL_HEIGHT, CEILING_Z
CAR_MAX_SPEED, BALL_RADIUS, GOAL_HEIGHT, CEILING_Z, BACK_NET_Y, BACK_WALL_Y
import numpy as np
from rlgym.utils.gamestates import PlayerData, GameState
from rlgym.utils.reward_functions import RewardFunction
from Constants import FRAME_SKIP
from Constants_kickoff import FRAME_SKIP

from numpy.linalg import norm

Expand Down Expand Up @@ -47,26 +47,27 @@ class ZeroSumReward(RewardFunction):
# (https://github.com/Rolv-Arild/Necto/blob/master/training/reward.py)
def __init__(
self,
goal_w=5, # go to 10 after working
concede_w=-5,
velocity_pb_w=0.01,
velocity_bg_w=0.05,
touch_grass_w=-0.005,
acel_ball_w=1.5,
boost_gain_w=1.5,
goal_w=0, # go to 10 after working
concede_w=0,
velocity_pb_w=0, # 0.01,
velocity_bg_w=0, # 0.05,
touch_grass_w=0, # -0.005,
acel_ball_w=0, # 1.5,
boost_gain_w=0, # 1.5,
punish_boost=False, # punish once they start wasting and understand the game a bit
jump_touch_w=3,
jump_touch_w=0, # 3,
cons_air_touches_w=0, # 6,
wall_touch_w=0.25,
demo_w=3, # 6,
got_demoed_w=-3, # -6,
kickoff_w=0.1,
wall_touch_w=0, # 0.25,
demo_w=0, # 3, # 6,
got_demoed_w=0, # -3, # -6,
kickoff_w=0, # 0.1,
double_tap_w=0,
aerial_goal_w=0,
flip_reset_w=0,
flip_reset_goal_w=0,
punish_low_touch_w=0,
punish_ceiling_pinch_w=0,
ball_opp_half_w=0,
tick_skip=FRAME_SKIP,
team_spirit=0, # increase as they learn
zero_sum=True,
Expand Down Expand Up @@ -98,6 +99,7 @@ def __init__(
self.flip_reset_goal_w = flip_reset_goal_w
self.punish_low_touch_w = punish_low_touch_w
self.punish_ceiling_pinch_w = punish_ceiling_pinch_w
self.ball_opp_half_w = ball_opp_half_w
self.rewards = None
self.current_state = None
self.last_state = None
Expand Down Expand Up @@ -216,6 +218,14 @@ def pre_step(self, state: GameState):
vel_bg_reward = float(np.dot(norm_pos_diff, norm_vel))
player_rewards[i] += self.velocity_bg_w * vel_bg_reward

# distance ball from halfway (for kickoffs)
# 1 at max oppo wall, 0 at midfield, -1 at our wall
if player.team_num == BLUE_TEAM:
objective = BACK_WALL_Y - BALL_RADIUS
else:
objective = -BACK_WALL_Y + BALL_RADIUS
player_rewards[i] += self.ball_opp_half_w * (1 + (state.ball.position[1] - objective) / objective)

# boost
# don't punish or reward boost when above approx single jump height
if player.car_data.position[2] < 2 * BALL_RADIUS:
Expand Down
91 changes: 91 additions & 0 deletions utils/replay_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import numpy as np
import os
from rlgym.utils.common_values import CEILING_Z, BALL_RADIUS, GOAL_HEIGHT
from rlgym.utils.math import euler_to_rotation, cosine_similarity


# curate aerial states with ball and at least one car above 750
def parse_aerial(file_name, _num_cars):
data = np.load(file_name)
output = []
ball_positions = data[:, BALL_POSITION]
for _i, ball_state in enumerate(ball_positions):
if ball_state[2] > GOAL_HEIGHT + 100:
cars = np.split(data[_i][9:], _num_cars)
for _j in range(_num_cars):
car_pos = cars[_j][CAR_POS]
if np.linalg.norm(ball_state - car_pos) < 5 * BALL_RADIUS:
output.append(data[_i])
print(f"Created {len(output)} aerial states from {file_name}")
output_file = f"aerial_{file_name}"
if os.path.exists(output_file):
os.remove(output_file)
np.save(output_file, output)


# curate flip reset states and save in flip_reset_1v1.npy, etc
def parse_flip_resets(file_name, _num_cars):
data = np.load(file_name)
output = []
ball_positions = data[:, BALL_POSITION]
for _i, ball_state in enumerate(ball_positions):
if ball_state[2] > CEILING_Z - ((CEILING_Z - GOAL_HEIGHT) / 2):
cars = np.split(data[_i][9:], _num_cars)
for _j in range(_num_cars):
car_rot = cars[_j][CAR_ROT]
car_theta = euler_to_rotation(car_rot)
car_up = car_theta[:, 2]
car_pos = cars[_j][CAR_POS]
if np.linalg.norm(ball_state - car_pos) < 3 * BALL_RADIUS \
and cosine_similarity(ball_state - car_pos, -car_up) > 0.7:
output.append(data[_i])
print(f"Created {len(output)} flip reset states from {file_name}")
output_file = f"flip_resets_{file_name}"
if os.path.exists(output_file):
os.remove(output_file)
np.save(output_file, output)


# curate possible ceiling shot states
def parse_ceiling_shots(file_name, _num_cars):
data = np.load(file_name)
output = []
up = [0, 0, 1]
for _i, state in enumerate(data):
cars = np.split(state[9:], _num_cars)
for _j in range(_num_cars):
car_rot = cars[_j][CAR_ROT]
car_theta = euler_to_rotation(car_rot)
car_up = car_theta[:, 2]
car_pos = cars[_j][CAR_POS]
if cosine_similarity(up, -car_up) > 0.9 and car_pos[2] > CEILING_Z - 50:
output.append(data[_i])

print(f"Created {len(output)} car ceiling states from {file_name}")
output_file = f"flip_resets_{file_name}"
if os.path.exists(output_file):
os.remove(output_file)
np.save(output_file, output)


BALL_POSITION = slice(0, 3)
BALL_LIN_VEL = slice(3, 6)
BALL_ANG_VEL = slice(6, 9)
CAR_POS = slice(0, 3)
CAR_ROT = slice(3, 6)
CAR_LIN_VEL = slice(6, 9)
CAR_ANG_VEL = slice(9, 12)
CAR_BOOST = slice(12, 13)

input_files = ['ssl_1v1.npy', 'ssl_2v2.npy', 'ssl_3v3.npy']
for i, file in enumerate(input_files):
num_cars = (i + 1) * 2
parse_aerial(file, num_cars)
parse_flip_resets(file, num_cars)
parse_ceiling_shots(file, num_cars)






2 changes: 1 addition & 1 deletion worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from rewards import ZeroSumReward
from pretrained_agents.necto.necto_v1 import NectoV1
from torch import set_num_threads
from Constants import FRAME_SKIP, ZERO_SUM
from Constants_kickoff import FRAME_SKIP, ZERO_SUM
from pretrained_agents.nexto.nexto_v2 import NextoV2
import os
set_num_threads(1)
Expand Down
Loading

0 comments on commit 59d319c

Please sign in to comment.