Skip to content

Commit

Permalink
half-flip ready to run, starting from recovery recovery_saves/Opti_16…
Browse files Browse the repository at this point in the history
…75223912.912102/Opti_430/checkpoint.pt
  • Loading branch information
Kaiyotech committed Feb 8, 2023
1 parent 0191b32 commit 36dc828
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Constants_half_flip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FRAME_SKIP = 4
TIME_HORIZON = 2 # horizon in seconds
T_STEP = FRAME_SKIP / 120 # real time per rollout step
ZERO_SUM = False
STEP_SIZE = 500_000
DB_NUM = 11
4 changes: 4 additions & 0 deletions CoyoteObs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ def __init__(self, tick_skip=8, team_size=3, expanding: bool = True, extra_boost
add_airtime=False,
add_boosttime=False,
dodge_deadzone=0.8,
flip_dir=True,
end_object: PhysicsObject = None,
):
super().__init__()
self.flip_dir = flip_dir
self.end_object = end_object
assert add_boosttime == add_airtime == add_fliptime == add_jumptime == add_handbrake, "All timers must match"
self.obs_info = obs_info
Expand Down Expand Up @@ -658,6 +660,8 @@ def add_players_to_obs(self, obs: List, state: GameState, player: PlayerData, ba
self.jumptimes[cid], self.airtimes[cid], self.fliptimes[cid], self.handbrakes[cid],
self.flipdirs[cid][0], self.flipdirs[cid][1]
)
if not self.flipdirs:
player_data = player_data[:-2]
else:
player_data = self.create_player_packet_njit(
player.inverted_car_data.position if inverted else player.car_data.position,
Expand Down
155 changes: 155 additions & 0 deletions learner_half_flip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import wandb
import torch.jit

from torch.nn import Linear, Sequential, LeakyReLU

from redis import Redis
from rocket_learn.agent.actor_critic_agent import ActorCriticAgent
from rocket_learn.agent.discrete_policy import DiscretePolicy
from rocket_learn.ppo import PPO
from rocket_learn.rollout_generator.redis.redis_rollout_generator import RedisRolloutGenerator
from CoyoteObs import CoyoteObsBuilder

from CoyoteParser import CoyoteAction
import numpy as np
from rewards import ZeroSumReward
import Constants_half_flip
from agent import MaskIndices

from utils.misc import count_parameters

import random

import os
from torch import set_num_threads
from rocket_learn.utils.stat_trackers.common_trackers import Speed, Demos, TimeoutRate, Touch, EpisodeLength, Boost, \
BehindBall, TouchHeight, DistToBall, AirTouch, AirTouchHeight, BallHeight, BallSpeed, CarOnGround, GoalSpeed, \
MaxGoalSpeed
from my_stattrackers import GoalSpeedTop5perc

# ideas for models:
# get to ball as fast as possible, sometimes with no boost, rewards exist
# pinches (ceiling and kuxir and team?), score in as few touches as possible with high velocity
# half flip, wavedash, wall dash, how to do this one?
# lix reset?
# normal play as well as possible, rewards exist
# aerial play without pinch, rewards exist
# kickoff, 5 second terminal, reward ball distance into opp half
set_num_threads(1)

if __name__ == "__main__":
frame_skip = Constants_half_flip.FRAME_SKIP
half_life_seconds = Constants_half_flip.TIME_HORIZON
fps = 120 / frame_skip
gamma = np.exp(np.log(0.5) / (fps * half_life_seconds))
config = dict(
actor_lr=1e-4,
critic_lr=1e-4,
n_steps=Constants_half_flip.STEP_SIZE,
batch_size=100_000,
minibatch_size=None,
epochs=30,
gamma=gamma,
save_every=10,
model_every=1000,
ent_coef=0.01,
)

run_id = "halfflip_run1.00"
wandb.login(key=os.environ["WANDB_KEY"])
logger = wandb.init(dir="./wandb_store",
name="Halfflip_Run1.00",
project="Opti",
entity="kaiyotech",
id=run_id,
config=config,
settings=wandb.Settings(_disable_stats=True, _disable_meta=True),
resume=True,
)
redis = Redis(username="user1", password=os.environ["redis_user1_key"],
db=Constants_half_flip.DB_NUM) # host="192.168.0.201",
redis.delete("worker-ids")

stat_trackers = [
Speed(normalize=True), Touch(), EpisodeLength(), Boost(),
DistToBall(), CarOnGround(),
]
state = random.getstate()
rollout_gen = RedisRolloutGenerator("Halfflip",
redis,
lambda: CoyoteObsBuilder(expanding=True,
tick_skip=Constants_half_flip.FRAME_SKIP,
team_size=3, extra_boost_info=False,
embed_players=False,
add_jumptime=True,
add_airtime=True,
add_fliptime=True,
add_boosttime=True,
add_handbrake=True,
flip_dir=False),
lambda: ZeroSumReward(zero_sum=Constants_half_flip.ZERO_SUM,
velocity_pb_w=0.01,
boost_gain_w=0.35,
boost_spend_w=3,
punish_boost=True,
touch_ball_w=2,
boost_remain_touch_w=1.5,
touch_grass_w=-0.01,
supersonic_bonus_vpb_w=0,
zero_touch_grass_if_ss=False,
turtle_w=0,
final_reward_ball_dist_w=1,
final_reward_boost_w=0.2,
tick_skip=frame_skip
),
lambda: CoyoteAction(),
save_every=logger.config.save_every * 3,
model_every=logger.config.model_every,
logger=logger,
clear=False,
stat_trackers=stat_trackers,
# gamemodes=("1v1", "2v2", "3v3"),
max_age=1,
)

critic = Sequential(Linear(227, 256), LeakyReLU(), Linear(256, 256), LeakyReLU(),
Linear(256, 128), LeakyReLU(),
Linear(128, 1))

actor = Sequential(Linear(227, 128), LeakyReLU(), Linear(128, 128), LeakyReLU(),
Linear(128, 128), LeakyReLU(),
Linear(128, 373))

actor = DiscretePolicy(actor, (373,))

optim = torch.optim.Adam([
{"params": actor.parameters(), "lr": logger.config.actor_lr},
{"params": critic.parameters(), "lr": logger.config.critic_lr}
])

agent = ActorCriticAgent(actor=actor, critic=critic, optimizer=optim)
print(f"Gamma is: {gamma}")
count_parameters(agent)

alg = PPO(
rollout_gen,
agent,
ent_coef=logger.config.ent_coef,
n_steps=logger.config.n_steps,
batch_size=logger.config.batch_size,
minibatch_size=logger.config.minibatch_size,
epochs=logger.config.epochs,
gamma=logger.config.gamma,
logger=logger,
zero_grads_with_none=True,
disable_gradient_logging=True,

)

alg.load("recovery_saves/Opti_1675223912.912102/Opti_430/checkpoint.pt")
alg.agent.optimizer.param_groups[0]["lr"] = logger.config.actor_lr
alg.agent.optimizer.param_groups[1]["lr"] = logger.config.critic_lr

alg.freeze_policy(10)

alg.run(iterations_per_save=logger.config.save_every, save_dir="recovery_ball_saves")
133 changes: 133 additions & 0 deletions worker_half_flip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import sys
from redis import Redis
from redis.retry import Retry
from redis.backoff import ExponentialBackoff
from redis.exceptions import ConnectionError, TimeoutError
from rlgym.envs import Match
from CoyoteObs import CoyoteObsBuilder
from rlgym.utils.terminal_conditions.common_conditions import GoalScoredCondition, TimeoutCondition, \
BallTouchedCondition
from mybots_terminals import BallTouchGroundCondition, PlayerTwoTouch, AttackerTouchCloseGoal, ReachObject
from rocket_learn.rollout_generator.redis.redis_rollout_worker import RedisRolloutWorker
from CoyoteParser import CoyoteAction
from rewards import ZeroSumReward
from torch import set_num_threads
from setter import CoyoteSetter
from mybots_statesets import HalfFlip
import Constants_half_flip
import os

set_num_threads(1)

if __name__ == "__main__":
frame_skip = Constants_half_flip.FRAME_SKIP
rew = ZeroSumReward(zero_sum=Constants_half_flip.ZERO_SUM,
velocity_pb_w=0.01,
boost_gain_w=0.35,
boost_spend_w=3,
punish_boost=True,
touch_ball_w=2,
boost_remain_touch_w=1.5,
touch_grass_w=-0.01,
supersonic_bonus_vpb_w=0,
zero_touch_grass_if_ss=False,
turtle_w=0,
final_reward_ball_dist_w=1,
final_reward_boost_w=0.2,
tick_skip=frame_skip
)

fps = 120 // frame_skip
name = "Default"
send_gamestate = False
streamer_mode = False
local = True
auto_minimize = True
game_speed = 100
evaluation_prob = 0
past_version_prob = 0.1
deterministic_streamer = False
force_old_deterministic = False
gamemode_weights = {'1v1': 1, '2v2': 0, '3v3': 0}
team_size = 3
dynamic_game = True
host = "127.0.0.1"
if len(sys.argv) > 1:
host = sys.argv[1]
if host != "127.0.0.1" and host != "localhost":
local = False
if len(sys.argv) > 2:
name = sys.argv[2]
# if len(sys.argv) > 3 and not dynamic_game:
# team_size = int(sys.argv[3])
if len(sys.argv) > 3:
if sys.argv[3] == 'GAMESTATE':
send_gamestate = True
elif sys.argv[3] == 'STREAMER':
streamer_mode = True
evaluation_prob = 0
game_speed = 1
auto_minimize = False
gamemode_weights = {'1v1': 1, '2v2': 0, '3v3': 0}

match = Match(
game_speed=game_speed,
spawn_opponents=True,
team_size=team_size,
state_setter=HalfFlip(zero_boost_weight=0.5, zero_ball_vel_weight=0.5, ball_vel_mult=2),
obs_builder=CoyoteObsBuilder(expanding=True,
tick_skip=Constants_half_flip.FRAME_SKIP,
team_size=3, extra_boost_info=False,
embed_players=False,
add_jumptime=True,
add_airtime=True,
add_fliptime=True,
add_boosttime=True,
add_handbrake=True,
flip_dir=False),
action_parser=CoyoteAction(),
terminal_conditions=[GoalScoredCondition(),
TimeoutCondition(fps * 100),
# TimeoutCondition(fps * 2),
BallTouchedCondition(),
],
reward_function=rew,
tick_skip=frame_skip,
)

# local Redis
if local:
r = Redis(host=host,
username="user1",
password=os.environ["redis_user1_key"],
db=Constants_half_flip.DB_NUM,
)

# remote Redis
else:
# noinspection PyArgumentList
r = Redis(host=host,
username="user1",
password=os.environ["redis_user1_key"],
retry_on_error=[ConnectionError, TimeoutError],
retry=Retry(ExponentialBackoff(cap=10, base=1), 25),
db=Constants_half_flip.DB_NUM,
)

RedisRolloutWorker(r, name, match,
past_version_prob=past_version_prob,
sigma_target=2,
evaluation_prob=evaluation_prob,
force_paging=False,
dynamic_gm=dynamic_game,
send_obs=True,
auto_minimize=auto_minimize,
send_gamestates=send_gamestate,
gamemode_weights=gamemode_weights, # default 1/3
streamer_mode=streamer_mode,
deterministic_streamer=deterministic_streamer,
force_old_deterministic=force_old_deterministic,
# testing
batch_mode=True,
step_size=Constants_half_flip.STEP_SIZE,
).run()

0 comments on commit 36dc828

Please sign in to comment.