-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment.py
176 lines (148 loc) · 6.98 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""This file runs an experiment based on parameters specified in a configuration file."""
import time
import traceback
import sys
import os
import argparse
import numpy as np
from tqdm import tqdm
import krpc
from utils.sweeper import Sweeper
from utils.helpers import validate_output_folder
from env.rolling_payload import RollingPayloadEnv, RollingPayloadEnvContinuous
from agent.algorithms import CDiscQAgent, BangBangAgent, LinearAgent, QuadraticAgent
env_map = {
'RollingPayload': 'RollingPayloadEnv',
'RollingPayloadContinuous': 'RollingPayloadEnvContinuous',
'Dummy': 'DummyEnv'
}
agent_map = {
'CDiscQ': 'CDiscQAgent',
'BangBang': 'BangBangAgent',
'Linear': 'LinearAgent',
'Quadratic': 'QuadraticAgent',
}
def process_observation(raw_obs):
return raw_obs
def log_data(interval, current_timestep, current_run,
exp_type, log, env, agent, centered_values, save_weights,
exp_name, exp_id, nonlinear):
if save_weights:
index = current_timestep // interval
log['weights'][current_run][index] = agent.weights
log['avgrew'][current_run][index] = agent.avg_reward
def save_final_weights(nonlinear, run_idx, log, agent, exp_name, exp_id):
if nonlinear:
agent.save_trained_model(f'{exp_name}_{exp_id}_{run_idx}')
else:
if hasattr(agent, "weights"):
log['weights_final'][run_idx] = agent.weights
if hasattr(agent, "avg_reward"):
log['avgrew_final'][run_idx] = agent.avg_reward
def print_experiment_summary(log, exp_type):
if exp_type == 'control':
tqdm.write('RewardRate_total\t= %f' % (np.mean(log['reward'])))
tqdm.write('RewardRate_last50%%\t= %f\n' % np.mean(log['reward'][:, log['reward'].shape[1] // 2:]))
tqdm.write('RewardRate_last10%%\t= %f\n' % np.mean(log['reward'][:, log['reward'].shape[1] // 10 * 9:]))
def run_experiment_one_config(config):
"""
Runs N independent experiments for a particular parameter configuration.
Args:
config: a dictionary of all the experiment parameters
Returns:
log: a dictionary of quantities of interest
"""
exp_name = config['exp_name']
exp_type = config['exp_type']
env_name = config['env_name']
agent_name = config['agent_name']
num_runs = config['num_runs']
max_steps = config['num_max_steps']
eval_every_n_steps = config['eval_every_n_steps']
save_weights = config.get('save_weights', 0)
num_weights = config['num_weights']
store_max_action_values = config.get('store_max_action_values', False)
log = {'reward': np.zeros((num_runs, max_steps + 1), dtype=np.float32),
'roll_rate': np.zeros((num_runs, max_steps + 1), dtype=np.float32),
'action': np.zeros((num_runs, max_steps + 1), dtype=np.float32),
'weights_final': np.zeros((num_runs, num_weights), dtype=np.float32),
'avgrew_final': np.zeros(num_runs, dtype=np.float32),
}
if save_weights:
log['avgrew'] = np.zeros((num_runs, max_steps // eval_every_n_steps + 1), dtype=np.float32)
log['weights'] = np.zeros((num_runs, max_steps // eval_every_n_steps + 1,
num_weights), dtype=np.float32)
if store_max_action_values:
log['max_value_per_step'] = np.zeros((num_runs, max_steps // 10 + 1), dtype=np.float32)
assert env_name in env_map, f'{env_name} not found.'
assert agent_name in agent_map, f'{agent_name} not found.'
for run in range(num_runs):
config['rng_seed'] = run
agent = getattr(sys.modules[__name__], agent_map[agent_name])(**config)
env = getattr(sys.modules[__name__], env_map[env_name])(krpc.connect(name="Tracker"), **config)
obs = env.reset(seed=config['rng_seed'])
action = agent.start(process_observation(obs))
for t in tqdm(range(max_steps + 1)):
# logging relevant data at regular intervals
if t % eval_every_n_steps == 0:
log_data(interval=eval_every_n_steps, current_timestep=t,
current_run=run, exp_type=exp_type, log=log,
env=env, agent=agent,
save_weights=save_weights, nonlinear=False,
exp_name=exp_name, exp_id=config['exp_id'],
centered_values=None)
# the environment and agent step
next_obs, reward, term_flag = env.step(action)
action = agent.step(reward, process_observation(next_obs), term_flag)
# if t % 10 == 0:
# print(action, reward, term_flag, next_obs)
# logging the reward at each step
log['reward'][run][t] = reward
# logging some data for debugging
log['action'][run][t] = action
log['roll_rate'][run][t] = next_obs[0]
# log['angle'][run][t] = np.arctan2(next_obs[0], next_obs[1]) # this is the *next* angle
time.sleep(0.1)
# print(np.rad2deg(np.arctan2(next_obs[0], next_obs[1])), env.roll())
save_final_weights(nonlinear=False,
run_idx=run, log=log, agent=agent,
exp_name=exp_name, exp_id=config['exp_id'])
print_experiment_summary(log, exp_type)
return log
parser = argparse.ArgumentParser(description="Run an experiment based on parameters specified in a configuration file")
parser.add_argument('--config-file', # required=True,
default='config_files/pendulum/test.json',
help='location of the config file for the experiment (e.g., config_files/test_config.json)')
parser.add_argument('--cfg-start', default=0)
parser.add_argument('--cfg-end', default=-1)
parser.add_argument('--output-path', default='results/test_exp/')
args = parser.parse_args()
print(args.config_file, args.output_path)
path = validate_output_folder(args.output_path)
sweeper = Sweeper(args.config_file)
cfg_start_idx = int(args.cfg_start)
cfg_end_idx = int(args.cfg_end) if args.cfg_end != -1 else sweeper.total_combinations
print(f'\n\nRunning configurations {cfg_start_idx} to {cfg_end_idx}...\n\n')
start_time = time.time()
for i in range(cfg_start_idx, cfg_end_idx):
config = sweeper.get_one_config(i)
config['exp_id'] = i
config['output_folder'] = path
# print(f'Starting at: {time.localtime(start_time)}')
print(config)
try:
log = run_experiment_one_config(config)
log['params'] = config
except Exception as e:
print('\n***\n')
print(traceback.format_exc())
print('***\nException occurred with this parameter configuration, moving on now\n***\n')
else:
filename = f"{config['exp_name']}_{config['exp_id']}"
print(f'Saving experiment log in: {filename}.npy\n**********\n')
np.save(f'{path}{filename}', log)
finally:
print("Time elapsed: {:.2} minutes\n\n".format((time.time() - start_time) / 60))
os.system('sleep 0.5')
end_time = time.time()
print("Total time elapsed: {:.2} minutes".format((end_time - start_time) / 60))