Skip to content

Commit f62da50

Browse files
committed
Fixed issues raised in original repo
1 parent f75c0f1 commit f62da50

File tree

4 files changed

+20
-17
lines changed

4 files changed

+20
-17
lines changed

monitor.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from gym.core import Wrapper
55
import time
66
from glob import glob
7-
import csv
7+
import csv, uuid, os, pandas
88
import os.path as osp
99
import json
1010
import numpy as np
@@ -74,7 +74,7 @@ def step(self, action):
7474
self.logger.writerow(epinfo)
7575
self.f.flush()
7676
if "episode" not in info:
77-
info["episoide"] = {}
77+
info["episode"] = {}
7878
info['episode'].update(epinfo)
7979
self.total_steps += 1
8080
return (ob, rew, done, info)
@@ -160,4 +160,4 @@ def test_monitor():
160160
last_logline = pandas.read_csv(f, index_col=None)
161161
assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
162162
f.close()
163-
os.remove(mon_file)
163+
os.remove(mon_file)

mpi_util.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ def guess_available_gpus(n_gpus=None):
4343
if n_gpus is not None:
4444
return list(range(n_gpus))
4545
if 'CUDA_VISIBLE_DEVICES' in os.environ:
46-
cuda_visible_divices = os.environ['CUDA_VISIBLE_DEVICES']
47-
cuda_visible_divices = cuda_visible_divices.split(',')
48-
return [int(n) for n in cuda_visible_divices]
49-
if 'RCALL_NUM_GPU' not in os.environ:
46+
cuda_visible_devices = os.environ['CUDA_VISIBLE_DEVICES']
47+
cuda_visible_devices = cuda_visible_devices.split(',')
48+
return [int(n) for n in cuda_visible_devices]
49+
if 'RCALL_NUM_GPU' in os.environ:
5050
n_gpus = int(os.environ['RCALL_NUM_GPU'])
5151
return list(range(n_gpus))
5252
nvidia_dir = '/proc/driver/nvidia/gpus/'

ppo_agent.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,8 @@ def update(self):
353353
(self.stochpol.ph_new, self.I.buf_news),
354354
])
355355

356-
verbose = True
356+
#verbose = True
357+
verbose = False
357358
if verbose and self.is_log_leader:
358359
samples = np.prod(self.I.buf_advs.shape)
359360
logger.info("buffer shape %s, samples_per_mpi=%i, mini_per_mpi=%i, samples=%i, mini=%i " % (
@@ -446,9 +447,9 @@ def step(self):
446447
sli = slice(l * self.I.lump_stride, (l + 1) * self.I.lump_stride)
447448
memsli = slice(None) if self.I.mem_state is NO_STATES else sli
448449
dict_obs = self.stochpol.ensure_observation_is_dict(obs)
449-
with logger.ProfileKV("policy_inference"):
450+
# with logger.ProfileKV("policy_inference"):
450451
#Calls the policy and value function on current observation.
451-
acs, vpreds_int, vpreds_ext, nlps, self.I.mem_state[memsli], ent = self.stochpol.call(dict_obs, news, self.I.mem_state[memsli],
452+
acs, vpreds_int, vpreds_ext, nlps, self.I.mem_state[memsli], ent = self.stochpol.call(dict_obs, news, self.I.mem_state[memsli],
452453
update_obs_stats=self.update_ob_stats_every_step)
453454
self.env_step(l, acs)
454455

@@ -476,8 +477,8 @@ def step(self):
476477
for k in self.stochpol.ph_ob_keys:
477478
self.I.buf_ob_last[k][sli] = dict_nextobs[k]
478479
self.I.buf_new_last[sli] = nextnews
479-
with logger.ProfileKV("policy_inference"):
480-
_, self.I.buf_vpred_int_last[sli], self.I.buf_vpred_ext_last[sli], _, _, _ = self.stochpol.call(dict_nextobs, nextnews, self.I.mem_state[memsli], update_obs_stats=False)
480+
# with logger.ProfileKV("policy_inference"):
481+
_, self.I.buf_vpred_int_last[sli], self.I.buf_vpred_ext_last[sli], _, _, _ = self.stochpol.call(dict_nextobs, nextnews, self.I.mem_state[memsli], update_obs_stats=False)
481482
self.I.buf_rews_ext[sli, t] = rews
482483

483484
#Calcuate the intrinsic rewards for the rollout.

replayer.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pickle
66
import sys
77

8-
import exptag
8+
#import exptag
99
import ipdb
1010
import numpy as np
1111
from atari_wrappers import make_atari, wrap_deepmind
@@ -204,7 +204,9 @@ def draw_frame_i(i):
204204

205205

206206
args = parser.parse_args().__dict__
207-
folder = exptag.get_last_experiment_folder_by_tag(args['tag'])
207+
#folder = exptag.get_last_experiment_folder_by_tag(args['tag'])
208+
# Give last experiment folder in the tag
209+
folder = args['tag']
208210

209211
def date_from_folder(folder):
210212
assert folder.startswith('openai-')
@@ -235,12 +237,12 @@ def date_from_folder(folder):
235237
env.reset()
236238
un_env = env.unwrapped
237239
rend_shape = un_env.render(mode='rgb_array').shape
238-
episodes = EpisodeIterator(filenames).iterate()
240+
episodes = EpisodeIterator(filenames)
239241
if args['kind'] == 'movie':
240242
import imageio
241243
import time
242-
for i, episode in enumerate(episodes):
243-
filename = os.path.expanduser('~/tmp/movie_{}.mp4'.format(time.time()))
244+
for i, episode in enumerate(episodes.iterate()):
245+
filename = os.path.expanduser('~/rnd_movies/movie_{}.mp4'.format(time.time()))
244246
imageio.mimwrite(filename, episode["obs"], fps=30)
245247
print(filename)
246248

0 commit comments

Comments
 (0)