@@ -353,7 +353,8 @@ def update(self):
353
353
(self .stochpol .ph_new , self .I .buf_news ),
354
354
])
355
355
356
- verbose = True
356
+ #verbose = True
357
+ verbose = False
357
358
if verbose and self .is_log_leader :
358
359
samples = np .prod (self .I .buf_advs .shape )
359
360
logger .info ("buffer shape %s, samples_per_mpi=%i, mini_per_mpi=%i, samples=%i, mini=%i " % (
@@ -446,9 +447,9 @@ def step(self):
446
447
sli = slice (l * self .I .lump_stride , (l + 1 ) * self .I .lump_stride )
447
448
memsli = slice (None ) if self .I .mem_state is NO_STATES else sli
448
449
dict_obs = self .stochpol .ensure_observation_is_dict (obs )
449
- with logger .ProfileKV ("policy_inference" ):
450
+ # with logger.ProfileKV("policy_inference"):
450
451
#Calls the policy and value function on current observation.
451
- acs , vpreds_int , vpreds_ext , nlps , self .I .mem_state [memsli ], ent = self .stochpol .call (dict_obs , news , self .I .mem_state [memsli ],
452
+ acs , vpreds_int , vpreds_ext , nlps , self .I .mem_state [memsli ], ent = self .stochpol .call (dict_obs , news , self .I .mem_state [memsli ],
452
453
update_obs_stats = self .update_ob_stats_every_step )
453
454
self .env_step (l , acs )
454
455
@@ -476,8 +477,8 @@ def step(self):
476
477
for k in self .stochpol .ph_ob_keys :
477
478
self .I .buf_ob_last [k ][sli ] = dict_nextobs [k ]
478
479
self .I .buf_new_last [sli ] = nextnews
479
- with logger .ProfileKV ("policy_inference" ):
480
- _ , self .I .buf_vpred_int_last [sli ], self .I .buf_vpred_ext_last [sli ], _ , _ , _ = self .stochpol .call (dict_nextobs , nextnews , self .I .mem_state [memsli ], update_obs_stats = False )
480
+ # with logger.ProfileKV("policy_inference"):
481
+ _ , self .I .buf_vpred_int_last [sli ], self .I .buf_vpred_ext_last [sli ], _ , _ , _ = self .stochpol .call (dict_nextobs , nextnews , self .I .mem_state [memsli ], update_obs_stats = False )
481
482
self .I .buf_rews_ext [sli , t ] = rews
482
483
483
484
#Calcuate the intrinsic rewards for the rollout.
0 commit comments