Merge pull request #2 from character-ai/tpu_tune

update config for tpu inference of repr
character-ai · Apr 15, 2024 · a2b402e · a2b402e
2 parents 6ee2d53 + 41abd08
commit a2b402e
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 570 deletions.
diff --git a/muzero/config.py b/muzero/config.py
@@ -38,7 +38,7 @@ class TrainConfig:
     batchsize: int = 512
     total_training_steps: int = 1_000_000
     log_period: int = 10
-    ckpt_save_interval_steps: int = 50
+    ckpt_save_interval_steps: int = 100
     # TEST:
     # batchsize: int = 8
     # log_period: int = 1
@@ -65,15 +65,15 @@ class ReplayConfig:
 class InferenceConfig:
     """training configuration for MZ."""
 
-    dyna_batch_size: int = 16
-    repr_batch_size: int = 8
-    dyna_time_out: float = 0.0005
+    dyna_batch_size: int = 32
+    repr_batch_size: int = 16
+    dyna_time_out: float = 0.00001
     repr_time_out: float = 0.001
     dyna_update_interval: int = (
-        75000  # repr_update_interval * num_simulations in mcts/utils.py
+        5000  # repr_update_interval * num_simulations in mcts/utils.py
     )
     repr_update_interval: int = 1500
-    dyna_actor_per_replica: int = 80
+    dyna_actor_per_replica: int = 50
     repr_actor_per_replica: int = 80
 
 

diff --git a/muzero/ray_inference.py b/muzero/ray_inference.py
@@ -237,6 +237,7 @@ def dyna_and_pred(params, embedding, action):
         if latest_step is None:
             latest_step = 0
             print(f"need to load actor latest_ckpt_step={latest_step}")
+        self.step = latest_step
         while True:
             try:
                 restored = self._ckpt_manager.restore(latest_step)