rollout accepts a list of models of length nroll

aftersomemath · aftersomemath · commit 943eb6bc7e8b · 2024-11-25T21:29:14.000-05:00
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -16,6 +16,7 @@ Python bindings
 - Added ``bind`` method and removed id attribute from :ref:`mjSpec` objects. Using ids is error prone in scenarios of repeated attachment and
   detachment. Python users are encouraged to use names for unique identification of model elements.
 - Removed ``nroll`` argument from :ref:`rollout<PyRollout>` because its value can always be inferred.
+- :ref:`rollout<PyRollout>` can now accept lists of MjModel of length ``nroll``.
 
 Bug fixes
 ^^^^^^^^^
diff --git a/doc/python.rst b/doc/python.rst
@@ -700,6 +700,7 @@ states and sensor values. The basic usage form is
 
    state, sensordata = rollout.rollout(model, data, initial_state, control)
 
+``model`` is either a single instance of MjModel or a list of compatible MjModel of length ``nroll``.
 ``initial_state`` is an ``nroll x nstate`` array, with ``nroll`` initial states of size ``nstate``, where
 ``nstate = mj_stateSize(model, mjtState.mjSTATE_FULLPHYSICS)`` is the size of the
 :ref:`full physics state<geFullPhysics>`. ``control`` is a ``nroll x nstep x ncontrol`` array of controls. Controls are
diff --git a/python/mujoco/rollout.cc b/python/mujoco/rollout.cc
@@ -37,7 +37,7 @@ const auto rollout_doc = R"(
 Roll out open-loop trajectories from initial states, get resulting states and sensor values.
 
   input arguments (required):
-    model              instance of MjModel
+    model              list of MjModel instances of length nroll
     data               associated instance of MjData
     nstep              integer, number of steps to be taken for each trajectory
     control_spec       specification of controls, ncontrol = mj_stateSize(m, control_spec)
@@ -54,18 +54,18 @@ Roll out open-loop trajectories from initial states, get resulting states and se
 // C-style rollout function, assumes all arguments are valid
 // all input fields of d are initialised, contents at call time do not matter
 // after returning, d will contain the last step of the last rollout
-void _unsafe_rollout(const mjModel* m, mjData* d, int nroll, int nstep, unsigned int control_spec,
+void _unsafe_rollout(const mjModel** m, mjData* d, int nroll, int nstep, unsigned int control_spec,
                      const mjtNum* state0, const mjtNum* warmstart0, const mjtNum* control,
                      mjtNum* state, mjtNum* sensordata) {
   // sizes
-  int nstate = mj_stateSize(m, mjSTATE_FULLPHYSICS);
-  int ncontrol = mj_stateSize(m, control_spec);
-  int nv = m->nv, nbody = m->nbody, neq = m->neq;
-  int nsensordata = m->nsensordata;
+  int nstate = mj_stateSize(m[0], mjSTATE_FULLPHYSICS);
+  int ncontrol = mj_stateSize(m[0], control_spec);
+  int nv = m[0]->nv, nbody = m[0]->nbody, neq = m[0]->neq;
+  int nsensordata = m[0]->nsensordata;
 
   // clear user inputs if unspecified
   if (!(control_spec & mjSTATE_CTRL)) {
-    mju_zero(d->ctrl, m->nu);
+    mju_zero(d->ctrl, m[0]->nu);
   }
   if (!(control_spec & mjSTATE_QFRC_APPLIED)) {
     mju_zero(d->qfrc_applied, nv);
@@ -75,26 +75,26 @@ void _unsafe_rollout(const mjModel* m, mjData* d, int nroll, int nstep, unsigned
   }
   if (!(control_spec & mjSTATE_MOCAP_POS)) {
     for (int i = 0; i < nbody; i++) {
-      int id = m->body_mocapid[i];
-      if (id >= 0) mju_copy3(d->mocap_pos+3*id, m->body_pos+3*i);
+      int id = m[0]->body_mocapid[i];
+      if (id >= 0) mju_copy3(d->mocap_pos+3*id, m[0]->body_pos+3*i);
     }
   }
   if (!(control_spec & mjSTATE_MOCAP_QUAT)) {
     for (int i = 0; i < nbody; i++) {
-      int id = m->body_mocapid[i];
-      if (id >= 0) mju_copy4(d->mocap_quat+4*id, m->body_quat+4*i);
+      int id = m[0]->body_mocapid[i];
+      if (id >= 0) mju_copy4(d->mocap_quat+4*id, m[0]->body_quat+4*i);
     }
   }
   if (!(control_spec & mjSTATE_EQ_ACTIVE)) {
     for (int i = 0; i < neq; i++) {
-      d->eq_active[i] = m->eq_active0[i];
+      d->eq_active[i] = m[0]->eq_active0[i];
     }
   }
 
   // loop over rollouts
   for (int r = 0; r < nroll; r++) {
     // set initial state
-    mj_setState(m, d, state0 + r*nstate, mjSTATE_FULLPHYSICS);
+    mj_setState(m[r], d, state0 + r*nstate, mjSTATE_FULLPHYSICS);
 
     // set warmstart accelerations
     if (warmstart0) {
@@ -124,7 +124,7 @@ void _unsafe_rollout(const mjModel* m, mjData* d, int nroll, int nstep, unsigned
         for (; t < nstep; t++) {
           int step = r*nstep + t;
           if (state) {
-            mj_getState(m, d, state + step*nstate, mjSTATE_FULLPHYSICS);
+            mj_getState(m[r], d, state + step*nstate, mjSTATE_FULLPHYSICS);
           }
           if (sensordata) {
             mju_copy(sensordata + step*nsensordata, d->sensordata, nsensordata);
@@ -137,15 +137,15 @@ void _unsafe_rollout(const mjModel* m, mjData* d, int nroll, int nstep, unsigned
 
       // controls
       if (control) {
-        mj_setState(m, d, control + step*ncontrol, control_spec);
+        mj_setState(m[r], d, control + step*ncontrol, control_spec);
       }
 
       // step
-      mj_step(m, d);
+      mj_step(m[r], d);
 
       // copy out new state
       if (state) {
-        mj_getState(m, d, state + step*nstate, mjSTATE_FULLPHYSICS);
+        mj_getState(m[r], d, state + step*nstate, mjSTATE_FULLPHYSICS);
       }
 
       // copy out sensor values
@@ -188,15 +188,20 @@ PYBIND11_MODULE(_rollout, pymodule) {
   // get subsequent states and corresponding sensor values
   pymodule.def(
       "rollout",
-      [](const MjModelWrapper& m, MjDataWrapper& d,
+      [](py::list m, MjDataWrapper& d,
          int nstep, unsigned int control_spec,
          const PyCArray state0,
          std::optional<const PyCArray> warmstart0,
          std::optional<const PyCArray> control,
          std::optional<const PyCArray> state,
          std::optional<const PyCArray> sensordata
          ) {
-        const raw::MjModel* model = m.get();
+        // get raw pointers
+        int nroll = state0.shape(0);
+        const raw::MjModel* model_ptrs[nroll];
+        for (int r = 0; r < nroll; r++) {
+          model_ptrs[r] = m[r].cast<const MjModelWrapper*>()->get();
+        }
         raw::MjData* data = d.get();
 
         // check that some steps need to be taken, return if not
@@ -205,19 +210,17 @@ PYBIND11_MODULE(_rollout, pymodule) {
         }
 
         // get sizes
-        int nstate = mj_stateSize(model, mjSTATE_FULLPHYSICS);
-        int ncontrol = mj_stateSize(model, control_spec);
-        int nroll = state0.shape(0);
+        int nstate = mj_stateSize(model_ptrs[0], mjSTATE_FULLPHYSICS);
+        int ncontrol = mj_stateSize(model_ptrs[0], control_spec);
 
-        // get raw pointers
         mjtNum* state0_ptr = get_array_ptr(state0, "state0", nroll, 1, nstate);
         mjtNum* warmstart0_ptr = get_array_ptr(warmstart0, "warmstart0", nroll,
-                                               1, model->nv);
+                                               1, model_ptrs[0]->nv);
         mjtNum* control_ptr = get_array_ptr(control, "control", nroll,
                                             nstep, ncontrol);
         mjtNum* state_ptr = get_array_ptr(state, "state", nroll, nstep, nstate);
         mjtNum* sensordata_ptr = get_array_ptr(sensordata, "sensordata", nroll,
-                                               nstep, model->nsensordata);
+                                               nstep, model_ptrs[0]->nsensordata);
 
         // perform rollouts
         {
@@ -226,7 +229,7 @@ PYBIND11_MODULE(_rollout, pymodule) {
 
           // call unsafe rollout function
           InterceptMjErrors(_unsafe_rollout)(
-              model, data, nroll, nstep, control_spec, state0_ptr,
+              model_ptrs, data, nroll, nstep, control_spec, state0_ptr,
               warmstart0_ptr, control_ptr, state_ptr, sensordata_ptr);
         }
       },
diff --git a/python/mujoco/rollout.py b/python/mujoco/rollout.py
@@ -14,15 +14,15 @@
 # ==============================================================================
 """Roll out open-loop trajectories from initial states, get subsequent states and sensor values."""
 
-from typing import Optional
+from typing import Optional, Union
 
 import mujoco
 from mujoco import _rollout
 import numpy as np
 from numpy import typing as npt
 
 
-def rollout(model: mujoco.MjModel,
+def rollout(model: Union[mujoco.MjModel, list[mujoco.MjModel]],
             data: mujoco.MjData,
             initial_state: npt.ArrayLike,
             control: Optional[npt.ArrayLike] = None,
@@ -41,7 +41,7 @@ def rollout(model: mujoco.MjModel,
   Allocates outputs if none are given.
 
   Args:
-    model: An mjModel instance.
+    model: An mjModel or a list of MjModel with the same size signature.
     data: An associated mjData instance.
     initial_state: Array of initial states from which to roll out trajectories.
       ([nroll or 1] x nstate)
@@ -90,6 +90,7 @@ def rollout(model: mujoco.MjModel,
       state=state,
       sensordata=sensordata)
 
+
   # check number of dimensions
   _check_number_of_dimensions(2,
                               initial_state=initial_state,
@@ -108,29 +109,49 @@ def rollout(model: mujoco.MjModel,
   state = _ensure_3d(state)
   sensordata = _ensure_3d(sensordata)
 
-  # check trailing dimensions
-  nstate = mujoco.mj_stateSize(model, mujoco.mjtState.mjSTATE_FULLPHYSICS.value)
-  _check_trailing_dimension(nstate, initial_state=initial_state, state=state)
-  ncontrol = mujoco.mj_stateSize(model, control_spec)
-  _check_trailing_dimension(ncontrol, control=control)
-  _check_trailing_dimension(model.nv, initial_warmstart=initial_warmstart)
-  _check_trailing_dimension(model.nsensordata, sensordata=sensordata)
-
   # infer nroll, check for incompatibilities
   nroll = _infer_dimension(0, 1,
                            initial_state=initial_state,
                            initial_warmstart=initial_warmstart,
                            control=control,
                            state=state,
                            sensordata=sensordata)
+  if isinstance(model, list) and nroll == 1:
+    nroll = len(model)
+
+  if isinstance(model, list) and len(model) != nroll:
+    raise ValueError(f'nroll inferred as {nroll} '
+                     f'but model is length {len(model)}')
+  elif not isinstance(model, list):
+    model = [model] # Use a length 1 list to simplify code below
 
   # infer nstep, check for incompatibilities
   nstep = _infer_dimension(1, nstep or 1,
                            control=control,
                            state=state,
                            sensordata=sensordata)
 
-  # tile input arrays if required (singleton expansion)
+  # get nstate/ncontrol/nv/nsensordata
+  # check that they are equal across models
+  nstate = mujoco.mj_stateSize(model[0], mujoco.mjtState.mjSTATE_FULLPHYSICS.value)
+  ncontrol = mujoco.mj_stateSize(model[0], control_spec)
+  nv = model[0].nv
+  nsensordata = model[0].nsensordata
+  for m in model[1:]:
+    if (nstate != mujoco.mj_stateSize(m, mujoco.mjtState.mjSTATE_FULLPHYSICS.value)
+        or ncontrol != mujoco.mj_stateSize(m, control_spec)
+        or nv != m.nv
+        or nsensordata != m.nsensordata):
+      raise ValueError('models are not compatible')
+
+  # check trailing dimensions
+  _check_trailing_dimension(nstate, initial_state=initial_state, state=state)
+  _check_trailing_dimension(ncontrol, control=control)
+  _check_trailing_dimension(nv, initial_warmstart=initial_warmstart)
+  _check_trailing_dimension(nsensordata, sensordata=sensordata)
+
+  # tile input arrays/lists if required (singleton expansion)
+  model = model*nroll if len(model) == 1 else model
   initial_state = _tile_if_required(initial_state, nroll)
   initial_warmstart = _tile_if_required(initial_warmstart, nroll)
   control = _tile_if_required(control, nroll, nstep)
@@ -139,7 +160,7 @@ def rollout(model: mujoco.MjModel,
   if state is None:
     state = np.empty((nroll, nstep, nstate))
   if sensordata is None:
-    sensordata = np.empty((nroll, nstep, model.nsensordata))
+    sensordata = np.empty((nroll, nstep, nsensordata))
 
   # call rollout
   _rollout.rollout(model, data, nstep, control_spec, initial_state,
diff --git a/python/mujoco/rollout_test.py b/python/mujoco/rollout_test.py
@@ -334,6 +334,34 @@ def test_multi_rollout(self, model_name):
     np.testing.assert_array_equal(state, py_state)
     np.testing.assert_array_equal(sensordata, py_sensordata)
 
+  @parameterized.parameters(ALL_MODELS.keys())
+  def test_multi_model(self, model_name):
+    nroll = 3  # number of initial states and models
+    nstep = 3  # number of timesteps
+
+    spec = mujoco.MjSpec.from_string(ALL_MODELS[model_name])
+
+    if len(spec.bodies) > 1:
+      model = []
+      for i in range(nroll):
+        body = spec.bodies[1]
+        assert body.name != 'world'
+        body.pos = body.pos + i
+        model.append(spec.compile())
+    else:
+      model = [spec.compile() for i in range(nroll)]
+
+    nstate = mujoco.mj_stateSize(model[0], mujoco.mjtState.mjSTATE_FULLPHYSICS)
+    data = mujoco.MjData(model[0])
+
+    initial_state = np.random.randn(nroll, nstate)
+    control = np.random.randn(nroll, nstep, model[0].nu)
+    state, sensordata = rollout.rollout(model, data, initial_state, control)
+
+    py_state, py_sensordata = py_rollout(model, data, initial_state, control)
+    np.testing.assert_array_equal(state, py_state)
+    np.testing.assert_array_equal(sensordata, py_sensordata)
+
   @parameterized.parameters(ALL_MODELS.keys())
   def test_multi_rollout_fixed_ctrl_infer_from_output(self, model_name):
     model = mujoco.MjModel.from_xml_string(ALL_MODELS[model_name])
@@ -430,8 +458,9 @@ def test_threading(self):
     def thread_initializer():
       thread_local.data = mujoco.MjData(model)
 
+    model_list = [model]*nroll
     def call_rollout(initial_state, control, state, sensordata):
-      rollout.rollout(model, thread_local.data, initial_state, control,
+      rollout.rollout(model_list, thread_local.data, initial_state, control,
                       skip_checks=True,
                       nstep=nstep, state=state, sensordata=sensordata)
 
@@ -677,13 +706,17 @@ def py_rollout(model, data, initial_state, control,
   control = ensure_3d(control)
   nroll = initial_state.shape[0]
   nstep = control.shape[1]
-  nstate = mujoco.mj_stateSize(model, mujoco.mjtState.mjSTATE_FULLPHYSICS)
+
+  if isinstance(model, mujoco.MjModel):
+    model = [model]*nroll
+
+  nstate = mujoco.mj_stateSize(model[0], mujoco.mjtState.mjSTATE_FULLPHYSICS)
 
   state = np.empty((nroll, nstep, nstate))
-  sensordata = np.empty((nroll, nstep, model.nsensordata))
+  sensordata = np.empty((nroll, nstep, model[0].nsensordata))
   for r in range(nroll):
     state_r, sensordata_r = one_rollout(
-        model, data, initial_state[r], control[r], control_spec
+        model[r], data, initial_state[r], control[r], control_spec
     )
     state[r] = state_r
     sensordata[r] = sensordata_r