huggingface · pkooij · Apr 9, 2026 · Apr 2, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/docs/source/adding_benchmarks.mdx b/docs/source/adding_benchmarks.mdx
@@ -26,7 +26,7 @@ During evaluation, data moves through four stages:
 1. gym.Env  ──→  raw observations (numpy dicts)
 
 2. Preprocessing  ──→  standard LeRobot keys + task description
-   (preprocess_observation, add_envs_task in envs/utils.py)
+   (preprocess_observation in envs/utils.py, env.call("task_description"))
 
 3. Processors  ──→  env-specific then policy-specific transforms
    (env_preprocessor, policy_preprocessor)
@@ -161,6 +161,8 @@ class MyBenchmarkEnv(gym.Env):
         ...
 ```
 
+**GPU-based simulators (e.g. MuJoCo with EGL rendering):** If your simulator allocates GPU/EGL contexts during `__init__`, defer that allocation to a `_ensure_env()` helper called on first `reset()`/`step()`. This avoids inheriting stale GPU handles when `AsyncVectorEnv` spawns worker processes. See `LiberoEnv._ensure_env()` for the pattern.
+
 Also provide a factory function that returns the nested dict structure:
 
 ```python
@@ -207,7 +209,7 @@ class MyBenchmarkEnvConfig(EnvConfig):
     def gym_kwargs(self) -> dict:
         return {"obs_type": self.obs_type, "render_mode": self.render_mode}
 
-    def create_envs(self, n_envs: int, use_async_envs: bool = False):
+    def create_envs(self, n_envs: int, use_async_envs: bool = True):
         """Override for multi-task benchmarks or custom env creation."""
         from lerobot.envs.<benchmark> import create_<benchmark>_envs
         return create_<benchmark>_envs(task=self.task, n_envs=n_envs, ...)
@@ -299,7 +301,7 @@ After completing the steps above, confirm that everything works:
 
 1. **Install** — `pip install -e ".[mybenchmark]"` and verify the dependency group installs cleanly.
 2. **Smoke test env creation** — call `make_env()` with your config in Python, check that the returned dict has the expected `{suite: {task_id: VectorEnv}}` shape, and that `reset()` returns observations with the right keys.
-3. **Run a full eval** — `lerobot-eval --env.type=<name> --env.task=<task> --eval.n_episodes=1 --eval.batch_size=1 --policy.path=<any_compatible_policy>` to exercise the full pipeline end-to-end.
+3. **Run a full eval** — `lerobot-eval --env.type=<name> --env.task=<task> --eval.n_episodes=1 --policy.path=<any_compatible_policy>` to exercise the full pipeline end-to-end. (`batch_size` defaults to auto-tuning based on CPU cores; pass `--eval.batch_size=1` to force a single environment.)
 4. **Check success detection** — verify that `info["is_success"]` flips to `True` when the task is actually completed. This is what the eval loop uses to compute success rates.
 
 ## Writing a benchmark doc page
@@ -311,7 +313,7 @@ Each benchmark `.mdx` page should include:
 - **Overview image or GIF.**
 - **Available tasks** — table of task suites with counts and brief descriptions.
 - **Installation** — `pip install -e ".[<benchmark>]"` plus any extra steps (env vars, system packages).
-- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` and `batch_size` for reproducible results. Include single-task and multi-task examples if applicable.
+- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` for reproducible results. `batch_size` defaults to auto; only specify it if needed. Include single-task and multi-task examples if applicable.
 - **Policy inputs and outputs** — observation keys with shapes, action space description.
 - **Recommended evaluation episodes** — how many episodes per task is standard.
 - **Training** — example `lerobot-train` command.

diff --git a/docs/source/env_processor.mdx b/docs/source/env_processor.mdx
@@ -88,7 +88,7 @@ policy_preprocessor = NormalizerProcessorStep(stats=dataset_stats)
 
 The same policy can work with different environment processors, and the same environment processor can work with different policies:
 
-```python
+````python
 # Use SmolVLA policy with LIBERO environment
 # Use SmolVLA policy with LIBERO environment
 libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
@@ -102,7 +102,20 @@ libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
     policy_cfg=act_cfg,
 )
 act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)
-```
+```python
+# Use SmolVLA policy with LIBERO environment
+libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
+    env_cfg=libero_cfg,
+    policy_cfg=smolvla_cfg,
+)
+smolvla_preprocessor, smolvla_postprocessor = make_pre_post_processors(smolvla_cfg)
+
+# Or use ACT policy with the same LIBERO environment
+libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
+    env_cfg=libero_cfg,
+    policy_cfg=act_cfg,
+)
+act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)
 
 ### 3. **Easier Experimentation**
 
@@ -132,7 +145,7 @@ class LiberoVelocityProcessorStep(ObservationProcessorStep):
         state = torch.cat([eef_pos, eef_axisangle, eef_vel,
                           gripper_pos, gripper_vel], dim=-1)  # 14D
         return state
-```
+````
 
 ### 4. **Cleaner Environment Code**
 
@@ -157,38 +170,54 @@ observation = {
 
 ### Factory Function
 
-The `make_env_pre_post_processors` function delegates to `env_cfg.get_env_processors()`:
+The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies:
 
 ```python
 from lerobot.envs.factory import make_env_pre_post_processors
 from lerobot.envs.configs import LiberoEnv, PushtEnv
 
 # For LIBERO: Returns LiberoProcessorStep in preprocessor
 libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"])
-env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg, policy_cfg)
+env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg)
 
 # For other environments: Returns identity processors (no-op)
 pusht_cfg = PushtEnv()
-env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg, policy_cfg)
+env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg)
 ```
 
-### How It Works
-
-Each `EnvConfig` subclass can override `get_env_processors()` to return benchmark-specific
-processor pipelines. The base class returns identity (no-op) processors by default.
+### Implementation in `envs/factory.py`
 
 ```python
-# In your EnvConfig subclass:
-def get_env_processors(self):
-    from lerobot.processor.pipeline import PolicyProcessorPipeline
-    return (
-        PolicyProcessorPipeline(steps=[MyProcessorStep()]),
-        PolicyProcessorPipeline(steps=[]),
-    )
-```
+def make_env_pre_post_processors(
+    env_cfg: EnvConfig,
+) -> tuple[
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+]:
+    """
+    Create preprocessor and postprocessor pipelines for environment observations.
+
+    Args:
+        env_cfg: The configuration of the environment.
 
-The factory function `make_env_pre_post_processors` simply delegates to this method,
-with a special case for `XVLAConfig` policies which override the env processors entirely.
+    Returns:
+        A tuple containing:
+            - preprocessor: Pipeline that processes environment observations
+            - postprocessor: Pipeline that processes environment outputs
+    """
+    # For LIBERO environments, add the LiberoProcessorStep to preprocessor
+    if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
+        preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
+    else:
+        # For all other environments, return an identity preprocessor
+        preprocessor = PolicyProcessorPipeline(steps=[])
+
+    # Postprocessor is currently identity for all environments
+    # Future: Could add environment-specific action transformations
+    postprocessor = PolicyProcessorPipeline(steps=[])
+
+    return preprocessor, postprocessor
+```
 
 ### Integration in Evaluation
 
@@ -209,10 +238,7 @@ def eval_main(cfg: EvalPipelineConfig):
     )
 
     # Create environment processors (NEW!)
-    env_preprocessor, env_postprocessor = make_env_pre_post_processors(
-        env_cfg=cfg.env,
-        policy_cfg=cfg.policy,
-    )
+    env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)
 
     # Run evaluation with both processor types
     eval_policy_all(
@@ -319,19 +345,18 @@ class MyEnvProcessorStep(ObservationProcessorStep):
 ### 2. Update Your `EnvConfig` Subclass
 
 ```python
-# In src/lerobot/envs/configs.py
-@EnvConfig.register_subclass("myenv")
-@dataclass
-class MyEnvConfig(EnvConfig):
-    # ... task/features/gym kwargs ...
-
-    def get_env_processors(self):
-        from lerobot.processor.pipeline import PolicyProcessorPipeline
-
-        return (
-            PolicyProcessorPipeline(steps=[MyEnvProcessorStep()]),
-            PolicyProcessorPipeline(steps=[]),
-        )
+# In src/lerobot/envs/factory.py
+
+def make_env_pre_post_processors(env_cfg: EnvConfig):
+    if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
+        preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
+    elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type:
+        preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()])
+    else:
+        preprocessor = PolicyProcessorPipeline(steps=[])
+
+    postprocessor = PolicyProcessorPipeline(steps=[])
+    return preprocessor, postprocessor
 ```
 
 ### 3. Use in Evaluation

diff --git a/docs/source/metaworld.mdx b/docs/source/metaworld.mdx
@@ -2,7 +2,7 @@
 
 Meta-World is an open-source simulation benchmark for **multi-task and meta reinforcement learning** in continuous-control robotic manipulation. It bundles 50 diverse manipulation tasks using everyday objects and a common tabletop Sawyer arm, providing a standardized playground to test whether algorithms can learn many different tasks and generalize quickly to new ones.
 
-- Paper: [Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning](https://arxiv.org/abs/1910.10897)
+- Paper: [Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning paper](https://arxiv.org/abs/1910.10897)
 - GitHub: [Farama-Foundation/Metaworld](https://github.com/Farama-Foundation/Metaworld)
 - Project website: [metaworld.farama.org](https://metaworld.farama.org)
 

diff --git a/src/lerobot/configs/default.py b/src/lerobot/configs/default.py
@@ -65,20 +65,27 @@ class WandBConfig:
 class EvalConfig:
     n_episodes: int = 50
     # `batch_size` specifies the number of environments to use in a gym.vector.VectorEnv.
-    batch_size: int = 50
+    # Set to 0 for auto-tuning based on available CPU cores and n_episodes.
+    batch_size: int = 0
     # `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
-    use_async_envs: bool = False
+    # Defaults to True; automatically downgraded to SyncVectorEnv when batch_size=1.
+    use_async_envs: bool = True
 
     def __post_init__(self) -> None:
+        if self.batch_size == 0:
+            self.batch_size = self._auto_batch_size()
         if self.batch_size > self.n_episodes:
-            raise ValueError(
-                "The eval batch size is greater than the number of eval episodes "
-                f"({self.batch_size} > {self.n_episodes}). As a result, {self.batch_size} "
-                f"eval environments will be instantiated, but only {self.n_episodes} will be used. "
-                "This might significantly slow down evaluation. To fix this, you should update your command "
-                f"to increase the number of episodes to match the batch size (e.g. `eval.n_episodes={self.batch_size}`), "
-                f"or lower the batch size (e.g. `eval.batch_size={self.n_episodes}`)."
-            )
+            self.batch_size = self.n_episodes
+
+    def _auto_batch_size(self) -> int:
+        """Pick batch_size based on CPU cores, capped by n_episodes."""
+        import math
+        import os
+
+        cpu_cores = os.cpu_count() or 4
+        # Each async env worker needs ~1 core; leave headroom for main process + inference.
+        by_cpu = max(1, math.floor(cpu_cores * 0.7))
+        return min(by_cpu, self.n_episodes, 64)
 
 
 @dataclass

diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
@@ -44,6 +44,13 @@
 )
 
 
+def _make_vec_env_cls(use_async: bool, n_envs: int):
+    """Return the right VectorEnv constructor."""
+    if use_async and n_envs > 1:
+        return gym.vector.AsyncVectorEnv
+    return gym.vector.SyncVectorEnv
+
+
 @dataclass
 class EnvConfig(draccus.ChoiceRegistry, abc.ABC):
     task: str | None = None
@@ -75,13 +82,14 @@ def gym_kwargs(self) -> dict:
     def create_envs(
         self,
         n_envs: int,
-        use_async_envs: bool = False,
+        use_async_envs: bool = True,
     ) -> dict[str, dict[int, gym.vector.VectorEnv]]:
         """Create {suite: {task_id: VectorEnv}}.
 
         Default: single-task env via gym.make(). Multi-task benchmarks override.
+        AsyncVectorEnv is the default for n_envs > 1; auto-downgraded to Sync for n_envs=1.
         """
-        env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
+        env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv
 
         if self.gym_id not in gym_registry:
             print(f"gym id '{self.gym_id}' not found, attempting to import '{self.package_name}'...")
@@ -394,17 +402,22 @@ def __post_init__(self):
 
     @property
     def gym_kwargs(self) -> dict:
-        kwargs: dict[str, Any] = {"obs_type": self.obs_type, "render_mode": self.render_mode}
+        kwargs: dict[str, Any] = {
+            "obs_type": self.obs_type,
+            "render_mode": self.render_mode,
+            "observation_height": self.observation_height,
+            "observation_width": self.observation_width,
+        }
         if self.task_ids is not None:
             kwargs["task_ids"] = self.task_ids
         return kwargs
 
-    def create_envs(self, n_envs: int, use_async_envs: bool = False):
+    def create_envs(self, n_envs: int, use_async_envs: bool = True):
         from lerobot.envs.libero import create_libero_envs
 
         if self.task is None:
             raise ValueError("LiberoEnv requires a task to be specified")
-        env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
+        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
         return create_libero_envs(
             task=self.task,
             n_envs=n_envs,
@@ -468,12 +481,12 @@ def gym_kwargs(self) -> dict:
             "render_mode": self.render_mode,
         }
 
-    def create_envs(self, n_envs: int, use_async_envs: bool = False):
+    def create_envs(self, n_envs: int, use_async_envs: bool = True):
         from lerobot.envs.metaworld import create_metaworld_envs
 
         if self.task is None:
             raise ValueError("MetaWorld requires a task to be specified")
-        env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
+        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
         return create_metaworld_envs(
             task=self.task,
             n_envs=n_envs,

diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py
@@ -58,7 +58,7 @@ def make_env_pre_post_processors(
 def make_env(
     cfg: EnvConfig | str,
     n_envs: int = 1,
-    use_async_envs: bool = False,
+    use_async_envs: bool = True,
     hub_cache_dir: str | None = None,
     trust_remote_code: bool = False,
 ) -> dict[str, dict[int, gym.vector.VectorEnv]]: