Skip to content
Merged
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
69eec9c
docs(benchmarks): add benchmark integration guide and standardize ben…
pkooij Apr 2, 2026
5ad4c8f
refactor(envs): move dispatch logic from factory into EnvConfig subcl…
pkooij Apr 3, 2026
bfa0a0f
docs(benchmarks): clean up adding-benchmarks guide for clarity
pkooij Apr 3, 2026
75d5e5b
fix link
pkooij Apr 3, 2026
7abe5f7
fix task count
pkooij Apr 3, 2026
1fad71c
fix: enable SmolVLA eval on LIBERO with custom camera mappings
pkooij Apr 7, 2026
d8e0eaa
fix: use direct AutoresetMode import for gymnasium compat
pkooij Apr 7, 2026
0ea6aac
fix: handle gymnasium < 1.0 without AutoresetMode
pkooij Apr 7, 2026
27bbb6b
refactor: revert policy changes, keep env-only camera mapping fixes
pkooij Apr 7, 2026
8e07cab
Update docs/source/env_processor.mdx
pkooij Apr 7, 2026
fd99209
feat(envs): lazy env init + AsyncVectorEnv as default for n_envs > 1
pkooij Apr 3, 2026
dbc8c2e
fix: close envs between tasks to prevent worker process accumulation
pkooij Apr 7, 2026
aebc5e2
fix(eval): use task_description instead of task for language conditio…
pkooij Apr 7, 2026
8a778c0
docs: update adding_benchmarks for async env changes
pkooij Apr 7, 2026
5ec6119
feat(eval): batch_size=auto + faster env loading
pkooij Apr 7, 2026
2c32c04
docs: add evaluation guide and update benchmarks doc
pkooij Apr 7, 2026
43abbcc
docs(evaluation): remove benchmark table, rename section header
pkooij Apr 7, 2026
03e1901
perf(eval): shared memory, observation passthrough, task prefetch
pkooij Apr 7, 2026
12023f4
style: ruff format
pkooij Apr 7, 2026
9a6ab6a
chore: revert env_processor.mdx changes (not part of this PR)
pkooij Apr 7, 2026
6e6f76d
ci(benchmarks): add isolated integration tests for libero and metaworld
pkooij Apr 7, 2026
61e2be8
ci(benchmarks): pin action hashes and use uv sync --locked
pkooij Apr 7, 2026
07350f9
ci(benchmarks): trigger only on envs/ or lerobot_eval.py changes
pkooij Apr 7, 2026
dfd09c0
fix(ci): set LIBERO_DATA_FOLDER to bypass interactive stdin prompt
pkooij Apr 8, 2026
42ef36e
docs(benchmarks): add CI smoke test step to adding_benchmarks guide
pkooij Apr 8, 2026
841cbb0
fix(ci): pre-create libero config in Dockerfile to bypass stdin prompt
pkooij Apr 8, 2026
c24687d
fix(ci): use shell to create libero config instead of multiline pytho…
pkooij Apr 8, 2026
2420d20
fix(ci): point libero config to bundled package init_files
pkooij Apr 8, 2026
58a5bcb
fix(ci): add smolvla extra to benchmark Dockerfiles
pkooij Apr 8, 2026
f3853c9
fix(eval): render_frame covers _LazyAsyncVectorEnv
pkooij Apr 8, 2026
e35b485
refactor(envs): remove unused _get_sub_env_attr helper
pkooij Apr 8, 2026
28d353e
chore: apply prettier formatting to docs
pkooij Apr 8, 2026
527463c
docs(env_processor): remove deprecated add_envs_task from pipeline ex…
pkooij Apr 8, 2026
606ed97
refactor(envs): remove __del__ from _LazyAsyncVectorEnv
pkooij Apr 8, 2026
93b99e4
fix(eval): prefetch next task's workers after close to avoid GPU memo…
pkooij Apr 8, 2026
fe05e50
refactor(envs): move _LazyAsyncVectorEnv to utils and apply to metaworld
pkooij Apr 8, 2026
c8c2e88
chore: remove out-of-scope benchmark/CI/docs files from PR
pkooij Apr 8, 2026
f4bc9b5
chore: restore adding_benchmarks + test_dispatch, drop env_processor …
pkooij Apr 8, 2026
5bc90c7
docs(adding_benchmarks): remove CI smoke test step (coming in separat…
pkooij Apr 8, 2026
566a77b
refactor(envs): remove unused add_envs_task
pkooij Apr 8, 2026
973bb7c
style: fix prettier formatting in env_processor.mdx
pkooij Apr 8, 2026
c3fa286
fix(eval): catch AttributeError and NotImplementedError explicitly fo…
pkooij Apr 8, 2026
44534d5
fix(envs): use forkserver context and close envs in test to prevent d…
pkooij Apr 8, 2026
7c9a676
fix(envs): default use_async_envs=False in create_envs and make_env
pkooij Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions docs/source/adding_benchmarks.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ During evaluation, data moves through four stages:
1. gym.Env ──→ raw observations (numpy dicts)

2. Preprocessing ──→ standard LeRobot keys + task description
(preprocess_observation, add_envs_task in envs/utils.py)
(preprocess_observation in envs/utils.py, env.call("task_description"))

3. Processors ──→ env-specific then policy-specific transforms
(env_preprocessor, policy_preprocessor)
Expand Down Expand Up @@ -161,6 +161,8 @@ class MyBenchmarkEnv(gym.Env):
...
```

**GPU-based simulators (e.g. MuJoCo with EGL rendering):** If your simulator allocates GPU/EGL contexts during `__init__`, defer that allocation to a `_ensure_env()` helper called on first `reset()`/`step()`. This avoids inheriting stale GPU handles when `AsyncVectorEnv` spawns worker processes. See `LiberoEnv._ensure_env()` for the pattern.

Also provide a factory function that returns the nested dict structure:

```python
Expand Down Expand Up @@ -207,7 +209,7 @@ class MyBenchmarkEnvConfig(EnvConfig):
def gym_kwargs(self) -> dict:
return {"obs_type": self.obs_type, "render_mode": self.render_mode}

def create_envs(self, n_envs: int, use_async_envs: bool = False):
def create_envs(self, n_envs: int, use_async_envs: bool = True):
"""Override for multi-task benchmarks or custom env creation."""
from lerobot.envs.<benchmark> import create_<benchmark>_envs
return create_<benchmark>_envs(task=self.task, n_envs=n_envs, ...)
Expand Down Expand Up @@ -299,7 +301,7 @@ After completing the steps above, confirm that everything works:

1. **Install** — `pip install -e ".[mybenchmark]"` and verify the dependency group installs cleanly.
2. **Smoke test env creation** — call `make_env()` with your config in Python, check that the returned dict has the expected `{suite: {task_id: VectorEnv}}` shape, and that `reset()` returns observations with the right keys.
3. **Run a full eval** — `lerobot-eval --env.type=<name> --env.task=<task> --eval.n_episodes=1 --eval.batch_size=1 --policy.path=<any_compatible_policy>` to exercise the full pipeline end-to-end.
3. **Run a full eval** — `lerobot-eval --env.type=<name> --env.task=<task> --eval.n_episodes=1 --policy.path=<any_compatible_policy>` to exercise the full pipeline end-to-end. (`batch_size` defaults to auto-tuning based on CPU cores; pass `--eval.batch_size=1` to force a single environment.)
4. **Check success detection** — verify that `info["is_success"]` flips to `True` when the task is actually completed. This is what the eval loop uses to compute success rates.

## Writing a benchmark doc page
Expand All @@ -311,7 +313,7 @@ Each benchmark `.mdx` page should include:
- **Overview image or GIF.**
- **Available tasks** — table of task suites with counts and brief descriptions.
- **Installation** — `pip install -e ".[<benchmark>]"` plus any extra steps (env vars, system packages).
- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` and `batch_size` for reproducible results. Include single-task and multi-task examples if applicable.
- **Evaluation** — recommended `lerobot-eval` command with `n_episodes` for reproducible results. `batch_size` defaults to auto; only specify it if needed. Include single-task and multi-task examples if applicable.
- **Policy inputs and outputs** — observation keys with shapes, action space description.
- **Recommended evaluation episodes** — how many episodes per task is standard.
- **Training** — example `lerobot-train` command.
Expand Down
99 changes: 62 additions & 37 deletions docs/source/env_processor.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ policy_preprocessor = NormalizerProcessorStep(stats=dataset_stats)

The same policy can work with different environment processors, and the same environment processor can work with different policies:

```python
````python
# Use SmolVLA policy with LIBERO environment
# Use SmolVLA policy with LIBERO environment
libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
Expand All @@ -102,7 +102,20 @@ libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
policy_cfg=act_cfg,
)
act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)
```
```python
# Use SmolVLA policy with LIBERO environment
libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
env_cfg=libero_cfg,
policy_cfg=smolvla_cfg,
)
smolvla_preprocessor, smolvla_postprocessor = make_pre_post_processors(smolvla_cfg)

# Or use ACT policy with the same LIBERO environment
libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(
env_cfg=libero_cfg,
policy_cfg=act_cfg,
)
act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg)

### 3. **Easier Experimentation**

Expand Down Expand Up @@ -132,7 +145,7 @@ class LiberoVelocityProcessorStep(ObservationProcessorStep):
state = torch.cat([eef_pos, eef_axisangle, eef_vel,
gripper_pos, gripper_vel], dim=-1) # 14D
return state
```
````

### 4. **Cleaner Environment Code**

Expand All @@ -157,38 +170,54 @@ observation = {

### Factory Function

The `make_env_pre_post_processors` function delegates to `env_cfg.get_env_processors()`:
The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies:

```python
from lerobot.envs.factory import make_env_pre_post_processors
from lerobot.envs.configs import LiberoEnv, PushtEnv

# For LIBERO: Returns LiberoProcessorStep in preprocessor
libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"])
env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg, policy_cfg)
env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg)

# For other environments: Returns identity processors (no-op)
pusht_cfg = PushtEnv()
env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg, policy_cfg)
env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg)
```

### How It Works

Each `EnvConfig` subclass can override `get_env_processors()` to return benchmark-specific
processor pipelines. The base class returns identity (no-op) processors by default.
### Implementation in `envs/factory.py`

```python
# In your EnvConfig subclass:
def get_env_processors(self):
from lerobot.processor.pipeline import PolicyProcessorPipeline
return (
PolicyProcessorPipeline(steps=[MyProcessorStep()]),
PolicyProcessorPipeline(steps=[]),
)
```
def make_env_pre_post_processors(
env_cfg: EnvConfig,
) -> tuple[
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
]:
"""
Create preprocessor and postprocessor pipelines for environment observations.

Args:
env_cfg: The configuration of the environment.

The factory function `make_env_pre_post_processors` simply delegates to this method,
with a special case for `XVLAConfig` policies which override the env processors entirely.
Returns:
A tuple containing:
- preprocessor: Pipeline that processes environment observations
- postprocessor: Pipeline that processes environment outputs
"""
# For LIBERO environments, add the LiberoProcessorStep to preprocessor
if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
else:
# For all other environments, return an identity preprocessor
preprocessor = PolicyProcessorPipeline(steps=[])

# Postprocessor is currently identity for all environments
# Future: Could add environment-specific action transformations
postprocessor = PolicyProcessorPipeline(steps=[])

return preprocessor, postprocessor
```

### Integration in Evaluation

Expand All @@ -209,10 +238,7 @@ def eval_main(cfg: EvalPipelineConfig):
)

# Create environment processors (NEW!)
env_preprocessor, env_postprocessor = make_env_pre_post_processors(
env_cfg=cfg.env,
policy_cfg=cfg.policy,
)
env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env)

# Run evaluation with both processor types
eval_policy_all(
Expand Down Expand Up @@ -319,19 +345,18 @@ class MyEnvProcessorStep(ObservationProcessorStep):
### 2. Update Your `EnvConfig` Subclass

```python
# In src/lerobot/envs/configs.py
@EnvConfig.register_subclass("myenv")
@dataclass
class MyEnvConfig(EnvConfig):
# ... task/features/gym kwargs ...

def get_env_processors(self):
from lerobot.processor.pipeline import PolicyProcessorPipeline

return (
PolicyProcessorPipeline(steps=[MyEnvProcessorStep()]),
PolicyProcessorPipeline(steps=[]),
)
# In src/lerobot/envs/factory.py

def make_env_pre_post_processors(env_cfg: EnvConfig):
if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()])
elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type:
preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()])
else:
preprocessor = PolicyProcessorPipeline(steps=[])

postprocessor = PolicyProcessorPipeline(steps=[])
return preprocessor, postprocessor
```

### 3. Use in Evaluation
Expand Down
2 changes: 1 addition & 1 deletion docs/source/metaworld.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Meta-World is an open-source simulation benchmark for **multi-task and meta reinforcement learning** in continuous-control robotic manipulation. It bundles 50 diverse manipulation tasks using everyday objects and a common tabletop Sawyer arm, providing a standardized playground to test whether algorithms can learn many different tasks and generalize quickly to new ones.

- Paper: [Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning](https://arxiv.org/abs/1910.10897)
- Paper: [Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning paper](https://arxiv.org/abs/1910.10897)
- GitHub: [Farama-Foundation/Metaworld](https://github.com/Farama-Foundation/Metaworld)
- Project website: [metaworld.farama.org](https://metaworld.farama.org)

Expand Down
27 changes: 17 additions & 10 deletions src/lerobot/configs/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,27 @@ class WandBConfig:
class EvalConfig:
n_episodes: int = 50
# `batch_size` specifies the number of environments to use in a gym.vector.VectorEnv.
batch_size: int = 50
# Set to 0 for auto-tuning based on available CPU cores and n_episodes.
batch_size: int = 0
# `use_async_envs` specifies whether to use asynchronous environments (multiprocessing).
use_async_envs: bool = False
# Defaults to True; automatically downgraded to SyncVectorEnv when batch_size=1.
use_async_envs: bool = True

def __post_init__(self) -> None:
if self.batch_size == 0:
self.batch_size = self._auto_batch_size()
if self.batch_size > self.n_episodes:
raise ValueError(
"The eval batch size is greater than the number of eval episodes "
f"({self.batch_size} > {self.n_episodes}). As a result, {self.batch_size} "
f"eval environments will be instantiated, but only {self.n_episodes} will be used. "
"This might significantly slow down evaluation. To fix this, you should update your command "
f"to increase the number of episodes to match the batch size (e.g. `eval.n_episodes={self.batch_size}`), "
f"or lower the batch size (e.g. `eval.batch_size={self.n_episodes}`)."
)
self.batch_size = self.n_episodes

def _auto_batch_size(self) -> int:
"""Pick batch_size based on CPU cores, capped by n_episodes."""
import math
import os

cpu_cores = os.cpu_count() or 4
# Each async env worker needs ~1 core; leave headroom for main process + inference.
by_cpu = max(1, math.floor(cpu_cores * 0.7))
return min(by_cpu, self.n_episodes, 64)


@dataclass
Expand Down
27 changes: 20 additions & 7 deletions src/lerobot/envs/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@
)


def _make_vec_env_cls(use_async: bool, n_envs: int):
"""Return the right VectorEnv constructor."""
if use_async and n_envs > 1:
return gym.vector.AsyncVectorEnv
return gym.vector.SyncVectorEnv


@dataclass
class EnvConfig(draccus.ChoiceRegistry, abc.ABC):
task: str | None = None
Expand Down Expand Up @@ -75,13 +82,14 @@ def gym_kwargs(self) -> dict:
def create_envs(
self,
n_envs: int,
use_async_envs: bool = False,
use_async_envs: bool = True,
) -> dict[str, dict[int, gym.vector.VectorEnv]]:
"""Create {suite: {task_id: VectorEnv}}.

Default: single-task env via gym.make(). Multi-task benchmarks override.
AsyncVectorEnv is the default for n_envs > 1; auto-downgraded to Sync for n_envs=1.
"""
env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
env_cls = gym.vector.AsyncVectorEnv if (use_async_envs and n_envs > 1) else gym.vector.SyncVectorEnv

if self.gym_id not in gym_registry:
print(f"gym id '{self.gym_id}' not found, attempting to import '{self.package_name}'...")
Expand Down Expand Up @@ -394,17 +402,22 @@ def __post_init__(self):

@property
def gym_kwargs(self) -> dict:
kwargs: dict[str, Any] = {"obs_type": self.obs_type, "render_mode": self.render_mode}
kwargs: dict[str, Any] = {
"obs_type": self.obs_type,
"render_mode": self.render_mode,
"observation_height": self.observation_height,
"observation_width": self.observation_width,
}
if self.task_ids is not None:
kwargs["task_ids"] = self.task_ids
return kwargs

def create_envs(self, n_envs: int, use_async_envs: bool = False):
def create_envs(self, n_envs: int, use_async_envs: bool = True):
from lerobot.envs.libero import create_libero_envs

if self.task is None:
raise ValueError("LiberoEnv requires a task to be specified")
env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
env_cls = _make_vec_env_cls(use_async_envs, n_envs)
return create_libero_envs(
task=self.task,
n_envs=n_envs,
Expand Down Expand Up @@ -468,12 +481,12 @@ def gym_kwargs(self) -> dict:
"render_mode": self.render_mode,
}

def create_envs(self, n_envs: int, use_async_envs: bool = False):
def create_envs(self, n_envs: int, use_async_envs: bool = True):
from lerobot.envs.metaworld import create_metaworld_envs

if self.task is None:
raise ValueError("MetaWorld requires a task to be specified")
env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv
env_cls = _make_vec_env_cls(use_async_envs, n_envs)
return create_metaworld_envs(
task=self.task,
n_envs=n_envs,
Expand Down
2 changes: 1 addition & 1 deletion src/lerobot/envs/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def make_env_pre_post_processors(
def make_env(
cfg: EnvConfig | str,
n_envs: int = 1,
use_async_envs: bool = False,
use_async_envs: bool = True,
hub_cache_dir: str | None = None,
trust_remote_code: bool = False,
) -> dict[str, dict[int, gym.vector.VectorEnv]]:
Expand Down
Loading
Loading