From 39bb3a34c66d78e6f0b9a37409792320f3f5b487 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 8 Apr 2026 10:11:58 +0200 Subject: [PATCH 1/3] feat(envs): add LIBERO-plus robustness benchmark integration - Add import fallback in libero.py for LIBERO-plus nested package structure (github.com/sylvestf/LIBERO-plus installs under a deeper module path than the original hf-libero wheel) - Register LiberoPlusEnv config subclass (inherits LiberoEnv fully; only the env type name and default suite differ) - Add libero_plus optional dep group in pyproject.toml pointing to the LIBERO-plus GitHub repo - Add docs/source/libero_plus.mdx with install guide, task suite table, perturbation dimensions, eval commands, and dataset reference - Add docker/Dockerfile.benchmark.libero_plus for isolated CI image - Add libero-plus-integration-test CI job to benchmark_tests.yml Dataset: pepijn223/libero_plus_lerobot is already v3.0 (no conversion needed). Dataset card is missing and should be added separately on the Hub. Eval smoke-test (requires Linux + GPU): lerobot-eval \ --policy.path=pepijn223/smolvla_libero \ --env.type=libero_plus \ --env.task=libero_spatial \ --eval.batch_size=1 --eval.n_episodes=1 \ --eval.use_async_envs=false --policy.device=cuda \ '--env.camera_name_mapping={"agentview_image":"camera1","robot0_eye_in_hand_image":"camera2"}' \ --policy.empty_cameras=1 Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/benchmark_tests.yml | 94 +++++++++++++ docker/Dockerfile.benchmark.libero_plus | 88 ++++++++++++ docs/source/_toctree.yml | 2 + docs/source/libero_plus.mdx | 171 ++++++++++++++++++++++++ pyproject.toml | 6 + src/lerobot/envs/configs.py | 23 ++++ src/lerobot/envs/libero.py | 11 +- 7 files changed, 393 insertions(+), 2 deletions(-) create mode 100644 docker/Dockerfile.benchmark.libero_plus create mode 100644 docs/source/libero_plus.mdx diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index afdc518141..088080db80 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -216,6 +216,100 @@ jobs: path: /tmp/libero-train-smoke-artifacts/eval/ if-no-files-found: warn + # ── LIBERO-plus ─────────────────────────────────────────────────────────── + # Isolated image: lerobot[libero_plus] only (LIBERO-plus from GitHub, mujoco) + libero-plus-integration-test: + name: LIBERO-plus — build image + 1-episode eval + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + lfs: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + + - name: Build LIBERO-plus benchmark image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: docker/Dockerfile.benchmark.libero_plus + push: false + load: true + tags: lerobot-benchmark-libero-plus:ci + cache-from: type=local,src=/tmp/.buildx-cache-libero-plus + cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max + + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + docker run --rm \ + -e HF_HOME=/tmp/hf \ + lerobot-benchmark-libero-plus:ci \ + bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami" + + - name: Run LIBERO-plus smoke eval (1 episode) + run: | + docker run --name libero-plus-eval --gpus all \ + --shm-size=4g \ + -e HF_HOME=/tmp/hf \ + -e HF_USER_TOKEN="${HF_USER_TOKEN}" \ + -e HF_HUB_DOWNLOAD_TIMEOUT=300 \ + lerobot-benchmark-libero-plus:ci \ + bash -c " + hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true + lerobot-eval \ + --policy.path=pepijn223/smolvla_libero \ + --env.type=libero_plus \ + --env.task=libero_spatial \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval.use_async_envs=false \ + --policy.device=cuda \ + '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \ + --policy.empty_cameras=1 \ + --output_dir=/tmp/eval-artifacts + " + + - name: Copy LIBERO-plus artifacts from container + if: always() + run: | + mkdir -p /tmp/libero-plus-artifacts + docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true + docker rm -f libero-plus-eval || true + + - name: Parse LIBERO-plus eval metrics + if: always() + run: | + python3 scripts/ci/parse_eval_metrics.py \ + --artifacts-dir /tmp/libero-plus-artifacts \ + --env libero_plus \ + --task libero_spatial \ + --policy pepijn223/smolvla_libero + + - name: Upload LIBERO-plus rollout video + if: always() + uses: actions/upload-artifact@v4 + with: + name: libero-plus-rollout-video + path: /tmp/libero-plus-artifacts/videos/ + if-no-files-found: warn + + - name: Upload LIBERO-plus eval metrics + if: always() + uses: actions/upload-artifact@v4 + with: + name: libero-plus-metrics + path: /tmp/libero-plus-artifacts/metrics.json + if-no-files-found: warn + # ── METAWORLD ───────────────────────────────────────────────────────────── # Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain) metaworld-integration-test: diff --git a/docker/Dockerfile.benchmark.libero_plus b/docker/Dockerfile.benchmark.libero_plus new file mode 100644 index 0000000000..c7d81988f8 --- /dev/null +++ b/docker/Dockerfile.benchmark.libero_plus @@ -0,0 +1,88 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Isolated benchmark image for LIBERO-plus integration tests. +# Installs only lerobot[libero_plus] (LIBERO-plus from GitHub, dm-control, mujoco). +# +# Build: docker build -f docker/Dockerfile.benchmark.libero_plus -t lerobot-benchmark-libero-plus . +# Run: docker run --gpus all --rm lerobot-benchmark-libero-plus lerobot-eval ... + +ARG CUDA_VERSION=12.4.1 +ARG OS_VERSION=22.04 +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION} + +ARG PYTHON_VERSION=3.12 + +ENV DEBIAN_FRONTEND=noninteractive \ + MUJOCO_GL=egl \ + PATH=/lerobot/.venv/bin:$PATH \ + CUDA_VISIBLE_DEVICES=0 \ + DEVICE=cuda + +# System deps — same set as Dockerfile.internal plus LIBERO-plus extras +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common build-essential git curl \ + libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \ + cmake pkg-config ninja-build \ + libexpat1 libfontconfig1-dev libmagickwand-dev \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && usermod -aG sudo user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot +USER user_lerobot + +ENV HOME=/home/user_lerobot \ + HF_HOME=/home/user_lerobot/.cache/huggingface \ + HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \ + TORCH_HOME=/home/user_lerobot/.cache/torch \ + TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton + +RUN uv venv --python python${PYTHON_VERSION} + +# Install only lerobot[libero_plus] — isolated from hf-libero and metaworld dep trees +COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ + +RUN uv sync --extra libero_plus --extra smolvla --no-cache + +# Pre-download libero assets so nothing is fetched at runtime (CI timeout risk). +# libero/libero/__init__.py prompts with input() when ~/.libero/config.yaml is +# missing; write the config first so any import is non-interactive. +RUN LIBERO_DIR=$(python${PYTHON_VERSION} -c \ + "import importlib.util, os; s=importlib.util.find_spec('libero'); \ + print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \ + mkdir -p /home/user_lerobot/.libero && \ + python${PYTHON_VERSION} -c "\ +from huggingface_hub import snapshot_download; \ +snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \ + local_dir='/home/user_lerobot/.libero/assets')" && \ + printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \ + > /home/user_lerobot/.libero/config.yaml + +RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas + +COPY --chown=user_lerobot:user_lerobot . . + +CMD ["/bin/bash"] diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 3dcba5993e..16f14fc926 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -77,6 +77,8 @@ title: Adding a New Benchmark - local: libero title: LIBERO + - local: libero_plus + title: LIBERO-plus - local: metaworld title: Meta-World - local: envhub_isaaclab_arena diff --git a/docs/source/libero_plus.mdx b/docs/source/libero_plus.mdx new file mode 100644 index 0000000000..a378042d53 --- /dev/null +++ b/docs/source/libero_plus.mdx @@ -0,0 +1,171 @@ +# LIBERO-plus + +LIBERO-plus is a **robustness benchmark** for Vision-Language-Action (VLA) models built on top of [LIBERO](./libero). It systematically stress-tests policies by applying **seven independent perturbation dimensions** to the original LIBERO task set, exposing failure modes that standard benchmarks miss. + +- Paper: [LIBERO-plus: A Robustness Benchmark for VLA Models](https://github.com/sylvestf/LIBERO-plus) +- GitHub: [sylvestf/LIBERO-plus](https://github.com/sylvestf/LIBERO-plus) +- Dataset: [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot) + +## Perturbation dimensions + +LIBERO-plus creates ~10 000 task variants by perturbing each original LIBERO task along these axes: + +| Dimension | What changes | +| --------------------- | ----------------------------------------------------- | +| Objects layout | Target position, presence of confounding objects | +| Camera viewpoints | Camera position, orientation, field-of-view | +| Robot initial states | Manipulator start pose | +| Language instructions | LLM-rewritten task description (paraphrase / synonym) | +| Light conditions | Intensity, direction, color, shadow | +| Background textures | Scene surface and object appearance | +| Sensor noise | Photometric distortions and image degradation | + +## Available task suites + +LIBERO-plus covers the same five suites as LIBERO: + +| Suite | CLI name | Tasks | Max steps | +| -------------- | ---------------- | ----- | --------- | +| LIBERO-Spatial | `libero_spatial` | 10 | 280 | +| LIBERO-Object | `libero_object` | 10 | 280 | +| LIBERO-Goal | `libero_goal` | 10 | 300 | +| LIBERO-90 | `libero_90` | 90 | 400 | +| LIBERO-Long | `libero_10` | 10 | 520 | + +## Installation + +### System dependencies (Linux only) + +```bash +sudo apt install libexpat1 libfontconfig1-dev libmagickwand-dev +``` + +### Python package + +```bash +pip install -e ".[libero_plus]" +``` + +This installs LIBERO-plus directly from its GitHub repository. Because MuJoCo is required, only Linux is supported. + + +Set the MuJoCo rendering backend before running evaluation: + +```bash +export MUJOCO_GL=egl # headless / HPC / cloud +``` + + + +### Download LIBERO-plus assets + +LIBERO-plus ships its extended asset pack separately. Download `assets.zip` from the [Hugging Face dataset](https://huggingface.co/datasets/Sylvest/LIBERO-plus/tree/main) and extract it into the LIBERO-plus package directory: + +```bash +# After installing the package, find where it was installed: +python -c "import libero; print(libero.__file__)" +# Then extract assets.zip into /libero/assets/ +``` + +## Evaluation + +### Minimal smoke-test (1 episode, no async) + +```bash +lerobot-eval \ + --policy.path=pepijn223/smolvla_libero \ + --env.type=libero_plus \ + --env.task=libero_spatial \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval.use_async_envs=false \ + --policy.device=cuda \ + --env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}' \ + --policy.empty_cameras=1 +``` + +### Full robustness benchmark (recommended) + +```bash +lerobot-eval \ + --policy.path= \ + --env.type=libero_plus \ + --env.task=libero_spatial,libero_object,libero_goal,libero_10 \ + --eval.batch_size=1 \ + --eval.n_episodes=10 \ + --env.max_parallel_tasks=1 +``` + +### Key CLI flags + +| Flag | Description | +| --------------------------- | ---------------------------------------------------------------- | +| `--env.type=libero_plus` | Selects LIBERO-plus environment (same gym interface as `libero`) | +| `--env.task` | Suite name(s), comma-separated | +| `--env.task_ids` | Restrict to specific task indices, e.g. `[0,1,2]` | +| `--env.camera_name_mapping` | JSON dict remapping raw camera names to policy input keys | +| `--env.control_mode` | `relative` (default) or `absolute` | +| `--eval.use_async_envs` | `true` for parallel rollouts (default), `false` for debugging | +| `--policy.empty_cameras` | Number of camera slots without observations (policy-specific) | + +### Camera name mapping + +By default, LIBERO cameras are mapped as: + +| Raw camera name | LeRobot key | +| -------------------------- | --------------------------- | +| `agentview_image` | `observation.images.image` | +| `robot0_eye_in_hand_image` | `observation.images.image2` | + +If your policy was trained with different key names, pass a JSON remapping: + +```bash +--env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}' +``` + +## Policy inputs and outputs + +**Observations (after `LiberoProcessorStep`):** + +- `observation.state` — 8-dim proprioceptive vector: `[eef_pos(3), eef_axis_angle(3), gripper_qpos(2)]` +- `observation.images.` — camera image(s), flipped 180° to match VLA convention + +**Actions:** + +- `Box(-1, 1, shape=(7,))` — 6D end-effector delta + 1D gripper + +## Dataset + +A LeRobot-format training dataset for LIBERO-plus is available at: + +- [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot) + +### Example training command + +```bash +lerobot-train \ + --policy.type=smolvla \ + --policy.repo_id=${HF_USER}/smolvla_libero_plus \ + --policy.load_vlm_weights=true \ + --dataset.repo_id=pepijn223/libero_plus_lerobot \ + --env.type=libero_plus \ + --env.task=libero_spatial \ + --output_dir=./outputs/ \ + --steps=100000 \ + --batch_size=4 \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ + --eval_freq=1000 +``` + +## Relationship to LIBERO + +LIBERO-plus is a drop-in extension of LIBERO: + +- Same Python gym interface (`LiberoEnv`, `LiberoProcessorStep`) +- Same camera names and observation/action format +- Same task suite names +- Installs under the same `libero` Python package name (different GitHub repo) +- The only code difference in LeRobot is a try/except import fallback in `libero.py` that handles the slightly different package nesting in LIBERO-plus + +To use the original LIBERO benchmark, see [LIBERO](./libero) and use `--env.type=libero`. diff --git a/pyproject.toml b/pyproject.toml index 79409a2002..192db40b3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,6 +175,11 @@ video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"] aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"] pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"] +libero_plus = [ + "lerobot[transformers-dep]", + "libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'", + "lerobot[scipy-dep]", +] metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"] # All @@ -205,6 +210,7 @@ all = [ "lerobot[pusht]", "lerobot[phone]", "lerobot[libero]; sys_platform == 'linux'", + "lerobot[libero_plus]; sys_platform == 'linux'", "lerobot[metaworld]", "lerobot[sarm]", "lerobot[peft]", diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index c9c43757d5..46bddcc12d 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -445,6 +445,29 @@ def get_env_processors(self): ) +@EnvConfig.register_subclass("libero_plus") +@dataclass +class LiberoPlusEnv(LiberoEnv): + """Config for LIBERO-plus robustness benchmark evaluation. + + LIBERO-plus extends LIBERO with 7 perturbation dimensions (camera viewpoints, + object layouts, robot initial states, language instructions, lighting, background + textures, sensor noise) producing ~10k task variants. + + The gym interface is identical to LIBERO so this class reuses ``LiberoEnv`` + entirely — only the registered name and default task suite differ. + + Install:: + + pip install -e ".[libero_plus]" + + See Also: + https://github.com/sylvestf/LIBERO-plus + """ + + task: str = "libero_spatial" + + @EnvConfig.register_subclass("metaworld") @dataclass class MetaworldEnv(EnvConfig): diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 1b814db524..4e1da6007f 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -26,8 +26,15 @@ import numpy as np import torch from gymnasium import spaces -from libero.libero import benchmark, get_libero_path -from libero.libero.envs import OffScreenRenderEnv + +try: + from libero.libero import benchmark, get_libero_path + from libero.libero.envs import OffScreenRenderEnv +except ImportError: + # LIBERO-plus installs with an extra package nesting level. + # See: https://github.com/sylvestf/LIBERO-plus + from libero.libero.libero import benchmark, get_libero_path # type: ignore[no-redef] + from libero.libero.libero.envs import OffScreenRenderEnv # type: ignore[no-redef] from lerobot.envs.utils import _LazyAsyncVectorEnv from lerobot.types import RobotObservation From 097c4a0fb575fe9fa417ea2337a22202f7074cb7 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 10 Apr 2026 14:33:20 +0200 Subject: [PATCH 2/3] ci: trigger benchmark CI after workflow update From 2ce1e94ab7bcaa182f460fcd619e78364668dac8 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 10 Apr 2026 14:42:56 +0200 Subject: [PATCH 3/3] =?UTF-8?q?ci:=20retrigger=20after=20uv=20sync=20?= =?UTF-8?q?=E2=86=92=20uv=20pip=20install=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit