From 39bb3a34c66d78e6f0b9a37409792320f3f5b487 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Wed, 8 Apr 2026 10:11:58 +0200
Subject: [PATCH 1/3] feat(envs): add LIBERO-plus robustness benchmark
 integration

- Add import fallback in libero.py for LIBERO-plus nested package structure
  (github.com/sylvestf/LIBERO-plus installs under a deeper module path than
  the original hf-libero wheel)
- Register LiberoPlusEnv config subclass (inherits LiberoEnv fully; only
  the env type name and default suite differ)
- Add libero_plus optional dep group in pyproject.toml pointing to the
  LIBERO-plus GitHub repo
- Add docs/source/libero_plus.mdx with install guide, task suite table,
  perturbation dimensions, eval commands, and dataset reference
- Add docker/Dockerfile.benchmark.libero_plus for isolated CI image
- Add libero-plus-integration-test CI job to benchmark_tests.yml

Dataset: pepijn223/libero_plus_lerobot is already v3.0 (no conversion needed).
Dataset card is missing and should be added separately on the Hub.

Eval smoke-test (requires Linux + GPU):
  lerobot-eval \
    --policy.path=pepijn223/smolvla_libero \
    --env.type=libero_plus \
    --env.task=libero_spatial \
    --eval.batch_size=1 --eval.n_episodes=1 \
    --eval.use_async_envs=false --policy.device=cuda \
    '--env.camera_name_mapping={"agentview_image":"camera1","robot0_eye_in_hand_image":"camera2"}' \
    --policy.empty_cameras=1

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/benchmark_tests.yml   |  94 +++++++++++++
 docker/Dockerfile.benchmark.libero_plus |  88 ++++++++++++
 docs/source/_toctree.yml                |   2 +
 docs/source/libero_plus.mdx             | 171 ++++++++++++++++++++++++
 pyproject.toml                          |   6 +
 src/lerobot/envs/configs.py             |  23 ++++
 src/lerobot/envs/libero.py              |  11 +-
 7 files changed, 393 insertions(+), 2 deletions(-)
 create mode 100644 docker/Dockerfile.benchmark.libero_plus
 create mode 100644 docs/source/libero_plus.mdx

diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml
index afdc518141..088080db80 100644
--- a/.github/workflows/benchmark_tests.yml
+++ b/.github/workflows/benchmark_tests.yml
@@ -216,6 +216,100 @@ jobs:
           path: /tmp/libero-train-smoke-artifacts/eval/
           if-no-files-found: warn
 
+  # ── LIBERO-plus ───────────────────────────────────────────────────────────
+  # Isolated image: lerobot[libero_plus] only (LIBERO-plus from GitHub, mujoco)
+  libero-plus-integration-test:
+    name: LIBERO-plus — build image + 1-episode eval
+    runs-on:
+      group: aws-g6-4xlarge-plus
+    env:
+      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+          lfs: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
+        with:
+          cache-binary: false
+
+      - name: Build LIBERO-plus benchmark image
+        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
+        with:
+          context: .
+          file: docker/Dockerfile.benchmark.libero_plus
+          push: false
+          load: true
+          tags: lerobot-benchmark-libero-plus:ci
+          cache-from: type=local,src=/tmp/.buildx-cache-libero-plus
+          cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max
+
+      - name: Login to Hugging Face
+        if: env.HF_USER_TOKEN != ''
+        run: |
+          docker run --rm \
+            -e HF_HOME=/tmp/hf \
+            lerobot-benchmark-libero-plus:ci \
+            bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami"
+
+      - name: Run LIBERO-plus smoke eval (1 episode)
+        run: |
+          docker run --name libero-plus-eval --gpus all \
+            --shm-size=4g \
+            -e HF_HOME=/tmp/hf \
+            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
+            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
+            lerobot-benchmark-libero-plus:ci \
+            bash -c "
+              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
+              lerobot-eval \
+                --policy.path=pepijn223/smolvla_libero \
+                --env.type=libero_plus \
+                --env.task=libero_spatial \
+                --eval.batch_size=1 \
+                --eval.n_episodes=1 \
+                --eval.use_async_envs=false \
+                --policy.device=cuda \
+                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
+                --policy.empty_cameras=1 \
+                --output_dir=/tmp/eval-artifacts
+            "
+
+      - name: Copy LIBERO-plus artifacts from container
+        if: always()
+        run: |
+          mkdir -p /tmp/libero-plus-artifacts
+          docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true
+          docker rm -f libero-plus-eval || true
+
+      - name: Parse LIBERO-plus eval metrics
+        if: always()
+        run: |
+          python3 scripts/ci/parse_eval_metrics.py \
+            --artifacts-dir /tmp/libero-plus-artifacts \
+            --env libero_plus \
+            --task libero_spatial \
+            --policy pepijn223/smolvla_libero
+
+      - name: Upload LIBERO-plus rollout video
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: libero-plus-rollout-video
+          path: /tmp/libero-plus-artifacts/videos/
+          if-no-files-found: warn
+
+      - name: Upload LIBERO-plus eval metrics
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: libero-plus-metrics
+          path: /tmp/libero-plus-artifacts/metrics.json
+          if-no-files-found: warn
+
   # ── METAWORLD ─────────────────────────────────────────────────────────────
   # Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
   metaworld-integration-test:
diff --git a/docker/Dockerfile.benchmark.libero_plus b/docker/Dockerfile.benchmark.libero_plus
new file mode 100644
index 0000000000..c7d81988f8
--- /dev/null
+++ b/docker/Dockerfile.benchmark.libero_plus
@@ -0,0 +1,88 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Isolated benchmark image for LIBERO-plus integration tests.
+# Installs only lerobot[libero_plus] (LIBERO-plus from GitHub, dm-control, mujoco).
+#
+# Build:  docker build -f docker/Dockerfile.benchmark.libero_plus -t lerobot-benchmark-libero-plus .
+# Run:    docker run --gpus all --rm lerobot-benchmark-libero-plus lerobot-eval ...
+
+ARG CUDA_VERSION=12.4.1
+ARG OS_VERSION=22.04
+FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}
+
+ARG PYTHON_VERSION=3.12
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    MUJOCO_GL=egl \
+    PATH=/lerobot/.venv/bin:$PATH \
+    CUDA_VISIBLE_DEVICES=0 \
+    DEVICE=cuda
+
+# System deps — same set as Dockerfile.internal plus LIBERO-plus extras
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common build-essential git curl \
+    libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
+    libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
+    cmake pkg-config ninja-build \
+    libexpat1 libfontconfig1-dev libmagickwand-dev \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+       python${PYTHON_VERSION} \
+       python${PYTHON_VERSION}-venv \
+       python${PYTHON_VERSION}-dev \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh \
+    && mv /root/.local/bin/uv /usr/local/bin/uv \
+    && useradd --create-home --shell /bin/bash user_lerobot \
+    && usermod -aG sudo user_lerobot \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /lerobot
+RUN chown -R user_lerobot:user_lerobot /lerobot
+USER user_lerobot
+
+ENV HOME=/home/user_lerobot \
+    HF_HOME=/home/user_lerobot/.cache/huggingface \
+    HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \
+    TORCH_HOME=/home/user_lerobot/.cache/torch \
+    TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton
+
+RUN uv venv --python python${PYTHON_VERSION}
+
+# Install only lerobot[libero_plus] — isolated from hf-libero and metaworld dep trees
+COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
+COPY --chown=user_lerobot:user_lerobot src/ src/
+
+RUN uv sync --extra libero_plus --extra smolvla --no-cache
+
+# Pre-download libero assets so nothing is fetched at runtime (CI timeout risk).
+# libero/libero/__init__.py prompts with input() when ~/.libero/config.yaml is
+# missing; write the config first so any import is non-interactive.
+RUN LIBERO_DIR=$(python${PYTHON_VERSION} -c \
+      "import importlib.util, os; s=importlib.util.find_spec('libero'); \
+       print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
+    mkdir -p /home/user_lerobot/.libero && \
+    python${PYTHON_VERSION} -c "\
+from huggingface_hub import snapshot_download; \
+snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
+                  local_dir='/home/user_lerobot/.libero/assets')" && \
+    printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
+    > /home/user_lerobot/.libero/config.yaml
+
+RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas
+
+COPY --chown=user_lerobot:user_lerobot . .
+
+CMD ["/bin/bash"]
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 3dcba5993e..16f14fc926 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -77,6 +77,8 @@
     title: Adding a New Benchmark
   - local: libero
     title: LIBERO
+  - local: libero_plus
+    title: LIBERO-plus
   - local: metaworld
     title: Meta-World
   - local: envhub_isaaclab_arena
diff --git a/docs/source/libero_plus.mdx b/docs/source/libero_plus.mdx
new file mode 100644
index 0000000000..a378042d53
--- /dev/null
+++ b/docs/source/libero_plus.mdx
@@ -0,0 +1,171 @@
+# LIBERO-plus
+
+LIBERO-plus is a **robustness benchmark** for Vision-Language-Action (VLA) models built on top of [LIBERO](./libero). It systematically stress-tests policies by applying **seven independent perturbation dimensions** to the original LIBERO task set, exposing failure modes that standard benchmarks miss.
+
+- Paper: [LIBERO-plus: A Robustness Benchmark for VLA Models](https://github.com/sylvestf/LIBERO-plus)
+- GitHub: [sylvestf/LIBERO-plus](https://github.com/sylvestf/LIBERO-plus)
+- Dataset: [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot)
+
+## Perturbation dimensions
+
+LIBERO-plus creates ~10 000 task variants by perturbing each original LIBERO task along these axes:
+
+| Dimension             | What changes                                          |
+| --------------------- | ----------------------------------------------------- |
+| Objects layout        | Target position, presence of confounding objects      |
+| Camera viewpoints     | Camera position, orientation, field-of-view           |
+| Robot initial states  | Manipulator start pose                                |
+| Language instructions | LLM-rewritten task description (paraphrase / synonym) |
+| Light conditions      | Intensity, direction, color, shadow                   |
+| Background textures   | Scene surface and object appearance                   |
+| Sensor noise          | Photometric distortions and image degradation         |
+
+## Available task suites
+
+LIBERO-plus covers the same five suites as LIBERO:
+
+| Suite          | CLI name         | Tasks | Max steps |
+| -------------- | ---------------- | ----- | --------- |
+| LIBERO-Spatial | `libero_spatial` | 10    | 280       |
+| LIBERO-Object  | `libero_object`  | 10    | 280       |
+| LIBERO-Goal    | `libero_goal`    | 10    | 300       |
+| LIBERO-90      | `libero_90`      | 90    | 400       |
+| LIBERO-Long    | `libero_10`      | 10    | 520       |
+
+## Installation
+
+### System dependencies (Linux only)
+
+```bash
+sudo apt install libexpat1 libfontconfig1-dev libmagickwand-dev
+```
+
+### Python package
+
+```bash
+pip install -e ".[libero_plus]"
+```
+
+This installs LIBERO-plus directly from its GitHub repository. Because MuJoCo is required, only Linux is supported.
+
+<Tip>
+Set the MuJoCo rendering backend before running evaluation:
+
+```bash
+export MUJOCO_GL=egl   # headless / HPC / cloud
+```
+
+</Tip>
+
+### Download LIBERO-plus assets
+
+LIBERO-plus ships its extended asset pack separately. Download `assets.zip` from the [Hugging Face dataset](https://huggingface.co/datasets/Sylvest/LIBERO-plus/tree/main) and extract it into the LIBERO-plus package directory:
+
+```bash
+# After installing the package, find where it was installed:
+python -c "import libero; print(libero.__file__)"
+# Then extract assets.zip into <package_root>/libero/assets/
+```
+
+## Evaluation
+
+### Minimal smoke-test (1 episode, no async)
+
+```bash
+lerobot-eval \
+    --policy.path=pepijn223/smolvla_libero \
+    --env.type=libero_plus \
+    --env.task=libero_spatial \
+    --eval.batch_size=1 \
+    --eval.n_episodes=1 \
+    --eval.use_async_envs=false \
+    --policy.device=cuda \
+    --env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}' \
+    --policy.empty_cameras=1
+```
+
+### Full robustness benchmark (recommended)
+
+```bash
+lerobot-eval \
+    --policy.path=<your-policy-id> \
+    --env.type=libero_plus \
+    --env.task=libero_spatial,libero_object,libero_goal,libero_10 \
+    --eval.batch_size=1 \
+    --eval.n_episodes=10 \
+    --env.max_parallel_tasks=1
+```
+
+### Key CLI flags
+
+| Flag                        | Description                                                      |
+| --------------------------- | ---------------------------------------------------------------- |
+| `--env.type=libero_plus`    | Selects LIBERO-plus environment (same gym interface as `libero`) |
+| `--env.task`                | Suite name(s), comma-separated                                   |
+| `--env.task_ids`            | Restrict to specific task indices, e.g. `[0,1,2]`                |
+| `--env.camera_name_mapping` | JSON dict remapping raw camera names to policy input keys        |
+| `--env.control_mode`        | `relative` (default) or `absolute`                               |
+| `--eval.use_async_envs`     | `true` for parallel rollouts (default), `false` for debugging    |
+| `--policy.empty_cameras`    | Number of camera slots without observations (policy-specific)    |
+
+### Camera name mapping
+
+By default, LIBERO cameras are mapped as:
+
+| Raw camera name            | LeRobot key                 |
+| -------------------------- | --------------------------- |
+| `agentview_image`          | `observation.images.image`  |
+| `robot0_eye_in_hand_image` | `observation.images.image2` |
+
+If your policy was trained with different key names, pass a JSON remapping:
+
+```bash
+--env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}'
+```
+
+## Policy inputs and outputs
+
+**Observations (after `LiberoProcessorStep`):**
+
+- `observation.state` — 8-dim proprioceptive vector: `[eef_pos(3), eef_axis_angle(3), gripper_qpos(2)]`
+- `observation.images.<name>` — camera image(s), flipped 180° to match VLA convention
+
+**Actions:**
+
+- `Box(-1, 1, shape=(7,))` — 6D end-effector delta + 1D gripper
+
+## Dataset
+
+A LeRobot-format training dataset for LIBERO-plus is available at:
+
+- [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot)
+
+### Example training command
+
+```bash
+lerobot-train \
+    --policy.type=smolvla \
+    --policy.repo_id=${HF_USER}/smolvla_libero_plus \
+    --policy.load_vlm_weights=true \
+    --dataset.repo_id=pepijn223/libero_plus_lerobot \
+    --env.type=libero_plus \
+    --env.task=libero_spatial \
+    --output_dir=./outputs/ \
+    --steps=100000 \
+    --batch_size=4 \
+    --eval.batch_size=1 \
+    --eval.n_episodes=1 \
+    --eval_freq=1000
+```
+
+## Relationship to LIBERO
+
+LIBERO-plus is a drop-in extension of LIBERO:
+
+- Same Python gym interface (`LiberoEnv`, `LiberoProcessorStep`)
+- Same camera names and observation/action format
+- Same task suite names
+- Installs under the same `libero` Python package name (different GitHub repo)
+- The only code difference in LeRobot is a try/except import fallback in `libero.py` that handles the slightly different package nesting in LIBERO-plus
+
+To use the original LIBERO benchmark, see [LIBERO](./libero) and use `--env.type=libero`.
diff --git a/pyproject.toml b/pyproject.toml
index 79409a2002..192db40b3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,6 +175,11 @@ video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
 aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
 pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
 libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
+libero_plus = [
+    "lerobot[transformers-dep]",
+    "libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
+    "lerobot[scipy-dep]",
+]
 metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]
 
 # All
@@ -205,6 +210,7 @@ all = [
     "lerobot[pusht]",
     "lerobot[phone]",
     "lerobot[libero]; sys_platform == 'linux'",
+    "lerobot[libero_plus]; sys_platform == 'linux'",
     "lerobot[metaworld]",
     "lerobot[sarm]",
     "lerobot[peft]",
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index c9c43757d5..46bddcc12d 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -445,6 +445,29 @@ def get_env_processors(self):
         )
 
 
+@EnvConfig.register_subclass("libero_plus")
+@dataclass
+class LiberoPlusEnv(LiberoEnv):
+    """Config for LIBERO-plus robustness benchmark evaluation.
+
+    LIBERO-plus extends LIBERO with 7 perturbation dimensions (camera viewpoints,
+    object layouts, robot initial states, language instructions, lighting, background
+    textures, sensor noise) producing ~10k task variants.
+
+    The gym interface is identical to LIBERO so this class reuses ``LiberoEnv``
+    entirely — only the registered name and default task suite differ.
+
+    Install::
+
+        pip install -e ".[libero_plus]"
+
+    See Also:
+        https://github.com/sylvestf/LIBERO-plus
+    """
+
+    task: str = "libero_spatial"
+
+
 @EnvConfig.register_subclass("metaworld")
 @dataclass
 class MetaworldEnv(EnvConfig):
diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py
index 1b814db524..4e1da6007f 100644
--- a/src/lerobot/envs/libero.py
+++ b/src/lerobot/envs/libero.py
@@ -26,8 +26,15 @@
 import numpy as np
 import torch
 from gymnasium import spaces
-from libero.libero import benchmark, get_libero_path
-from libero.libero.envs import OffScreenRenderEnv
+
+try:
+    from libero.libero import benchmark, get_libero_path
+    from libero.libero.envs import OffScreenRenderEnv
+except ImportError:
+    # LIBERO-plus installs with an extra package nesting level.
+    # See: https://github.com/sylvestf/LIBERO-plus
+    from libero.libero.libero import benchmark, get_libero_path  # type: ignore[no-redef]
+    from libero.libero.libero.envs import OffScreenRenderEnv  # type: ignore[no-redef]
 
 from lerobot.envs.utils import _LazyAsyncVectorEnv
 from lerobot.types import RobotObservation

From 097c4a0fb575fe9fa417ea2337a22202f7074cb7 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 10 Apr 2026 14:33:20 +0200
Subject: [PATCH 2/3] ci: trigger benchmark CI after workflow update


From 2ce1e94ab7bcaa182f460fcd619e78364668dac8 Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Fri, 10 Apr 2026 14:42:56 +0200
Subject: [PATCH 3/3] =?UTF-8?q?ci:=20retrigger=20after=20uv=20sync=20?=
 =?UTF-8?q?=E2=86=92=20uv=20pip=20install=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit