Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions .github/workflows/benchmark_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,100 @@ jobs:
path: /tmp/libero-train-smoke-artifacts/eval/
if-no-files-found: warn

# ── LIBERO-plus ───────────────────────────────────────────────────────────
# Isolated image: lerobot[libero_plus] only (LIBERO-plus from GitHub, mujoco)
libero-plus-integration-test:
name: LIBERO-plus — build image + 1-episode eval
runs-on:
group: aws-g6-4xlarge-plus
env:
HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}

steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
lfs: true

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
with:
cache-binary: false

- name: Build LIBERO-plus benchmark image
uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
with:
context: .
file: docker/Dockerfile.benchmark.libero_plus
push: false
load: true
tags: lerobot-benchmark-libero-plus:ci
cache-from: type=local,src=/tmp/.buildx-cache-libero-plus
cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max

- name: Login to Hugging Face
if: env.HF_USER_TOKEN != ''
run: |
docker run --rm \
-e HF_HOME=/tmp/hf \
lerobot-benchmark-libero-plus:ci \
bash -c "hf auth login --token '$HF_USER_TOKEN' --add-to-git-credential && hf auth whoami"

- name: Run LIBERO-plus smoke eval (1 episode)
run: |
docker run --name libero-plus-eval --gpus all \
--shm-size=4g \
-e HF_HOME=/tmp/hf \
-e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-e HF_HUB_DOWNLOAD_TIMEOUT=300 \
lerobot-benchmark-libero-plus:ci \
bash -c "
hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
lerobot-eval \
--policy.path=pepijn223/smolvla_libero \
--env.type=libero_plus \
--env.task=libero_spatial \
--eval.batch_size=1 \
--eval.n_episodes=1 \
--eval.use_async_envs=false \
--policy.device=cuda \
'--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
--policy.empty_cameras=1 \
--output_dir=/tmp/eval-artifacts
"

- name: Copy LIBERO-plus artifacts from container
if: always()
run: |
mkdir -p /tmp/libero-plus-artifacts
docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true
docker rm -f libero-plus-eval || true

- name: Parse LIBERO-plus eval metrics
if: always()
run: |
python3 scripts/ci/parse_eval_metrics.py \
--artifacts-dir /tmp/libero-plus-artifacts \
--env libero_plus \
--task libero_spatial \
--policy pepijn223/smolvla_libero

- name: Upload LIBERO-plus rollout video
if: always()
uses: actions/upload-artifact@v4
with:
name: libero-plus-rollout-video
path: /tmp/libero-plus-artifacts/videos/
if-no-files-found: warn

- name: Upload LIBERO-plus eval metrics
if: always()
uses: actions/upload-artifact@v4
with:
name: libero-plus-metrics
path: /tmp/libero-plus-artifacts/metrics.json
if-no-files-found: warn

# ── METAWORLD ─────────────────────────────────────────────────────────────
# Isolated image: lerobot[metaworld] only (metaworld==3.0.0, mujoco>=3 chain)
metaworld-integration-test:
Expand Down
88 changes: 88 additions & 0 deletions docker/Dockerfile.benchmark.libero_plus
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Isolated benchmark image for LIBERO-plus integration tests.
# Installs only lerobot[libero_plus] (LIBERO-plus from GitHub, dm-control, mujoco).
#
# Build: docker build -f docker/Dockerfile.benchmark.libero_plus -t lerobot-benchmark-libero-plus .
# Run: docker run --gpus all --rm lerobot-benchmark-libero-plus lerobot-eval ...

ARG CUDA_VERSION=12.4.1
ARG OS_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION}

ARG PYTHON_VERSION=3.12

ENV DEBIAN_FRONTEND=noninteractive \
MUJOCO_GL=egl \
PATH=/lerobot/.venv/bin:$PATH \
CUDA_VISIBLE_DEVICES=0 \
DEVICE=cuda

# System deps — same set as Dockerfile.internal plus LIBERO-plus extras
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common build-essential git curl \
libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \
cmake pkg-config ninja-build \
libexpat1 libfontconfig1-dev libmagickwand-dev \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-dev \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
&& mv /root/.local/bin/uv /usr/local/bin/uv \
&& useradd --create-home --shell /bin/bash user_lerobot \
&& usermod -aG sudo user_lerobot \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /lerobot
RUN chown -R user_lerobot:user_lerobot /lerobot
USER user_lerobot

ENV HOME=/home/user_lerobot \
HF_HOME=/home/user_lerobot/.cache/huggingface \
HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \
TORCH_HOME=/home/user_lerobot/.cache/torch \
TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton

RUN uv venv --python python${PYTHON_VERSION}

# Install only lerobot[libero_plus] — isolated from hf-libero and metaworld dep trees
COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
COPY --chown=user_lerobot:user_lerobot src/ src/

RUN uv sync --extra libero_plus --extra smolvla --no-cache

# Pre-download libero assets so nothing is fetched at runtime (CI timeout risk).
# libero/libero/__init__.py prompts with input() when ~/.libero/config.yaml is
# missing; write the config first so any import is non-interactive.
RUN LIBERO_DIR=$(python${PYTHON_VERSION} -c \
"import importlib.util, os; s=importlib.util.find_spec('libero'); \
print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
mkdir -p /home/user_lerobot/.libero && \
python${PYTHON_VERSION} -c "\
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
local_dir='/home/user_lerobot/.libero/assets')" && \
printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
> /home/user_lerobot/.libero/config.yaml

RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas

COPY --chown=user_lerobot:user_lerobot . .

CMD ["/bin/bash"]
2 changes: 2 additions & 0 deletions docs/source/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
title: Adding a New Benchmark
- local: libero
title: LIBERO
- local: libero_plus
title: LIBERO-plus
- local: metaworld
title: Meta-World
- local: envhub_isaaclab_arena
Expand Down
171 changes: 171 additions & 0 deletions docs/source/libero_plus.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# LIBERO-plus

LIBERO-plus is a **robustness benchmark** for Vision-Language-Action (VLA) models built on top of [LIBERO](./libero). It systematically stress-tests policies by applying **seven independent perturbation dimensions** to the original LIBERO task set, exposing failure modes that standard benchmarks miss.

- Paper: [LIBERO-plus: A Robustness Benchmark for VLA Models](https://github.com/sylvestf/LIBERO-plus)
- GitHub: [sylvestf/LIBERO-plus](https://github.com/sylvestf/LIBERO-plus)
- Dataset: [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot)

## Perturbation dimensions

LIBERO-plus creates ~10 000 task variants by perturbing each original LIBERO task along these axes:

| Dimension | What changes |
| --------------------- | ----------------------------------------------------- |
| Objects layout | Target position, presence of confounding objects |
| Camera viewpoints | Camera position, orientation, field-of-view |
| Robot initial states | Manipulator start pose |
| Language instructions | LLM-rewritten task description (paraphrase / synonym) |
| Light conditions | Intensity, direction, color, shadow |
| Background textures | Scene surface and object appearance |
| Sensor noise | Photometric distortions and image degradation |

## Available task suites

LIBERO-plus covers the same five suites as LIBERO:

| Suite | CLI name | Tasks | Max steps |
| -------------- | ---------------- | ----- | --------- |
| LIBERO-Spatial | `libero_spatial` | 10 | 280 |
| LIBERO-Object | `libero_object` | 10 | 280 |
| LIBERO-Goal | `libero_goal` | 10 | 300 |
| LIBERO-90 | `libero_90` | 90 | 400 |
| LIBERO-Long | `libero_10` | 10 | 520 |

## Installation

### System dependencies (Linux only)

```bash
sudo apt install libexpat1 libfontconfig1-dev libmagickwand-dev
```

### Python package

```bash
pip install -e ".[libero_plus]"
```

This installs LIBERO-plus directly from its GitHub repository. Because MuJoCo is required, only Linux is supported.

<Tip>
Set the MuJoCo rendering backend before running evaluation:

```bash
export MUJOCO_GL=egl # headless / HPC / cloud
```

</Tip>

### Download LIBERO-plus assets

LIBERO-plus ships its extended asset pack separately. Download `assets.zip` from the [Hugging Face dataset](https://huggingface.co/datasets/Sylvest/LIBERO-plus/tree/main) and extract it into the LIBERO-plus package directory:

```bash
# After installing the package, find where it was installed:
python -c "import libero; print(libero.__file__)"
# Then extract assets.zip into <package_root>/libero/assets/
```

## Evaluation

### Minimal smoke-test (1 episode, no async)

```bash
lerobot-eval \
--policy.path=pepijn223/smolvla_libero \
--env.type=libero_plus \
--env.task=libero_spatial \
--eval.batch_size=1 \
--eval.n_episodes=1 \
--eval.use_async_envs=false \
--policy.device=cuda \
--env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}' \
--policy.empty_cameras=1
```

### Full robustness benchmark (recommended)

```bash
lerobot-eval \
--policy.path=<your-policy-id> \
--env.type=libero_plus \
--env.task=libero_spatial,libero_object,libero_goal,libero_10 \
--eval.batch_size=1 \
--eval.n_episodes=10 \
--env.max_parallel_tasks=1
```

### Key CLI flags

| Flag | Description |
| --------------------------- | ---------------------------------------------------------------- |
| `--env.type=libero_plus` | Selects LIBERO-plus environment (same gym interface as `libero`) |
| `--env.task` | Suite name(s), comma-separated |
| `--env.task_ids` | Restrict to specific task indices, e.g. `[0,1,2]` |
| `--env.camera_name_mapping` | JSON dict remapping raw camera names to policy input keys |
| `--env.control_mode` | `relative` (default) or `absolute` |
| `--eval.use_async_envs` | `true` for parallel rollouts (default), `false` for debugging |
| `--policy.empty_cameras` | Number of camera slots without observations (policy-specific) |

### Camera name mapping

By default, LIBERO cameras are mapped as:

| Raw camera name | LeRobot key |
| -------------------------- | --------------------------- |
| `agentview_image` | `observation.images.image` |
| `robot0_eye_in_hand_image` | `observation.images.image2` |

If your policy was trained with different key names, pass a JSON remapping:

```bash
--env.camera_name_mapping='{"agentview_image": "camera1", "robot0_eye_in_hand_image": "camera2"}'
```

## Policy inputs and outputs

**Observations (after `LiberoProcessorStep`):**

- `observation.state` — 8-dim proprioceptive vector: `[eef_pos(3), eef_axis_angle(3), gripper_qpos(2)]`
- `observation.images.<name>` — camera image(s), flipped 180° to match VLA convention

**Actions:**

- `Box(-1, 1, shape=(7,))` — 6D end-effector delta + 1D gripper

## Dataset

A LeRobot-format training dataset for LIBERO-plus is available at:

- [pepijn223/libero_plus_lerobot](https://huggingface.co/datasets/pepijn223/libero_plus_lerobot)

### Example training command

```bash
lerobot-train \
--policy.type=smolvla \
--policy.repo_id=${HF_USER}/smolvla_libero_plus \
--policy.load_vlm_weights=true \
--dataset.repo_id=pepijn223/libero_plus_lerobot \
--env.type=libero_plus \
--env.task=libero_spatial \
--output_dir=./outputs/ \
--steps=100000 \
--batch_size=4 \
--eval.batch_size=1 \
--eval.n_episodes=1 \
--eval_freq=1000
```

## Relationship to LIBERO

LIBERO-plus is a drop-in extension of LIBERO:

- Same Python gym interface (`LiberoEnv`, `LiberoProcessorStep`)
- Same camera names and observation/action format
- Same task suite names
- Installs under the same `libero` Python package name (different GitHub repo)
- The only code difference in LeRobot is a try/except import fallback in `libero.py` that handles the slightly different package nesting in LIBERO-plus

To use the original LIBERO benchmark, see [LIBERO](./libero) and use `--env.type=libero`.
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ video_benchmark = ["scikit-image>=0.23.2,<0.26.0", "pandas>=2.2.2,<2.4.0"]
aloha = ["gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
libero = ["lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
libero_plus = [
"lerobot[transformers-dep]",
"libero @ git+https://github.com/sylvestf/LIBERO-plus.git@main ; sys_platform == 'linux'",
"lerobot[scipy-dep]",
]
metaworld = ["metaworld==3.0.0", "lerobot[scipy-dep]"]

# All
Expand Down Expand Up @@ -205,6 +210,7 @@ all = [
"lerobot[pusht]",
"lerobot[phone]",
"lerobot[libero]; sys_platform == 'linux'",
"lerobot[libero_plus]; sys_platform == 'linux'",
"lerobot[metaworld]",
"lerobot[sarm]",
"lerobot[peft]",
Expand Down
Loading