From 7de92a91fc394df6c1a196f8bbd21cdea475ea1f Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Tue, 7 Apr 2026 15:01:56 +0200 Subject: [PATCH 1/2] fix(one shot load): adding metadata loading when reading from a dataset after writing --- src/lerobot/datasets/dataset_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lerobot/datasets/dataset_reader.py b/src/lerobot/datasets/dataset_reader.py index 3720a50847..210dcc453c 100644 --- a/src/lerobot/datasets/dataset_reader.py +++ b/src/lerobot/datasets/dataset_reader.py @@ -98,6 +98,7 @@ def try_load(self) -> bool: def load_and_activate(self) -> None: """Load HF dataset from disk and build index mapping. Call after data is on disk.""" + self._meta._load_metadata() self.hf_dataset = self._load_hf_dataset() self._build_index_mapping() From bc1076050c6d63b387d4c5df7fbb23b91cd4d9ea Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 07:43:00 +0000 Subject: [PATCH 2/2] refactor(one shot load): move metadata reload to ensure_readable() on LeRobotDatasetMetadata Move the metadata reload from DatasetReader.load_and_activate() to a new public ensure_readable() method on LeRobotDatasetMetadata, called from LeRobotDataset._ensure_reader(). This places lifecycle management in the right layer: metadata owns its readiness check, the dataset orchestrates the write-to-read transition, and the reader stays clean. Also adds a regression test using delta_timestamps to exercise the meta.episodes access path in the create -> write -> finalize -> read flow. Co-authored-by: Steven Palma Co-authored-by: Claude Opus 4.6 --- src/lerobot/datasets/dataset_metadata.py | 10 ++++++++++ src/lerobot/datasets/dataset_reader.py | 1 - src/lerobot/datasets/lerobot_dataset.py | 1 + tests/datasets/test_lerobot_dataset.py | 25 ++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 65dbc9c4a1..d79f4bfba5 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -180,6 +180,16 @@ def _load_metadata(self): self.episodes = load_episodes(self.root) self.stats = load_stats(self.root) + def ensure_readable(self) -> None: + """Guarantee metadata is fully loaded for read operations. + + Idempotent — when metadata is already in memory this is a single + ``is None`` check. Call this before transitioning from write to + read mode on the same instance. + """ + if self.episodes is None: + self._load_metadata() + def _pull_from_repo( self, allow_patterns: list[str] | str | None = None, diff --git a/src/lerobot/datasets/dataset_reader.py b/src/lerobot/datasets/dataset_reader.py index 210dcc453c..3720a50847 100644 --- a/src/lerobot/datasets/dataset_reader.py +++ b/src/lerobot/datasets/dataset_reader.py @@ -98,7 +98,6 @@ def try_load(self) -> bool: def load_and_activate(self) -> None: """Load HF dataset from disk and build index mapping. Call after data is on disk.""" - self._meta._load_metadata() self.hf_dataset = self._load_hf_dataset() self._build_index_mapping() diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index f719222fd6..80fbd59ccd 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -275,6 +275,7 @@ def _require_writer(self, method_name: str) -> None: def _ensure_reader(self) -> DatasetReader: """Lazily create the reader on first access.""" if self.reader is None: + self.meta.ensure_readable() self.reader = DatasetReader( meta=self.meta, root=self.root, diff --git a/tests/datasets/test_lerobot_dataset.py b/tests/datasets/test_lerobot_dataset.py index a8aa47ed29..5c3c24f99e 100644 --- a/tests/datasets/test_lerobot_dataset.py +++ b/tests/datasets/test_lerobot_dataset.py @@ -535,6 +535,31 @@ def test_getitem_works_after_finalize(tmp_path): assert "task" in item +def test_getitem_after_finalize_with_delta_timestamps(tmp_path): + """After finalize(), dataset[0] works when delta_timestamps require episode metadata. + + Regression test for https://github.com/huggingface/lerobot/pull/3305. + The create -> write -> finalize -> read path left meta.episodes as None + because the write path flushes episodes to disk without updating them + in memory. Features that access meta.episodes (video decoding, + delta_timestamps) would crash with a TypeError. + """ + dataset = LeRobotDataset.create( + repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS, features=SIMPLE_FEATURES, root=tmp_path / "ds" + ) + for _ in range(5): + dataset.add_frame(_make_frame()) + dataset.save_episode() + dataset.finalize() + + # Set delta_timestamps so get_item() accesses meta.episodes via _get_query_indices + dataset.delta_timestamps = {"state": [0.0]} + + item = dataset[0] + assert "state" in item + assert "state_is_pad" in item + + # ── Property delegation ──────────────────────────────────────────────