Skip to content
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c750514
feat: Kornia GPU augmentation backend for detection training
Borda Mar 25, 2026
17043fe
fix(pre-commit): 🎨 auto format pre-commit hooks
pre-commit-ci[bot] Mar 25, 2026
eee9c01
Apply suggestions from code review
Borda Mar 25, 2026
c982914
fix: wire gpu_postprocess through build_o365_raw to prevent double no…
Borda Mar 26, 2026
2b6a7d0
fix: add _kornia_setup_done sentinel; promote auto fallback log to WA…
Borda Mar 26, 2026
c12bdda
fix: blur_limit min 3; docstrings for gpu_postprocess and unpack_boxes
Borda Mar 26, 2026
51754ff
test: add coverage for blur_limit edge, o365 guard, sentinel re-run, …
Borda Mar 26, 2026
55d4bc6
fix: GPU path falls back to AUG_CONFIG not {} when aug_config=None
Borda Mar 26, 2026
e969361
Apply suggestions from code review
Borda Mar 26, 2026
de9017c
fix(pre-commit): 🎨 auto format pre-commit hooks
pre-commit-ci[bot] Mar 26, 2026
b517771
Merge branch 'develop' into aug/kornia
Borda Mar 26, 2026
c31f834
fix: square gaussian kernel; device-move Kornia modules; pre-resolve …
Borda Mar 26, 2026
2aa18f5
fix: add missing kornia import in o365.py has_kornia check
Borda Mar 26, 2026
5ab9bab
Merge branch 'develop' into aug/kornia
Borda Apr 9, 2026
26c7c80
Merge branch 'develop' into aug/kornia
Borda Apr 9, 2026
212df68
fix: add `# type: ignore` to kornia imports; update mypy ignores for …
Borda Apr 9, 2026
b8e80e1
refactor: replace `torch.cuda.is_available` with `_has_cuda_device` a…
Borda Apr 9, 2026
ea5d30d
refactor: standardize variable naming for clarity; update kornia impo…
Borda Apr 9, 2026
d504dca
refactor: consolidate backend resolution; fix fork-unsafe CUDA detection
Borda Apr 9, 2026
38d719d
refactor: remove mutable args mutation in build_coco()
Borda Apr 9, 2026
c6cbd0c
fix: add type: ignore[import-not-found] to kornia imports in module_d…
Borda Apr 9, 2026
2e658a7
docs: document gpu_postprocess in make_coco_transforms* top-level doc…
Borda Apr 9, 2026
c5ec7dc
lint: modernise typing and fix F401 noqa annotations
Borda Apr 9, 2026
300e270
fix(tests): update torch.cuda.is_available patch target in test_coco.py
Borda Apr 9, 2026
19a6ee2
fix: build_coco now validates 'gpu' backend via _resolve_runtime_augm…
Borda Apr 9, 2026
77bea4f
Update src/rfdetr/training/module_data.py
Borda Apr 9, 2026
fb5a618
fix: validate augmentation_backend and fix _make_affine translate
Borda Apr 9, 2026
57925d2
fix(test): correct auto+no-CUDA patch target in test_yolo
Borda Apr 9, 2026
956b71e
Merge branch 'develop' into aug/kornia
Borda Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `augmentation_backend` field on `TrainConfig` (`"cpu"` / `"auto"` / `"gpu"`): opt-in GPU-side augmentation via [Kornia](https://kornia.readthedocs.io) applied in `RFDETRDataModule.on_after_batch_transfer` after the batch is resident on the GPU. CPU path is unchanged and remains the default. Install with `pip install 'rfdetr[kornia]'`. Phase 1 supports detection only; segmentation mask support is planned for Phase 2.
- `BuilderArgs` — a `@runtime_checkable` `typing.Protocol` documenting the minimum attribute set consumed by `build_model()`, `build_backbone()`, `build_transformer()`, and `build_criterion_and_postprocessors()`. Enables static type-checker support for custom builder integrations. Exported from `rfdetr.models`. (#841)
- `build_model_from_config(model_config, train_config=None, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_model(build_namespace(mc, tc))`; accepts Pydantic config objects directly and constructs the internal namespace automatically. Exported from `rfdetr.models`. (#845)
- `build_criterion_from_config(model_config, train_config, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_criterion_and_postprocessors(build_namespace(mc, tc))`; returns a `(SetCriterion, PostProcess)` tuple. Exported from `rfdetr.models`. (#845)
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ trt = [
"tensorrt>=8.6.1",
"polygraphy",
]
kornia = [
"kornia>=0.7,<1", # GPU-side augmentation via on_after_batch_transfer
]
loggers = [
"tensorboard>=2.13.0",
"protobuf>=3.20.0,<4.0.0", # Pins protobuf below 4.x to avoid TensorBoard descriptor crash with protobuf>=4 (see #844)
Expand Down Expand Up @@ -214,6 +217,7 @@ overrides = [
"requests", "requests.*",
"seaborn", "seaborn.*",
"tqdm", "tqdm.*",
"kornia", "kornia.*",
], ignore_missing_imports = true },
# Modules with pre-existing type errors — ignored until incrementally fixed.
{ module = [
Expand Down
1 change: 1 addition & 0 deletions src/rfdetr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ class TrainConfig(BaseModel):
eval_interval: int = 1
log_per_class_metrics: bool = True
aug_config: Optional[Dict[str, Any]] = None
augmentation_backend: Literal["cpu", "auto", "gpu"] = "cpu"

@model_validator(mode="after")
def _warn_deprecated_train_config_fields(self) -> "TrainConfig":
Expand Down
21 changes: 21 additions & 0 deletions src/rfdetr/datasets/aug_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@
"YourCustomTransform", # Add here
}
```
## Kornia GPU Backend
When ``augmentation_backend="auto"`` or ``"gpu"`` is set in ``TrainConfig``, augmentations
run on the GPU via Kornia instead of Albumentations.
**Supported transforms** (all presets):
| Preset key | Kornia equivalent | Notes |
|---|---|---|
| ``HorizontalFlip`` | ``K.RandomHorizontalFlip`` | Direct |
| ``VerticalFlip`` | ``K.RandomVerticalFlip`` | Direct |
| ``Rotate`` | ``K.RandomRotation`` | ``limit`` may be scalar or tuple |
| ``Affine`` | ``K.RandomAffine`` | ``translate_percent`` treated as fraction |
| ``ColorJitter`` | ``K.ColorJiggle`` | Same multiplicative semantics |
| ``RandomBrightnessContrast`` | ``K.ColorJiggle`` | ``brightness_limit`` / ``contrast_limit`` direct |
| ``GaussianBlur`` | ``K.RandomGaussianBlur`` | ``blur_limit`` rounded up to odd; ``sigma=(0.1, 2.0)`` |
| ``GaussNoise`` | ``K.RandomGaussianNoise`` | Upper bound of ``std_range`` used as fixed std |
**Phase 1 limitation**: Segmentation models (``segmentation_head=True``) skip GPU augmentation;
CPU Albumentations are used instead. Mask support is planned for Phase 2.
"""

# ---------------------------------------------------------------------------
Expand Down
120 changes: 94 additions & 26 deletions src/rfdetr/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""

from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any

import torch
import torch.utils.data
Expand All @@ -44,7 +44,7 @@ def compute_multi_scale_scales(
expanded_scales: bool = False,
patch_size: int = 16,
num_windows: int = 4,
) -> List[int]:
) -> list[int]:
# round to the nearest multiple of 4*patch_size to enable both patching and windowing
base_num_patches_per_window = resolution // (patch_size * num_windows)
offsets = [-3, -2, -1, 0, 1, 2, 3, 4] if not expanded_scales else [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
Expand Down Expand Up @@ -74,7 +74,7 @@ def _is_rle(segmentation: Any) -> bool:
return isinstance(segmentation, dict) and "counts" in segmentation and "size" in segmentation


def convert_coco_poly_to_mask(segmentations: List[Any], height: int, width: int) -> torch.Tensor:
def convert_coco_poly_to_mask(segmentations: list[Any], height: int, width: int) -> torch.Tensor:
"""Convert COCO segmentation annotations to a binary mask tensor of shape ``[N, H, W]``.

Supports both polygon and RLE (Run-Length Encoding) annotation formats.
Expand Down Expand Up @@ -174,13 +174,13 @@ class CocoDetection(torchvision.datasets.CocoDetection):

def __init__(
self,
img_folder: Union[str, Path],
ann_file: Union[str, Path],
transforms: Optional[Any],
img_folder: str | Path,
ann_file: str | Path,
transforms: Any | None,
include_masks: bool = False,
remap_category_ids: bool = False,
) -> None:
super(CocoDetection, self).__init__(img_folder, ann_file)
super().__init__(img_folder, ann_file)
self._transforms = transforms
self.include_masks = include_masks
if remap_category_ids:
Expand All @@ -189,14 +189,14 @@ def __init__(
# Reverse mapping from contiguous label indices back to COCO category_id
self.label2cat = {label: cat_id for cat_id, label in self.cat2label.items()}
# Expose label-to-category mapping on the underlying COCO API object for evaluators
setattr(self.coco, "label2cat", self.label2cat)
self.coco.label2cat = self.label2cat
else:
self.cat2label = None
self.label2cat = None
self.prepare = ConvertCoco(include_masks=include_masks, cat2label=self.cat2label)

def __getitem__(self, idx: int) -> Tuple[Any, Any]:
img, target = super(CocoDetection, self).__getitem__(idx)
def __getitem__(self, idx: int) -> tuple[Any, Any]:
img, target = super().__getitem__(idx)
image_id = self.ids[idx]
target = {"image_id": image_id, "annotations": target}
img, target = self.prepare(img, target)
Expand All @@ -207,7 +207,7 @@ def __getitem__(self, idx: int) -> Tuple[Any, Any]:
return img, target


class ConvertCoco(object):
class ConvertCoco:
"""Convert a raw COCO annotation dict into model-ready tensors.

Accepts the ``(image, target)`` pair produced by
Expand Down Expand Up @@ -237,11 +237,11 @@ class ConvertCoco(object):
that labels stay within the model's output range.
"""

def __init__(self, include_masks: bool = False, cat2label: Optional[Dict[int, int]] = None) -> None:
def __init__(self, include_masks: bool = False, cat2label: dict[int, int] | None = None) -> None:
self.include_masks = include_masks
self.cat2label = cat2label

def __call__(self, image: Image.Image, target: Dict[str, Any]) -> Tuple[Image.Image, Dict[str, Any]]:
def __call__(self, image: Image.Image, target: dict[str, Any]) -> tuple[Image.Image, dict[str, Any]]:
w, h = image.size

image_id = target["image_id"]
Expand All @@ -258,7 +258,7 @@ def __call__(self, image: Image.Image, target: Dict[str, Any]) -> Tuple[Image.Im
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)

classes: List[int] = []
classes: list[int] = []
for obj in anno:
category_id = obj["category_id"]
if getattr(self, "cat2label", None) is not None:
Expand All @@ -283,7 +283,7 @@ def __call__(self, image: Image.Image, target: Dict[str, Any]) -> Tuple[Image.Im

# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
iscrowd = torch.tensor([obj.get("iscrowd", 0) for obj in anno])
target["area"] = area[keep]
target["iscrowd"] = iscrowd[keep]

Expand All @@ -308,11 +308,11 @@ def __call__(self, image: Image.Image, target: Dict[str, Any]) -> Tuple[Image.Im


def _build_train_resize_config(
scales: List[int],
scales: list[int],
*,
square: bool,
max_size: Optional[int] = None,
) -> List[Dict[str, Any]]:
max_size: int | None = None,
) -> list[dict[str, Any]]:
"""Build the training resize pipeline as an Albumentations config list.

Expresses the ``RandomSelect(resize_a, Compose([resize_b1, crop, resize_b2]))``
Expand All @@ -337,12 +337,12 @@ def _build_train_resize_config(
A single-element list containing a ``OneOf`` config entry.
"""
if square:
option_a: Dict[str, Any] = {
option_a: dict[str, Any] = {
"OneOf": {
"transforms": [{"Resize": {"height": s, "width": s}} for s in scales],
}
}
option_b: Dict[str, Any] = {
option_b: dict[str, Any] = {
"Sequential": {
"transforms": [
{"SmallestMaxSize": {"max_size": [400, 500, 600]}},
Expand Down Expand Up @@ -391,7 +391,8 @@ def make_coco_transforms(
skip_random_resize: bool = False,
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
aug_config: dict[str, dict[str, Any]] | None = None,
gpu_postprocess: bool = False,
) -> Compose:
"""Build the standard COCO transform pipeline for a given dataset split.

Expand All @@ -405,6 +406,11 @@ def make_coco_transforms(
normalisation. For ``"val"``, ``"test"``, and ``"val_speed"`` only resize and
normalisation are applied — no augmentation.

When *gpu_postprocess* is ``True``, both the Albumentations augmentation
wrappers and the ``Normalize`` step are omitted from the ``"train"`` pipeline.
The ``RFDETRDataModule`` then applies augmentation and normalization on the
device in ``on_after_batch_transfer`` instead.

Args:
image_set: Dataset split identifier — ``"train"``, ``"val"``, ``"test"``,
or ``"val_speed"``.
Expand All @@ -425,6 +431,10 @@ def make_coco_transforms(
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. Falls back
to the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` when
``None``.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A :class:`torchvision.transforms.v2.Compose` pipeline ready to be passed
Expand All @@ -450,8 +460,14 @@ def make_coco_transforms(
resize_wrappers = AlbumentationsWrapper.from_config(
_build_train_resize_config(scales, square=False, max_size=1333)
)
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test"):
resize_wrappers = AlbumentationsWrapper.from_config(
Expand All @@ -476,7 +492,8 @@ def make_coco_transforms_square_div_64(
skip_random_resize: bool = False,
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
aug_config: dict[str, dict[str, Any]] | None = None,
gpu_postprocess: bool = False,
) -> Compose:
"""
Create COCO transforms with square resizing where the output size is divisible by 64.
Expand All @@ -486,6 +503,11 @@ def make_coco_transforms_square_div_64(
divisible by 64. It supports multi-scale training and optional random resizing and
cropping for the training split.

When *gpu_postprocess* is ``True``, both the Albumentations augmentation
wrappers and the ``Normalize`` step are omitted from the ``"train"`` pipeline.
The ``RFDETRDataModule`` then applies augmentation and normalization on the
device in ``on_after_batch_transfer`` instead.

Args:
image_set: Dataset split identifier. Expected values are "train", "val",
"test", or "val_speed". Each split uses a slightly different transform
Expand All @@ -506,6 +528,10 @@ def make_coco_transforms_square_div_64(
aug_config: Augmentation configuration dictionary compatible with
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. If ``None``,
the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` is used.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A ``Compose`` object containing the composed image transforms appropriate
Expand All @@ -526,8 +552,14 @@ def make_coco_transforms_square_div_64(
if image_set == "train":
resolved_aug_config = aug_config if aug_config is not None else AUG_CONFIG
resize_wrappers = AlbumentationsWrapper.from_config(_build_train_resize_config(scales, square=True))
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test", "val_speed"):
resize_wrappers = AlbumentationsWrapper.from_config([{"Resize": {"height": resolution, "width": resolution}}])
Expand All @@ -554,6 +586,22 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
square_resize_div_64 = getattr(args, "square_resize_div_64", False)
include_masks = getattr(args, "segmentation_head", False)
aug_config = getattr(args, "aug_config", None)
augmentation_backend = getattr(args, "augmentation_backend", "cpu")
resolved_augmentation_backend = augmentation_backend
if include_masks and augmentation_backend != "cpu":
logger.warning(
"Segmentation training does not currently support GPU postprocess transforms; "
"forcing augmentation_backend='cpu' to retain CPU transforms and normalization."
)
resolved_augmentation_backend = "cpu"
if resolved_augmentation_backend != "cpu":
resolved_augmentation_backend = _resolve_runtime_augmentation_backend(resolved_augmentation_backend)
if resolved_augmentation_backend == "cpu":
logger.warning(
"augmentation_backend='auto' resolved to 'cpu' because CUDA or kornia is unavailable; "
"disabling GPU postprocess transforms and retaining CPU normalization."
)
gpu_postprocess = resolved_augmentation_backend != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building COCO {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -569,6 +617,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand All @@ -586,12 +635,27 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
return dataset


def _resolve_runtime_augmentation_backend(backend: str) -> str:
"""Resolve ``augmentation_backend`` at runtime for dataset builders.

Thin wrapper around :func:`rfdetr.datasets.kornia_transforms.resolve_augmentation_backend`
kept for backward-compatibility with callers in ``yolo.py``.

``"auto"`` becomes ``"gpu"`` only when CUDA and Kornia are both available,
otherwise ``"cpu"``. Explicit ``"cpu"``/``"gpu"`` values pass through.
"""
from rfdetr.datasets.kornia_transforms import resolve_augmentation_backend

return resolve_augmentation_backend(backend)


def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
"""Build a Roboflow COCO-format dataset.

Expand All @@ -618,6 +682,8 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size = getattr(args, "patch_size", 16)
num_windows = getattr(args, "num_windows", 4)
aug_config = getattr(args, "aug_config", None)
resolved_augmentation_backend = _resolve_runtime_augmentation_backend(getattr(args, "augmentation_backend", "cpu"))
gpu_postprocess = resolved_augmentation_backend != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building Roboflow {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -633,6 +699,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand All @@ -651,6 +718,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand Down
Loading
Loading