Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c750514
feat: Kornia GPU augmentation backend for detection training
Borda Mar 25, 2026
17043fe
fix(pre-commit): 🎨 auto format pre-commit hooks
pre-commit-ci[bot] Mar 25, 2026
eee9c01
Apply suggestions from code review
Borda Mar 25, 2026
c982914
fix: wire gpu_postprocess through build_o365_raw to prevent double no…
Borda Mar 26, 2026
2b6a7d0
fix: add _kornia_setup_done sentinel; promote auto fallback log to WA…
Borda Mar 26, 2026
c12bdda
fix: blur_limit min 3; docstrings for gpu_postprocess and unpack_boxes
Borda Mar 26, 2026
51754ff
test: add coverage for blur_limit edge, o365 guard, sentinel re-run, …
Borda Mar 26, 2026
55d4bc6
fix: GPU path falls back to AUG_CONFIG not {} when aug_config=None
Borda Mar 26, 2026
e969361
Apply suggestions from code review
Borda Mar 26, 2026
de9017c
fix(pre-commit): 🎨 auto format pre-commit hooks
pre-commit-ci[bot] Mar 26, 2026
b517771
Merge branch 'develop' into aug/kornia
Borda Mar 26, 2026
c31f834
fix: square gaussian kernel; device-move Kornia modules; pre-resolve …
Borda Mar 26, 2026
2aa18f5
fix: add missing kornia import in o365.py has_kornia check
Borda Mar 26, 2026
5ab9bab
Merge branch 'develop' into aug/kornia
Borda Apr 9, 2026
26c7c80
Merge branch 'develop' into aug/kornia
Borda Apr 9, 2026
212df68
fix: add `# type: ignore` to kornia imports; update mypy ignores for …
Borda Apr 9, 2026
b8e80e1
refactor: replace `torch.cuda.is_available` with `_has_cuda_device` a…
Borda Apr 9, 2026
ea5d30d
refactor: standardize variable naming for clarity; update kornia impo…
Borda Apr 9, 2026
d504dca
refactor: consolidate backend resolution; fix fork-unsafe CUDA detection
Borda Apr 9, 2026
38d719d
refactor: remove mutable args mutation in build_coco()
Borda Apr 9, 2026
c6cbd0c
fix: add type: ignore[import-not-found] to kornia imports in module_d…
Borda Apr 9, 2026
2e658a7
docs: document gpu_postprocess in make_coco_transforms* top-level doc…
Borda Apr 9, 2026
c5ec7dc
lint: modernise typing and fix F401 noqa annotations
Borda Apr 9, 2026
300e270
fix(tests): update torch.cuda.is_available patch target in test_coco.py
Borda Apr 9, 2026
19a6ee2
fix: build_coco now validates 'gpu' backend via _resolve_runtime_augm…
Borda Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `augmentation_backend` field on `TrainConfig` (`"cpu"` / `"auto"` / `"gpu"`): opt-in GPU-side augmentation via [Kornia](https://kornia.readthedocs.io) applied in `RFDETRDataModule.on_after_batch_transfer` after the batch is resident on the GPU. CPU path is unchanged and remains the default. Install with `pip install 'rfdetr[kornia]'`. Phase 1 supports detection only; segmentation mask support is planned for Phase 2.
- `BuilderArgs` — a `@runtime_checkable` `typing.Protocol` documenting the minimum attribute set consumed by `build_model()`, `build_backbone()`, `build_transformer()`, and `build_criterion_and_postprocessors()`. Enables static type-checker support for custom builder integrations. Exported from `rfdetr.models`. (#841)
- `build_model_from_config(model_config, train_config=None, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_model(build_namespace(mc, tc))`; accepts Pydantic config objects directly and constructs the internal namespace automatically. Exported from `rfdetr.models`. (#845)
- `build_criterion_from_config(model_config, train_config, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_criterion_and_postprocessors(build_namespace(mc, tc))`; returns a `(SetCriterion, PostProcess)` tuple. Exported from `rfdetr.models`. (#845)
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ trt = [
"tensorrt>=8.6.1",
"polygraphy",
]
kornia = [
"kornia>=0.7,<1", # GPU-side augmentation via on_after_batch_transfer
]
loggers = [
"tensorboard>=2.13.0",
"protobuf>=3.20.0,<4.0.0", # Pins protobuf below 4.x to avoid TensorBoard descriptor crash with protobuf>=4 (see #844)
Expand Down
1 change: 1 addition & 0 deletions src/rfdetr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ class TrainConfig(BaseModel):
eval_interval: int = 1
log_per_class_metrics: bool = True
aug_config: Optional[Dict[str, Any]] = None
augmentation_backend: Literal["cpu", "auto", "gpu"] = "cpu"

@model_validator(mode="after")
def _warn_deprecated_train_config_fields(self) -> "TrainConfig":
Expand Down
21 changes: 21 additions & 0 deletions src/rfdetr/datasets/aug_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@
"YourCustomTransform", # Add here
}
```

## Kornia GPU Backend

When ``augmentation_backend="auto"`` or ``"gpu"`` is set in ``TrainConfig``, augmentations
run on the GPU via Kornia instead of Albumentations.

**Supported transforms** (all presets):

| Preset key | Kornia equivalent | Notes |
|---|---|---|
| ``HorizontalFlip`` | ``K.RandomHorizontalFlip`` | Direct |
| ``VerticalFlip`` | ``K.RandomVerticalFlip`` | Direct |
| ``Rotate`` | ``K.RandomRotation`` | ``limit`` may be scalar or tuple |
| ``Affine`` | ``K.RandomAffine`` | ``translate_percent`` treated as fraction |
| ``ColorJitter`` | ``K.ColorJiggle`` | Same multiplicative semantics |
| ``RandomBrightnessContrast`` | ``K.ColorJiggle`` | ``brightness_limit`` / ``contrast_limit`` direct |
| ``GaussianBlur`` | ``K.RandomGaussianBlur`` | ``blur_limit`` rounded up to odd; ``sigma=(0.1, 2.0)`` |
| ``GaussNoise`` | ``K.RandomGaussianNoise`` | Upper bound of ``std_range`` used as fixed std |

**Phase 1 limitation**: Segmentation models (``segmentation_head=True``) skip GPU augmentation;
CPU Albumentations are used instead. Mask support is planned for Phase 2.
"""

# ---------------------------------------------------------------------------
Expand Down
61 changes: 57 additions & 4 deletions src/rfdetr/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ def make_coco_transforms(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""Build the standard COCO transform pipeline for a given dataset split.

Expand Down Expand Up @@ -425,6 +426,10 @@ def make_coco_transforms(
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. Falls back
to the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` when
``None``.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A :class:`torchvision.transforms.v2.Compose` pipeline ready to be passed
Expand All @@ -450,8 +455,14 @@ def make_coco_transforms(
resize_wrappers = AlbumentationsWrapper.from_config(
_build_train_resize_config(scales, square=False, max_size=1333)
)
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test"):
resize_wrappers = AlbumentationsWrapper.from_config(
Expand All @@ -477,6 +488,7 @@ def make_coco_transforms_square_div_64(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""
Create COCO transforms with square resizing where the output size is divisible by 64.
Expand Down Expand Up @@ -506,6 +518,10 @@ def make_coco_transforms_square_div_64(
aug_config: Augmentation configuration dictionary compatible with
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. If ``None``,
the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` is used.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A ``Compose`` object containing the composed image transforms appropriate
Expand All @@ -526,8 +542,14 @@ def make_coco_transforms_square_div_64(
if image_set == "train":
resolved_aug_config = aug_config if aug_config is not None else AUG_CONFIG
resize_wrappers = AlbumentationsWrapper.from_config(_build_train_resize_config(scales, square=True))
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test", "val_speed"):
resize_wrappers = AlbumentationsWrapper.from_config([{"Resize": {"height": resolution, "width": resolution}}])
Expand All @@ -554,6 +576,32 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
square_resize_div_64 = getattr(args, "square_resize_div_64", False)
include_masks = getattr(args, "segmentation_head", False)
aug_config = getattr(args, "aug_config", None)
augmentation_backend = getattr(args, "augmentation_backend", "cpu")
resolved_augmentation_backend = augmentation_backend
if include_masks and augmentation_backend != "cpu":
logger.warning(
"Segmentation training does not currently support GPU postprocess transforms; "
"forcing augmentation_backend='cpu' to retain CPU transforms and normalization."
)
resolved_augmentation_backend = "cpu"
if hasattr(args, "augmentation_backend"):
setattr(args, "augmentation_backend", "cpu")
if resolved_augmentation_backend == "auto":
gpu_available = torch.cuda.is_available()
if gpu_available:
try:
import kornia # type: ignore[import-not-found]
except ImportError:
gpu_available = False
if not gpu_available:
logger.warning(
"augmentation_backend='auto' resolved to 'cpu' because CUDA or kornia is unavailable; "
"disabling GPU postprocess transforms and retaining CPU normalization."
)
resolved_augmentation_backend = "cpu"
if hasattr(args, "augmentation_backend"):
setattr(args, "augmentation_backend", "cpu")
gpu_postprocess = resolved_augmentation_backend != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building COCO {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -569,6 +617,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand All @@ -586,6 +635,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand Down Expand Up @@ -618,6 +668,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size = getattr(args, "patch_size", 16)
num_windows = getattr(args, "num_windows", 4)
aug_config = getattr(args, "aug_config", None)
gpu_postprocess = getattr(args, "augmentation_backend", "cpu") != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building Roboflow {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -633,6 +684,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand All @@ -651,6 +703,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand Down
Loading
Loading