Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `augmentation_backend` field on `TrainConfig` (`"cpu"` / `"auto"` / `"gpu"`): opt-in GPU-side augmentation via [Kornia](https://kornia.readthedocs.io) applied in `RFDETRDataModule.on_after_batch_transfer` after the batch is resident on the GPU. CPU path is unchanged and remains the default. Install with `pip install 'rfdetr[kornia]'`. Phase 1 supports detection only; segmentation mask support is planned for Phase 2.
- `RFDETR.predict(shape=...)` — optional `(height, width)` tuple overrides the default inference resolution; useful for matching the resolution used when exporting the model. Both dimensions must be positive integers divisible by 14. (closes #682)
- `BuilderArgs` — a `@runtime_checkable` `typing.Protocol` documenting the minimum attribute set consumed by `build_model()`, `build_backbone()`, `build_transformer()`, and `build_criterion_and_postprocessors()`. Enables static type-checker support for custom builder integrations. Exported from `rfdetr.models`.
- `build_model_from_config(model_config, train_config=None, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_model(build_namespace(mc, tc))`; accepts Pydantic config objects directly and constructs the internal namespace automatically. Exported from `rfdetr.models`.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ trt = [
"tensorrt>=8.6.1",
"polygraphy",
]
kornia = [
"kornia>=0.7,<1", # GPU-side augmentation via on_after_batch_transfer
]
loggers = [
"tensorboard>=2.13.0",
"protobuf>=3.20.0,<4.0.0", # Pins protobuf below 4.x to avoid TensorBoard descriptor crash with protobuf>=4 (see #844)
Expand Down
1 change: 1 addition & 0 deletions src/rfdetr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ class TrainConfig(BaseModel):
eval_interval: int = 1
log_per_class_metrics: bool = True
aug_config: Optional[Dict[str, Any]] = None
augmentation_backend: Literal["cpu", "auto", "gpu"] = "cpu"

@model_validator(mode="after")
def _warn_deprecated_train_config_fields(self) -> "TrainConfig":
Expand Down
21 changes: 21 additions & 0 deletions src/rfdetr/datasets/aug_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@
"YourCustomTransform", # Add here
}
```
## Kornia GPU Backend
When ``augmentation_backend="auto"`` or ``"gpu"`` is set in ``TrainConfig``, augmentations
run on the GPU via Kornia instead of Albumentations.
**Supported transforms** (all presets):
| Preset key | Kornia equivalent | Notes |
|---|---|---|
| ``HorizontalFlip`` | ``K.RandomHorizontalFlip`` | Direct |
| ``VerticalFlip`` | ``K.RandomVerticalFlip`` | Direct |
| ``Rotate`` | ``K.RandomRotation`` | ``limit`` may be scalar or tuple |
| ``Affine`` | ``K.RandomAffine`` | ``translate_percent`` treated as fraction |
| ``ColorJitter`` | ``K.ColorJiggle`` | Same multiplicative semantics |
| ``RandomBrightnessContrast`` | ``K.ColorJiggle`` | ``brightness_limit`` / ``contrast_limit`` direct |
| ``GaussianBlur`` | ``K.RandomGaussianBlur`` | ``blur_limit`` rounded up to odd; ``sigma=(0.1, 2.0)`` |
| ``GaussNoise`` | ``K.RandomGaussianNoise`` | Upper bound of ``std_range`` used as fixed std |
**Phase 1 limitation**: Segmentation models (``segmentation_head=True``) skip GPU augmentation;
CPU Albumentations are used instead. Mask support is planned for Phase 2.
"""

# ---------------------------------------------------------------------------
Expand Down
61 changes: 57 additions & 4 deletions src/rfdetr/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def make_coco_transforms(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""Build the standard COCO transform pipeline for a given dataset split.

Expand Down Expand Up @@ -373,6 +374,10 @@ def make_coco_transforms(
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. Falls back
to the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` when
``None``.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A :class:`torchvision.transforms.v2.Compose` pipeline ready to be passed
Expand All @@ -398,8 +403,14 @@ def make_coco_transforms(
resize_wrappers = AlbumentationsWrapper.from_config(
_build_train_resize_config(scales, square=False, max_size=1333)
)
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test"):
resize_wrappers = AlbumentationsWrapper.from_config(
Expand All @@ -425,6 +436,7 @@ def make_coco_transforms_square_div_64(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""
Create COCO transforms with square resizing where the output size is divisible by 64.
Expand Down Expand Up @@ -454,6 +466,10 @@ def make_coco_transforms_square_div_64(
aug_config: Augmentation configuration dictionary compatible with
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. If ``None``,
the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` is used.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A ``Compose`` object containing the composed image transforms appropriate
Expand All @@ -474,8 +490,14 @@ def make_coco_transforms_square_div_64(
if image_set == "train":
resolved_aug_config = aug_config if aug_config is not None else AUG_CONFIG
resize_wrappers = AlbumentationsWrapper.from_config(_build_train_resize_config(scales, square=True))
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test", "val_speed"):
resize_wrappers = AlbumentationsWrapper.from_config([{"Resize": {"height": resolution, "width": resolution}}])
Expand All @@ -502,6 +524,32 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
square_resize_div_64 = getattr(args, "square_resize_div_64", False)
include_masks = getattr(args, "segmentation_head", False)
aug_config = getattr(args, "aug_config", None)
augmentation_backend = getattr(args, "augmentation_backend", "cpu")
resolved_augmentation_backend = augmentation_backend
if include_masks and augmentation_backend != "cpu":
logger.warning(
"Segmentation training does not currently support GPU postprocess transforms; "
"forcing augmentation_backend='cpu' to retain CPU transforms and normalization."
)
resolved_augmentation_backend = "cpu"
if hasattr(args, "augmentation_backend"):
setattr(args, "augmentation_backend", "cpu")
if resolved_augmentation_backend == "auto":
gpu_available = torch.cuda.is_available()
if gpu_available:
try:
import kornia # type: ignore[import-not-found]
except ImportError:
gpu_available = False
if not gpu_available:
logger.warning(
"augmentation_backend='auto' resolved to 'cpu' because CUDA or kornia is unavailable; "
"disabling GPU postprocess transforms and retaining CPU normalization."
)
resolved_augmentation_backend = "cpu"
if hasattr(args, "augmentation_backend"):
setattr(args, "augmentation_backend", "cpu")
gpu_postprocess = resolved_augmentation_backend != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building COCO {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -517,6 +565,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand All @@ -534,6 +583,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand Down Expand Up @@ -566,6 +616,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size = getattr(args, "patch_size", 16)
num_windows = getattr(args, "num_windows", 4)
aug_config = getattr(args, "aug_config", None)
gpu_postprocess = getattr(args, "augmentation_backend", "cpu") != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building Roboflow {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -581,6 +632,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand All @@ -599,6 +651,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand Down
Loading
Loading