Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `augmentation_backend` field on `TrainConfig` (`"cpu"` / `"auto"` / `"gpu"`): opt-in GPU-side augmentation via [Kornia](https://kornia.readthedocs.io) applied in `RFDETRDataModule.on_after_batch_transfer` after the batch is resident on the GPU. CPU path is unchanged and remains the default. Install with `pip install 'rfdetr[kornia]'`. Phase 1 supports detection only; segmentation mask support is planned for Phase 2.
- `RFDETR.predict(shape=...)` — optional `(height, width)` tuple overrides the default inference resolution; useful for matching the resolution used when exporting the model. Both dimensions must be positive integers divisible by 14. (closes #682)
- `BuilderArgs` — a `@runtime_checkable` `typing.Protocol` documenting the minimum attribute set consumed by `build_model()`, `build_backbone()`, `build_transformer()`, and `build_criterion_and_postprocessors()`. Enables static type-checker support for custom builder integrations. Exported from `rfdetr.models`.
- `build_model_from_config(model_config, train_config=None, defaults=MODEL_DEFAULTS)` — config-native alternative to `build_model(build_namespace(mc, tc))`; accepts Pydantic config objects directly and constructs the internal namespace automatically. Exported from `rfdetr.models`.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ trt = [
"tensorrt>=8.6.1",
"polygraphy",
]
kornia = [
"kornia>=0.7,<1", # GPU-side augmentation via on_after_batch_transfer
]
loggers = [
"tensorboard>=2.13.0",
"protobuf>=3.20.0,<4.0.0", # Pins protobuf below 4.x to avoid TensorBoard descriptor crash with protobuf>=4 (see #844)
Expand Down
1 change: 1 addition & 0 deletions src/rfdetr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ class TrainConfig(BaseModel):
eval_interval: int = 1
log_per_class_metrics: bool = True
aug_config: Optional[Dict[str, Any]] = None
augmentation_backend: Literal["cpu", "auto", "gpu"] = "cpu"

@model_validator(mode="after")
def _warn_deprecated_train_config_fields(self) -> "TrainConfig":
Expand Down
21 changes: 21 additions & 0 deletions src/rfdetr/datasets/aug_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@
"YourCustomTransform", # Add here
}
```
## Kornia GPU Backend
When ``augmentation_backend="auto"`` or ``"gpu"`` is set in ``TrainConfig``, augmentations
run on the GPU via Kornia instead of Albumentations.
**Supported transforms** (all presets):
| Preset key | Kornia equivalent | Notes |
|---|---|---|
| ``HorizontalFlip`` | ``K.RandomHorizontalFlip`` | Direct |
| ``VerticalFlip`` | ``K.RandomVerticalFlip`` | Direct |
| ``Rotate`` | ``K.RandomRotation`` | ``limit`` may be scalar or tuple |
| ``Affine`` | ``K.RandomAffine`` | ``translate_percent`` treated as fraction |
| ``ColorJitter`` | ``K.ColorJiggle`` | Same multiplicative semantics |
| ``RandomBrightnessContrast`` | ``K.ColorJiggle`` | ``brightness_limit`` / ``contrast_limit`` direct |
| ``GaussianBlur`` | ``K.RandomGaussianBlur`` | ``blur_limit`` rounded up to odd; ``sigma=(0.1, 2.0)`` |
| ``GaussNoise`` | ``K.RandomGaussianNoise`` | Upper bound of ``std_range`` used as fixed std |
**Phase 1 limitation**: Segmentation models (``segmentation_head=True``) skip GPU augmentation;
CPU Albumentations are used instead. Mask support is planned for Phase 2.
"""

# ---------------------------------------------------------------------------
Expand Down
42 changes: 38 additions & 4 deletions src/rfdetr/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def make_coco_transforms(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""Build the standard COCO transform pipeline for a given dataset split.

Expand Down Expand Up @@ -373,6 +374,10 @@ def make_coco_transforms(
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. Falls back
to the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` when
``None``.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A :class:`torchvision.transforms.v2.Compose` pipeline ready to be passed
Expand All @@ -398,8 +403,14 @@ def make_coco_transforms(
resize_wrappers = AlbumentationsWrapper.from_config(
_build_train_resize_config(scales, square=False, max_size=1333)
)
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test"):
resize_wrappers = AlbumentationsWrapper.from_config(
Expand All @@ -425,6 +436,7 @@ def make_coco_transforms_square_div_64(
patch_size: int = 16,
num_windows: int = 4,
aug_config: Optional[Dict[str, Dict[str, Any]]] = None,
gpu_postprocess: bool = False,
) -> Compose:
"""
Create COCO transforms with square resizing where the output size is divisible by 64.
Expand Down Expand Up @@ -454,6 +466,10 @@ def make_coco_transforms_square_div_64(
aug_config: Augmentation configuration dictionary compatible with
:class:`~rfdetr.datasets.transforms.AlbumentationsWrapper`. If ``None``,
the default :data:`~rfdetr.datasets.aug_config.AUG_CONFIG` is used.
gpu_postprocess: When ``True``, skip Albumentations augmentation wrappers and
``Normalize`` from the CPU pipeline. The ``RFDETRDataModule`` then applies
both augmentation and normalization on the GPU in
``on_after_batch_transfer``. Has no effect on val/test splits.

Returns:
A ``Compose`` object containing the composed image transforms appropriate
Expand All @@ -474,8 +490,14 @@ def make_coco_transforms_square_div_64(
if image_set == "train":
resolved_aug_config = aug_config if aug_config is not None else AUG_CONFIG
resize_wrappers = AlbumentationsWrapper.from_config(_build_train_resize_config(scales, square=True))
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
return Compose([*resize_wrappers, *aug_wrappers, to_image, to_float, normalize])
pipeline = [*resize_wrappers]
if not gpu_postprocess:
aug_wrappers = AlbumentationsWrapper.from_config(resolved_aug_config)
pipeline += [*aug_wrappers]
pipeline += [to_image, to_float]
if not gpu_postprocess:
pipeline += [normalize]
return Compose(pipeline)

if image_set in ("val", "test", "val_speed"):
resize_wrappers = AlbumentationsWrapper.from_config([{"Resize": {"height": resolution, "width": resolution}}])
Expand All @@ -502,6 +524,13 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
square_resize_div_64 = getattr(args, "square_resize_div_64", False)
include_masks = getattr(args, "segmentation_head", False)
aug_config = getattr(args, "aug_config", None)
augmentation_backend = getattr(args, "augmentation_backend", "cpu")
if include_masks and augmentation_backend != "cpu":
logger.warning(
"Segmentation training does not currently support GPU postprocess transforms; "
"forcing augmentation_backend='cpu' to retain CPU transforms and normalization."
)
gpu_postprocess = augmentation_backend != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building COCO {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -517,6 +546,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand All @@ -534,6 +564,7 @@ def build_coco(image_set: str, args: Any, resolution: int) -> CocoDetection:
patch_size=args.patch_size,
num_windows=args.num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
)
Expand Down Expand Up @@ -566,6 +597,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size = getattr(args, "patch_size", 16)
num_windows = getattr(args, "num_windows", 4)
aug_config = getattr(args, "aug_config", None)
gpu_postprocess = getattr(args, "augmentation_backend", "cpu") != "cpu" and not include_masks

if square_resize_div_64:
logger.info(f"Building Roboflow {image_set} dataset with square resize at resolution {resolution}")
Expand All @@ -581,6 +613,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand All @@ -599,6 +632,7 @@ def build_roboflow_from_coco(image_set: str, args: Any, resolution: int) -> Coco
patch_size=patch_size,
num_windows=num_windows,
aug_config=aug_config,
gpu_postprocess=gpu_postprocess,
),
include_masks=include_masks,
remap_category_ids=True,
Expand Down
Loading
Loading