diff --git a/docs/learn/export.md b/docs/learn/export.md
index f7db203d1..b46adccd6 100644
--- a/docs/learn/export.md
+++ b/docs/learn/export.md
@@ -51,6 +51,7 @@ The `export()` method accepts several parameters to customize the export process
 | `force`         | `False`    | Deprecated and ignored.                                                                                                |
 | `shape`         | `None`     | Input shape as tuple `(height, width)`. Must be divisible by 14. If not provided, uses the model's default resolution. |
 | `batch_size`    | `1`        | Batch size for the exported model.                                                                                     |
+| `tensorrt`      | `False`    | When `True`, convert the ONNX model to a TensorRT `.engine` file. Requires TensorRT (`trtexec`) to be installed.       |
 
 ## Advanced Export Examples
 
@@ -118,6 +119,20 @@ If you want lower latency on NVIDIA GPUs, you can convert the exported ONNX mode
 - Install TensorRT (`trtexec` must be available in your `PATH`)
 - Export an ONNX model first (for example: `output/inference_model.onnx`)
 
+### Export Directly to TensorRT
+
+Pass `tensorrt=True` to `export()` to export ONNX and convert to a TensorRT engine in one step:
+
+```python
+from rfdetr import RFDETRMedium
+
+model = RFDETRMedium(pretrain_weights="<path/to/checkpoint.pth>")
+
+model.export(tensorrt=True)
+```
+
+This exports `output/inference_model.onnx` first and then produces `output/inference_model.engine`.
+
 ### Python API Conversion
 
 ```python
@@ -131,10 +146,78 @@ args = Namespace(
     dry_run=False,
 )
 
-trtexec("output/inference_model.onnx", args)
+engine_path = trtexec("output/inference_model.onnx", args)
+```
+
+`trtexec` returns the path to the generated `.engine` file. If `profile=True`, it also writes an Nsight Systems report (`.nsys-rep`).
+
+## Run Inference with `inference-models`
+
+[`inference-models`](https://github.com/roboflow/inference/tree/main/inference_models) is the
+recommended library for running RF-DETR inference. It supports multiple backends — PyTorch,
+ONNX, and TensorRT — with automatic backend selection and a unified API.
+
+### Installation
+
+```bash
+# CPU / PyTorch only
+pip install inference-models
+
+# With TensorRT support (NVIDIA GPU required)
+pip install "inference-models[trt-cu12]"  # CUDA 12.x
+```
+
+See the [inference-models installation guide](https://inference-models.roboflow.com/getting-started/installation/)
+for all installation options including Jetson and CUDA 11.x.
+
+### Load a Pre-trained RF-DETR Model
+
+```python
+import cv2
+from inference_models import AutoModel
+
+# Automatically selects the best available backend for your environment
+model = AutoModel.from_pretrained("rfdetr-base")
+
+image = cv2.imread("image.jpg")
+predictions = model(image)
+
+# Convert to supervision Detections
+detections = predictions[0].to_supervision()
+print(detections)
+```
+
+### Load a Local RF-DETR Checkpoint
+
+```python
+import cv2
+from inference_models import AutoModel
+
+# Load from a local .pth checkpoint (same file used by rfdetr for training)
+model = AutoModel.from_pretrained(
+    "/path/to/checkpoint.pth",
+    model_type="rfdetr-base",  # specify the architecture variant
+)
+
+image = cv2.imread("image.jpg")
+predictions = model(image)
+```
+
+### Force TensorRT Backend
+
+```python
+import cv2
+from inference_models import AutoModel, BackendType
+
+# Explicitly request TensorRT — requires TRT to be installed
+model = AutoModel.from_pretrained("rfdetr-base", backend=BackendType.TRT)
+
+image = cv2.imread("image.jpg")
+predictions = model(image)
 ```
 
-This produces `output/inference_model.engine`. If `profile=True`, it also writes an Nsight Systems report (`.nsys-rep`).
+`AutoModel.from_pretrained` accepts `backend="onnx"`, `backend="torch"`, or
+`backend="trt"` to override automatic backend selection.
 
 ## Using the Exported Model
 
@@ -174,5 +257,6 @@ boxes, labels = outputs
 After exporting your model, you may want to:
 
 - [Deploy to Roboflow](deploy.md) for cloud-based inference and workflow integration
-- Use the ONNX model with TensorRT for optimized GPU inference
+- Use [`inference-models`](https://github.com/roboflow/inference/tree/main/inference_models) for
+    multi-backend inference (PyTorch, ONNX, TensorRT) with automatic backend selection
 - Integrate with edge deployment frameworks like ONNX Runtime or OpenVINO
diff --git a/src/rfdetr/detr.py b/src/rfdetr/detr.py
index 3f75a0a08..b634fe2a0 100644
--- a/src/rfdetr/detr.py
+++ b/src/rfdetr/detr.py
@@ -554,9 +554,10 @@ def export(
         batch_size: int = 1,
         dynamic_batch: bool = False,
         patch_size: int | None = None,
+        tensorrt: bool = False,
         **kwargs,
     ) -> None:
-        """Export the trained model to ONNX format.
+        """Export the trained model to ONNX format, and optionally to TensorRT.
 
         See the `ONNX export documentation <https://rfdetr.roboflow.com/learn/export/>`_
         for more information.
@@ -578,6 +579,9 @@ def export(
                 ``model_config.patch_size`` (typically 14 or 16). When provided
                 explicitly it must match the instantiated model's patch size.
                 Shape divisibility is validated against ``patch_size * num_windows``.
+            tensorrt: When ``True``, convert the exported ONNX model to a TensorRT
+                ``.engine`` file using ``trtexec``.  Requires TensorRT to be installed
+                and ``trtexec`` available in ``PATH``.
             **kwargs: Additional keyword arguments forwarded to export_onnx.
 
         """
@@ -666,6 +670,16 @@ def export(
 
         logger.info(f"Successfully exported ONNX model to: {output_file}")
 
+        if tensorrt:
+            from argparse import Namespace
+
+            from rfdetr.export.tensorrt import trtexec
+
+            logger.info("Converting ONNX model to TensorRT engine")
+            trt_args = Namespace(verbose=verbose, profile=False, dry_run=False)
+            engine_file = trtexec(output_file, trt_args)
+            logger.info(f"Successfully exported TensorRT engine to: {engine_file}")
+
         logger.info("ONNX export completed successfully")
         self.model.model = self.model.model.to(device)
 
diff --git a/src/rfdetr/export/tensorrt.py b/src/rfdetr/export/tensorrt.py
index 595e7a4a7..4721b999c 100644
--- a/src/rfdetr/export/tensorrt.py
+++ b/src/rfdetr/export/tensorrt.py
@@ -9,6 +9,15 @@
 
 """
 TensorRT export helpers: trtexec invocation and output parsing.
+
+For TensorRT inference, use the `inference-models` library which provides
+multi-backend RF-DETR support (PyTorch, ONNX, TensorRT) with automatic backend
+selection:
+
+    from inference_models import AutoModel
+    model = AutoModel.from_pretrained("rfdetr-base")
+
+See https://github.com/roboflow/inference/tree/main/inference_models for details.
 """
 
 import os
@@ -32,7 +41,17 @@ def run_command_shell(command, dry_run: bool = False) -> subprocess.CompletedPro
         raise
 
 
-def trtexec(onnx_dir: str, args) -> None:
+def trtexec(onnx_dir: str, args) -> str:
+    """Convert an ONNX model to a TensorRT engine using trtexec.
+
+    Args:
+        onnx_dir: Path to the input ONNX file.
+        args: Namespace with ``verbose`` (bool), ``profile`` (bool), and
+            ``dry_run`` (bool) attributes.
+
+    Returns:
+        Path to the generated ``.engine`` file.
+    """
     engine_dir = onnx_dir.replace(".onnx", ".engine")
 
     # Base trtexec command
@@ -59,6 +78,7 @@ def trtexec(onnx_dir: str, args) -> None:
 
     output = run_command_shell(command, args.dry_run)
     parse_trtexec_output(output.stdout)
+    return engine_dir
 
 
 def parse_trtexec_output(output_text):
diff --git a/tests/export/test_tensorrt_export.py b/tests/export/test_tensorrt_export.py
index e55eda1a8..d3ce56b3f 100644
--- a/tests/export/test_tensorrt_export.py
+++ b/tests/export/test_tensorrt_export.py
@@ -26,3 +26,22 @@ def _fake_run(command, shell, capture_output, text, check):
 
     assert result.returncode == 0
     assert any("CUDA_VISIBLE_DEVICES=" in message for message in logged_messages)
+
+
+def test_trtexec_returns_engine_path(monkeypatch) -> None:
+    """trtexec should return the .engine file path derived from the .onnx path."""
+    fake_result = subprocess.CompletedProcess("cmd", 0, stdout="", stderr="")
+
+    monkeypatch.setattr(
+        tensorrt_export,
+        "run_command_shell",
+        lambda command, dry_run: fake_result,
+    )
+    monkeypatch.setattr(tensorrt_export, "parse_trtexec_output", lambda text: {})
+
+    from argparse import Namespace
+
+    args = Namespace(verbose=False, profile=False, dry_run=False)
+    result = tensorrt_export.trtexec("output/inference_model.onnx", args)
+
+    assert result == "output/inference_model.engine"
diff --git a/tests/models/test_export.py b/tests/models/test_export.py
index ee72c421d..7468dfaf8 100644
--- a/tests/models/test_export.py
+++ b/tests/models/test_export.py
@@ -576,6 +576,80 @@ def test_dynamic_batch_forwards_dynamic_axes(
             f"expected keys {expected_names}, got {set(dynamic_axes.keys())}"
         )
 
+    def test_tensorrt_flag_calls_trtexec(self, output_dir: str) -> None:
+        """When tensorrt=True, main() must call trtexec with the ONNX output path."""
+        trtexec_calls: list[str] = []
+
+        def fake_trtexec(onnx_path: str, args) -> str:
+            trtexec_calls.append(onnx_path)
+            return onnx_path.replace(".onnx", ".engine")
+
+        args = self._make_args(output_dir=output_dir, tensorrt=True)
+        onnx_output = str(args.output_dir) + "/inference_model.onnx"
+
+        mock_model = MagicMock()
+        mock_model.parameters.return_value = []
+        mock_model.backbone.parameters.return_value = []
+        mock_model.backbone.__getitem__.return_value.projector.parameters.return_value = []
+        mock_model.backbone.__getitem__.return_value.encoder.parameters.return_value = []
+        mock_model.transformer.parameters.return_value = []
+        mock_model.to.return_value = mock_model
+        mock_model.cpu.return_value = mock_model
+        mock_model.eval.return_value = mock_model
+        mock_model.return_value = {
+            "pred_boxes": torch.zeros(1, 300, 4),
+            "pred_logits": torch.zeros(1, 300, 90),
+        }
+        mock_tensor = MagicMock()
+        mock_tensor.to.return_value = mock_tensor
+        mock_tensor.cpu.return_value = mock_tensor
+
+        with (
+            patch.object(_cli_export_module, "build_model", return_value=(mock_model, MagicMock(), MagicMock())),
+            patch.object(_cli_export_module, "make_infer_image", return_value=mock_tensor),
+            patch.object(_cli_export_module, "export_onnx", return_value=onnx_output),
+            patch.object(_cli_export_module, "trtexec", side_effect=fake_trtexec),
+            patch.object(_cli_export_module, "get_rank", return_value=0),
+        ):
+            _cli_export_module.main(args)
+
+        assert len(trtexec_calls) == 1, "trtexec should be called exactly once"
+        assert trtexec_calls[0] == onnx_output, f"trtexec called with {trtexec_calls[0]!r}, expected {onnx_output!r}"
+
+    def test_tensorrt_false_does_not_call_trtexec(self, output_dir: str) -> None:
+        """When tensorrt=False (default), main() must not call trtexec."""
+        trtexec_calls: list[str] = []
+
+        def fake_trtexec(onnx_path: str, args) -> str:
+            trtexec_calls.append(onnx_path)
+            return onnx_path.replace(".onnx", ".engine")
+
+        args = self._make_args(output_dir=output_dir, tensorrt=False)
+
+        mock_model = MagicMock()
+        mock_model.parameters.return_value = []
+        mock_model.backbone.parameters.return_value = []
+        mock_model.backbone.__getitem__.return_value.projector.parameters.return_value = []
+        mock_model.backbone.__getitem__.return_value.encoder.parameters.return_value = []
+        mock_model.transformer.parameters.return_value = []
+        mock_model.to.return_value = mock_model
+        mock_model.cpu.return_value = mock_model
+        mock_model.eval.return_value = mock_model
+        mock_tensor = MagicMock()
+        mock_tensor.to.return_value = mock_tensor
+        mock_tensor.cpu.return_value = mock_tensor
+
+        with (
+            patch.object(_cli_export_module, "build_model", return_value=(mock_model, MagicMock(), MagicMock())),
+            patch.object(_cli_export_module, "make_infer_image", return_value=mock_tensor),
+            patch.object(_cli_export_module, "export_onnx", return_value=str(args.output_dir) + "/model.onnx"),
+            patch.object(_cli_export_module, "trtexec", side_effect=fake_trtexec),
+            patch.object(_cli_export_module, "get_rank", return_value=0),
+        ):
+            _cli_export_module.main(args)
+
+        assert len(trtexec_calls) == 0, "trtexec must not be called when tensorrt=False"
+
 
 class TestExportPatchSize:
     """RFDETR.export() patch_size validation and shape-divisibility tests."""