roboflow · leeclemnet · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/sab/clock_watch.py b/sab/clock_watch.py
@@ -200,6 +200,56 @@ def enable_persistence(enable: bool) -> None:
     run(["sudo", "nvidia-smi", "-pm", "1" if enable else "0"])
 
 
+class CPUFrequencyMonitor:
+    """Monitors CPU frequency drift during a benchmark run via /proc/cpuinfo."""
+
+    def __init__(self, tolerance_mhz: float = 50.0):
+        self._tolerance_mhz = tolerance_mhz
+        self._baseline_freqs: list[float] | None = None
+        self._end_freqs: list[float] | None = None
+        self._drifted = False
+
+    @staticmethod
+    def _read_cpu_frequencies() -> list[float]:
+        freqs = []
+        with open("/proc/cpuinfo") as f:
+            for line in f:
+                if line.startswith("cpu MHz"):
+                    freqs.append(float(line.split(":")[1].strip()))
+        return freqs
+
+    def __enter__(self):
+        try:
+            self._baseline_freqs = self._read_cpu_frequencies()
+        except OSError:
+            self._baseline_freqs = None
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self._baseline_freqs is None:
+            return
+        try:
+            self._end_freqs = self._read_cpu_frequencies()
+        except OSError:
+            return
+        for before, after in zip(self._baseline_freqs, self._end_freqs):
+            if abs(before - after) > self._tolerance_mhz:
+                self._drifted = True
+                return
+
+    def did_drift(self) -> bool:
+        return self._drifted
+
+    def get_summary(self) -> dict | None:
+        if self._baseline_freqs is None or self._end_freqs is None:
+            return None
+        return {
+            "baseline_mean_mhz": sum(self._baseline_freqs) / len(self._baseline_freqs),
+            "end_mean_mhz": sum(self._end_freqs) / len(self._end_freqs),
+            "max_drift_mhz": max(abs(b - e) for b, e in zip(self._baseline_freqs, self._end_freqs)),
+        }
+
+
 def main():
     signal.signal(signal.SIGINT, lambda *_: sys.exit(0))   # clean Ctrl-C
     print("🟢  Watching for any GPU clock changes (press Ctrl-C to quit)")

diff --git a/sab/evaluation.py b/sab/evaluation.py
@@ -12,6 +12,8 @@
 from tqdm import tqdm
 import time
 
+from sab.onnx_inference import ONNXInferenceCPU
+
 
 def evaluate(inference, image_dir: str, annotations_file_path: str, class_mapping: dict[int, str]|None=None, buffer_time: float=0.0, output_file_name: str|None=None, max_images: int|None=None, max_dets: int=100):
     predictions = []
@@ -29,7 +31,9 @@ def evaluate(inference, image_dir: str, annotations_file_path: str, class_mappin
 
         image = Image.open(image_path).convert("RGB")
         initial_shape = image.size
-        image = TF.to_tensor(image).cuda()
+        image = TF.to_tensor(image)
+        if not isinstance(inference, ONNXInferenceCPU):
+            image = image.cuda()
 
         if inference.prediction_type == "bbox":
             xyxy, class_id, score = inference.infer(image)

diff --git a/sab/models/benchmark_dfine.py b/sab/models/benchmark_dfine.py
@@ -6,7 +6,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -28,7 +28,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores
 
 
-class DFINEONNXInference(ONNXInference):
+class DFINEONNXInference(ONNXInferenceCUDA):
     def __init__(self, model_path: str, image_input_name: str|None="images"):
         super().__init__(model_path, image_input_name)
 

diff --git a/sab/models/benchmark_lwdetr.py b/sab/models/benchmark_lwdetr.py
@@ -3,7 +3,7 @@
 import json
 import fire
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -39,7 +39,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes.contiguous(), labels.contiguous(), scores.contiguous()
 
 
-class LWDETRONNXInference(ONNXInference):
+class LWDETRONNXInference(ONNXInferenceCUDA):
     def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
         return preprocess_image(input_image, self.image_input_shape)
 

diff --git a/sab/models/benchmark_rfdetr.py b/sab/models/benchmark_rfdetr.py
@@ -9,7 +9,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA, ONNXInferenceCPU
 from sab.trt_inference import TRTInference
 from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -45,14 +45,22 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes.contiguous(), labels.contiguous(), scores.contiguous()
 
 
-class RFDETRONNXInference(ONNXInference):
+class RFDETRONNXInference(ONNXInferenceCUDA):
     def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
         return preprocess_image(input_image, self.image_input_shape)
 
     def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         return postprocess_output(outputs, metadata)
 
 
+class RFDETRONNXCPUInference(ONNXInferenceCPU):
+    def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
+        return preprocess_image(input_image, self.image_input_shape)
+
+    def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        return postprocess_output(outputs, metadata)
+
+
 class RFDETRTRTInference(TRTInference):
     def __init__(self, model_path: str, image_input_name: str|None=None):
         super().__init__(model_path, image_input_name, use_cuda_graph=True)
@@ -78,6 +86,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             needs_fp16=True,
             buffer_time=buffer_time,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="rf-detr-nano.onnx",
+            inference_class=RFDETRONNXCPUInference,
+            buffer_time=buffer_time,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="rf-detr-small.onnx",
             inference_class=RFDETRTRTInference,
@@ -90,6 +103,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             needs_fp16=True,
             buffer_time=buffer_time,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="rf-detr-small.onnx",
+            inference_class=RFDETRONNXCPUInference,
+            buffer_time=buffer_time,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="rf-detr-medium.onnx",
             inference_class=RFDETRTRTInference,
@@ -102,6 +120,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             needs_fp16=True,
             buffer_time=buffer_time,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="rf-detr-medium.onnx",
+            inference_class=RFDETRONNXCPUInference,
+            buffer_time=buffer_time,
+        ),
     ]
 
     results = run_benchmark_on_artifacts(requests, image_dir, annotations_file_path)

diff --git a/sab/models/benchmark_rfdetr_seg.py b/sab/models/benchmark_rfdetr_seg.py
@@ -10,7 +10,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -56,7 +56,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes.contiguous(), labels.contiguous(), scores.contiguous(), masks.contiguous()
 
 
-class RFDETRSegONNXInference(ONNXInference):
+class RFDETRSegONNXInference(ONNXInferenceCUDA):
     def __init__(self, model_path: str, image_input_name: str|None=None):
         super().__init__(model_path, image_input_name, prediction_type="segm")
 

diff --git a/sab/models/benchmark_rtdetr.py b/sab/models/benchmark_rtdetr.py
@@ -6,7 +6,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -28,7 +28,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores
 
 
-class RTDETRONNXInference(ONNXInference):
+class RTDETRONNXInference(ONNXInferenceCUDA):
     def __init__(self, model_path: str, image_input_name: str|None="images"):
         super().__init__(model_path, image_input_name)
 

diff --git a/sab/models/benchmark_yolov11.py b/sab/models/benchmark_yolov11.py
@@ -5,7 +5,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA, ONNXInferenceCPU
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -79,7 +79,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores
 
 
-class YOLOv11ONNXInference(ONNXInference):
+class YOLOv11ONNXInference(ONNXInferenceCUDA):
     # reference: https://github.com/ultralytics/ultralytics/blob/3c88bebc9514a4d7f70b771811ddfe3a625ef14d/examples/YOLOv8-OpenCV-ONNX-Python/main.py#L23C57-L31
     def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
         return preprocess_image(input_image, self.image_input_shape)
@@ -88,6 +88,14 @@ def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple
         return postprocess_output(outputs, metadata)
 
 
+class YOLOv11ONNXCPUInference(ONNXInferenceCPU):
+    def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
+        return preprocess_image(input_image, self.image_input_shape)
+
+    def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        return postprocess_output(outputs, metadata)
+
+
 class YOLOv11TRTInference(TRTInference):
     def __init__(self, model_path: str, image_input_name: str|None=None):
         super().__init__(model_path, image_input_name, use_cuda_graph=False)
@@ -115,6 +123,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             buffer_time=buffer_time,
             needs_class_remapping=True,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="yolo11n_nms_conf_0.01.onnx",
+            inference_class=YOLOv11ONNXCPUInference,
+            buffer_time=buffer_time,
+            needs_class_remapping=True,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="yolo11s_nms_conf_0.01.onnx",
             inference_class=YOLOv11TRTInference,
@@ -129,6 +143,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             buffer_time=buffer_time,
             needs_class_remapping=True,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="yolo11s_nms_conf_0.01.onnx",
+            inference_class=YOLOv11ONNXCPUInference,
+            buffer_time=buffer_time,
+            needs_class_remapping=True,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="yolo11m_nms_conf_0.01.onnx",
             inference_class=YOLOv11TRTInference,
@@ -143,6 +163,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             buffer_time=buffer_time,
             needs_class_remapping=True,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="yolo11m_nms_conf_0.01.onnx",
+            inference_class=YOLOv11ONNXCPUInference,
+            buffer_time=buffer_time,
+            needs_class_remapping=True,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="yolo11l_nms_conf_0.01.onnx",
             inference_class=YOLOv11TRTInference,
@@ -157,6 +183,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             buffer_time=buffer_time,
             needs_class_remapping=True,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="yolo11l_nms_conf_0.01.onnx",
+            inference_class=YOLOv11ONNXCPUInference,
+            buffer_time=buffer_time,
+            needs_class_remapping=True,
+        ),
         ArtifactBenchmarkRequest(
             onnx_path="yolo11x_nms_conf_0.01.onnx",
             inference_class=YOLOv11TRTInference,
@@ -171,6 +203,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
             buffer_time=buffer_time,
             needs_class_remapping=True,
         ),
+        ArtifactBenchmarkRequest(
+            onnx_path="yolo11x_nms_conf_0.01.onnx",
+            inference_class=YOLOv11ONNXCPUInference,
+            buffer_time=buffer_time,
+            needs_class_remapping=True,
+        ),
     ]
 
     results = run_benchmark_on_artifacts(requests, image_dir, annotations_file_path)

diff --git a/sab/models/benchmark_yolov11_seg.py b/sab/models/benchmark_yolov11_seg.py
@@ -6,7 +6,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 from sab.models.graph_surgery import fuse_yolo_mask_postprocessing_into_onnx
@@ -107,7 +107,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores, masks
 
 
-class YOLOv11SegONNXInference(ONNXInference):
+class YOLOv11SegONNXInference(ONNXInferenceCUDA):
     def __init__(self, model_path: str, image_input_name: str|None=None):
         super().__init__(model_path, image_input_name, prediction_type="segm")
 

diff --git a/sab/models/benchmark_yolov8.py b/sab/models/benchmark_yolov8.py
@@ -5,7 +5,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 
@@ -79,7 +79,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores
 
 
-class YOLOv8ONNXInference(ONNXInference):
+class YOLOv8ONNXInference(ONNXInferenceCUDA):
     # reference: https://github.com/ultralytics/ultralytics/blob/3c88bebc9514a4d7f70b771811ddfe3a625ef14d/examples/YOLOv8-OpenCV-ONNX-Python/main.py#L23C57-L31
     def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
         return preprocess_image(input_image, self.image_input_shape)

diff --git a/sab/models/benchmark_yolov8_seg.py b/sab/models/benchmark_yolov8_seg.py
@@ -6,7 +6,7 @@
 import fire
 
 
-from sab.onnx_inference import ONNXInference
+from sab.onnx_inference import ONNXInferenceCUDA
 from sab.trt_inference import TRTInference
 from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
 from sab.models.graph_surgery import fuse_yolo_mask_postprocessing_into_onnx
@@ -107,7 +107,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
     return bboxes, labels, scores, masks
 
 
-class YOLOv8SegONNXInference(ONNXInference):
+class YOLOv8SegONNXInference(ONNXInferenceCUDA):
     def __init__(self, model_path: str, image_input_name: str|None=None):
         super().__init__(model_path, image_input_name, prediction_type="segm")