Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions sab/clock_watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,56 @@ def enable_persistence(enable: bool) -> None:
run(["sudo", "nvidia-smi", "-pm", "1" if enable else "0"])


class CPUFrequencyMonitor:
"""Monitors CPU frequency drift during a benchmark run via /proc/cpuinfo."""

def __init__(self, tolerance_mhz: float = 50.0):
self._tolerance_mhz = tolerance_mhz
self._baseline_freqs: list[float] | None = None
self._end_freqs: list[float] | None = None
self._drifted = False

@staticmethod
def _read_cpu_frequencies() -> list[float]:
freqs = []
with open("/proc/cpuinfo") as f:
for line in f:
if line.startswith("cpu MHz"):
freqs.append(float(line.split(":")[1].strip()))
return freqs

def __enter__(self):
try:
self._baseline_freqs = self._read_cpu_frequencies()
except OSError:
self._baseline_freqs = None
return self

def __exit__(self, exc_type, exc_val, exc_tb):
if self._baseline_freqs is None:
return
try:
self._end_freqs = self._read_cpu_frequencies()
except OSError:
return
for before, after in zip(self._baseline_freqs, self._end_freqs):
if abs(before - after) > self._tolerance_mhz:
self._drifted = True
return

def did_drift(self) -> bool:
return self._drifted

def get_summary(self) -> dict | None:
if self._baseline_freqs is None or self._end_freqs is None:
return None
return {
"baseline_mean_mhz": sum(self._baseline_freqs) / len(self._baseline_freqs),
"end_mean_mhz": sum(self._end_freqs) / len(self._end_freqs),
"max_drift_mhz": max(abs(b - e) for b, e in zip(self._baseline_freqs, self._end_freqs)),
}


def main():
signal.signal(signal.SIGINT, lambda *_: sys.exit(0)) # clean Ctrl-C
print("🟢 Watching for any GPU clock changes (press Ctrl-C to quit)")
Expand Down
6 changes: 5 additions & 1 deletion sab/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from tqdm import tqdm
import time

from sab.onnx_inference import ONNXInferenceCPU


def evaluate(inference, image_dir: str, annotations_file_path: str, class_mapping: dict[int, str]|None=None, buffer_time: float=0.0, output_file_name: str|None=None, max_images: int|None=None, max_dets: int=100):
predictions = []
Expand All @@ -29,7 +31,9 @@ def evaluate(inference, image_dir: str, annotations_file_path: str, class_mappin

image = Image.open(image_path).convert("RGB")
initial_shape = image.size
image = TF.to_tensor(image).cuda()
image = TF.to_tensor(image)
if not isinstance(inference, ONNXInferenceCPU):
image = image.cuda()

if inference.prediction_type == "bbox":
xyxy, class_id, score = inference.infer(image)
Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_dfine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand All @@ -28,7 +28,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores


class DFINEONNXInference(ONNXInference):
class DFINEONNXInference(ONNXInferenceCUDA):
def __init__(self, model_path: str, image_input_name: str|None="images"):
super().__init__(model_path, image_input_name)

Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_lwdetr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import fire

from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand Down Expand Up @@ -39,7 +39,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes.contiguous(), labels.contiguous(), scores.contiguous()


class LWDETRONNXInference(ONNXInference):
class LWDETRONNXInference(ONNXInferenceCUDA):
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)

Expand Down
27 changes: 25 additions & 2 deletions sab/models/benchmark_rfdetr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA, ONNXInferenceCPU
from sab.trt_inference import TRTInference
from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand Down Expand Up @@ -45,14 +45,22 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes.contiguous(), labels.contiguous(), scores.contiguous()


class RFDETRONNXInference(ONNXInference):
class RFDETRONNXInference(ONNXInferenceCUDA):
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)

def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
return postprocess_output(outputs, metadata)


class RFDETRONNXCPUInference(ONNXInferenceCPU):
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)

def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
return postprocess_output(outputs, metadata)


class RFDETRTRTInference(TRTInference):
def __init__(self, model_path: str, image_input_name: str|None=None):
super().__init__(model_path, image_input_name, use_cuda_graph=True)
Expand All @@ -78,6 +86,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
needs_fp16=True,
buffer_time=buffer_time,
),
ArtifactBenchmarkRequest(
onnx_path="rf-detr-nano.onnx",
inference_class=RFDETRONNXCPUInference,
buffer_time=buffer_time,
),
ArtifactBenchmarkRequest(
onnx_path="rf-detr-small.onnx",
inference_class=RFDETRTRTInference,
Expand All @@ -90,6 +103,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
needs_fp16=True,
buffer_time=buffer_time,
),
ArtifactBenchmarkRequest(
onnx_path="rf-detr-small.onnx",
inference_class=RFDETRONNXCPUInference,
buffer_time=buffer_time,
),
ArtifactBenchmarkRequest(
onnx_path="rf-detr-medium.onnx",
inference_class=RFDETRTRTInference,
Expand All @@ -102,6 +120,11 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
needs_fp16=True,
buffer_time=buffer_time,
),
ArtifactBenchmarkRequest(
onnx_path="rf-detr-medium.onnx",
inference_class=RFDETRONNXCPUInference,
buffer_time=buffer_time,
),
]

results = run_benchmark_on_artifacts(requests, image_dir, annotations_file_path)
Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_rfdetr_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import cxcywh_to_xyxy, ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand Down Expand Up @@ -56,7 +56,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes.contiguous(), labels.contiguous(), scores.contiguous(), masks.contiguous()


class RFDETRSegONNXInference(ONNXInference):
class RFDETRSegONNXInference(ONNXInferenceCUDA):
def __init__(self, model_path: str, image_input_name: str|None=None):
super().__init__(model_path, image_input_name, prediction_type="segm")

Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_rtdetr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand All @@ -28,7 +28,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores


class RTDETRONNXInference(ONNXInference):
class RTDETRONNXInference(ONNXInferenceCUDA):
def __init__(self, model_path: str, image_input_name: str|None="images"):
super().__init__(model_path, image_input_name)

Expand Down
42 changes: 40 additions & 2 deletions sab/models/benchmark_yolov11.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA, ONNXInferenceCPU
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand Down Expand Up @@ -79,7 +79,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores


class YOLOv11ONNXInference(ONNXInference):
class YOLOv11ONNXInference(ONNXInferenceCUDA):
# reference: https://github.com/ultralytics/ultralytics/blob/3c88bebc9514a4d7f70b771811ddfe3a625ef14d/examples/YOLOv8-OpenCV-ONNX-Python/main.py#L23C57-L31
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)
Expand All @@ -88,6 +88,14 @@ def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple
return postprocess_output(outputs, metadata)


class YOLOv11ONNXCPUInference(ONNXInferenceCPU):
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)

def postprocess(self, outputs: dict[str, torch.Tensor], metadata: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
return postprocess_output(outputs, metadata)


class YOLOv11TRTInference(TRTInference):
def __init__(self, model_path: str, image_input_name: str|None=None):
super().__init__(model_path, image_input_name, use_cuda_graph=False)
Expand Down Expand Up @@ -115,6 +123,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11n_nms_conf_0.01.onnx",
inference_class=YOLOv11ONNXCPUInference,
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11s_nms_conf_0.01.onnx",
inference_class=YOLOv11TRTInference,
Expand All @@ -129,6 +143,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11s_nms_conf_0.01.onnx",
inference_class=YOLOv11ONNXCPUInference,
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11m_nms_conf_0.01.onnx",
inference_class=YOLOv11TRTInference,
Expand All @@ -143,6 +163,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11m_nms_conf_0.01.onnx",
inference_class=YOLOv11ONNXCPUInference,
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11l_nms_conf_0.01.onnx",
inference_class=YOLOv11TRTInference,
Expand All @@ -157,6 +183,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11l_nms_conf_0.01.onnx",
inference_class=YOLOv11ONNXCPUInference,
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11x_nms_conf_0.01.onnx",
inference_class=YOLOv11TRTInference,
Expand All @@ -171,6 +203,12 @@ def main(image_dir: str, annotations_file_path: str, buffer_time: float = 0.0, o
buffer_time=buffer_time,
needs_class_remapping=True,
),
ArtifactBenchmarkRequest(
onnx_path="yolo11x_nms_conf_0.01.onnx",
inference_class=YOLOv11ONNXCPUInference,
buffer_time=buffer_time,
needs_class_remapping=True,
),
]

results = run_benchmark_on_artifacts(requests, image_dir, annotations_file_path)
Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_yolov11_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
from sab.models.graph_surgery import fuse_yolo_mask_postprocessing_into_onnx
Expand Down Expand Up @@ -107,7 +107,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores, masks


class YOLOv11SegONNXInference(ONNXInference):
class YOLOv11SegONNXInference(ONNXInferenceCUDA):
def __init__(self, model_path: str, image_input_name: str|None=None):
super().__init__(model_path, image_input_name, prediction_type="segm")

Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_yolov8.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results

Expand Down Expand Up @@ -79,7 +79,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores


class YOLOv8ONNXInference(ONNXInference):
class YOLOv8ONNXInference(ONNXInferenceCUDA):
# reference: https://github.com/ultralytics/ultralytics/blob/3c88bebc9514a4d7f70b771811ddfe3a625ef14d/examples/YOLOv8-OpenCV-ONNX-Python/main.py#L23C57-L31
def preprocess(self, input_image: torch.Tensor) -> tuple[torch.Tensor, dict]:
return preprocess_image(input_image, self.image_input_shape)
Expand Down
4 changes: 2 additions & 2 deletions sab/models/benchmark_yolov8_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import fire


from sab.onnx_inference import ONNXInference
from sab.onnx_inference import ONNXInferenceCUDA
from sab.trt_inference import TRTInference
from sab.models.utils import ArtifactBenchmarkRequest, run_benchmark_on_artifacts, pretty_print_results
from sab.models.graph_surgery import fuse_yolo_mask_postprocessing_into_onnx
Expand Down Expand Up @@ -107,7 +107,7 @@ def postprocess_output(outputs: dict[str, torch.Tensor], metadata: dict) -> tupl
return bboxes, labels, scores, masks


class YOLOv8SegONNXInference(ONNXInference):
class YOLOv8SegONNXInference(ONNXInferenceCUDA):
def __init__(self, model_path: str, image_input_name: str|None=None):
super().__init__(model_path, image_input_name, prediction_type="segm")

Expand Down
Loading