diff --git a/src/supervision/detection/core.py b/src/supervision/detection/core.py index baabc56854..976819a6c9 100644 --- a/src/supervision/detection/core.py +++ b/src/supervision/detection/core.py @@ -995,423 +995,6 @@ def from_lmm( ValueError: If the LMM is invalid, required arguments are missing, or disallowed arguments are provided. ValueError: If the specified LMM is not supported. - - !!! example "PaliGemma" - ```python - - import supervision as sv - - paligemma_result = " cat" - detections = sv.Detections.from_lmm( - sv.LMM.PALIGEMMA, - paligemma_result, - resolution_wh=(1000, 1000), - classes=['cat', 'dog'] - ) - detections.xyxy - # array([[250., 250., 750., 750.]]) - - detections.class_id - # array([0]) - - detections.data - # {'class_name': array(['cat'], dtype='" - } - }, - { - "text": "Detect all the cats and dogs in the image..." - } - ] - } - ] - } - ``` - To get the best results from Google Gemini 2.0, use the following prompt. - - ``` - Detect all the cats and dogs in the image. The box_2d should be - [ymin, xmin, ymax, xmax] normalized to 0-1000. - ``` - - ```python - import supervision as sv - - gemini_response_text = \"\"\"```json - [ - {"box_2d": [543, 40, 728, 200], "label": "cat", "id": 1}, - {"box_2d": [653, 352, 820, 522], "label": "dog", "id": 2} - ] - ```\"\"\" - - detections = sv.Detections.from_lmm( - sv.LMM.GOOGLE_GEMINI_2_0, - gemini_response_text, - resolution_wh=(1000, 1000), - classes=['cat', 'dog'], - ) - - detections.xyxy - # array([[543., 40., 728., 200.], [653., 352., 820., 522.]]) - - detections.data - # {'class_name': array(['cat', 'dog'], dtype='\\n<|ref|>The giraffe at the front<|/ref|> - ``` - - **For visual grounding, use the following user prompt:** - - ``` - \\n<|grounding|>Detect the giraffes - ``` - - ```python - from PIL import Image - import supervision as sv - - deepseek_vl2_result = "<|ref|>The giraffe at the back<|/ref|><|det|>[[580, 270, 999, 904]]<|/det|><|ref|>The giraffe at the front<|/ref|><|det|>[[26, 31, 632, 998]]<|/det|><|end▁of▁sentence|>" - - detections = sv.Detections.from_vlm( - vlm=sv.VLM.DEEPSEEK_VL_2, result=deepseek_vl2_result, resolution_wh=image.size - ) - - detections.xyxy - # array([[ 420, 293, 724, 982], - # [ 18, 33, 458, 1084]]) - - detections.class_id - # array([0, 1]) - - detections.data - # {'class_name': array(['The giraffe at the back', 'The giraffe at the front'], dtype='\\n<|ref|>The giraffe at the front<|/ref|> - ``` - - **For visual grounding, use the following user prompt:** - - ``` - \\n<|grounding|>Detect the giraffes - ``` - - ```python - from PIL import Image - import supervision as sv - - deepseek_vl2_result = "<|ref|>The giraffe at the back<|/ref|><|det|>[[580, 270, 999, 904]]<|/det|><|ref|>The giraffe at the front<|/ref|><|det|>[[26, 31, 632, 998]]<|/det|><|end▁of▁sentence|>" - - detections = sv.Detections.from_vlm( - vlm=sv.VLM.DEEPSEEK_VL_2, result=deepseek_vl2_result, resolution_wh=image.size - ) - - detections.xyxy - # array([[ 420, 293, 724, 982], - # [ 18, 33, 458, 1084]]) - - detections.class_id - # array([0, 1]) - - detections.data - # {'class_name': array(['The giraffe at the back', 'The giraffe at the front'], dtype=' Detections: def is_empty(self) -> bool: """ - Returns `True` if the `Detections` object is considered empty. + Returns `True` if the `Detections` object is considered empty, + i.e. contains no detections. This check is based solely on the + number of bounding boxes, making it robust to optional fields + (such as `tracker_id`) being empty arrays rather than `None`. """ - empty_detections = Detections.empty() - empty_detections.data = self.data - empty_detections.metadata = self.metadata - return bool(self == empty_detections) + return len(self) == 0 @classmethod def merge(cls, detections_list: list[Detections]) -> Detections: @@ -2268,7 +1462,7 @@ def __getitem__( ```python import supervision as sv - detections = sv.Detections() + detections = sv.Detections(...) first_detection = detections[0] first_10_detections = detections[0:10]