SORT

Overview

SORT is a classic online, tracking-by-detection method that predicts object motion with a Kalman filter and matches predicted tracks to detections using the Hungarian algorithm based on Intersection over Union (IoU). The tracker uses only geometric cues from bounding boxes, without appearance features, so it runs extremely fast and scales to hundreds of frames per second on typical hardware. Detections from a strong CNN detector feed SORT, which updates each track’s state via a constant velocity motion model and prunes stale tracks. Because SORT lacks explicit re-identification or appearance cues, it can suffer identity switches and fragmented tracks under long occlusions or heavy crowding.

Comparison

For comparisons with other trackers, plus dataset context and evaluation details, see the tracker comparison page.

Dataset	HOTA	IDF1	MOTA
MOT17	58.4	69.9	67.2
SportsMOT	70.9	68.9	95.7
SoccerNet	81.6	76.2	95.1

Run on video, webcam, or RTSP stream

These examples use opencv-python for decoding and display. Replace <SOURCE_VIDEO_PATH>, <WEBCAM_INDEX>, and <RTSP_STREAM_URL> with your inputs. <WEBCAM_INDEX> is usually 0 for the default camera.

VideoWebcamRTSP

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import SORTTracker

tracker = SORTTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<SOURCE_VIDEO_PATH>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open video source")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + SORT", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import SORTTracker

tracker = SORTTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<WEBCAM_INDEX>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open webcam")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + SORT", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import SORTTracker

tracker = SORTTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<RTSP_STREAM_URL>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open RTSP stream")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + SORT", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

SORT

Overview

Comparison

Run on video, webcam, or RTSP stream

Comments