ByteTrack

Overview

ByteTrack builds on the same Kalman filter plus Hungarian algorithm framework as SORT but changes the data association strategy to use almost every detection box regardless of confidence score. It runs a two-stage matching: first match high-confidence detections to tracks, then match low-confidence detections to any unmatched tracks using IoU. This reduces missed tracks and fragmentation for occluded or weak detections while retaining simplicity and high frame rates. ByteTrack has set state-of-the-art results on standard MOT benchmarks with real-time performance, because it recovers valid low-score detections instead of discarding them.

Comparison

For comparisons with other trackers, plus dataset context and evaluation details, see the tracker comparison page.

Dataset	HOTA	IDF1	MOTA
MOT17	60.1	73.2	74.1
SportsMOT	73.0	72.5	96.4
SoccerNet	84.0	78.1	97.8

Run on video, webcam, or RTSP stream

These examples use opencv-python for decoding and display. Replace <SOURCE_VIDEO_PATH>, <WEBCAM_INDEX>, and <RTSP_STREAM_URL> with your inputs. <WEBCAM_INDEX> is usually 0 for the default camera.

VideoWebcamRTSP

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import ByteTrackTracker

tracker = ByteTrackTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<SOURCE_VIDEO_PATH>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open video source")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + ByteTrack", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import ByteTrackTracker

tracker = ByteTrackTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<WEBCAM_INDEX>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open webcam")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + ByteTrack", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

import cv2
import supervision as sv
from rfdetr import RFDETRMedium
from trackers import ByteTrackTracker

tracker = ByteTrackTracker()
model = RFDETRMedium()

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

video_capture = cv2.VideoCapture("<RTSP_STREAM_URL>")
if not video_capture.isOpened():
    raise RuntimeError("Failed to open RTSP stream")

while True:
    success, frame_bgr = video_capture.read()
    if not success:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    detections = model.predict(frame_rgb)
    detections = tracker.update(detections)

    annotated_frame = box_annotator.annotate(frame_bgr, detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame,
        detections,
        labels=detections.tracker_id,
    )

    cv2.imshow("RF-DETR + ByteTrack", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video_capture.release()
cv2.destroyAllWindows()

ByteTrack

Overview

Comparison

Run on video, webcam, or RTSP stream

Comments