object_detection.py
import datetime
from ultralytics import YOLO
import cv2
from helper import create_video_writer
# define some constants
CONFIDENCE_THRESHOLD = 0.8
GREEN = (0, 255, 0)
# initialize the video capture object
video_cap = cv2.VideoCapture("2.mp4")
# initialize the video writer object
writer = create_video_writer(video_cap, "output.mp4")
# load the pre-trained YOLOv8n model
model = YOLO("yolov8n.pt")
while True:
# start time to compute the fps
start = datetime.datetime.now()
ret, frame = video_cap.read()
# if there are no more frames to process, break out of the loop
if not ret:
break
# run the YOLO model on the frame
detections = model(frame)[0]
# loop over the detections
for data in detections.boxes.data.tolist():
# extract the confidence (i.e., probability) associated with the detection
confidence = data[4]
# filter out weak detections by ensuring the
# confidence is greater than the minimum confidence
if float(confidence) < CONFIDENCE_THRESHOLD:
continue
# if the confidence is greater than the minimum confidence,
# draw the bounding box on the frame
xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
cv2.rectangle(frame, (xmin, ymin) , (xmax, ymax), GREEN, 2)
# end time to compute the fps
end = datetime.datetime.now()
# show the time it took to process 1 frame
total = (end - start).total_seconds()
print(f"Time to process 1 frame: {total * 1000:.0f} milliseconds")
# calculate the frame per second and draw it on the frame
fps = f"FPS: {1 / total:.2f}"
cv2.putText(frame, fps, (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8)
# show the frame to our screen
cv2.imshow("Frame", frame)
writer.write(frame)
if cv2.waitKey(1) == ord("q"):
break
video_cap.release()
writer.release()
cv2.destroyAllWindows()
object_detection_tracking.py
import datetime
from ultralytics import YOLO
import cv2
from helper import create_video_writer
from deep_sort_realtime.deepsort_tracker import DeepSort
CONFIDENCE_THRESHOLD = 0.8
GREEN = (0, 255, 0)
WHITE = (255, 255, 255)
# initialize the video capture object
video_cap = cv2.VideoCapture("2.mp4")
# initialize the video writer object
writer = create_video_writer(video_cap, "output.mp4")
# load the pre-trained YOLOv8n model
model = YOLO("yolov8n.pt")
tracker = DeepSort(max_age=50)
while True:
start = datetime.datetime.now()
ret, frame = video_cap.read()
if not ret:
break
# run the YOLO model on the frame
detections = model(frame)[0]
# initialize the list of bounding boxes and confidences
results = []
######################################
# DETECTION
######################################
# loop over the detections
for data in detections.boxes.data.tolist():
# extract the confidence (i.e., probability) associated with the prediction
confidence = data[4]
# filter out weak detections by ensuring the
# confidence is greater than the minimum confidence
if float(confidence) < CONFIDENCE_THRESHOLD:
continue
# if the confidence is greater than the minimum confidence,
# get the bounding box and the class id
xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
class_id = int(data[5])
# add the bounding box (x, y, w, h), confidence and class id to the results list
results.append([[xmin, ymin, xmax - xmin, ymax - ymin], confidence, class_id])
######################################
# TRACKING
######################################
# update the tracker with the new detections
tracks = tracker.update_tracks(results, frame=frame)
# loop over the tracks
for track in tracks:
# if the track is not confirmed, ignore it
if not track.is_confirmed():
continue
# get the track id and the bounding box
track_id = track.track_id
ltrb = track.to_ltrb()
xmin, ymin, xmax, ymax = int(ltrb[0]), int(
ltrb[1]), int(ltrb[2]), int(ltrb[3])
# draw the bounding box and the track id
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), GREEN, 2)
cv2.rectangle(frame, (xmin, ymin - 20), (xmin + 20, ymin), GREEN, -1)
cv2.putText(frame, str(track_id), (xmin + 5, ymin - 8),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, WHITE, 2)
# end time to compute the fps
end = datetime.datetime.now()
# show the time it took to process 1 frame
print(f"Time to process 1 frame: {(end - start).total_seconds() * 1000:.0f} milliseconds")
# calculate the frame per second and draw it on the frame
fps = f"FPS: {1 / (end - start).total_seconds():.2f}"
cv2.putText(frame, fps, (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8)
# show the frame to our screen
cv2.imshow("Frame", frame)
writer.write(frame)
if cv2.waitKey(1) == ord("q"):
break
video_cap.release()
writer.release()
cv2.destroyAllWindows()