yolov8_opencv.py
import numpy as np
import os
import cv2
import time
from ultralytics import YOLO
# define some parameters
CONFIDENCE = 0.5
font_scale = 1
thickness = 1
# loading the YOLOv8 model with the default weight file
model = YOLO("yolov8n.pt")
# loading all the class labels (objects)
labels = open("data/coco.names").read().strip().split("\n")
# generating colors for each object for later plotting
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
path_name = "images/dog.jpg"
image = cv2.imread(path_name)
file_name = os.path.basename(path_name) # "dog.jpg"
filename, ext = file_name.split(".") # "dog", "jpg"
# measure how much it took in seconds
start = time.perf_counter()
# run inference on the image
# see: https://docs.ultralytics.com/modes/predict/#arguments for full list of arguments
results = model.predict(image, conf=CONFIDENCE)[0]
time_took = time.perf_counter() - start
print(f"Time took: {time_took:.2f}s")
print(results.boxes.data)
# loop over the detections
for data in results.boxes.data.tolist():
# get the bounding box coordinates, confidence, and class id
xmin, ymin, xmax, ymax, confidence, class_id = data
# converting the coordinates and the class id to integers
xmin = int(xmin)
ymin = int(ymin)
xmax = int(xmax)
ymax = int(ymax)
class_id = int(class_id)
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_id]]
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)
text = f"{labels[class_id]}: {confidence:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = xmin
text_offset_y = ymin - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
# display output image
cv2.imshow("Image", image)
cv2.waitKey(0)
# save output image to disk
cv2.imwrite(filename + "_yolo8." + ext, image)
import cv2
import numpy as np
import time
import sys
from ultralytics import YOLO
CONFIDENCE = 0.5
font_scale = 1
thickness = 1
labels = open("data/coco.names").read().strip().split("\n")
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture(0)
_, image = cap.read()
h, w = image.shape[:2]
fourcc = cv2.VideoWriter_fourcc(*"XVID")
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))
while True:
_, image = cap.read()
start = time.perf_counter()
# run inference on the image
# see: https://docs.ultralytics.com/modes/predict/#arguments for full list of arguments
results = model.predict(image, conf=CONFIDENCE)[0]
time_took = time.perf_counter() - start
print("Time took:", time_took)
# loop over the detections
for data in results.boxes.data.tolist():
# get the bounding box coordinates, confidence, and class id
xmin, ymin, xmax, ymax, confidence, class_id = data
# converting the coordinates and the class id to integers
xmin = int(xmin)
ymin = int(ymin)
xmax = int(xmax)
ymax = int(ymax)
class_id = int(class_id)
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_id]]
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)
text = f"{labels[class_id]}: {confidence:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = xmin
text_offset_y = ymin - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
# end time to compute the fps
end = time.perf_counter()
# calculate the frame per second and draw it on the frame
fps = f"FPS: {1 / (end - start):.2f}"
cv2.putText(image, fps, (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 6)
out.write(image)
cv2.imshow("image", image)
if ord("q") == cv2.waitKey(1):
break
cap.release()
cv2.destroyAllWindows()
import cv2
import numpy as np
import time
import sys
from ultralytics import YOLO
# define some parameters
CONFIDENCE = 0.5
font_scale = 1
thickness = 1
labels = open("data/coco.names").read().strip().split("\n")
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
# loading the YOLOv8 model with the default weight file
model = YOLO("yolov8n.pt")
# read the file from the command line
video_file = sys.argv[1]
cap = cv2.VideoCapture(video_file)
_, image = cap.read()
h, w = image.shape[:2]
fourcc = cv2.VideoWriter_fourcc(*"XVID")
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))
while True:
_, image = cap.read()
start = time.perf_counter()
results = model.predict(image, conf=CONFIDENCE)[0]
time_took = time.perf_counter() - start
print("Time took:", time_took)
# loop over the detections
for data in results.boxes.data.tolist():
# get the bounding box coordinates, confidence, and class id
xmin, ymin, xmax, ymax, confidence, class_id = data
# converting the coordinates and the class id to integers
xmin = int(xmin)
ymin = int(ymin)
xmax = int(xmax)
ymax = int(ymax)
class_id = int(class_id)
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_id]]
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)
text = f"{labels[class_id]}: {confidence:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = xmin
text_offset_y = ymin - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
try:
overlay = image.copy()
except:
break
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
# end time to compute the fps
end = time.perf_counter()
# calculate the frame per second and draw it on the frame
fps = f"FPS: {1 / (end - start):.2f}"
cv2.putText(image, fps, (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 6)
out.write(image)
cv2.imshow("image", image)
if ord("q") == cv2.waitKey(1):
break
cap.release()
cv2.destroyAllWindows()
yolo_opencv.py
import cv2
import numpy as np
import time
import sys
import os
CONFIDENCE = 0.5
SCORE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.5
# the neural network configuration
config_path = "cfg/yolov3.cfg"
# the YOLO net weights file
weights_path = "weights/yolov3.weights"
# loading all the class labels (objects)
labels = open("data/coco.names").read().strip().split("\n")
# generating colors for each object for later plotting
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
# load the YOLO network
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
# path_name = "images/city_scene.jpg"
path_name = sys.argv[1]
image = cv2.imread(path_name)
file_name = os.path.basename(path_name)
filename, ext = file_name.split(".")
h, w = image.shape[:2]
# create 4D blob
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
# sets the blob as the input of the network
net.setInput(blob)
# get all the layer names
ln = net.getLayerNames()
try:
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
except IndexError:
# in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
# feed forward (inference) and get the network output
# measure how much it took in seconds
start = time.perf_counter()
layer_outputs = net.forward(ln)
time_took = time.perf_counter() - start
print(f"Time took: {time_took:.2f}s")
boxes, confidences, class_ids = [], [], []
# loop over each of the layer outputs
for output in layer_outputs:
# loop over each of the object detections
for detection in output:
# extract the class id (label) and confidence (as a probability) of
# the current object detection
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
# discard weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > CONFIDENCE:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[:4] * np.array([w, h, w, h])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
class_ids.append(class_id)
# perform the non maximum suppression given the scores defined before
idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)
font_scale = 1
thickness = 1
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
# cv2.imshow("image", image)
# if cv2.waitKey(0) == ord("q"):
# pass
cv2.imwrite(filename + "_yolo3." + ext, image)
live_yolo_opencv.py
import cv2
import numpy as np
import time
CONFIDENCE = 0.5
SCORE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.5
config_path = "cfg/yolov3.cfg"
weights_path = "weights/yolov3.weights"
font_scale = 1
thickness = 1
LABELS = open("data/coco.names").read().strip().split("\n")
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getLayerNames()
try:
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
except IndexError:
# in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
cap = cv2.VideoCapture(0)
while True:
_, image = cap.read()
h, w = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
start = time.perf_counter()
layer_outputs = net.forward(ln)
time_took = time.perf_counter() - start
print("Time took:", time_took)
boxes, confidences, class_ids = [], [], []
# loop over each of the layer outputs
for output in layer_outputs:
# loop over each of the object detections
for detection in output:
# extract the class id (label) and confidence (as a probability) of
# the current object detection
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
# discard weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > CONFIDENCE:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[:4] * np.array([w, h, w, h])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
class_ids.append(class_id)
# perform the non maximum suppression given the scores defined before
idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)
font_scale = 1
thickness = 1
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
cv2.imshow("image", image)
if ord("q") == cv2.waitKey(1):
break
cap.release()
cv2.destroyAllWindows()
read_video.py
import cv2
import numpy as np
import time
import sys
CONFIDENCE = 0.5
SCORE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.5
config_path = "cfg/yolov3.cfg"
weights_path = "weights/yolov3.weights"
font_scale = 1
thickness = 1
labels = open("data/coco.names").read().strip().split("\n")
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = net.getLayerNames()
try:
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
except IndexError:
# in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
# read the file from the command line
video_file = sys.argv[1]
cap = cv2.VideoCapture(video_file)
_, image = cap.read()
h, w = image.shape[:2]
fourcc = cv2.VideoWriter_fourcc(*"XVID")
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))
while True:
_, image = cap.read()
h, w = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
start = time.perf_counter()
layer_outputs = net.forward(ln)
time_took = time.perf_counter() - start
print("Time took:", time_took)
boxes, confidences, class_ids = [], [], []
# loop over each of the layer outputs
for output in layer_outputs:
# loop over each of the object detections
for detection in output:
# extract the class id (label) and confidence (as a probability) of
# the current object detection
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
# discard weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > CONFIDENCE:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[:4] * np.array([w, h, w, h])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
class_ids.append(class_id)
# perform the non maximum suppression given the scores defined before
idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)
font_scale = 1
thickness = 1
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
out.write(image)
cv2.imshow("image", image)
if ord("q") == cv2.waitKey(1):
break
cap.release()
cv2.destroyAllWindows()
yolo.py (PyTorch) requires darknet.py and utils.py.
import cv2
import matplotlib.pyplot as plt
from utils import *
from darknet import Darknet
# Set the NMS Threshold
score_threshold = 0.6
# Set the IoU threshold
iou_threshold = 0.4
cfg_file = "cfg/yolov3.cfg"
weight_file = "weights/yolov3.weights"
namesfile = "data/coco.names"
m = Darknet(cfg_file)
m.load_weights(weight_file)
class_names = load_class_names(namesfile)
# m.print_network()
original_image = cv2.imread("images/city_scene.jpg")
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
img = cv2.resize(original_image, (m.width, m.height))
# detect the objects
boxes = detect_objects(m, img, iou_threshold, score_threshold)
# plot the image with the bounding boxes and corresponding object class labels
plot_boxes(original_image, boxes, class_names, plot_labels=True)