492 lines
16 KiB
Python
492 lines
16 KiB
Python
import sys
|
|
sys.path.append(R"../0_ppyolov5")
|
|
import numpy as np
|
|
import torch
|
|
import cv2
|
|
import onnxruntime
|
|
import onnx
|
|
import paddle
|
|
|
|
|
|
|
|
|
|
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
|
|
|
|
for i in range(len(boxes)):
|
|
box = boxes[i]
|
|
cls_id = int(cls_ids[i])
|
|
score = scores[i]
|
|
if score < conf:
|
|
continue
|
|
x0 = int(box[0])
|
|
y0 = int(box[1])
|
|
x1 = int(box[2])
|
|
y1 = int(box[3])
|
|
|
|
color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
|
|
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
|
|
txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
|
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
|
|
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
|
|
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
|
|
|
|
txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
|
|
cv2.rectangle(
|
|
img,
|
|
(x0, y0 + 1),
|
|
(x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
|
|
txt_bk_color,
|
|
-1
|
|
)
|
|
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
|
|
|
|
return img
|
|
|
|
|
|
_COLORS = np.array(
|
|
[
|
|
0.000, 0.447, 0.741,
|
|
0.850, 0.325, 0.098,
|
|
0.929, 0.694, 0.125,
|
|
0.494, 0.184, 0.556,
|
|
0.466, 0.674, 0.188,
|
|
0.301, 0.745, 0.933,
|
|
0.635, 0.078, 0.184,
|
|
0.300, 0.300, 0.300,
|
|
0.600, 0.600, 0.600,
|
|
1.000, 0.000, 0.000,
|
|
1.000, 0.500, 0.000,
|
|
0.749, 0.749, 0.000,
|
|
0.000, 1.000, 0.000,
|
|
0.000, 0.000, 1.000,
|
|
0.667, 0.000, 1.000,
|
|
0.333, 0.333, 0.000,
|
|
0.333, 0.667, 0.000,
|
|
0.333, 1.000, 0.000,
|
|
0.667, 0.333, 0.000,
|
|
0.667, 0.667, 0.000,
|
|
0.667, 1.000, 0.000,
|
|
1.000, 0.333, 0.000,
|
|
1.000, 0.667, 0.000,
|
|
1.000, 1.000, 0.000,
|
|
0.000, 0.333, 0.500,
|
|
0.000, 0.667, 0.500,
|
|
0.000, 1.000, 0.500,
|
|
0.333, 0.000, 0.500,
|
|
0.333, 0.333, 0.500,
|
|
0.333, 0.667, 0.500,
|
|
0.333, 1.000, 0.500,
|
|
0.667, 0.000, 0.500,
|
|
0.667, 0.333, 0.500,
|
|
0.667, 0.667, 0.500,
|
|
0.667, 1.000, 0.500,
|
|
1.000, 0.000, 0.500,
|
|
1.000, 0.333, 0.500,
|
|
1.000, 0.667, 0.500,
|
|
1.000, 1.000, 0.500,
|
|
0.000, 0.333, 1.000,
|
|
0.000, 0.667, 1.000,
|
|
0.000, 1.000, 1.000,
|
|
0.333, 0.000, 1.000,
|
|
0.333, 0.333, 1.000,
|
|
0.333, 0.667, 1.000,
|
|
0.333, 1.000, 1.000,
|
|
0.667, 0.000, 1.000,
|
|
0.667, 0.333, 1.000,
|
|
0.667, 0.667, 1.000,
|
|
0.667, 1.000, 1.000,
|
|
1.000, 0.000, 1.000,
|
|
1.000, 0.333, 1.000,
|
|
1.000, 0.667, 1.000,
|
|
0.333, 0.000, 0.000,
|
|
0.500, 0.000, 0.000,
|
|
0.667, 0.000, 0.000,
|
|
0.833, 0.000, 0.000,
|
|
1.000, 0.000, 0.000,
|
|
0.000, 0.167, 0.000,
|
|
0.000, 0.333, 0.000,
|
|
0.000, 0.500, 0.000,
|
|
0.000, 0.667, 0.000,
|
|
0.000, 0.833, 0.000,
|
|
0.000, 1.000, 0.000,
|
|
0.000, 0.000, 0.167,
|
|
0.000, 0.000, 0.333,
|
|
0.000, 0.000, 0.500,
|
|
0.000, 0.000, 0.667,
|
|
0.000, 0.000, 0.833,
|
|
0.000, 0.000, 1.000,
|
|
0.000, 0.000, 0.000,
|
|
0.143, 0.143, 0.143,
|
|
0.286, 0.286, 0.286,
|
|
0.429, 0.429, 0.429,
|
|
0.571, 0.571, 0.571,
|
|
0.714, 0.714, 0.714,
|
|
0.857, 0.857, 0.857,
|
|
0.000, 0.447, 0.741,
|
|
0.314, 0.717, 0.741,
|
|
0.50, 0.5, 0
|
|
]
|
|
).astype(np.float32).reshape(-1, 3)
|
|
|
|
COCO_CLASSES = (
|
|
"person",
|
|
"bicycle",
|
|
"car",
|
|
"motorcycle",
|
|
"airplane",
|
|
"bus",
|
|
"train",
|
|
"truck",
|
|
"boat",
|
|
"traffic light",
|
|
"fire hydrant",
|
|
"stop sign",
|
|
"parking meter",
|
|
"bench",
|
|
"bird",
|
|
"cat",
|
|
"dog",
|
|
"horse",
|
|
"sheep",
|
|
"cow",
|
|
"elephant",
|
|
"bear",
|
|
"zebra",
|
|
"giraffe",
|
|
"backpack",
|
|
"umbrella",
|
|
"handbag",
|
|
"tie",
|
|
"suitcase",
|
|
"frisbee",
|
|
"skis",
|
|
"snowboard",
|
|
"sports ball",
|
|
"kite",
|
|
"baseball bat",
|
|
"baseball glove",
|
|
"skateboard",
|
|
"surfboard",
|
|
"tennis racket",
|
|
"bottle",
|
|
"wine glass",
|
|
"cup",
|
|
"fork",
|
|
"knife",
|
|
"spoon",
|
|
"bowl",
|
|
"banana",
|
|
"apple",
|
|
"sandwich",
|
|
"orange",
|
|
"broccoli",
|
|
"carrot",
|
|
"hot dog",
|
|
"pizza",
|
|
"donut",
|
|
"cake",
|
|
"chair",
|
|
"couch",
|
|
"potted plant",
|
|
"bed",
|
|
"dining table",
|
|
"toilet",
|
|
"tv",
|
|
"laptop",
|
|
"mouse",
|
|
"remote",
|
|
"keyboard",
|
|
"cell phone",
|
|
"microwave",
|
|
"oven",
|
|
"toaster",
|
|
"sink",
|
|
"refrigerator",
|
|
"book",
|
|
"clock",
|
|
"vase",
|
|
"scissors",
|
|
"teddy bear",
|
|
"hair drier",
|
|
"toothbrush",
|
|
)
|
|
|
|
class Colors:
|
|
# Ultralytics color palette https://ultralytics.com/
|
|
def __init__(self):
|
|
# hex = matplotlib.colors.TABLEAU_COLORS.values()
|
|
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
|
|
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
|
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
|
self.n = len(self.palette)
|
|
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
|
|
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
|
|
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
|
|
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
|
|
dtype=np.uint8)
|
|
|
|
def __call__(self, i, bgr=False):
|
|
c = self.palette[int(i) % self.n]
|
|
return (c[2], c[1], c[0]) if bgr else c
|
|
|
|
@staticmethod
|
|
def hex2rgb(h): # rgb order (PIL)
|
|
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
|
|
|
|
|
|
colors = Colors() # create instance for 'from utils.plots import colors'
|
|
skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
|
|
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
|
|
|
|
limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
|
|
kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
|
|
|
|
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
|
# Resize and pad image while meeting stride-multiple constraints
|
|
shape = im.shape[:2] # current shape [height, width]
|
|
if isinstance(new_shape, int):
|
|
new_shape = (new_shape, new_shape)
|
|
|
|
# Scale ratio (new / old)
|
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
|
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
|
r = min(r, 1.0)
|
|
|
|
# Compute padding
|
|
ratio = r, r # width, height ratios
|
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
|
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
|
if auto: # minimum rectangle
|
|
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
|
elif scaleFill: # stretch
|
|
dw, dh = 0.0, 0.0
|
|
new_unpad = (new_shape[1], new_shape[0])
|
|
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
|
|
|
# dw /= 2 # divide padding into 2 sides
|
|
# dh /= 2
|
|
|
|
if shape[::-1] != new_unpad: # resize
|
|
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|
bottom = int(round(dh + 0.1))
|
|
right = int(round(dw + 0.1))
|
|
im = cv2.copyMakeBorder(im, 0, bottom, 0, right, cv2.BORDER_CONSTANT, value=color) # add border
|
|
return im, ratio, (dw, dh)
|
|
|
|
import time
|
|
import torchvision
|
|
|
|
def xywh2xyxy(x):
|
|
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
|
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
|
|
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
|
|
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
|
|
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
|
|
return y
|
|
|
|
def box_iou(box1, box2, eps=1e-7):
|
|
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
|
"""
|
|
Return intersection-over-union (Jaccard index) of boxes.
|
|
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
|
Arguments:
|
|
box1 (Tensor[N, 4])
|
|
box2 (Tensor[M, 4])
|
|
Returns:
|
|
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
|
IoU values for every element in boxes1 and boxes2
|
|
"""
|
|
|
|
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
|
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
|
|
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
|
|
|
|
# IoU = inter / (area1 + area2 - inter)
|
|
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
|
|
def non_max_suppression(
|
|
prediction,
|
|
conf_thres=0.25,
|
|
iou_thres=0.45,
|
|
classes=None,
|
|
agnostic=False,
|
|
multi_label=False,
|
|
labels=(),
|
|
max_det=300,
|
|
nm=0, # number of masks
|
|
):
|
|
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
|
|
|
|
Returns:
|
|
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
|
"""
|
|
|
|
# Checks
|
|
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
|
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
|
if isinstance(prediction, (list, tuple)): # YOLOv3 model in validation model, output = (inference_out, loss_out)
|
|
prediction = prediction[0] # select only inference output
|
|
|
|
device = prediction.device
|
|
mps = 'mps' in device.type # Apple MPS
|
|
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
|
|
prediction = prediction.cpu()
|
|
bs = prediction.shape[0] # batch size
|
|
nc = prediction.shape[2] - nm - 5 # number of classes
|
|
xc = prediction[..., 4] > conf_thres # candidates
|
|
|
|
# Settings
|
|
# min_wh = 2 # (pixels) minimum box width and height
|
|
max_wh = 7680 # (pixels) maximum box width and height
|
|
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
|
time_limit = 0.5 + 0.05 * bs # seconds to quit after
|
|
redundant = True # require redundant detections
|
|
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
|
merge = False # use merge-NMS
|
|
|
|
t = time.time()
|
|
mi = 5 + nc # mask start index
|
|
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
|
for xi, x in enumerate(prediction): # image index, image inference
|
|
# Apply constraints
|
|
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
|
x = x[xc[xi]] # confidence
|
|
|
|
# Cat apriori labels if autolabelling
|
|
if labels and len(labels[xi]):
|
|
lb = labels[xi]
|
|
v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
|
|
v[:, :4] = lb[:, 1:5] # box
|
|
v[:, 4] = 1.0 # conf
|
|
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
|
x = torch.cat((x, v), 0)
|
|
|
|
# If none remain process next image
|
|
if not x.shape[0]:
|
|
continue
|
|
|
|
# Compute conf
|
|
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
|
|
|
# Box/Mask
|
|
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
|
|
mask = x[:, mi:] # zero columns if no masks
|
|
|
|
# Detections matrix nx6 (xyxy, conf, cls)
|
|
if multi_label:
|
|
i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
|
|
x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
|
|
else: # best class only
|
|
conf, j = x[:, 5:mi].max(1, keepdim=True)
|
|
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
|
|
|
|
# Filter by class
|
|
if classes is not None:
|
|
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
|
|
|
# Apply finite constraint
|
|
# if not torch.isfinite(x).all():
|
|
# x = x[torch.isfinite(x).all(1)]
|
|
|
|
# Check shape
|
|
n = x.shape[0] # number of boxes
|
|
if not n: # no boxes
|
|
continue
|
|
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
|
|
|
|
# Batched NMS
|
|
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
|
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
|
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
|
i = i[:max_det] # limit detections
|
|
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
|
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
|
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
|
weights = iou * scores[None] # box weights
|
|
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
|
if redundant:
|
|
i = i[iou.sum(1) > 1] # require redundancy
|
|
|
|
output[xi] = x[i]
|
|
if mps:
|
|
output[xi] = output[xi].to(device)
|
|
if (time.time() - t) > time_limit:
|
|
break # time limit exceeded
|
|
|
|
return output
|
|
|
|
|
|
stride = [8, 16, 32]
|
|
anchors = torch.tensor([[[10,13], [16,30], [33,23]], [[30,61], [62,45], [59,119]], [[116,90], [156,198], [373,326]]])
|
|
|
|
|
|
def make_grid(nx=20, ny=20, i=0):
|
|
shape = 1, 3, ny, nx, 2 # grid shape
|
|
y, x = torch.arange(ny), torch.arange(nx)
|
|
# if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
|
|
# yv, xv = torch.meshgrid(y, x, indexing='ij')
|
|
# else:
|
|
yv, xv = torch.meshgrid(y, x)
|
|
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
|
|
anchor_grid = anchors[i].view((1, 3, 1, 1, 2)).expand(shape)
|
|
return grid, anchor_grid
|
|
|
|
|
|
|
|
onnx_model = onnxruntime.InferenceSession("../2_compile/fmodel/ppyolov5s-640x640.onnx")
|
|
|
|
img_path = '../2_compile/qtset/coco/bus.jpg'
|
|
net_size = (640,640)
|
|
N_CLASS = 80
|
|
|
|
img_raw = cv2.imread(img_path)
|
|
img = letterbox(img_raw, new_shape=net_size, stride=32, auto=False)[0]
|
|
|
|
# cv2.imshow("",img)
|
|
# cv2.waitKey()
|
|
|
|
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
|
img = np.ascontiguousarray(img)
|
|
im = torch.from_numpy(img).float().unsqueeze(0)
|
|
im /= 255
|
|
|
|
input = im.numpy()
|
|
|
|
|
|
|
|
out1 = onnx_model.run([onnx_model.get_outputs()[0].name], {onnx_model.get_inputs()[0].name: input})
|
|
out2 = onnx_model.run([onnx_model.get_outputs()[1].name], {onnx_model.get_inputs()[0].name: input})
|
|
out3 = onnx_model.run([onnx_model.get_outputs()[2].name], {onnx_model.get_inputs()[0].name: input})
|
|
|
|
outputs = [torch.from_numpy(out1[0]),torch.from_numpy(out2[0]),torch.from_numpy(out3[0])]
|
|
|
|
# 后处理
|
|
z = []
|
|
grid = [torch.zeros(1)] * 3
|
|
anchor_grid = [torch.zeros(1)] * 3
|
|
|
|
for i in range(3):
|
|
bs, _ , ny, nx = outputs[i].shape
|
|
outputs[i] = outputs[i].view(bs, 3, N_CLASS+5, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
|
|
|
grid[i], anchor_grid[i] = make_grid(nx, ny, i)
|
|
y = outputs[i].sigmoid()
|
|
y[..., 0:2] = (y[..., 0:2] * 2 + grid[i]) * stride[i] # xy
|
|
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] # wh
|
|
z.append(y.view(bs, -1, N_CLASS+5))
|
|
|
|
pred = torch.cat(z, 1) #torch.size([1, 25200, 85])
|
|
|
|
# NMS
|
|
conf_thres = 0.25
|
|
iou_thres = 0.45
|
|
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=None, agnostic=False, max_det=1000)
|
|
|
|
# 结果显示
|
|
det = pred[0]
|
|
ratio = min(net_size[0] / img_raw.shape[0], net_size[1] / img_raw.shape[1])
|
|
det[:, :4] /= ratio
|
|
result_image = vis(img_raw, boxes=det[:,:4], scores=det[:,4], cls_ids=det[:,5], conf=conf_thres, class_names=COCO_CLASSES)
|
|
cv2.imshow(" ", result_image)
|
|
cv2.waitKey() |