mdz/pytorch/yolov9_pan/1_scripts/yolov9_utils.py

from distutils.command import sdist
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import cv2
from pathlib import Path


# when you do not want to import torch

class DFL:
    # DFL module
    def __init__(self, c1=17):
        self.c1 = c1
        self.conv_weights = np.arange(c1, dtype=np.float32).reshape(1, c1, 1, 1)

    def softmax(self, x, axis):
        e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
        return e_x / np.sum(e_x, axis=axis, keepdims=True)

    def conv2d(self, x, weights):
        # Assuming a 1x1 convolution for simplicity
        return np.sum(x * weights, axis=1, keepdims=True)

    def forward(self, x):
        b, c, a = x.shape  # batch, channels, anchors
        x = x.reshape(b, 4, self.c1, a).transpose(0, 2, 1, 3)  # reshape and transpose
        x = self.softmax(x, axis=1)  # apply softmax along the specified axis
        x = self.conv2d(x, self.conv_weights)  # apply 1x1 convolution
        return x.reshape(b, 4, a)  # reshape back to the desired output shape

def make_anchors(feats, strides, grid_cell_offset=0.5):
    # Generate anchors from features using NumPy
    anchor_points = []
    stride_tensor = []
    assert feats is not None

    for i, stride in enumerate(strides):
        _, _, h, w = feats[i].shape
        sx = np.arange(w) + grid_cell_offset  # shift x
        sy = np.arange(h) + grid_cell_offset  # shift y
        sy, sx = np.meshgrid(sy, sx, indexing='ij')
        anchor_points.append(np.stack((sx, sy), -1).reshape(-1, 2))
        stride_tensor.append(np.full((h * w, 1), stride))

    return np.concatenate(anchor_points), np.concatenate(stride_tensor)

def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
    # Transform distance(ltrb) to box(xywh or xyxy) using NumPy
    lt, rb = np.split(distance, 2, axis=dim)
    x1y1 = anchor_points - lt
    x2y2 = anchor_points + rb
    if xywh:
        c_xy = (x1y1 + x2y2) / 2
        wh = x2y2 - x1y1
        return np.concatenate((c_xy, wh), axis=dim)  # xywh bbox
    return np.concatenate((x1y1, x2y2), axis=dim)  # xyxy bbox

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def nms(boxes, scores, iou_threshold):
    """
    Implements Non-Maximum Suppression (NMS) to filter the predictions.

    Args:
        boxes (numpy.ndarray): Array of shape (n, 4) where n is the number of predicted boxes.
                               Each row represents [x1, y1, x2, y2].
        scores (numpy.ndarray): Array of shape (n,) where n is the number of predicted boxes.
                                Each element represents the confidence score of the corresponding box.
        iou_threshold (float): IOU threshold for NMS.

    Returns:
        List[int]: List of indices of the boxes to keep.
    """

    # If no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # Get the coordinates of the boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # Compute the area of the boxes
    areas = (x2 - x1) * (y2 - y1)

    # Sort the boxes by scores in descending order
    order = scores.argsort()[::-1]

    keep = []  # List to keep the indices of boxes to keep

    # Iterate through all boxes
    for i in range(len(order)):
        if order.size == 0:
            break

        idx = order[0]  # Index of the current box with the highest score
        keep.append(idx)

        # Get the coordinates of the intersection boxes
        xx1 = np.maximum(x1[idx], x1[order[1:]])
        yy1 = np.maximum(y1[idx], y1[order[1:]])
        xx2 = np.minimum(x2[idx], x2[order[1:]])
        yy2 = np.minimum(y2[idx], y2[order[1:]])

        # Compute the width and height of the intersection boxes
        w = np.maximum(0, xx2 - xx1)
        h = np.maximum(0, yy2 - yy1)

        # Compute the intersection over union (IoU)
        inter = w * h
        iou = inter / (areas[idx] + areas[order[1:]] - inter)

        # Keep boxes with IoU less than the threshold
        inds = np.where(iou <= iou_threshold)[0]
        order = order[inds + 1]

    return keep

def compute_iou(boxes1, boxes2):
    """Compute IOU between two sets of boxes."""
    # Compute intersection area
    x1 = np.maximum(boxes1[:, 0], boxes2[:, 0])
    y1 = np.maximum(boxes1[:, 1], boxes2[:, 1])
    x2 = np.minimum(boxes1[:, 2], boxes2[:, 2])
    y2 = np.minimum(boxes1[:, 3], boxes2[:, 3])
    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)

    # Compute union area
    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
    union = area1 + area2 - intersection

    # Compute IOU
    iou = intersection / np.maximum(union, 1e-6)
    return iou


def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y

def non_max_suppression(
        prediction,
        conf_thres=0.25,
        iou_thres=0.45,
        classes=None,
        multi_label=False,
        labels=(),
        max_det=300,
        nm=0,  # number of masks
):
    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    if isinstance(prediction, (list, tuple)):  # YOLO model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output

    bs = prediction.shape[0]  # batch size
    nc = prediction.shape[1] - nm - 4  # number of classes (84+32-32-4)
    mi = 4 + nc  # mask start index
    xc = prediction[:, 4:mi].max(1) > conf_thres  # candidates

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    max_wh = 7680  # (pixels) maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    output = [np.zeros((0, 6 + nm))] * bs
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x.T[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            lb = labels[xi]
            v = np.zeros((len(lb), nc + nm + 5))
            v[:, :4] = lb[:, 1:5]  # box
            v[np.arange(len(lb)), lb[:, 0].astype(int) + 4] = 1.0  # cls
            x = np.concatenate((x, v), axis=0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        box = x[:, :4]
        cls = x[:, 4:4 + nc]
        mask = x[:, 4 + nc:]

        box = xywh2xyxy(box)  # center_x, center_y, width, height) to (x1, y1, x2, y2)
        if multi_label:
            i, j = np.nonzero(cls > conf_thres)
            x = np.concatenate((box[i], x[i, 4 + j, None], j[:, None].astype(float), mask[i]), axis=1)
        else:  # best class only
            conf = np.amax(cls, axis=1, keepdims=True)
            j = np.argmax(cls, axis=1, keepdims=True).astype(float)
            x = np.concatenate((box, conf, j, mask), axis=1) # 4+1+1+32 = 38
            x = x[conf.flatten() > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == np.array(classes)).any(axis=1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort()[::-1][:max_nms]]  # sort by confidence
        else:
            x = x[x[:, 4].argsort()[::-1]]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * max_wh # classes
        boxes, scores = x[:, :4] + c, x[:, 4] # boxes(offset by class), scores
        i = nms(boxes, scores, iou_thres)

        if len(i) > max_det:  # limit detections
            i = i[:max_det]

        output[xi] = x[i]

    return output

COCO_CLASSES = (
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush",
)

def clip_boxes(boxes, shape):
    # Clip boxes (xyxy) to image shape (height, width)
    # np.array (faster grouped)
    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    # Rescale boxes (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[:, [0, 2]] -= pad[0]  # x padding
    boxes[:, [1, 3]] -= pad[1]  # y padding
    boxes[:, :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes

def crop_mask(masks, boxes):
    """
    "Crop" predicted masks by zeroing out everything not in the predicted bbox.

    Args:
        - masks: a size [h, w, n] array of masks
        - boxes: a size [n, 4] array of bbox coords in relative point form

    Returns:
        - cropped_masks: a size [h, w, n] array of cropped masks
    """

    n, h, w = masks.shape
    x1, y1, x2, y2 = np.split(boxes[:, None, :], 4, axis=2)  # x1 shape(1,1,n)
    r = np.arange(w)[None, None, :]  # rows shape(1,w,1)
    c = np.arange(h)[None, :, None]  # cols shape(h,1,1)

    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))

def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
    Crop before upsample.
    protos: [mask_dim, mask_h, mask_w]
    masks_in: [n, mask_dim], n is number of masks after NMS
    bboxes: [n, 4], n is number of masks after NMS
    shape: input_image_size, (h, w)
    upsample: Whether to upsample the masks

    Returns:
    masks: [h, w, n]
    """

    c, mh, mw = protos.shape  # CHW

    ih, iw = shape
    # mask_in check is ok!
    masks = sigmoid(np.dot(masks_in, protos.reshape(c, -1))).reshape(-1, mh, mw)
    # masks check is ok!

    downsampled_bboxes = bboxes.copy()
    downsampled_bboxes[:, 0] *= mw / iw
    downsampled_bboxes[:, 2] *= mw / iw
    downsampled_bboxes[:, 3] *= mh / ih
    downsampled_bboxes[:, 1] *= mh / ih

    masks = crop_mask(masks, downsampled_bboxes)  # nHW

    if upsample:
        masks = cv2.resize(masks.transpose(1, 2, 0), (iw, ih), interpolation=cv2.INTER_LINEAR)  # HWn
    return (masks > 0.5).astype(np.uint8)  # Convert to binary masks

class Colors:
    # Ultralytics color palette https://ultralytics.com/
    def __init__(self):
        # hex = matplotlib.colors.TABLEAU_COLORS.values()
        hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
                '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
        self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
        self.n = len(self.palette)

    def __call__(self, i, bgr=False):
        c = self.palette[int(i) % self.n]
        return (c[2], c[1], c[0]) if bgr else c

    @staticmethod
    def hex2rgb(h):  # rgb order (PIL)
        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))


colors = Colors()  # create instance for 'from utils.plots import colors'

def box_label(im, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
    p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
    cv2.rectangle(im, p1, p2, color, thickness=2 , lineType=cv2.LINE_AA)
    if label:
        tf = 2  # font thickness
        w, h = cv2.getTextSize(label, 0, fontScale=3 / 3, thickness=tf)[0]  # text width, height
        outside = p1[1] - h >= 3
        p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
        cv2.rectangle(im, p1, p2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im,
                    label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
                    0,
                    3 / 3,
                    txt_color,
                    thickness=tf,
                    lineType=cv2.LINE_AA)
    return np.asarray(im)

def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
    """
    img1_shape: model input shape, [h, w]
    img0_shape: origin pic shape, [h, w, 3]
    masks: [h, w, num]
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        pad = ratio_pad[1]
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])

    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    # masks shape: [h, w, n]
    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))

    if len(masks.shape) == 2:
        masks = masks[:, :, None]
    return masks

def crop_image(im1_shape, masks, im0_shape, ratio_pad=None):
    """
    img1_shape: model input shape, [h, w]
    img0_shape: origin pic shape, [h, w, 3]
    masks: [h, w, num]
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        pad = ratio_pad[1]
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])

    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    return masks

def draw_masks(im, masks, colors, alpha=0.5):
    """Plot masks at once.
    Args:
        masks (array): predicted masks , shape: [h, w, n]
        colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
        alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
    """
    if len(masks) == 0:
        return
    masks = scale_image(masks.shape[:2], masks, im.shape) # scale masks to im's shape
    masks = np.asarray(masks, dtype=np.float32)
    colors = np.asarray(colors, dtype=np.float32) # shape(n,3)
    s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together
    masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)
    im[:] = masks * alpha + im * (1 - s * alpha)
    return np.asarray(im)


# coco id
all_instances_ids = [
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
    11, 13, 14, 15, 16, 17, 18, 19, 20,
    21, 22, 23, 24, 25, 27, 28,
    31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    41, 42, 43, 44, 46, 47, 48, 49, 50,
    51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
    61, 62, 63, 64, 65, 67, 70,
    72, 73, 74, 75, 76, 77, 78, 79, 80,
    81, 82, 84, 85, 86, 87, 88, 89, 90,
]

all_stuff_ids = [
    92, 93, 94, 95, 96, 97, 98, 99, 100,
    101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
    111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
    121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
    131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
    141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
    151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
    161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
    171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
    181, 182,
    # other
    183,
    # unlabeled
    0,
]

# panoptic id: https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
panoptic_stuff_ids = [
    92, 93, 95, 100,
    107, 109,
    112, 118, 119,
    122, 125, 128, 130,
    133, 138,
    141, 144, 145, 147, 148, 149,
    151, 154, 155, 156, 159,
    161, 166, 168,
    171, 175, 176, 177, 178, 180,
    181, 184, 185, 186, 187, 188, 189, 190,
    191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
    # unlabeled
    0,
]


def getCocoIds(name = 'semantic'):
    if 'instances' == name:
        return all_instances_ids
    elif 'stuff' == name:
        return all_stuff_ids
    elif 'panoptic' == name:
        return all_instances_ids + panoptic_stuff_ids
    else: # semantic
        return all_instances_ids + all_stuff_ids

def getMappingId(index, name = 'semantic'):
    ids = getCocoIds(name = name)
    return ids[index]

def getMappingIndex(id, name = 'semantic'):
    ids = getCocoIds(name = name)
    return ids.index(id)

def panoptic_merge_show(semask, masks, labels, conf, min_area):
    panoptic = np.zeros(semask.shape + (3,), dtype=np.int32)
    stuff = np.zeros_like(semask)
    unique_labels = np.unique(semask)
    for _cls in unique_labels:
        if _cls < 80:
            stuff[semask == _cls] = 255
        else:
            stuff[semask == _cls] = getMappingId(_cls, 'semantic')
    panoptic[:, :, 2] = stuff
    panoptic[:, :, 0] = stuff
    inst_id = 0

    # merge inst
    area = np.sum(masks, axis=(1,2))
    # print(area.shape)
    sorted_indices = np.argsort(area)[::-1]
    # # print("**", sorted_indices)
    masks = masks[sorted_indices]
    labels = labels[sorted_indices]
    conf = conf[sorted_indices]
    # used = None
    # print(labels)
    for i in range(len(masks)):

        valid_area = (masks[i] == 1)
        panoptic[:, :, 1][valid_area] = getMappingId(int(labels[i]), 'instances') * 1000 + inst_id
        panoptic[:, :, 2][valid_area] = getMappingId(int(labels[i]), 'instances') * 1000 + inst_id
        panoptic[:, :, 0][valid_area] = getMappingId(int(labels[i]), 'instances')
        inst_id += 1

    # for _cls in np.unique(getMappingId(int(labels[i]), 'instances')):
    #     inst_id = 0
    #     for i in range(len(masks)):
    #         if labels[i] == _cls:
    #             valid_area = (masks[i] == 1)
    #             panoptic[:, :, 1][valid_area] = getMappingId(int(labels[i]), 'instances') * 1000 + inst_id
    #             panoptic[:, :, 2][valid_area] = getMappingId(int(labels[i]), 'instances') * 1000 + inst_id
    #             inst_id += 1

    # merge stuff
    stuff_map = panoptic[:, :, 1] == 0
    stuff_cls = np.unique(panoptic[:, :, 2][stuff_map])
    for _cls in stuff_cls:
        stuff_seg = (panoptic[:, :, 2] == _cls).astype(np.uint8)
        num, componets = cv2.connectedComponents(stuff_seg)
        for i in range(num):
            if i > 0:
                com_map = componets == i
                if np.count_nonzero(com_map) <= min_area:
                    panoptic[:, :, 2][com_map] = 255
                    panoptic[:, :, 0][com_map] = 255

    # Convert 255 to Unlabeled
    panoptic[panoptic == 255] = 0
    # panoptic[:, :, 1] = panoptic[:, :, 1] // 256
    # panoptic[:, :, 2] = panoptic[:, :, 2] % 256
    # panoptic = panoptic.astype('uint8')
    return panoptic


def panoptic_merge_coco(semask, masks, labels, min_area):
    panoptic = np.zeros(semask.shape + (3,), dtype=np.int32)
    stuff = np.zeros_like(semask)
    unique_labels = np.unique(semask)
    for _cls in unique_labels:
        if _cls < 92:
            stuff[semask == _cls] = 255
        else:
            stuff[semask == _cls] = _cls
    panoptic[:, :, 2] = stuff
    panoptic[:, :, 0] = stuff

    # merge inst
    inst_id = 0
    for i in range(len(masks)):
        valid_area = (masks[i] == 1)
        panoptic[:, :, 1][valid_area] = labels[i] * 1000 + inst_id
        # print(labels[i])
        # print('debug.......',np.unique(panoptic[:, :, 1]))
        panoptic[:, :, 2][valid_area] = labels[i] * 1000 + inst_id
        panoptic[:, :, 0][valid_area] = labels[i]
        inst_id += 1
    # print('debug.......',np.unique(panoptic[:, :, 1]))

    # for _cls in np.unique(labels):
    #     inst_id = 0
    #     imasks = masks[labels == _cls]
    #     for i, inst in enumerate(imasks):
    #         valid_area = (inst == 1)
    #         panoptic[:, :, 1][valid_area] = _cls * 1000 + inst_id
    #         panoptic[:, :, 2][valid_area] = _cls * 1000 + inst_id
    #         inst_id += 1


    # merge stuff
    stuff_map = panoptic[:, :, 1] == 0
    stuff_cls = np.unique(panoptic[:, :, 2][stuff_map])
    for _cls in stuff_cls:
        stuff_seg = (panoptic[:, :, 2] == _cls).astype(np.uint8)
        num, componets = cv2.connectedComponents(stuff_seg)
        for i in range(num):
            if i > 0:
                com_map = componets == i
                if np.count_nonzero(com_map) <= min_area:
                    panoptic[:, :, 2][com_map] = 255

    # Convert 255 to Unlabeled
    panoptic[panoptic == 255] = 0
    # panoptic[:, :, 1] = panoptic[:, :, 1] // 256
    # panoptic[:, :, 2] = panoptic[:, :, 2] % 256
    # panoptic = panoptic.astype('uint8')
    return panoptic