shvtr159

proj file

1 +import numpy as np
2 +import math
3 +import os
4 +import argparse
5 +import cv2
6 +import time
7 +import torch
8 +import tensorflow as tf
9 +
10 +import utils.utils as utils
11 +from models import *
12 +import torch.utils.data as torch_data
13 +
14 +import utils.kitti_utils as kitti_utils
15 +import utils.kitti_aug_utils as aug_utils
16 +import utils.kitti_bev_utils as bev_utils
17 +from utils.kitti_yolo_dataset import KittiYOLODataset
18 +import utils.config as cnf
19 +#import utils.mayavi_viewer as mview
20 +from socket import *
21 +import struct
22 +
23 +def predictions_to_kitti_format(img_detections, calib, img_shape_2d, img_size, RGB_Map=None):
24 + predictions = np.zeros([50, 7], dtype=np.float32)
25 + count = 0
26 + for detections in img_detections:
27 + if detections is None:
28 + continue
29 + # Rescale boxes to original image
30 + for x, y, w, l, im, re, conf, cls_conf, cls_pred in detections:
31 + yaw = np.arctan2(im, re)
32 + predictions[count, :] = cls_pred, x/img_size, y/img_size, w/img_size, l/img_size, im, re
33 + count += 1
34 +
35 + predictions = bev_utils.inverse_yolo_target(predictions, cnf.boundary)
36 + if predictions.shape[0]:
37 + predictions[:, 1:] = aug_utils.lidar_to_camera_box(predictions[:, 1:], calib.V2C, calib.R0, calib.P)
38 +
39 + objects_new = []
40 + corners3d = []
41 + for index, l in enumerate(predictions):
42 +
43 + str = "Pedestrian"
44 + if l[0] == 0:str="Car"
45 + elif l[0] == 1:str="Pedestrian"
46 + elif l[0] == 2: str="Cyclist"
47 + else:str = "DontCare"
48 + line = '%s -1 -1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' % str
49 +
50 + obj = kitti_utils.Object3d(line)
51 + obj.t = l[1:4]
52 + obj.h,obj.w,obj.l = l[4:7]
53 + obj.ry = np.arctan2(math.sin(l[7]), math.cos(l[7]))
54 +
55 + _, corners_3d = kitti_utils.compute_box_3d(obj, calib.P)
56 + corners3d.append(corners_3d)
57 + objects_new.append(obj)
58 +
59 + if len(corners3d) > 0:
60 + corners3d = np.array(corners3d)
61 + img_boxes, _ = calib.corners3d_to_img_boxes(corners3d)
62 +
63 + img_boxes[:, 0] = np.clip(img_boxes[:, 0], 0, img_shape_2d[1] - 1)
64 + img_boxes[:, 1] = np.clip(img_boxes[:, 1], 0, img_shape_2d[0] - 1)
65 + img_boxes[:, 2] = np.clip(img_boxes[:, 2], 0, img_shape_2d[1] - 1)
66 + img_boxes[:, 3] = np.clip(img_boxes[:, 3], 0, img_shape_2d[0] - 1)
67 +
68 + img_boxes_w = img_boxes[:, 2] - img_boxes[:, 0]
69 + img_boxes_h = img_boxes[:, 3] - img_boxes[:, 1]
70 + box_valid_mask = np.logical_and(img_boxes_w < img_shape_2d[1] * 0.8, img_boxes_h < img_shape_2d[0] * 0.8)
71 +
72 + for i, obj in enumerate(objects_new):
73 + x, z, ry = obj.t[0], obj.t[2], obj.ry
74 + beta = np.arctan2(z, x)
75 + alpha = -np.sign(beta) * np.pi / 2 + beta + ry
76 +
77 + obj.alpha = alpha
78 + obj.box2d = img_boxes[i, :]
79 +
80 + if RGB_Map is not None:
81 + labels, noObjectLabels = kitti_utils.read_labels_for_bevbox(objects_new)
82 + if not noObjectLabels:
83 + labels[:, 1:] = aug_utils.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord
84 +
85 + target = bev_utils.build_yolo_target(labels)
86 + utils.draw_box_in_bev(RGB_Map, target)
87 +
88 + return objects_new
89 +
90 +if __name__ == "__main__":
91 + parser = argparse.ArgumentParser()
92 + parser.add_argument("--model_def", type=str, default="config/complex_tiny_yolov3.cfg", help="path to model definition file")
93 + parser.add_argument("--weights_path", type=str, default="checkpoints/tiny-yolov3_ckpt_epoch-220.pth", help="path to weights file")
94 + parser.add_argument("--class_path", type=str, default="data/classes.names", help="path to class label file")
95 + parser.add_argument("--conf_thres", type=float, default=0.01, help="object confidence threshold")
96 + parser.add_argument("--nms_thres", type=float, default=0.01, help="iou thresshold for non-maximum suppression")
97 + parser.add_argument("--img_size", type=int, default=cnf.BEV_WIDTH, help="size of each image dimension")
98 + parser.add_argument("--split", type=str, default="valid", help="text file having image lists in dataset")
99 + parser.add_argument("--folder", type=str, default="training", help="directory name that you downloaded all dataset")
100 + opt = parser.parse_args()
101 + print(opt)
102 +
103 + classes = utils.load_classes(opt.class_path)
104 + device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105 +
106 + # Set up model
107 + model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
108 + # Load checkpoint weights
109 + model.load_state_dict(torch.load(opt.weights_path,map_location='cpu'))
110 + # Eval mode
111 + model.eval()
112 +
113 + dataset = KittiYOLODataset(cnf.root_dir, split=opt.split, mode='TEST', folder=opt.folder, data_aug=False)
114 + data_loader = torch_data.DataLoader(dataset, 1, shuffle=False)
115 +
116 + Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
117 +
118 + start_time = time.time()
119 + detections_list = []
120 + #socket
121 + boardSock = socket(AF_INET, SOCK_DGRAM)
122 + board_add = ('192.168.2.99',12400)
123 + frame = 0
124 + for index, (img_paths, bev_maps) in enumerate(data_loader):
125 +
126 + # Configure bev image
127 + input_imgs = Variable(bev_maps.type(Tensor))
128 + #input_imgs = input_imgs * 0.4
129 +
130 + # Get detections
131 + with torch.no_grad():
132 + detections = model(input_imgs)
133 + detections = utils.non_max_suppression_rotated_bbox(detections, opt.conf_thres, opt.nms_thres)
134 +
135 + end_time = time.time()
136 + print(f"FPS: {(1.0/(end_time-start_time)):0.2f}")
137 + start_time = end_time
138 +
139 + img_detections = [] # Stores detections for each image index
140 + img_detections.extend(detections)
141 +
142 + bev_maps = torch.squeeze(bev_maps).numpy()
143 +
144 + RGB_Map = np.zeros((cnf.BEV_WIDTH, cnf.BEV_WIDTH, 3))
145 + RGB_Map[:, :, 2] = bev_maps[0, :, :] # r_map
146 + RGB_Map[:, :, 1] = bev_maps[1, :, :] # g_map
147 + RGB_Map[:, :, 0] = bev_maps[2, :, :] # b_map
148 +
149 + RGB_Map *= 255
150 + RGB_Map = RGB_Map.astype(np.uint8)
151 + del detections_list[:]
152 + count = 0
153 + for detections in img_detections:
154 + if detections is None:
155 + continue
156 +
157 + # Rescale boxes to original image
158 + detections = utils.rescale_boxes(detections, opt.img_size, RGB_Map.shape[:2])
159 + # send detection count
160 + boardSock.sendto(str(len(detections)).encode(), board_add)
161 + for x, y, w, l, im, re, conf, cls_conf, cls_pred in detections:
162 + yaw = np.arctan2(im, re)
163 + # Draw rotated box
164 + bev_utils.drawRotatedBox(RGB_Map, x, y, w, l, yaw, cnf.colors[int(cls_pred)])
165 + # send to board
166 + send_struct = struct.pack('10sffffffff', f'D{frame}-{count}'.encode(), x.item(), y.item(), w.item(), l.item(), yaw.item(), conf.item(), cls_conf.item(), cls_pred.item())
167 + print(x.item(), y.item(), w.item(), l.item(), yaw.item(), conf.item(), cls_conf.item(), cls_pred.item())
168 +
169 + #detections_list.append([x.item(), y.item(), w.item(), l.item(), yaw.item(), conf.item(), cls_conf.item(), cls_pred.item()])
170 + count = count + 1
171 + detections_list.append(send_struct)
172 + # cls_pred value: 0 (Car), 1 (Pedestrian), 2 (Cyclist)
173 + #print(detections_list)
174 + # send to board
175 + for i in range(len(detections)):
176 + boardSock.sendto(detections_list[i], board_add)
177 + frame = frame + 1
178 +
179 +
180 + '''
181 + img2d = cv2.imread(img_paths[0])
182 + calib = kitti_utils.Calibration(img_paths[0].replace(".png", ".txt").replace("image_2", "calib"))
183 + objects_pred = predictions_to_kitti_format(img_detections, calib, img2d.shape, opt.img_size)
184 +
185 + #img2d = mview.show_image_with_boxes(img2d, objects_pred, calib, False)
186 +
187 + cv2.imshow("bev img", RGB_Map)
188 + #cv2.imshow("img2d", img2d)
189 +
190 + if cv2.waitKey(0) & 0xFF == 27:
191 + break
192 + '''
193 + boardSock.close()