Showing
5 changed files
with
181 additions
and
14 deletions
code/yolov3/changes.txt
0 → 100644
1 | +changes from https://github.com/wizyoung/YOLOv3_TensorFlow | ||
2 | + | ||
3 | +by Seongju Kim, kareus1@khu.ac.kr | ||
4 | + | ||
5 | +1] changed TextLineDataset to TFRecordDataset. (also changed data parsing in data utils and eval utils) | ||
6 | +2] fixed restore-does-not-exist problem in train/eval mode | ||
7 | +3] fixed saver to save the parameter only when save-optimizer option is true | ||
8 | +4] changed parameter 'mode' to bool value 'is_training' in data util functions (string value 'mode' is passed as byte string, so functions do not evaluate if-clauses as expected. ex) 'train' != b'train') | ||
9 | +5] wrote TFRecord binary iterator, which runs without tf session (references: https://github.com/pgmmpk/tfrecord ) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -17,16 +17,16 @@ from model import yolov3 | ... | @@ -17,16 +17,16 @@ from model import yolov3 |
17 | parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.") | 17 | parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.") |
18 | 18 | ||
19 | # paths | 19 | # paths |
20 | -parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt", | 20 | +parser.add_argument("--eval_file", type=str, default="../../data/test.tfrecord", |
21 | help="The path of the validation or test txt file.") | 21 | help="The path of the validation or test txt file.") |
22 | 22 | ||
23 | -parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt", | 23 | +parser.add_argument("--restore_path", type=str, default="../../data/darknet_weights/yolov3.ckpt", |
24 | help="The path of the weights to restore.") | 24 | help="The path of the weights to restore.") |
25 | 25 | ||
26 | -parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt", | 26 | +parser.add_argument("--anchor_path", type=str, default="../../data/yolo_anchors.txt", |
27 | help="The path of the anchor txt file.") | 27 | help="The path of the anchor txt file.") |
28 | 28 | ||
29 | -parser.add_argument("--class_name_path", type=str, default="./data/coco.names", | 29 | +parser.add_argument("--class_name_path", type=str, default="../../data/classes.txt", |
30 | help="The path of the class names.") | 30 | help="The path of the class names.") |
31 | 31 | ||
32 | # some numbers | 32 | # some numbers | ... | ... |
code/yolov3/get_kmeans.py
0 → 100644
1 | +from __future__ import division, print_function | ||
2 | + | ||
3 | +import numpy as np | ||
4 | + | ||
5 | +def iou(box, clusters): | ||
6 | + """ | ||
7 | + Calculates the Intersection over Union (IoU) between a box and k clusters. | ||
8 | + param: | ||
9 | + box: tuple or array, shifted to the origin (i. e. width and height) | ||
10 | + clusters: numpy array of shape (k, 2) where k is the number of clusters | ||
11 | + return: | ||
12 | + numpy array of shape (k, 0) where k is the number of clusters | ||
13 | + """ | ||
14 | + x = np.minimum(clusters[:, 0], box[0]) | ||
15 | + y = np.minimum(clusters[:, 1], box[1]) | ||
16 | + if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: | ||
17 | + raise ValueError("Box has no area") | ||
18 | + | ||
19 | + intersection = x * y | ||
20 | + box_area = box[0] * box[1] | ||
21 | + cluster_area = clusters[:, 0] * clusters[:, 1] | ||
22 | + | ||
23 | + iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10) | ||
24 | + # iou_ = intersection / (box_area + cluster_area - intersection + 1e-10) | ||
25 | + | ||
26 | + return iou_ | ||
27 | + | ||
28 | + | ||
29 | +def avg_iou(boxes, clusters): | ||
30 | + """ | ||
31 | + Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters. | ||
32 | + param: | ||
33 | + boxes: numpy array of shape (r, 2), where r is the number of rows | ||
34 | + clusters: numpy array of shape (k, 2) where k is the number of clusters | ||
35 | + return: | ||
36 | + average IoU as a single float | ||
37 | + """ | ||
38 | + return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) | ||
39 | + | ||
40 | + | ||
41 | +def translate_boxes(boxes): | ||
42 | + """ | ||
43 | + Translates all the boxes to the origin. | ||
44 | + param: | ||
45 | + boxes: numpy array of shape (r, 4) | ||
46 | + return: | ||
47 | + numpy array of shape (r, 2) | ||
48 | + """ | ||
49 | + new_boxes = boxes.copy() | ||
50 | + for row in range(new_boxes.shape[0]): | ||
51 | + new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0]) | ||
52 | + new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1]) | ||
53 | + return np.delete(new_boxes, [0, 1], axis=1) | ||
54 | + | ||
55 | + | ||
56 | +def kmeans(boxes, k, dist=np.median): | ||
57 | + """ | ||
58 | + Calculates k-means clustering with the Intersection over Union (IoU) metric. | ||
59 | + param: | ||
60 | + boxes: numpy array of shape (r, 2), where r is the number of rows | ||
61 | + k: number of clusters | ||
62 | + dist: distance function | ||
63 | + return: | ||
64 | + numpy array of shape (k, 2) | ||
65 | + """ | ||
66 | + rows = boxes.shape[0] | ||
67 | + | ||
68 | + distances = np.empty((rows, k)) | ||
69 | + last_clusters = np.zeros((rows,)) | ||
70 | + | ||
71 | + np.random.seed() | ||
72 | + | ||
73 | + # the Forgy method will fail if the whole array contains the same rows | ||
74 | + clusters = boxes[np.random.choice(rows, k, replace=False)] | ||
75 | + | ||
76 | + while True: | ||
77 | + for row in range(rows): | ||
78 | + distances[row] = 1 - iou(boxes[row], clusters) | ||
79 | + | ||
80 | + nearest_clusters = np.argmin(distances, axis=1) | ||
81 | + | ||
82 | + if (last_clusters == nearest_clusters).all(): | ||
83 | + break | ||
84 | + | ||
85 | + for cluster in range(k): | ||
86 | + clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) | ||
87 | + | ||
88 | + last_clusters = nearest_clusters | ||
89 | + | ||
90 | + return clusters | ||
91 | + | ||
92 | + | ||
93 | +def parse_anno(annotation_path, target_size=None): | ||
94 | + anno = open(annotation_path, 'r') | ||
95 | + result = [] | ||
96 | + for line in anno: | ||
97 | + s = line.strip().split(' ') | ||
98 | + img_w = int(s[2]) | ||
99 | + img_h = int(s[3]) | ||
100 | + s = s[4:] | ||
101 | + box_cnt = len(s) // 5 | ||
102 | + for i in range(box_cnt): | ||
103 | + x_min, y_min, x_max, y_max = float(s[i*5+1]), float(s[i*5+2]), float(s[i*5+3]), float(s[i*5+4]) | ||
104 | + width = x_max - x_min | ||
105 | + height = y_max - y_min | ||
106 | + assert width > 0 | ||
107 | + assert height > 0 | ||
108 | + # use letterbox resize, i.e. keep the original aspect ratio | ||
109 | + # get k-means anchors on the resized target image size | ||
110 | + if target_size is not None: | ||
111 | + resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h) | ||
112 | + width *= resize_ratio | ||
113 | + height *= resize_ratio | ||
114 | + result.append([width, height]) | ||
115 | + # get k-means anchors on the original image size | ||
116 | + else: | ||
117 | + result.append([width, height]) | ||
118 | + result = np.asarray(result) | ||
119 | + return result | ||
120 | + | ||
121 | + | ||
122 | +def get_kmeans(anno, cluster_num=9): | ||
123 | + | ||
124 | + anchors = kmeans(anno, cluster_num) | ||
125 | + ave_iou = avg_iou(anno, anchors) | ||
126 | + | ||
127 | + anchors = anchors.astype('int').tolist() | ||
128 | + | ||
129 | + anchors = sorted(anchors, key=lambda x: x[0] * x[1]) | ||
130 | + | ||
131 | + return anchors, ave_iou | ||
132 | + | ||
133 | + | ||
134 | +if __name__ == '__main__': | ||
135 | + # target resize format: [width, height] | ||
136 | + # if target_resize is speficied, the anchors are on the resized image scale | ||
137 | + # if target_resize is set to None, the anchors are on the original image scale | ||
138 | + target_size = [416, 416] | ||
139 | + annotation_path = "../train.txt" | ||
140 | + anno_result = parse_anno(annotation_path, target_size=target_size) | ||
141 | + anchors, ave_iou = get_kmeans(anno_result, 9) | ||
142 | + | ||
143 | + anchor_string = '' | ||
144 | + for anchor in anchors: | ||
145 | + anchor_string += '{},{}, '.format(anchor[0], anchor[1]) | ||
146 | + anchor_string = anchor_string[:-2] | ||
147 | + | ||
148 | + print('### Save the anchors to text file ###') | ||
149 | + print('anchors are:') | ||
150 | + print(anchor_string) | ||
151 | + print('the average iou is:') | ||
152 | + print(ave_iou) |
... | @@ -98,7 +98,6 @@ class yolov3(object): | ... | @@ -98,7 +98,6 @@ class yolov3(object): |
98 | def forward(self, inputs, is_training=False, reuse=False): | 98 | def forward(self, inputs, is_training=False, reuse=False): |
99 | # the input size: [height, weight] format | 99 | # the input size: [height, weight] format |
100 | self.img_size = tf.shape(inputs)[1:3] | 100 | self.img_size = tf.shape(inputs)[1:3] |
101 | - print("Img size:", self.img_size) | ||
102 | 101 | ||
103 | batch_norm_params = { | 102 | batch_norm_params = { |
104 | 'decay': self.batch_norm_decay, | 103 | 'decay': self.batch_norm_decay, |
... | @@ -148,7 +147,7 @@ class yolov3(object): | ... | @@ -148,7 +147,7 @@ class yolov3(object): |
148 | 147 | ||
149 | return feature_map_1, feature_map_2, feature_map_3 | 148 | return feature_map_1, feature_map_2, feature_map_3 |
150 | 149 | ||
151 | - def reorganize_layer(self, feature_map, anchors): | 150 | + def reorg_layer(self, feature_map, anchors): |
152 | # size : [h, w] format | 151 | # size : [h, w] format |
153 | grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3] # [13, 13] | 152 | grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3] # [13, 13] |
154 | ratio = tf.cast(self.img_size / grid_size, tf.float32) | 153 | ratio = tf.cast(self.img_size / grid_size, tf.float32) |
... | @@ -180,6 +179,13 @@ class yolov3(object): | ... | @@ -180,6 +179,13 @@ class yolov3(object): |
180 | 179 | ||
181 | return x_y_offset, boxes, conf_logits, prob_logits | 180 | return x_y_offset, boxes, conf_logits, prob_logits |
182 | 181 | ||
182 | + def predict(self, feature_maps): | ||
183 | + feature_map_1, feature_map_2, feature_map_3 = feature_maps | ||
184 | + | ||
185 | + feature_map_anchors = [(feature_map_1, self.anchors[6:9]), | ||
186 | + (feature_map_2, self.anchors[3:6]), | ||
187 | + (feature_map_3, self.anchors[0:3])] | ||
188 | + reorg_results = [self.reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors] | ||
183 | 189 | ||
184 | def _reshape_logit(result): | 190 | def _reshape_logit(result): |
185 | x_y_offset, boxes, conf_logits, prob_logits = result | 191 | x_y_offset, boxes, conf_logits, prob_logits = result |
... | @@ -189,14 +195,6 @@ class yolov3(object): | ... | @@ -189,14 +195,6 @@ class yolov3(object): |
189 | prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num]) | 195 | prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num]) |
190 | return boxes, conf_logits, prob_logits | 196 | return boxes, conf_logits, prob_logits |
191 | 197 | ||
192 | - def predict(self, feature_maps): | ||
193 | - feature_map_1, feature_map_2, feature_map_3 = feature_maps | ||
194 | - | ||
195 | - feature_map_anchors = [(feature_map_1, self.anchors[6:9]), | ||
196 | - (feature_map_2, self.anchors[3:6]), | ||
197 | - (feature_map_3, self.anchors[0:3])] | ||
198 | - reorg_results = [self.reorganize_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors] | ||
199 | - | ||
200 | boxes_list, confs_list, probs_list = [], [], [] | 198 | boxes_list, confs_list, probs_list = [], [], [] |
201 | 199 | ||
202 | for result in reorg_results: | 200 | for result in reorg_results: | ... | ... |
... | @@ -14,6 +14,14 @@ from nms_utils import gpu_nms | ... | @@ -14,6 +14,14 @@ from nms_utils import gpu_nms |
14 | 14 | ||
15 | from model import yolov3 | 15 | from model import yolov3 |
16 | 16 | ||
17 | +is_training = tf.placeholder(tf.bool, name="phase_train") | ||
18 | +handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') | ||
19 | + | ||
20 | +pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None]) | ||
21 | +pred_scores_flag = tf.placeholder(tf.float32, [1, None, None]) | ||
22 | +gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold) | ||
23 | + | ||
24 | +### tf.data pipeline | ||
17 | train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP') | 25 | train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP') |
18 | train_dataset = train_dataset.shuffle(train_img_cnt) | 26 | train_dataset = train_dataset.shuffle(train_img_cnt) |
19 | train_dataset = train_dataset.batch(batch_size) | 27 | train_dataset = train_dataset.batch(batch_size) | ... | ... |
-
Please register or login to post a comment