김성주

ipynb version for google colab & fixed more errors

...@@ -24,10 +24,10 @@ anchor_path = data_path + 'yolo_anchors.txt' # The path of the anchor txt file. ...@@ -24,10 +24,10 @@ anchor_path = data_path + 'yolo_anchors.txt' # The path of the anchor txt file.
24 class_name_path = data_path + 'classes.txt' # The path of the class names. 24 class_name_path = data_path + 'classes.txt' # The path of the class names.
25 25
26 ### Training releated numbers 26 ### Training releated numbers
27 -batch_size = 6 27 +batch_size = 10
28 img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height] 28 img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height]
29 letterbox_resize = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image. 29 letterbox_resize = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
30 -total_epoches = 50 30 +total_epoches = 20
31 train_evaluation_step = 10 # Evaluate on the training batch after some steps. 31 train_evaluation_step = 10 # Evaluate on the training batch after some steps.
32 val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch. 32 val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
33 save_epoch = 5 # Save the model after some epochs. 33 save_epoch = 5 # Save the model after some epochs.
...@@ -73,7 +73,7 @@ use_label_smooth = True # Whether to use class label smoothing strategy. ...@@ -73,7 +73,7 @@ use_label_smooth = True # Whether to use class label smoothing strategy.
73 use_focal_loss = True # Whether to apply focal loss on the conf loss. 73 use_focal_loss = True # Whether to apply focal loss on the conf loss.
74 use_mix_up = True # Whether to use mix up data augmentation strategy. 74 use_mix_up = True # Whether to use mix up data augmentation strategy.
75 use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding. 75 use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.
76 -warm_up_epoch = 3 # Warm up training epoches. Set to a larger value if gradient explodes. 76 +warm_up_epoch = 2 # Warm up training epoches. Set to a larger value if gradient explodes.
77 77
78 ### some constants in validation 78 ### some constants in validation
79 # nms 79 # nms
......
...@@ -2,8 +2,13 @@ changes from https://github.com/wizyoung/YOLOv3_TensorFlow ...@@ -2,8 +2,13 @@ changes from https://github.com/wizyoung/YOLOv3_TensorFlow
2 2
3 by Seongju Kim, kareus1@khu.ac.kr 3 by Seongju Kim, kareus1@khu.ac.kr
4 4
5 +I only tested in colab environment yet (2020.05.16),
6 +so let me know if there are some errors/problems in python code version
7 +(##last changed: 2020.05.16)
8 +
5 1] changed TextLineDataset to TFRecordDataset. (also changed data parsing in data utils and eval utils) 9 1] changed TextLineDataset to TFRecordDataset. (also changed data parsing in data utils and eval utils)
6 2] fixed restore-does-not-exist problem in train/eval mode 10 2] fixed restore-does-not-exist problem in train/eval mode
7 3] fixed saver to save the parameter only when save-optimizer option is true 11 3] fixed saver to save the parameter only when save-optimizer option is true
8 4] changed parameter 'mode' to bool value 'is_training' in data util functions (string value 'mode' is passed as byte string, so functions do not evaluate if-clauses as expected. ex) 'train' != b'train') 12 4] changed parameter 'mode' to bool value 'is_training' in data util functions (string value 'mode' is passed as byte string, so functions do not evaluate if-clauses as expected. ex) 'train' != b'train')
9 -5] wrote TFRecord binary iterator, which runs without tf session (references: https://github.com/pgmmpk/tfrecord )
...\ No newline at end of file ...\ No newline at end of file
13 +5] wrote TFRecord binary iterator, which runs without tf session (references: https://github.com/pgmmpk/tfrecord )
14 +6] removed logging/tenorboard summary code. (I will add it later if necessary)
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -9,22 +9,20 @@ import random ...@@ -9,22 +9,20 @@ import random
9 PY_VERSION = sys.version_info[0] 9 PY_VERSION = sys.version_info[0]
10 iter_cnt = 0 10 iter_cnt = 0
11 11
12 -FEATURE_DESCRIPTION = { 12 +def _parse_tfrecord(data):
13 - 'index': tf.FixedLenFeature([], tf.int64), 13 + example = tf.train.Example()
14 - 'image': tf.FixedLenFeature([], tf.string), 14 + example.ParseFromString(data)
15 - 'width': tf.FixedLenFeature([], tf.int64), 15 + features = example.features.feature
16 - 'height': tf.FixedLenFeature([], tf.int64), 16 + return features
17 - 'boxes': tf.VarLenFeature(tf.int64)
18 -}
19 17
20 def parse_tfrecord(data): 18 def parse_tfrecord(data):
21 # tfrecord parser for TFRecordDataset (raw data) 19 # tfrecord parser for TFRecordDataset (raw data)
22 - features = tf.parse_single_example(data, FEATURE_DESCRIPTION) 20 + features = _parse_tfrecord(data)
23 - index = int(features['index']) 21 + index = features['index'].int64_list.value[0]
24 - encoded_image = np.frombuffer(features['image'], dtype = np.uint8) 22 + encoded_image = np.frombuffer(features['image'].bytes_list.value[0], dtype = np.uint8)
25 - width = int(features['width']) 23 + width = features['width'].int64_list.value[0]
26 - height = int(features['height']) 24 + height = features['height'].int64_list.value[0]
27 - boxes = features['boxes'].eval() 25 + boxes = features['boxes'].int64_list.value
28 26
29 assert len(boxes) % 5 == 0, 'Annotation error occured in box array.' 27 assert len(boxes) % 5 == 0, 'Annotation error occured in box array.'
30 box_cnt = len(boxes) // 5 28 box_cnt = len(boxes) // 5
...@@ -33,7 +31,7 @@ def parse_tfrecord(data): ...@@ -33,7 +31,7 @@ def parse_tfrecord(data):
33 labels = [] 31 labels = []
34 32
35 for i in range(box_cnt): 33 for i in range(box_cnt):
36 - label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3]) ## do we need to change int to float? is there float rectangle sample? 34 + label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3]), float(boxes[i * 5 + 4]) ## do we need to change int to float? is there float rectangle sample?
37 aligned_boxes.append([x_min, y_min, x_max, y_max]) 35 aligned_boxes.append([x_min, y_min, x_max, y_max])
38 labels.append(label) 36 labels.append(label)
39 37
......
...@@ -99,6 +99,8 @@ with tf.Session() as sess: ...@@ -99,6 +99,8 @@ with tf.Session() as sess:
99 sess.run([tf.global_variables_initializer()]) 99 sess.run([tf.global_variables_initializer()])
100 if os.path.exists(args.restore_path): 100 if os.path.exists(args.restore_path):
101 saver_to_restore.restore(sess, args.restore_path) 101 saver_to_restore.restore(sess, args.restore_path)
102 + else:
103 + raise ValueError('there is no model to evaluate. You should move/create the checkpoint file to restore path')
102 104
103 print('\nStart evaluation...\n') 105 print('\nStart evaluation...\n')
104 106
......
...@@ -22,18 +22,18 @@ pred_scores_flag = tf.placeholder(tf.float32, [1, None, None]) ...@@ -22,18 +22,18 @@ pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
22 gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold) 22 gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
23 23
24 ### tf.data pipeline 24 ### tf.data pipeline
25 -train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP') 25 +train_dataset = tf.data.TFRecordDataset(filenames=args.train_file, compression_type='GZIP')
26 -train_dataset = train_dataset.shuffle(train_img_cnt) 26 +train_dataset = train_dataset.shuffle(args.train_img_cnt)
27 -train_dataset = train_dataset.batch(batch_size) 27 +train_dataset = train_dataset.batch(args.batch_size)
28 train_dataset = train_dataset.map( 28 train_dataset = train_dataset.map(
29 lambda x: tf.py_func(get_batch_data, 29 lambda x: tf.py_func(get_batch_data,
30 inp=[x, args.class_num, args.img_size, args.anchors, True, args.multi_scale_train, args.use_mix_up, args.letterbox_resize], 30 inp=[x, args.class_num, args.img_size, args.anchors, True, args.multi_scale_train, args.use_mix_up, args.letterbox_resize],
31 Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]), 31 Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
32 num_parallel_calls=args.num_threads 32 num_parallel_calls=args.num_threads
33 ) 33 )
34 -train_dataset = train_dataset.prefetch(prefetech_buffer) 34 +train_dataset = train_dataset.prefetch(args.prefetech_buffer)
35 35
36 -val_dataset = tf.data.TFRecordDataset(filenames=val_file, compression_type='GZIP') 36 +val_dataset = tf.data.TFRecordDataset(filenames=args.val_file, compression_type='GZIP')
37 val_dataset = val_dataset.batch(1) 37 val_dataset = val_dataset.batch(1)
38 val_dataset = val_dataset.map( 38 val_dataset = val_dataset.map(
39 lambda x: tf.py_func(get_batch_data, 39 lambda x: tf.py_func(get_batch_data,
...@@ -41,7 +41,7 @@ val_dataset = val_dataset.map( ...@@ -41,7 +41,7 @@ val_dataset = val_dataset.map(
41 Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]), 41 Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
42 num_parallel_calls=args.num_threads 42 num_parallel_calls=args.num_threads
43 ) 43 )
44 -val_dataset.prefetch(prefetech_buffer) 44 +val_dataset.prefetch(args.prefetech_buffer)
45 45
46 iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) 46 iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
47 train_init_op = iterator.make_initializer(train_dataset) 47 train_init_op = iterator.make_initializer(train_dataset)
...@@ -71,13 +71,13 @@ saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to ...@@ -71,13 +71,13 @@ saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to
71 update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part) 71 update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part)
72 72
73 73
74 -global_step = tf.Variable(float(global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 74 +global_step = tf.Variable(float(args.global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
75 if use_warm_up: 75 if use_warm_up:
76 learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch), 76 learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch),
77 lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch), 77 lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch),
78 - lambda: config_learning_rate(global_step - args.train_batch_num * args.warm_up_epoch)) 78 + lambda: config_learning_rate(args, global_step - args.train_batch_num * args.warm_up_epoch))
79 else: 79 else:
80 - learning_rate = config_learning_rate(global_step) 80 + learning_rate = config_learning_rate(args, global_step)
81 81
82 optimizer = config_optimizer(args.optimizer_name, learning_rate) 82 optimizer = config_optimizer(args.optimizer_name, learning_rate)
83 83
...@@ -105,7 +105,7 @@ with tf.Session() as sess: ...@@ -105,7 +105,7 @@ with tf.Session() as sess:
105 if os.path.exists(args.restore_path): 105 if os.path.exists(args.restore_path):
106 saver_to_restore.restore(sess, args.restore_path) 106 saver_to_restore.restore(sess, args.restore_path)
107 107
108 - print('\nStart training...\n') 108 + print('\nStart training...: Total epoches =', args.total_epoches, '\n')
109 109
110 best_mAP = -np.Inf 110 best_mAP = -np.Inf
111 111
...@@ -163,7 +163,7 @@ with tf.Session() as sess: ...@@ -163,7 +163,7 @@ with tf.Session() as sess:
163 163
164 # calc mAP 164 # calc mAP
165 rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter() 165 rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
166 - gt_dict = parse_gt_rec(args.val_file, args.img_size, args.letterbox_resize) 166 + gt_dict = parse_gt_rec(args.val_file, 'GZIP', args.img_size, args.letterbox_resize)
167 167
168 info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\n'.format(epoch, __global_step, __lr) 168 info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\n'.format(epoch, __global_step, __lr)
169 169
......
1 +{
2 + "nbformat": 4,
3 + "nbformat_minor": 0,
4 + "metadata": {
5 + "colab": {
6 + "name": "yolov3.ipynb",
7 + "provenance": [],
8 + "collapsed_sections": []
9 + },
10 + "kernelspec": {
11 + "name": "python3",
12 + "display_name": "Python 3"
13 + }
14 + },
15 + "cells": [
16 + {
17 + "cell_type": "code",
18 + "metadata": {
19 + "id": "p0y3wIkfSuIT",
20 + "colab_type": "code",
21 + "outputId": "eeedd664-406a-43ff-aa5e-bd48963494c4",
22 + "colab": {
23 + "base_uri": "https://localhost:8080/",
24 + "height": 53
25 + }
26 + },
27 + "source": [
28 + "%tensorflow_version 1.x\n",
29 + "## Check your google colab/drive settings!!! (libraries, argument paths, ...)\n",
30 + "from google.colab import drive\n",
31 + "drive.mount('/content/gdrive')\n",
32 + "\n",
33 + "## variables for notebook\n",
34 + "training = True\n",
35 + "\n",
36 + "##### changes\n",
37 + "### changed some variable names because of argument conflicts\n",
38 + "### last two parts are train, test mode code. you can switch the mode with above variable, 'training'\n",
39 + "### there are some difficulties for separating train/eval code (making into functions), because of variable dependencies"
40 + ],
41 + "execution_count": 1,
42 + "outputs": [
43 + {
44 + "output_type": "stream",
45 + "text": [
46 + "TensorFlow 1.x selected.\n",
47 + "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
48 + ],
49 + "name": "stdout"
50 + }
51 + ]
52 + },
53 + {
54 + "cell_type": "code",
55 + "metadata": {
56 + "id": "Yh3RWBkgAjZx",
57 + "colab_type": "code",
58 + "colab": {}
59 + },
60 + "source": [
61 + "## TFRecord utils here\n",
62 + "import tensorflow as tf\n",
63 + "from itertools import tee\n",
64 + "\n",
65 + "class TFRecordIterator:\n",
66 + " def __init__(self, path, compression=None):\n",
67 + " self._core = tf.python_io.tf_record_iterator(path, tf.python_io.TFRecordOptions(compression))\n",
68 + " self._iterator = iter(self._core)\n",
69 + " self._iterator, self._iterator_temp = tee(self._iterator)\n",
70 + " self._total_cnt = sum(1 for _ in self._iterator_temp)\n",
71 + "\n",
72 + " def _read_value(self, feature):\n",
73 + " if len(feature.int64_list.value) > 0:\n",
74 + " return feature.int64_list.value\n",
75 + "\n",
76 + " if len(feature.bytes_list.value) > 0:\n",
77 + " return feature.bytes_list.value\n",
78 + "\n",
79 + " if len(feature.float_list.value) > 0:\n",
80 + " return feature.float_list.value\n",
81 + "\n",
82 + " return None\n",
83 + "\n",
84 + " def _read_features(self, features):\n",
85 + " d = dict()\n",
86 + " for data in features:\n",
87 + " d[data] = self._read_value(features[data])\n",
88 + " return d\n",
89 + "\n",
90 + " def __enter__(self):\n",
91 + " return self\n",
92 + "\n",
93 + " def __exit__(self, exception_type, exception_value, traceback):\n",
94 + " pass\n",
95 + "\n",
96 + " def __iter__(self):\n",
97 + " return self\n",
98 + "\n",
99 + " def __next__(self):\n",
100 + " record = next(self._iterator)\n",
101 + " example = tf.train.Example()\n",
102 + " example.ParseFromString(record)\n",
103 + " return self._read_features(example.features.feature)\n",
104 + "\n",
105 + " def count(self):\n",
106 + " return self._total_cnt\n"
107 + ],
108 + "execution_count": 0,
109 + "outputs": []
110 + },
111 + {
112 + "cell_type": "code",
113 + "metadata": {
114 + "id": "oCVOPE2XC3qE",
115 + "colab_type": "code",
116 + "colab": {}
117 + },
118 + "source": [
119 + "## plot utils\n",
120 + "from __future__ import division, print_function\n",
121 + "\n",
122 + "import cv2\n",
123 + "import random\n",
124 + "\n",
125 + "def get_color_table(class_num, seed=2):\n",
126 + " random.seed(seed)\n",
127 + " color_table = {}\n",
128 + " for i in range(class_num):\n",
129 + " color_table[i] = [random.randint(0, 255) for _ in range(3)]\n",
130 + " return color_table\n",
131 + "\n",
132 + "\n",
133 + "def plot_one_box(img, coord, label=None, color=None, line_thickness=None):\n",
134 + " tl = line_thickness or int(round(0.002 * max(img.shape[0:2]))) # line thickness\n",
135 + " color = color or [random.randint(0, 255) for _ in range(3)]\n",
136 + " c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))\n",
137 + " cv2.rectangle(img, c1, c2, color, thickness=tl)\n",
138 + " if label:\n",
139 + " tf = max(tl - 1, 1) # font thickness\n",
140 + " t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]\n",
141 + " c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3\n",
142 + " cv2.rectangle(img, c1, c2, color, -1) # filled\n",
143 + " cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)"
144 + ],
145 + "execution_count": 0,
146 + "outputs": []
147 + },
148 + {
149 + "cell_type": "code",
150 + "metadata": {
151 + "id": "SY10K9LoDJOZ",
152 + "colab_type": "code",
153 + "colab": {}
154 + },
155 + "source": [
156 + "## nms utils\n",
157 + "import numpy as np\n",
158 + "\n",
159 + "def gpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, nms_thresh=0.5):\n",
160 + " boxes_list, label_list, score_list = [], [], []\n",
161 + " max_boxes = tf.constant(max_boxes, dtype='int32')\n",
162 + "\n",
163 + " boxes = tf.reshape(boxes, [-1, 4]) # '-1' means we don't konw the exact number of boxes\n",
164 + " score = tf.reshape(scores, [-1, num_classes])\n",
165 + "\n",
166 + " # Step 1: Create a filtering mask based on \"box_class_scores\" by using \"threshold\".\n",
167 + " mask = tf.greater_equal(score, tf.constant(score_thresh))\n",
168 + " # Step 2: Do non_max_suppression for each class\n",
169 + " for i in range(num_classes):\n",
170 + " # Step 3: Apply the mask to scores, boxes and pick them out\n",
171 + " filter_boxes = tf.boolean_mask(boxes, mask[:,i])\n",
172 + " filter_score = tf.boolean_mask(score[:,i], mask[:,i])\n",
173 + " nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,\n",
174 + " scores=filter_score,\n",
175 + " max_output_size=max_boxes,\n",
176 + " iou_threshold=nms_thresh, name='nms_indices')\n",
177 + " label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)\n",
178 + " boxes_list.append(tf.gather(filter_boxes, nms_indices))\n",
179 + " score_list.append(tf.gather(filter_score, nms_indices))\n",
180 + "\n",
181 + " boxes = tf.concat(boxes_list, axis=0)\n",
182 + " score = tf.concat(score_list, axis=0)\n",
183 + " label = tf.concat(label_list, axis=0)\n",
184 + "\n",
185 + " return boxes, score, label\n",
186 + "\n",
187 + "\n",
188 + "def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):\n",
189 + " assert boxes.shape[1] == 4 and len(scores.shape) == 1\n",
190 + "\n",
191 + " x1 = boxes[:, 0]\n",
192 + " y1 = boxes[:, 1]\n",
193 + " x2 = boxes[:, 2]\n",
194 + " y2 = boxes[:, 3]\n",
195 + "\n",
196 + " areas = (x2 - x1) * (y2 - y1)\n",
197 + " order = scores.argsort()[::-1]\n",
198 + "\n",
199 + " keep = []\n",
200 + " while order.size > 0:\n",
201 + " i = order[0]\n",
202 + " keep.append(i)\n",
203 + " xx1 = np.maximum(x1[i], x1[order[1:]])\n",
204 + " yy1 = np.maximum(y1[i], y1[order[1:]])\n",
205 + " xx2 = np.minimum(x2[i], x2[order[1:]])\n",
206 + " yy2 = np.minimum(y2[i], y2[order[1:]])\n",
207 + "\n",
208 + " w = np.maximum(0.0, xx2 - xx1 + 1)\n",
209 + " h = np.maximum(0.0, yy2 - yy1 + 1)\n",
210 + " inter = w * h\n",
211 + " ovr = inter / (areas[i] + areas[order[1:]] - inter)\n",
212 + "\n",
213 + " inds = np.where(ovr <= iou_thresh)[0]\n",
214 + " order = order[inds + 1]\n",
215 + "\n",
216 + " return keep[:max_boxes]\n",
217 + "\n",
218 + "\n",
219 + "def cpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):\n",
220 + " boxes = boxes.reshape(-1, 4)\n",
221 + " scores = scores.reshape(-1, num_classes)\n",
222 + " picked_boxes, picked_score, picked_label = [], [], []\n",
223 + "\n",
224 + " for i in range(num_classes):\n",
225 + " indices = np.where(scores[:,i] >= score_thresh)\n",
226 + " filter_boxes = boxes[indices]\n",
227 + " filter_scores = scores[:,i][indices]\n",
228 + " if len(filter_boxes) == 0: \n",
229 + " continue\n",
230 + "\n",
231 + " indices = py_nms(filter_boxes, filter_scores,\n",
232 + " max_boxes=max_boxes, iou_thresh=iou_thresh)\n",
233 + " picked_boxes.append(filter_boxes[indices])\n",
234 + " picked_score.append(filter_scores[indices])\n",
235 + " picked_label.append(np.ones(len(indices), dtype='int32')*i)\n",
236 + " if len(picked_boxes) == 0: \n",
237 + " return None, None, None\n",
238 + "\n",
239 + " boxes = np.concatenate(picked_boxes, axis=0)\n",
240 + " score = np.concatenate(picked_score, axis=0)\n",
241 + " label = np.concatenate(picked_label, axis=0)\n",
242 + "\n",
243 + " return boxes, score, label"
244 + ],
245 + "execution_count": 0,
246 + "outputs": []
247 + },
248 + {
249 + "cell_type": "code",
250 + "metadata": {
251 + "id": "Dg-ZKHmRDlPp",
252 + "colab_type": "code",
253 + "colab": {}
254 + },
255 + "source": [
256 + "## misc utils\n",
257 + "class AverageMeter(object):\n",
258 + " def __init__(self):\n",
259 + " self.reset()\n",
260 + "\n",
261 + " def reset(self):\n",
262 + " self.val = 0\n",
263 + " self.average = 0\n",
264 + " self.sum = 0\n",
265 + " self.count = 0\n",
266 + "\n",
267 + " def update(self, val, n=1):\n",
268 + " self.val = val\n",
269 + " self.sum += val * n\n",
270 + " self.count += n\n",
271 + " self.average = self.sum / float(self.count)\n",
272 + "\n",
273 + "\n",
274 + "def parse_anchors(anchor_path):\n",
275 + " anchors = np.reshape(np.asarray(open(anchor_path, 'r').read().split(','), np.float32), [-1, 2])\n",
276 + " return anchors\n",
277 + "\n",
278 + "\n",
279 + "def read_class_names(class_name_path):\n",
280 + " names = {}\n",
281 + " with open(class_name_path, 'r') as data:\n",
282 + " for ID, name in enumerate(data):\n",
283 + " names[ID] = name.strip('\\n')\n",
284 + " return names\n",
285 + "\n",
286 + "\n",
287 + "def shuffle_and_overwrite(file_name):\n",
288 + " content = open(file_name, 'r').readlines()\n",
289 + " random.shuffle(content)\n",
290 + " with open(file_name, 'w') as f:\n",
291 + " for line in content:\n",
292 + " f.write(line)\n",
293 + "\n",
294 + "\n",
295 + "def update_dict(ori_dict, new_dict):\n",
296 + " if not ori_dict:\n",
297 + " return new_dict\n",
298 + " for key in ori_dict:\n",
299 + " ori_dict[key] += new_dict[key]\n",
300 + " return ori_dict\n",
301 + "\n",
302 + "\n",
303 + "def list_add(ori_list, new_list):\n",
304 + " for i in range(len(ori_list)):\n",
305 + " ori_list[i] += new_list[i]\n",
306 + " return ori_list\n",
307 + "\n",
308 + "\n",
309 + "def load_weights(var_list, weights_file):\n",
310 + " with open(weights_file, \"rb\") as fp:\n",
311 + " np.fromfile(fp, dtype=np.int32, count=5)\n",
312 + " weights = np.fromfile(fp, dtype=np.float32)\n",
313 + "\n",
314 + " ptr = 0\n",
315 + " i = 0\n",
316 + " assign_ops = []\n",
317 + " while i < len(var_list) - 1:\n",
318 + " var1 = var_list[i]\n",
319 + " var2 = var_list[i + 1]\n",
320 + " if 'Conv' in var1.name.split('/')[-2]:\n",
321 + " if 'BatchNorm' in var2.name.split('/')[-2]:\n",
322 + " gamma, beta, mean, var = var_list[i + 1:i + 5]\n",
323 + " batch_norm_vars = [beta, gamma, mean, var]\n",
324 + " for var in batch_norm_vars:\n",
325 + " shape = var.shape.as_list()\n",
326 + " num_params = np.prod(shape)\n",
327 + " var_weights = weights[ptr:ptr + num_params].reshape(shape)\n",
328 + " ptr += num_params\n",
329 + " assign_ops.append(tf.assign(var, var_weights, validate_shape=True))\n",
330 + " i += 4\n",
331 + " elif 'Conv' in var2.name.split('/')[-2]:\n",
332 + " # load biases\n",
333 + " bias = var2\n",
334 + " bias_shape = bias.shape.as_list()\n",
335 + " bias_params = np.prod(bias_shape)\n",
336 + " bias_weights = weights[ptr:ptr +\n",
337 + " bias_params].reshape(bias_shape)\n",
338 + " ptr += bias_params\n",
339 + " assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))\n",
340 + " i += 1\n",
341 + "\n",
342 + " shape = var1.shape.as_list()\n",
343 + " num_params = np.prod(shape)\n",
344 + "\n",
345 + " var_weights = weights[ptr:ptr + num_params].reshape(\n",
346 + " (shape[3], shape[2], shape[0], shape[1]))\n",
347 + "\n",
348 + " var_weights = np.transpose(var_weights, (2, 3, 1, 0))\n",
349 + " ptr += num_params\n",
350 + " assign_ops.append(\n",
351 + " tf.assign(var1, var_weights, validate_shape=True))\n",
352 + " i += 1\n",
353 + "\n",
354 + " return assign_ops\n",
355 + "\n",
356 + "\n",
357 + "def config_learning_rate(global_step):\n",
358 + " ## fixes for removing arg paramter\n",
359 + " global lr_type, learning_rate_init, lr_decay_freq, lr_decay_factor, lr_lower_bound, total_epoches, use_warm_up, warm_up_epoch, train_batch_num, lr_lower_bound, pw_boundaries, pw_values\n",
360 + "\n",
361 + " if lr_type == 'exponential':\n",
362 + " lr_tmp = tf.train.exponential_decay(learning_rate_init, global_step, lr_decay_freq,\n",
363 + " lr_decay_factor, staircase=True, name='exponential_learning_rate')\n",
364 + " return tf.maximum(lr_tmp, lr_lower_bound)\n",
365 + " elif lr_type == 'cosine_decay':\n",
366 + " train_steps = (total_epoches - float(use_warm_up) * warm_up_epoch) * train_batch_num\n",
367 + " return lr_lower_bound + 0.5 * (learning_rate_init - lr_lower_bound) * \\\n",
368 + " (1 + tf.cos(global_step / train_steps * np.pi))\n",
369 + " elif lr_type == 'cosine_decay_restart':\n",
370 + " return tf.train.cosine_decay_restarts(learning_rate_init, global_step, \n",
371 + " lr_decay_freq, t_mul=2.0, m_mul=1.0, \n",
372 + " name='cosine_decay_learning_rate_restart')\n",
373 + " elif lr_type == 'fixed':\n",
374 + " return tf.convert_to_tensor(learning_rate_init, name='fixed_learning_rate')\n",
375 + " elif lr_type == 'piecewise':\n",
376 + " return tf.train.piecewise_constant(global_step, boundaries=pw_boundaries, values=pw_values,\n",
377 + " name='piecewise_learning_rate')\n",
378 + " else:\n",
379 + " raise ValueError('Unsupported learning rate type!')\n",
380 + "\n",
381 + "\n",
382 + "def config_optimizer(optimizer_name, learning_rate, decay=0.9, momentum=0.9):\n",
383 + " if optimizer_name == 'momentum':\n",
384 + " return tf.train.MomentumOptimizer(learning_rate, momentum=momentum)\n",
385 + " elif optimizer_name == 'rmsprop':\n",
386 + " return tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum)\n",
387 + " elif optimizer_name == 'adam':\n",
388 + " return tf.train.AdamOptimizer(learning_rate)\n",
389 + " elif optimizer_name == 'sgd':\n",
390 + " return tf.train.GradientDescentOptimizer(learning_rate)\n",
391 + " else:\n",
392 + " raise ValueError('Unsupported optimizer type!')"
393 + ],
394 + "execution_count": 0,
395 + "outputs": []
396 + },
397 + {
398 + "cell_type": "code",
399 + "metadata": {
400 + "id": "YIlZhFLYD0d8",
401 + "colab_type": "code",
402 + "colab": {}
403 + },
404 + "source": [
405 + "## data utils\n",
406 + "\n",
407 + "import sys\n",
408 + "\n",
409 + "PY_VERSION = sys.version_info[0]\n",
410 + "iter_cnt = 0\n",
411 + "\n",
412 + "def _parse_tfrecord(data):\n",
413 + " example = tf.train.Example()\n",
414 + " example.ParseFromString(data)\n",
415 + " features = example.features.feature\n",
416 + " return features\n",
417 + "\n",
418 + "def parse_tfrecord(data):\n",
419 + " # tfrecord parser for TFRecordDataset (raw data)\n",
420 + " features = _parse_tfrecord(data)\n",
421 + " index = features['index'].int64_list.value[0]\n",
422 + " encoded_image = np.frombuffer(features['image'].bytes_list.value[0], dtype = np.uint8)\n",
423 + " width = features['width'].int64_list.value[0]\n",
424 + " height = features['height'].int64_list.value[0]\n",
425 + " boxes = features['boxes'].int64_list.value\n",
426 + "\n",
427 + " assert len(boxes) % 5 == 0, 'Annotation error occured in box array.'\n",
428 + " box_cnt = len(boxes) // 5\n",
429 + "\n",
430 + " aligned_boxes = []\n",
431 + " labels = []\n",
432 + "\n",
433 + " for i in range(box_cnt):\n",
434 + " label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3]), float(boxes[i * 5 + 4]) ## do we need to change int to float? is there float rectangle sample?\n",
435 + " aligned_boxes.append([x_min, y_min, x_max, y_max])\n",
436 + " labels.append(label)\n",
437 + "\n",
438 + " aligned_boxes = np.asarray(aligned_boxes, np.float32)\n",
439 + " labels = np.asarray(labels, np.int64)\n",
440 + "\n",
441 + " return index, encoded_image, aligned_boxes, labels, width, height\n",
442 + "\n",
443 + "def parse_record(features):\n",
444 + " # tfrecord parser for TFRecordIterator (primitive data)\n",
445 + "\n",
446 + " index = int(features['index'][0])\n",
447 + " encoded_image = np.frombuffer(features['image'][0], dtype = np.uint8)\n",
448 + " width = int(features['width'][0])\n",
449 + " height = int(features['height'][0])\n",
450 + " boxes = features['boxes']\n",
451 + "\n",
452 + " assert len(boxes) % 5 == 0, 'Annotation error occured in box array.'\n",
453 + " box_cnt = len(boxes) // 5\n",
454 + "\n",
455 + " aligned_boxes = []\n",
456 + " labels = []\n",
457 + "\n",
458 + " for i in range(box_cnt):\n",
459 + " label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3]), float(boxes[i * 5 + 4])\n",
460 + " aligned_boxes.append([x_min, y_min, x_max, y_max])\n",
461 + " labels.append(label)\n",
462 + "\n",
463 + " aligned_boxes = np.asarray(aligned_boxes, np.float32)\n",
464 + " labels = np.asarray(labels, np.int64)\n",
465 + "\n",
466 + " return index, encoded_image, aligned_boxes, labels, width, height\n",
467 + "\n",
468 + "def bbox_crop(bbox, crop_box=None, allow_outside_center=True):\n",
469 + " bbox = bbox.copy()\n",
470 + " if crop_box is None:\n",
471 + " return bbox\n",
472 + " if not len(crop_box) == 4:\n",
473 + " raise ValueError(\n",
474 + " \"Invalid crop_box parameter, requires length 4, given {}\".format(str(crop_box)))\n",
475 + " if sum([int(c is None) for c in crop_box]) == 4:\n",
476 + " return bbox\n",
477 + "\n",
478 + " l, t, w, h = crop_box\n",
479 + "\n",
480 + " left = l if l else 0\n",
481 + " top = t if t else 0\n",
482 + " right = left + (w if w else np.inf)\n",
483 + " bottom = top + (h if h else np.inf)\n",
484 + " crop_bbox = np.array((left, top, right, bottom))\n",
485 + "\n",
486 + " if allow_outside_center:\n",
487 + " mask = np.ones(bbox.shape[0], dtype=bool)\n",
488 + " else:\n",
489 + " centers = (bbox[:, :2] + bbox[:, 2:4]) / 2\n",
490 + " mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1)\n",
491 + "\n",
492 + " # transform borders\n",
493 + " bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2])\n",
494 + " bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4])\n",
495 + " bbox[:, :2] -= crop_bbox[:2]\n",
496 + " bbox[:, 2:4] -= crop_bbox[:2]\n",
497 + "\n",
498 + " mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1))\n",
499 + " bbox = bbox[mask]\n",
500 + " return bbox\n",
501 + "\n",
502 + "def bbox_iou(bbox_a, bbox_b, offset=0):\n",
503 + " if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:\n",
504 + " raise IndexError(\"Bounding boxes axis 1 must have at least length 4\")\n",
505 + "\n",
506 + " tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])\n",
507 + " br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])\n",
508 + "\n",
509 + " area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)\n",
510 + " area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)\n",
511 + " area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)\n",
512 + " return area_i / (area_a[:, None] + area_b - area_i)\n",
513 + "\n",
514 + "\n",
515 + "def random_crop_with_constraints(bbox, size, min_scale=0.3, max_scale=1,\n",
516 + " max_aspect_ratio=2, constraints=None,\n",
517 + " max_trial=50):\n",
518 + " # default params in paper\n",
519 + " if constraints is None:\n",
520 + " constraints = (\n",
521 + " (0.1, None),\n",
522 + " (0.3, None),\n",
523 + " (0.5, None),\n",
524 + " (0.7, None),\n",
525 + " (0.9, None),\n",
526 + " (None, 1),\n",
527 + " )\n",
528 + "\n",
529 + " w, h = size\n",
530 + "\n",
531 + " candidates = [(0, 0, w, h)]\n",
532 + " for min_iou, max_iou in constraints:\n",
533 + " min_iou = -np.inf if min_iou is None else min_iou\n",
534 + " max_iou = np.inf if max_iou is None else max_iou\n",
535 + "\n",
536 + " for _ in range(max_trial):\n",
537 + " scale = random.uniform(min_scale, max_scale)\n",
538 + " aspect_ratio = random.uniform(\n",
539 + " max(1 / max_aspect_ratio, scale * scale),\n",
540 + " min(max_aspect_ratio, 1 / (scale * scale)))\n",
541 + " crop_h = int(h * scale / np.sqrt(aspect_ratio))\n",
542 + " crop_w = int(w * scale * np.sqrt(aspect_ratio))\n",
543 + "\n",
544 + " crop_t = random.randrange(h - crop_h)\n",
545 + " crop_l = random.randrange(w - crop_w)\n",
546 + " crop_bb = np.array((crop_l, crop_t, crop_l + crop_w, crop_t + crop_h))\n",
547 + "\n",
548 + " if len(bbox) == 0:\n",
549 + " top, bottom = crop_t, crop_t + crop_h\n",
550 + " left, right = crop_l, crop_l + crop_w\n",
551 + " return bbox, (left, top, right-left, bottom-top)\n",
552 + "\n",
553 + " iou = bbox_iou(bbox, crop_bb[np.newaxis])\n",
554 + " if min_iou <= iou.min() and iou.max() <= max_iou:\n",
555 + " top, bottom = crop_t, crop_t + crop_h\n",
556 + " left, right = crop_l, crop_l + crop_w\n",
557 + " candidates.append((left, top, right-left, bottom-top))\n",
558 + " break\n",
559 + "\n",
560 + " # random select one\n",
561 + " while candidates:\n",
562 + " crop = candidates.pop(np.random.randint(0, len(candidates)))\n",
563 + " new_bbox = bbox_crop(bbox, crop, allow_outside_center=False)\n",
564 + " if new_bbox.size < 1:\n",
565 + " continue\n",
566 + " new_crop = (crop[0], crop[1], crop[2], crop[3])\n",
567 + " return new_bbox, new_crop\n",
568 + " return bbox, (0, 0, w, h)\n",
569 + "\n",
570 + "\n",
571 + "def random_color_distort(img, brightness_delta=32, hue_vari=18, sat_vari=0.5, val_vari=0.5):\n",
572 + " def random_hue(img_hsv, hue_vari, p=0.5):\n",
573 + " if np.random.uniform(0, 1) > p:\n",
574 + " hue_delta = np.random.randint(-hue_vari, hue_vari)\n",
575 + " img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180\n",
576 + " return img_hsv\n",
577 + "\n",
578 + " def random_saturation(img_hsv, sat_vari, p=0.5):\n",
579 + " if np.random.uniform(0, 1) > p:\n",
580 + " sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)\n",
581 + " img_hsv[:, :, 1] *= sat_mult\n",
582 + " return img_hsv\n",
583 + "\n",
584 + " def random_value(img_hsv, val_vari, p=0.5):\n",
585 + " if np.random.uniform(0, 1) > p:\n",
586 + " val_mult = 1 + np.random.uniform(-val_vari, val_vari)\n",
587 + " img_hsv[:, :, 2] *= val_mult\n",
588 + " return img_hsv\n",
589 + "\n",
590 + " def random_brightness(img, brightness_delta, p=0.5):\n",
591 + " if np.random.uniform(0, 1) > p:\n",
592 + " img = img.astype(np.float32)\n",
593 + " brightness_delta = int(np.random.uniform(-brightness_delta, brightness_delta))\n",
594 + " img = img + brightness_delta\n",
595 + " return np.clip(img, 0, 255)\n",
596 + "\n",
597 + " # brightness\n",
598 + " img = random_brightness(img, brightness_delta)\n",
599 + " img = img.astype(np.uint8)\n",
600 + "\n",
601 + " # color jitter\n",
602 + " img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)\n",
603 + "\n",
604 + " if np.random.randint(0, 2):\n",
605 + " img_hsv = random_value(img_hsv, val_vari)\n",
606 + " img_hsv = random_saturation(img_hsv, sat_vari)\n",
607 + " img_hsv = random_hue(img_hsv, hue_vari)\n",
608 + " else:\n",
609 + " img_hsv = random_saturation(img_hsv, sat_vari)\n",
610 + " img_hsv = random_hue(img_hsv, hue_vari)\n",
611 + " img_hsv = random_value(img_hsv, val_vari)\n",
612 + "\n",
613 + " img_hsv = np.clip(img_hsv, 0, 255)\n",
614 + " img = cv2.cvtColor(img_hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)\n",
615 + "\n",
616 + " return img\n",
617 + "\n",
618 + "\n",
619 + "def letterbox_resize(img, new_width, new_height, interp=0):\n",
620 + " ori_height, ori_width = img.shape[:2]\n",
621 + "\n",
622 + " resize_ratio = min(new_width / ori_width, new_height / ori_height)\n",
623 + "\n",
624 + " resize_w = int(resize_ratio * ori_width)\n",
625 + " resize_h = int(resize_ratio * ori_height)\n",
626 + "\n",
627 + " img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)\n",
628 + " image_padded = np.full((new_height, new_width, 3), 128, np.uint8)\n",
629 + "\n",
630 + " dw = int((new_width - resize_w) / 2)\n",
631 + " dh = int((new_height - resize_h) / 2)\n",
632 + "\n",
633 + " image_padded[dh: resize_h + dh, dw: resize_w + dw, :] = img\n",
634 + "\n",
635 + " return image_padded, resize_ratio, dw, dh\n",
636 + "\n",
637 + "\n",
638 + "def resize_with_bbox(img, bbox, new_width, new_height, interp=0, letterbox=False):\n",
639 + " if letterbox:\n",
640 + " image_padded, resize_ratio, dw, dh = letterbox_resize(img, new_width, new_height, interp)\n",
641 + "\n",
642 + " # xmin, xmax\n",
643 + " bbox[:, [0, 2]] = bbox[:, [0, 2]] * resize_ratio + dw\n",
644 + " # ymin, ymax\n",
645 + " bbox[:, [1, 3]] = bbox[:, [1, 3]] * resize_ratio + dh\n",
646 + "\n",
647 + " return image_padded, bbox\n",
648 + " else:\n",
649 + " ori_height, ori_width = img.shape[:2]\n",
650 + "\n",
651 + " img = cv2.resize(img, (new_width, new_height), interpolation=interp)\n",
652 + "\n",
653 + " # xmin, xmax\n",
654 + " bbox[:, [0, 2]] = bbox[:, [0, 2]] / ori_width * new_width\n",
655 + " # ymin, ymax\n",
656 + " bbox[:, [1, 3]] = bbox[:, [1, 3]] / ori_height * new_height\n",
657 + "\n",
658 + " return img, bbox\n",
659 + "\n",
660 + "\n",
661 + "def random_flip(img, bbox, px=0, py=0):\n",
662 + " height, width = img.shape[:2]\n",
663 + " if np.random.uniform(0, 1) < px:\n",
664 + " img = cv2.flip(img, 1)\n",
665 + " xmax = width - bbox[:, 0]\n",
666 + " xmin = width - bbox[:, 2]\n",
667 + " bbox[:, 0] = xmin\n",
668 + " bbox[:, 2] = xmax\n",
669 + "\n",
670 + " if np.random.uniform(0, 1) < py:\n",
671 + " img = cv2.flip(img, 0)\n",
672 + " ymax = height - bbox[:, 1]\n",
673 + " ymin = height - bbox[:, 3]\n",
674 + " bbox[:, 1] = ymin\n",
675 + " bbox[:, 3] = ymax\n",
676 + " return img, bbox\n",
677 + "\n",
678 + "\n",
679 + "def random_expand(img, bbox, max_ratio=4, fill=0, keep_ratio=True):\n",
680 + " h, w, c = img.shape\n",
681 + " ratio_x = random.uniform(1, max_ratio)\n",
682 + " if keep_ratio:\n",
683 + " ratio_y = ratio_x\n",
684 + " else:\n",
685 + " ratio_y = random.uniform(1, max_ratio)\n",
686 + "\n",
687 + " oh, ow = int(h * ratio_y), int(w * ratio_x)\n",
688 + " off_y = random.randint(0, oh - h)\n",
689 + " off_x = random.randint(0, ow - w)\n",
690 + "\n",
691 + " dst = np.full(shape=(oh, ow, c), fill_value=fill, dtype=img.dtype)\n",
692 + "\n",
693 + " dst[off_y:off_y + h, off_x:off_x + w, :] = img\n",
694 + "\n",
695 + " # correct bbox\n",
696 + " bbox[:, :2] += (off_x, off_y)\n",
697 + " bbox[:, 2:4] += (off_x, off_y)\n",
698 + "\n",
699 + " return dst, bbox\n",
700 + "\n",
701 + "def process_box(boxes, labels, img_size, class_num, anchors):\n",
702 + " anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\n",
703 + "\n",
704 + " # convert boxes form:\n",
705 + " # shape: [N, 2]\n",
706 + " # (x_center, y_center)\n",
707 + " box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2\n",
708 + " # (width, height)\n",
709 + " box_sizes = boxes[:, 2:4] - boxes[:, 0:2]\n",
710 + "\n",
711 + " # [13, 13, 3, 5+num_class+1] `5` means coords and labels. `1` means mix up weight. \n",
712 + " y_true_13 = np.zeros((img_size[1] // 32, img_size[0] // 32, 3, 6 + class_num), np.float32)\n",
713 + " y_true_26 = np.zeros((img_size[1] // 16, img_size[0] // 16, 3, 6 + class_num), np.float32)\n",
714 + " y_true_52 = np.zeros((img_size[1] // 8, img_size[0] // 8, 3, 6 + class_num), np.float32)\n",
715 + "\n",
716 + " # mix up weight default to 1.\n",
717 + " y_true_13[..., -1] = 1.\n",
718 + " y_true_26[..., -1] = 1.\n",
719 + " y_true_52[..., -1] = 1.\n",
720 + "\n",
721 + " y_true = [y_true_13, y_true_26, y_true_52]\n",
722 + "\n",
723 + " # [N, 1, 2]\n",
724 + " box_sizes = np.expand_dims(box_sizes, 1)\n",
725 + " # broadcast tricks\n",
726 + " # [N, 1, 2] & [9, 2] ==> [N, 9, 2]\n",
727 + " mins = np.maximum(- box_sizes / 2, - anchors / 2)\n",
728 + " maxs = np.minimum(box_sizes / 2, anchors / 2)\n",
729 + " # [N, 9, 2]\n",
730 + " whs = maxs - mins\n",
731 + "\n",
732 + " # [N, 9]\n",
733 + " iou = (whs[:, :, 0] * whs[:, :, 1]) / (\n",
734 + " box_sizes[:, :, 0] * box_sizes[:, :, 1] + anchors[:, 0] * anchors[:, 1] - whs[:, :, 0] * whs[:, :,\n",
735 + " 1] + 1e-10)\n",
736 + " # [N]\n",
737 + " best_match_idx = np.argmax(iou, axis=1)\n",
738 + "\n",
739 + " ratio_dict = {1.: 8., 2.: 16., 3.: 32.}\n",
740 + " for i, idx in enumerate(best_match_idx):\n",
741 + " # idx: 0,1,2 ==> 2; 3,4,5 ==> 1; 6,7,8 ==> 0\n",
742 + " feature_map_group = 2 - idx // 3\n",
743 + " # scale ratio: 0,1,2 ==> 8; 3,4,5 ==> 16; 6,7,8 ==> 32\n",
744 + " ratio = ratio_dict[np.ceil((idx + 1) / 3.)]\n",
745 + " x = int(np.floor(box_centers[i, 0] / ratio))\n",
746 + " y = int(np.floor(box_centers[i, 1] / ratio))\n",
747 + " k = anchors_mask[feature_map_group].index(idx)\n",
748 + " c = labels[i]\n",
749 + " # print(feature_map_group, '|', y,x,k,c)\n",
750 + "\n",
751 + " y_true[feature_map_group][y, x, k, :2] = box_centers[i]\n",
752 + " y_true[feature_map_group][y, x, k, 2:4] = box_sizes[i]\n",
753 + " y_true[feature_map_group][y, x, k, 4] = 1.\n",
754 + " y_true[feature_map_group][y, x, k, 5 + c] = 1.\n",
755 + " y_true[feature_map_group][y, x, k, -1] = boxes[i, -1]\n",
756 + "\n",
757 + " return y_true_13, y_true_26, y_true_52\n",
758 + "\n",
759 + "\n",
760 + "def parse_data(data, class_num, img_size, anchors, is_training, letterbox_resize):\n",
761 + " \n",
762 + " img_idx, encoded_img, boxes, labels, _, _ = parse_tfrecord(data)\n",
763 + " img = cv2.imdecode(encoded_img, cv2.IMREAD_COLOR)\n",
764 + " boxes = np.concatenate((boxes, np.full(shape=(boxes.shape[0], 1), fill_value=1., dtype=np.float32)), axis=-1)\n",
765 + "\n",
766 + " ## I erased mix-up method here\n",
767 + "\n",
768 + " if is_training:\n",
769 + " # random color distortion\n",
770 + " img = random_color_distort(img)\n",
771 + "\n",
772 + " # random expansion with prob 0.5\n",
773 + " if np.random.uniform(0, 1) > 0.5:\n",
774 + " img, boxes = random_expand(img, boxes, 4)\n",
775 + "\n",
776 + " # random cropping\n",
777 + " h, w, _ = img.shape\n",
778 + " boxes, crop = random_crop_with_constraints(boxes, (w, h))\n",
779 + " x0, y0, w, h = crop\n",
780 + " img = img[y0: y0+h, x0: x0+w]\n",
781 + "\n",
782 + " # resize with random interpolation\n",
783 + " h, w, _ = img.shape\n",
784 + " interp = np.random.randint(0, 5)\n",
785 + " img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=interp, letterbox=letterbox_resize)\n",
786 + "\n",
787 + " # random horizontal flip\n",
788 + " h, w, _ = img.shape\n",
789 + " img, boxes = random_flip(img, boxes, px=0.5)\n",
790 + " else:\n",
791 + " img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=1, letterbox=letterbox_resize)\n",
792 + "\n",
793 + " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)\n",
794 + "\n",
795 + " # the input of yolo_v3 should be in range 0~1\n",
796 + " img = img / 255.\n",
797 + "\n",
798 + " y_true_13, y_true_26, y_true_52 = process_box(boxes, labels, img_size, class_num, anchors)\n",
799 + "\n",
800 + " return img_idx, img, y_true_13, y_true_26, y_true_52\n",
801 + "\n",
802 + "\n",
803 + "def get_batch_data(records, class_num, img_size, anchors, is_training, multi_scale=False, mix_up=False, letterbox_resize=True, interval=10):\n",
804 + " global iter_cnt\n",
805 + "\n",
806 + " # multi_scale training\n",
807 + " if multi_scale and is_training:\n",
808 + " random.seed(iter_cnt // interval)\n",
809 + " random_img_size = [[x * 32, x * 32] for x in range(10, 20)]\n",
810 + " img_size = random.sample(random_img_size, 1)[0]\n",
811 + " iter_cnt += 1\n",
812 + "\n",
813 + " img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = [], [], [], [], []\n",
814 + "\n",
815 + " # deleted mix up strategy\n",
816 + " \n",
817 + " for data in records:\n",
818 + " img_idx, img, y_true_13, y_true_26, y_true_52 = parse_data(data, class_num, img_size, anchors, is_training, letterbox_resize)\n",
819 + "\n",
820 + " img_idx_batch.append(img_idx)\n",
821 + " img_batch.append(img)\n",
822 + " y_true_13_batch.append(y_true_13)\n",
823 + " y_true_26_batch.append(y_true_26)\n",
824 + " y_true_52_batch.append(y_true_52)\n",
825 + "\n",
826 + " img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = np.asarray(img_idx_batch, np.int64), np.asarray(img_batch), np.asarray(y_true_13_batch), np.asarray(y_true_26_batch), np.asarray(y_true_52_batch)\n",
827 + "\n",
828 + " return img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch"
829 + ],
830 + "execution_count": 0,
831 + "outputs": []
832 + },
833 + {
834 + "cell_type": "code",
835 + "metadata": {
836 + "id": "sd9Pk3XgDqxt",
837 + "colab_type": "code",
838 + "colab": {}
839 + },
840 + "source": [
841 + "## evaluation utils\n",
842 + "\n",
843 + "from collections import Counter\n",
844 + "\n",
845 + "def calc_iou(pred_boxes, true_boxes):\n",
846 + " pred_boxes = np.expand_dims(pred_boxes, -2)\n",
847 + " true_boxes = np.expand_dims(true_boxes, 0)\n",
848 + "\n",
849 + " intersect_mins = np.maximum(pred_boxes[..., :2], true_boxes[..., :2])\n",
850 + " intersect_maxs = np.minimum(pred_boxes[..., 2:], true_boxes[..., 2:])\n",
851 + " intersect_wh = np.maximum(intersect_maxs - intersect_mins, 0.)\n",
852 + "\n",
853 + " intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]\n",
854 + " pred_box_wh = pred_boxes[..., 2:] - pred_boxes[..., :2]\n",
855 + " pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]\n",
856 + " true_boxes_wh = true_boxes[..., 2:] - true_boxes[..., :2]\n",
857 + " true_boxes_area = true_boxes_wh[..., 0] * true_boxes_wh[..., 1]\n",
858 + "\n",
859 + " iou = intersect_area / (pred_box_area + true_boxes_area - intersect_area + 1e-10)\n",
860 + "\n",
861 + " return iou\n",
862 + "\n",
863 + "\n",
864 + "def evaluate_on_cpu(y_pred, y_true, num_classes, calc_now=True, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):\n",
865 + " num_images = y_true[0].shape[0]\n",
866 + " true_labels_dict = {i: 0 for i in range(num_classes)}\n",
867 + " pred_labels_dict = {i: 0 for i in range(num_classes)}\n",
868 + " true_positive_dict = {i: 0 for i in range(num_classes)}\n",
869 + "\n",
870 + " for i in range(num_images):\n",
871 + " true_labels_list, true_boxes_list = [], []\n",
872 + " for j in range(3):\n",
873 + " true_probs_temp = y_true[j][i][..., 5:-1]\n",
874 + " true_boxes_temp = y_true[j][i][..., 0:4]\n",
875 + "\n",
876 + " object_mask = true_probs_temp.sum(axis=-1) > 0\n",
877 + "\n",
878 + " true_probs_temp = true_probs_temp[object_mask]\n",
879 + " true_boxes_temp = true_boxes_temp[object_mask]\n",
880 + "\n",
881 + " true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()\n",
882 + " true_boxes_list += true_boxes_temp.tolist()\n",
883 + "\n",
884 + " if len(true_labels_list) != 0:\n",
885 + " for cls, count in Counter(true_labels_list).items():\n",
886 + " true_labels_dict[cls] += count\n",
887 + "\n",
888 + " true_boxes = np.array(true_boxes_list)\n",
889 + " box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]\n",
890 + " true_boxes[:, 0:2] = box_centers - box_sizes / 2.\n",
891 + " true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes\n",
892 + "\n",
893 + " pred_boxes = y_pred[0][i:i + 1]\n",
894 + " pred_confs = y_pred[1][i:i + 1]\n",
895 + " pred_probs = y_pred[2][i:i + 1]\n",
896 + "\n",
897 + " pred_boxes, pred_confs, pred_labels = cpu_nms(pred_boxes, pred_confs * pred_probs, num_classes, max_boxes=max_boxes, score_thresh=score_thresh, iou_thresh=iou_thresh)\n",
898 + "\n",
899 + " pred_labels_list = [] if pred_labels is None else pred_labels.tolist()\n",
900 + " if pred_labels_list == []:\n",
901 + " continue\n",
902 + "\n",
903 + " # calc iou\n",
904 + " iou_matrix = calc_iou(pred_boxes, true_boxes)\n",
905 + " max_iou_idx = np.argmax(iou_matrix, axis=-1)\n",
906 + "\n",
907 + " correct_idx = []\n",
908 + " correct_conf = []\n",
909 + "\n",
910 + " for k in range(max_iou_idx.shape[0]):\n",
911 + " pred_labels_dict[pred_labels_list[k]] += 1\n",
912 + " match_idx = max_iou_idx[k] # V level\n",
913 + " if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:\n",
914 + " if match_idx not in correct_idx:\n",
915 + " correct_idx.append(match_idx)\n",
916 + " correct_conf.append(pred_confs[k])\n",
917 + " else:\n",
918 + " same_idx = correct_idx.index(match_idx)\n",
919 + " if pred_confs[k] > correct_conf[same_idx]:\n",
920 + " correct_idx.pop(same_idx)\n",
921 + " correct_conf.pop(same_idx)\n",
922 + " correct_idx.append(match_idx)\n",
923 + " correct_conf.append(pred_confs[k])\n",
924 + "\n",
925 + " for t in correct_idx:\n",
926 + " true_positive_dict[true_labels_list[t]] += 1\n",
927 + "\n",
928 + " if calc_now:\n",
929 + " # avoid divided by 0\n",
930 + " recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)\n",
931 + " precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)\n",
932 + "\n",
933 + " return recall, precision\n",
934 + " else:\n",
935 + " return true_positive_dict, true_labels_dict, pred_labels_dict\n",
936 + "\n",
937 + "\n",
938 + "def evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, y_pred, y_true, num_classes, iou_thresh=0.5, calc_now=True):\n",
939 + " num_images = y_true[0].shape[0]\n",
940 + " true_labels_dict = {i: 0 for i in range(num_classes)}\n",
941 + " pred_labels_dict = {i: 0 for i in range(num_classes)}\n",
942 + " true_positive_dict = {i: 0 for i in range(num_classes)}\n",
943 + "\n",
944 + " for i in range(num_images):\n",
945 + " true_labels_list, true_boxes_list = [], []\n",
946 + " for j in range(3):\n",
947 + " true_probs_temp = y_true[j][i][..., 5:-1]\n",
948 + " true_boxes_temp = y_true[j][i][..., 0:4]\n",
949 + "\n",
950 + " object_mask = true_probs_temp.sum(axis=-1) > 0\n",
951 + "\n",
952 + " true_probs_temp = true_probs_temp[object_mask]\n",
953 + " true_boxes_temp = true_boxes_temp[object_mask]\n",
954 + "\n",
955 + " true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()\n",
956 + " true_boxes_list += true_boxes_temp.tolist()\n",
957 + "\n",
958 + " if len(true_labels_list) != 0:\n",
959 + " for cls, count in Counter(true_labels_list).items():\n",
960 + " true_labels_dict[cls] += count\n",
961 + "\n",
962 + " true_boxes = np.array(true_boxes_list)\n",
963 + " box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]\n",
964 + " true_boxes[:, 0:2] = box_centers - box_sizes / 2.\n",
965 + " true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes\n",
966 + "\n",
967 + " pred_boxes = y_pred[0][i:i + 1]\n",
968 + " pred_confs = y_pred[1][i:i + 1]\n",
969 + " pred_probs = y_pred[2][i:i + 1]\n",
970 + "\n",
971 + " pred_boxes, pred_confs, pred_labels = sess.run(gpu_nms_op, feed_dict={pred_boxes_flag: pred_boxes, pred_scores_flag: pred_confs * pred_probs})\n",
972 + "\n",
973 + " pred_labels_list = [] if pred_labels is None else pred_labels.tolist()\n",
974 + " if pred_labels_list == []:\n",
975 + " continue\n",
976 + "\n",
977 + " # calc iou\n",
978 + " iou_matrix = calc_iou(pred_boxes, true_boxes)\n",
979 + " max_iou_idx = np.argmax(iou_matrix, axis=-1)\n",
980 + "\n",
981 + " correct_idx = []\n",
982 + " correct_conf = []\n",
983 + " for k in range(max_iou_idx.shape[0]):\n",
984 + " pred_labels_dict[pred_labels_list[k]] += 1\n",
985 + " match_idx = max_iou_idx[k] # V level\n",
986 + " if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:\n",
987 + " if match_idx not in correct_idx:\n",
988 + " correct_idx.append(match_idx)\n",
989 + " correct_conf.append(pred_confs[k])\n",
990 + " else:\n",
991 + " same_idx = correct_idx.index(match_idx)\n",
992 + " if pred_confs[k] > correct_conf[same_idx]:\n",
993 + " correct_idx.pop(same_idx)\n",
994 + " correct_conf.pop(same_idx)\n",
995 + " correct_idx.append(match_idx)\n",
996 + " correct_conf.append(pred_confs[k])\n",
997 + "\n",
998 + " for t in correct_idx:\n",
999 + " true_positive_dict[true_labels_list[t]] += 1\n",
1000 + "\n",
1001 + " if calc_now:\n",
1002 + " # avoid divided by 0\n",
1003 + " recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)\n",
1004 + " precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)\n",
1005 + "\n",
1006 + " return recall, precision\n",
1007 + " else:\n",
1008 + " return true_positive_dict, true_labels_dict, pred_labels_dict\n",
1009 + "\n",
1010 + "\n",
1011 + "def get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, image_ids, y_pred):\n",
1012 + " image_id = image_ids[0]\n",
1013 + "\n",
1014 + " pred_boxes = y_pred[0][0:1]\n",
1015 + " pred_confs = y_pred[1][0:1]\n",
1016 + " pred_probs = y_pred[2][0:1]\n",
1017 + "\n",
1018 + " boxes, scores, labels = sess.run(gpu_nms_op, feed_dict={pred_boxes_flag: pred_boxes, pred_scores_flag: pred_confs * pred_probs})\n",
1019 + "\n",
1020 + " pred_content = []\n",
1021 + " for i in range(len(labels)):\n",
1022 + " x_min, y_min, x_max, y_max = boxes[i]\n",
1023 + " score = scores[i]\n",
1024 + " label = labels[i]\n",
1025 + " pred_content.append([image_id, x_min, y_min, x_max, y_max, score, label])\n",
1026 + "\n",
1027 + " return pred_content\n",
1028 + "\n",
1029 + "gt_dict = {} # key: img_id, value: gt object list\n",
1030 + "def parse_gt_rec(gt_filename, compression_type, target_img_size, letterbox_resize=True):\n",
1031 + " global gt_dict\n",
1032 + "\n",
1033 + " if not gt_dict:\n",
1034 + " new_width, new_height = target_img_size\n",
1035 + "\n",
1036 + " with TFRecordIterator(gt_filename, compression_type) as reader:\n",
1037 + " for data in reader:\n",
1038 + " img_id, image, boxes, labels, ori_width, ori_height = parse_record(data)\n",
1039 + "\n",
1040 + " objects = []\n",
1041 + " for i in range(len(labels)):\n",
1042 + " x_min, y_min, x_max, y_max = boxes[i]\n",
1043 + " label = labels[i]\n",
1044 + "\n",
1045 + " if letterbox_resize:\n",
1046 + " resize_ratio = min(new_width / ori_width, new_height / ori_height)\n",
1047 + "\n",
1048 + " resize_w = int(resize_ratio * ori_width)\n",
1049 + " resize_h = int(resize_ratio * ori_height)\n",
1050 + "\n",
1051 + " dw = int((new_width - resize_w) / 2)\n",
1052 + " dh = int((new_height - resize_h) / 2)\n",
1053 + "\n",
1054 + " objects.append([x_min * resize_ratio + dw,\n",
1055 + " y_min * resize_ratio + dh,\n",
1056 + " x_max * resize_ratio + dw,\n",
1057 + " y_max * resize_ratio + dh,\n",
1058 + " label])\n",
1059 + " else:\n",
1060 + " objects.append([x_min * new_width / ori_width,\n",
1061 + " y_min * new_height / ori_height,\n",
1062 + " x_max * new_width / ori_width,\n",
1063 + " y_max * new_height / ori_height,\n",
1064 + " label])\n",
1065 + " gt_dict[img_id] = objects\n",
1066 + " return gt_dict\n",
1067 + "\n",
1068 + "\n",
1069 + "# The following two functions are modified from FAIR's Detectron repo to calculate mAP:\n",
1070 + "# https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/voc_eval.py\n",
1071 + "def voc_ap(rec, prec, use_07_metric=False):\n",
1072 + " if use_07_metric:\n",
1073 + " ap = 0.\n",
1074 + " for t in np.arange(0., 1.1, 0.1):\n",
1075 + " if np.sum(rec >= t) == 0:\n",
1076 + " p = 0\n",
1077 + " else:\n",
1078 + " p = np.max(prec[rec >= t])\n",
1079 + " ap = ap + p / 11.\n",
1080 + " else:\n",
1081 + " mrec = np.concatenate(([0.], rec, [1.]))\n",
1082 + " mpre = np.concatenate(([0.], prec, [0.]))\n",
1083 + "\n",
1084 + " for i in range(mpre.size - 1, 0, -1):\n",
1085 + " mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])\n",
1086 + "\n",
1087 + " i = np.where(mrec[1:] != mrec[:-1])[0]\n",
1088 + "\n",
1089 + " ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])\n",
1090 + " return ap\n",
1091 + "\n",
1092 + "\n",
1093 + "def voc_eval(gt_dict, val_preds, classidx, iou_thres=0.5, use_07_metric=False):\n",
1094 + " # 1.obtain gt: extract all gt objects for this class\n",
1095 + " class_recs = {}\n",
1096 + " npos = 0\n",
1097 + " for img_id in gt_dict:\n",
1098 + " R = [obj for obj in gt_dict[img_id] if obj[-1] == classidx]\n",
1099 + " bbox = np.array([x[:4] for x in R])\n",
1100 + " det = [False] * len(R)\n",
1101 + " npos += len(R)\n",
1102 + " class_recs[img_id] = {'bbox': bbox, 'det': det}\n",
1103 + "\n",
1104 + " # 2. obtain pred results\n",
1105 + " pred = [x for x in val_preds if x[-1] == classidx]\n",
1106 + " img_ids = [x[0] for x in pred]\n",
1107 + " confidence = np.array([x[-2] for x in pred])\n",
1108 + " BB = np.array([[x[1], x[2], x[3], x[4]] for x in pred])\n",
1109 + "\n",
1110 + " # 3. sort by confidence\n",
1111 + " sorted_ind = np.argsort(-confidence)\n",
1112 + " try:\n",
1113 + " BB = BB[sorted_ind, :]\n",
1114 + " except:\n",
1115 + " print('no box, ignore')\n",
1116 + " return 1e-6, 1e-6, 0, 0, 0\n",
1117 + " img_ids = [img_ids[x] for x in sorted_ind]\n",
1118 + "\n",
1119 + " # 4. mark TPs and FPs\n",
1120 + " nd = len(img_ids)\n",
1121 + " tp = np.zeros(nd)\n",
1122 + " fp = np.zeros(nd)\n",
1123 + "\n",
1124 + " for d in range(nd):\n",
1125 + " R = class_recs[img_ids[d]]\n",
1126 + " bb = BB[d, :]\n",
1127 + " ovmax = -np.Inf\n",
1128 + " BBGT = R['bbox']\n",
1129 + "\n",
1130 + " if BBGT.size > 0:\n",
1131 + " ixmin = np.maximum(BBGT[:, 0], bb[0])\n",
1132 + " iymin = np.maximum(BBGT[:, 1], bb[1])\n",
1133 + " ixmax = np.minimum(BBGT[:, 2], bb[2])\n",
1134 + " iymax = np.minimum(BBGT[:, 3], bb[3])\n",
1135 + " iw = np.maximum(ixmax - ixmin + 1., 0.)\n",
1136 + " ih = np.maximum(iymax - iymin + 1., 0.)\n",
1137 + " inters = iw * ih\n",
1138 + "\n",
1139 + " uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (\n",
1140 + " BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)\n",
1141 + "\n",
1142 + " overlaps = inters / uni\n",
1143 + " ovmax = np.max(overlaps)\n",
1144 + " jmax = np.argmax(overlaps)\n",
1145 + "\n",
1146 + " if ovmax > iou_thres:\n",
1147 + " # gt not matched yet\n",
1148 + " if not R['det'][jmax]:\n",
1149 + " tp[d] = 1.\n",
1150 + " R['det'][jmax] = 1\n",
1151 + " else:\n",
1152 + " fp[d] = 1.\n",
1153 + " else:\n",
1154 + " fp[d] = 1.\n",
1155 + "\n",
1156 + " fp = np.cumsum(fp)\n",
1157 + " tp = np.cumsum(tp)\n",
1158 + " rec = tp / float(npos)\n",
1159 + " # avoid divide by zero in case the first detection matches a difficult\n",
1160 + " prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)\n",
1161 + " ap = voc_ap(rec, prec, use_07_metric)\n",
1162 + "\n",
1163 + " # return rec, prec, ap\n",
1164 + " return npos, nd, tp[-1] / float(npos), tp[-1] / float(nd), ap"
1165 + ],
1166 + "execution_count": 0,
1167 + "outputs": []
1168 + },
1169 + {
1170 + "cell_type": "code",
1171 + "metadata": {
1172 + "id": "X4uQxNl0FRli",
1173 + "colab_type": "code",
1174 + "outputId": "c2b22c73-6195-4b80-d1b4-5ada76ef3da8",
1175 + "colab": {
1176 + "base_uri": "https://localhost:8080/",
1177 + "height": 161
1178 + }
1179 + },
1180 + "source": [
1181 + "## model\n",
1182 + "\n",
1183 + "slim = tf.contrib.slim\n",
1184 + "\n",
1185 + "def conv2d(inputs, filters, kernel_size, strides=1):\n",
1186 + " def _fixed_padding(inputs, kernel_size):\n",
1187 + " pad_total = kernel_size - 1\n",
1188 + " pad_beg = pad_total // 2\n",
1189 + " pad_end = pad_total - pad_beg\n",
1190 + "\n",
1191 + " padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],\n",
1192 + " [pad_beg, pad_end], [0, 0]], mode='CONSTANT')\n",
1193 + " return padded_inputs\n",
1194 + " if strides > 1: \n",
1195 + " inputs = _fixed_padding(inputs, kernel_size)\n",
1196 + " inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,\n",
1197 + " padding=('SAME' if strides == 1 else 'VALID'))\n",
1198 + " return inputs\n",
1199 + "\n",
1200 + "def darknet53_body(inputs):\n",
1201 + " def res_block(inputs, filters):\n",
1202 + " shortcut = inputs\n",
1203 + " net = conv2d(inputs, filters * 1, 1)\n",
1204 + " net = conv2d(net, filters * 2, 3)\n",
1205 + "\n",
1206 + " net = net + shortcut\n",
1207 + "\n",
1208 + " return net\n",
1209 + " \n",
1210 + " # first two conv2d layers\n",
1211 + " net = conv2d(inputs, 32, 3, strides=1)\n",
1212 + " net = conv2d(net, 64, 3, strides=2)\n",
1213 + "\n",
1214 + " # res_block * 1\n",
1215 + " net = res_block(net, 32)\n",
1216 + "\n",
1217 + " net = conv2d(net, 128, 3, strides=2)\n",
1218 + "\n",
1219 + " # res_block * 2\n",
1220 + " for i in range(2):\n",
1221 + " net = res_block(net, 64)\n",
1222 + "\n",
1223 + " net = conv2d(net, 256, 3, strides=2)\n",
1224 + "\n",
1225 + " # res_block * 8\n",
1226 + " for i in range(8):\n",
1227 + " net = res_block(net, 128)\n",
1228 + "\n",
1229 + " route_1 = net\n",
1230 + " net = conv2d(net, 512, 3, strides=2)\n",
1231 + "\n",
1232 + " # res_block * 8\n",
1233 + " for i in range(8):\n",
1234 + " net = res_block(net, 256)\n",
1235 + "\n",
1236 + " route_2 = net\n",
1237 + " net = conv2d(net, 1024, 3, strides=2)\n",
1238 + "\n",
1239 + " # res_block * 4\n",
1240 + " for i in range(4):\n",
1241 + " net = res_block(net, 512)\n",
1242 + " route_3 = net\n",
1243 + "\n",
1244 + " return route_1, route_2, route_3\n",
1245 + "\n",
1246 + "\n",
1247 + "def yolo_block(inputs, filters):\n",
1248 + " net = conv2d(inputs, filters * 1, 1)\n",
1249 + " net = conv2d(net, filters * 2, 3)\n",
1250 + " net = conv2d(net, filters * 1, 1)\n",
1251 + " net = conv2d(net, filters * 2, 3)\n",
1252 + " net = conv2d(net, filters * 1, 1)\n",
1253 + " route = net\n",
1254 + " net = conv2d(net, filters * 2, 3)\n",
1255 + " return route, net\n",
1256 + "\n",
1257 + "\n",
1258 + "def upsample_layer(inputs, out_shape):\n",
1259 + " new_height, new_width = out_shape[1], out_shape[2]\n",
1260 + " # NOTE: here height is the first\n",
1261 + " inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width), name='upsampled')\n",
1262 + " return inputs\n",
1263 + "\n",
1264 + "class yolov3(object):\n",
1265 + "\n",
1266 + " def __init__(self, class_num, anchors, use_label_smooth=False, use_focal_loss=False, batch_norm_decay=0.999, weight_decay=5e-4, use_static_shape=True):\n",
1267 + " self.class_num = class_num\n",
1268 + " self.anchors = anchors\n",
1269 + " self.batch_norm_decay = batch_norm_decay\n",
1270 + " self.use_label_smooth = use_label_smooth\n",
1271 + " self.use_focal_loss = use_focal_loss\n",
1272 + " self.weight_decay = weight_decay\n",
1273 + " self.use_static_shape = use_static_shape\n",
1274 + "\n",
1275 + " def forward(self, inputs, is_training=False, reuse=False):\n",
1276 + " # the input size: [height, weight] format\n",
1277 + " self.img_size = tf.shape(inputs)[1:3]\n",
1278 + " print(\"Img size:\", self.img_size)\n",
1279 + "\t\t\n",
1280 + " batch_norm_params = {\n",
1281 + " 'decay': self.batch_norm_decay,\n",
1282 + " 'epsilon': 1e-05,\n",
1283 + " 'scale': True,\n",
1284 + " 'is_training': is_training,\n",
1285 + " 'fused': None,\n",
1286 + " }\n",
1287 + "\n",
1288 + " with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse):\n",
1289 + " with slim.arg_scope([slim.conv2d], \n",
1290 + " normalizer_fn=slim.batch_norm,\n",
1291 + " normalizer_params=batch_norm_params,\n",
1292 + " biases_initializer=None,\n",
1293 + " activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1),\n",
1294 + " weights_regularizer=slim.l2_regularizer(self.weight_decay)):\n",
1295 + "\n",
1296 + " with tf.variable_scope('darknet53_body'):\n",
1297 + " route_1, route_2, route_3 = darknet53_body(inputs)\n",
1298 + "\n",
1299 + " with tf.variable_scope('yolov3_head'):\n",
1300 + " inter1, net = yolo_block(route_3, 512)\n",
1301 + " feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1,\n",
1302 + " stride=1, normalizer_fn=None,\n",
1303 + " activation_fn=None, biases_initializer=tf.zeros_initializer())\n",
1304 + " feature_map_1 = tf.identity(feature_map_1, name='feature_map_1')\n",
1305 + "\n",
1306 + " inter1 = conv2d(inter1, 256, 1)\n",
1307 + " inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2))\n",
1308 + " concat1 = tf.concat([inter1, route_2], axis=3)\n",
1309 + "\n",
1310 + " inter2, net = yolo_block(concat1, 256)\n",
1311 + " feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1,\n",
1312 + " stride=1, normalizer_fn=None,\n",
1313 + " activation_fn=None, biases_initializer=tf.zeros_initializer())\n",
1314 + " feature_map_2 = tf.identity(feature_map_2, name='feature_map_2')\n",
1315 + "\n",
1316 + " inter2 = conv2d(inter2, 128, 1)\n",
1317 + " inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1))\n",
1318 + " concat2 = tf.concat([inter2, route_1], axis=3)\n",
1319 + "\n",
1320 + " _, feature_map_3 = yolo_block(concat2, 128)\n",
1321 + " feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1,\n",
1322 + " stride=1, normalizer_fn=None,\n",
1323 + " activation_fn=None, biases_initializer=tf.zeros_initializer())\n",
1324 + " feature_map_3 = tf.identity(feature_map_3, name='feature_map_3')\n",
1325 + "\n",
1326 + " return feature_map_1, feature_map_2, feature_map_3\n",
1327 + "\n",
1328 + " def reorg_layer(self, feature_map, anchors):\t\n",
1329 + " # size : [h, w] format\n",
1330 + " grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3] # [13, 13]\n",
1331 + " ratio = tf.cast(self.img_size / grid_size, tf.float32)\n",
1332 + "\t\t\n",
1333 + " # anchor : [w, h] format\n",
1334 + " rescaled_anchors = [(anchor[0] / ratio[1], anchor[1] / ratio[0]) for anchor in anchors]\n",
1335 + "\n",
1336 + " feature_map = tf.reshape(feature_map, [-1, grid_size[0], grid_size[1], 3, 5 + self.class_num])\n",
1337 + "\t\t\n",
1338 + " box_centers, box_sizes, conf_logits, prob_logits = tf.split(feature_map, [2, 2, 1, self.class_num], axis=-1)\n",
1339 + " box_centers = tf.nn.sigmoid(box_centers)\n",
1340 + "\n",
1341 + " grid_x = tf.range(grid_size[1], dtype=tf.int32)\n",
1342 + " grid_y = tf.range(grid_size[0], dtype=tf.int32)\n",
1343 + " grid_x, grid_y = tf.meshgrid(grid_x, grid_y)\n",
1344 + " x_offset = tf.reshape(grid_x, (-1, 1))\n",
1345 + " y_offset = tf.reshape(grid_y, (-1, 1))\n",
1346 + " x_y_offset = tf.concat([x_offset, y_offset], axis=-1)\n",
1347 + "\t\t\n",
1348 + " x_y_offset = tf.cast(tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]), tf.float32)\n",
1349 + "\n",
1350 + " box_centers = box_centers + x_y_offset\n",
1351 + " box_centers = box_centers * ratio[::-1]\n",
1352 + "\n",
1353 + " box_sizes = tf.exp(box_sizes) * rescaled_anchors\n",
1354 + " box_sizes = box_sizes * ratio[::-1]\n",
1355 + "\n",
1356 + " boxes = tf.concat([box_centers, box_sizes], axis=-1)\n",
1357 + "\n",
1358 + " return x_y_offset, boxes, conf_logits, prob_logits\n",
1359 + " \n",
1360 + " def predict(self, feature_maps):\n",
1361 + " feature_map_1, feature_map_2, feature_map_3 = feature_maps\n",
1362 + "\n",
1363 + " feature_map_anchors = [(feature_map_1, self.anchors[6:9]),\n",
1364 + " (feature_map_2, self.anchors[3:6]),\n",
1365 + " (feature_map_3, self.anchors[0:3])]\n",
1366 + " reorg_results = [self.reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]\n",
1367 + "\n",
1368 + " def _reshape_logit(result):\n",
1369 + " x_y_offset, boxes, conf_logits, prob_logits = result\n",
1370 + " grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]\n",
1371 + " boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])\n",
1372 + " conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])\n",
1373 + " prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])\n",
1374 + " return boxes, conf_logits, prob_logits\n",
1375 + "\n",
1376 + " boxes_list, confs_list, probs_list = [], [], []\n",
1377 + "\t\t\n",
1378 + " for result in reorg_results:\n",
1379 + " boxes, conf_logits, prob_logits = _reshape_logit(result)\n",
1380 + " confs = tf.sigmoid(conf_logits)\n",
1381 + " probs = tf.sigmoid(prob_logits)\n",
1382 + " boxes_list.append(boxes)\n",
1383 + " confs_list.append(confs)\n",
1384 + " probs_list.append(probs)\n",
1385 + " \n",
1386 + " boxes = tf.concat(boxes_list, axis=1)\n",
1387 + " confs = tf.concat(confs_list, axis=1)\n",
1388 + " probs = tf.concat(probs_list, axis=1)\n",
1389 + "\n",
1390 + " center_x, center_y, width, height = tf.split(boxes, [1, 1, 1, 1], axis=-1)\n",
1391 + " x_min = center_x - width / 2\n",
1392 + " y_min = center_y - height / 2\n",
1393 + " x_max = center_x + width / 2\n",
1394 + " y_max = center_y + height / 2\n",
1395 + "\n",
1396 + " boxes = tf.concat([x_min, y_min, x_max, y_max], axis=-1)\n",
1397 + "\n",
1398 + " return boxes, confs, probs\n",
1399 + " \n",
1400 + " def loss_layer(self, feature_map_i, y_true, anchors):\n",
1401 + " grid_size = tf.shape(feature_map_i)[1:3]\n",
1402 + " ratio = tf.cast(self.img_size / grid_size, tf.float32)\n",
1403 + " # N: batch_size\n",
1404 + " N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)\n",
1405 + "\n",
1406 + " x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(feature_map_i, anchors)\n",
1407 + "\n",
1408 + "\t\t### mask\n",
1409 + " object_mask = y_true[..., 4:5]\n",
1410 + " ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)\n",
1411 + "\t\t\n",
1412 + " def loop_cond(idx, ignore_mask):\n",
1413 + " return tf.less(idx, tf.cast(N, tf.int32))\n",
1414 + "\t\t\t\n",
1415 + " def loop_body(idx, ignore_mask):\n",
1416 + " valid_true_boxes = tf.boolean_mask(y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool'))\n",
1417 + "\t\t\t\n",
1418 + " iou = self.box_iou(pred_boxes[idx], valid_true_boxes)\t\t\t\n",
1419 + " best_iou = tf.reduce_max(iou, axis=-1)\n",
1420 + "\t\t\t\n",
1421 + " ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)\n",
1422 + "\t\t\t\n",
1423 + " ignore_mask = ignore_mask.write(idx, ignore_mask_tmp)\n",
1424 + " return idx + 1, ignore_mask\n",
1425 + "\t\t\t\n",
1426 + " _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask])\n",
1427 + " ignore_mask = ignore_mask.stack()\n",
1428 + " ignore_mask = tf.expand_dims(ignore_mask, -1)\n",
1429 + "\n",
1430 + " pred_box_xy = pred_boxes[..., 0:2]\n",
1431 + " pred_box_wh = pred_boxes[..., 2:4]\n",
1432 + "\n",
1433 + " true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset\n",
1434 + " pred_xy = pred_box_xy / ratio[::-1] - x_y_offset\n",
1435 + "\n",
1436 + " true_tw_th = y_true[..., 2:4] / anchors\n",
1437 + " pred_tw_th = pred_box_wh / anchors\n",
1438 + "\t\t\n",
1439 + " true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),\n",
1440 + " x=tf.ones_like(true_tw_th), y=true_tw_th)\n",
1441 + " pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),\n",
1442 + " x=tf.ones_like(pred_tw_th), y=pred_tw_th)\n",
1443 + " true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))\n",
1444 + " pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))\n",
1445 + "\n",
1446 + " box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))\n",
1447 + "\n",
1448 + " ### loss\n",
1449 + "\t\t\n",
1450 + " mix_w = y_true[..., -1:]\n",
1451 + "\t\t\n",
1452 + " xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / N\n",
1453 + " wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / N\n",
1454 + "\n",
1455 + " conf_pos_mask = object_mask\n",
1456 + " conf_neg_mask = (1 - object_mask) * ignore_mask\n",
1457 + " conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)\n",
1458 + " conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)\n",
1459 + "\t\t\n",
1460 + " conf_loss = conf_loss_pos + conf_loss_neg\n",
1461 + "\n",
1462 + " if self.use_focal_loss:\n",
1463 + " alpha = 1.0\n",
1464 + " gamma = 2.0\n",
1465 + " focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma)\n",
1466 + " conf_loss *= focal_mask\n",
1467 + " conf_loss = tf.reduce_sum(conf_loss * mix_w) / N\n",
1468 + "\n",
1469 + " if self.use_label_smooth:\n",
1470 + " delta = 0.01\n",
1471 + " label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / self.class_num\n",
1472 + " else:\n",
1473 + " label_target = y_true[..., 5:-1]\n",
1474 + "\t\t\t\n",
1475 + " class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_target, logits=pred_prob_logits) * mix_w\n",
1476 + " class_loss = tf.reduce_sum(class_loss) / N\n",
1477 + "\n",
1478 + " return xy_loss, wh_loss, conf_loss, class_loss\n",
1479 + " \n",
1480 + "\n",
1481 + " def box_iou(self, pred_boxes, valid_true_boxes):\n",
1482 + " pred_box_xy = pred_boxes[..., 0:2]\n",
1483 + " pred_box_wh = pred_boxes[..., 2:4]\n",
1484 + "\n",
1485 + " pred_box_xy = tf.expand_dims(pred_box_xy, -2)\n",
1486 + " pred_box_wh = tf.expand_dims(pred_box_wh, -2)\n",
1487 + "\n",
1488 + " true_box_xy = valid_true_boxes[:, 0:2]\n",
1489 + " true_box_wh = valid_true_boxes[:, 2:4]\n",
1490 + "\n",
1491 + " intersect_mins = tf.maximum(pred_box_xy - pred_box_wh / 2.,\n",
1492 + " true_box_xy - true_box_wh / 2.)\n",
1493 + " intersect_maxs = tf.minimum(pred_box_xy + pred_box_wh / 2.,\n",
1494 + " true_box_xy + true_box_wh / 2.)\n",
1495 + " intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)\n",
1496 + "\n",
1497 + " intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]\n",
1498 + " pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]\n",
1499 + " true_box_area = true_box_wh[..., 0] * true_box_wh[..., 1]\n",
1500 + " true_box_area = tf.expand_dims(true_box_area, axis=0)\n",
1501 + "\n",
1502 + " iou = intersect_area / (pred_box_area + true_box_area - intersect_area + 1e-10)\n",
1503 + "\n",
1504 + " return iou\n",
1505 + "\n",
1506 + " \n",
1507 + " def compute_loss(self, y_pred, y_true):\n",
1508 + " loss_xy, loss_wh, loss_conf, loss_class = 0., 0., 0., 0.\n",
1509 + " anchor_group = [self.anchors[6:9], self.anchors[3:6], self.anchors[0:3]]\n",
1510 + "\n",
1511 + " for i in range(len(y_pred)):\n",
1512 + " result = self.loss_layer(y_pred[i], y_true[i], anchor_group[i])\n",
1513 + " loss_xy += result[0]\n",
1514 + " loss_wh += result[1]\n",
1515 + " loss_conf += result[2]\n",
1516 + " loss_class += result[3]\n",
1517 + " total_loss = loss_xy + loss_wh + loss_conf + loss_class\n",
1518 + " return [total_loss, loss_xy, loss_wh, loss_conf, loss_class]"
1519 + ],
1520 + "execution_count": 8,
1521 + "outputs": [
1522 + {
1523 + "output_type": "stream",
1524 + "text": [
1525 + "WARNING:tensorflow:\n",
1526 + "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
1527 + "For more information, please see:\n",
1528 + " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
1529 + " * https://github.com/tensorflow/addons\n",
1530 + " * https://github.com/tensorflow/io (for I/O related ops)\n",
1531 + "If you depend on functionality not listed there, please file an issue.\n",
1532 + "\n"
1533 + ],
1534 + "name": "stdout"
1535 + }
1536 + ]
1537 + },
1538 + {
1539 + "cell_type": "code",
1540 + "metadata": {
1541 + "id": "Nlddq-K7AJin",
1542 + "colab_type": "code",
1543 + "outputId": "c5baed55-0d4e-4c65-fa7d-340b27baf8f9",
1544 + "colab": {
1545 + "base_uri": "https://localhost:8080/",
1546 + "height": 89
1547 + }
1548 + },
1549 + "source": [
1550 + "## arguments\n",
1551 + "\n",
1552 + "import math\n",
1553 + "\n",
1554 + "\n",
1555 + "### Some paths\n",
1556 + "\n",
1557 + "data_path = '/content/gdrive/My Drive/yolo/data/'\n",
1558 + "train_file = data_path + 'train.tfrecord' # The path of the training txt file.\n",
1559 + "val_file = data_path + 'val.tfrecord' # The path of the validation txt file.\n",
1560 + "restore_path = data_path + 'darknet_weights/yolov3.ckpt' # The path of the weights to restore.\n",
1561 + "save_dir = '/content/gdrive/My Drive/yolo/checkpoint/' # The directory of the weights to save.\n",
1562 + "\n",
1563 + "### we are not using tensorboard logs in this code\n",
1564 + "\n",
1565 + "log_dir = data_path + 'logs/' # The directory to store the tensorboard log files.\n",
1566 + "progress_log_path = data_path + 'progress.log' # The path to record the training progress.\n",
1567 + "\n",
1568 + "anchor_path = data_path + 'yolo_anchors.txt' # The path of the anchor txt file.\n",
1569 + "class_name_path = data_path + 'classes.txt' # The path of the class names.\n",
1570 + "\n",
1571 + "### Training releated numbers\n",
1572 + "batch_size = 4\n",
1573 + "img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height]\n",
1574 + "letterbox_resizing = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.\n",
1575 + "total_epoches = 10\n",
1576 + "train_evaluation_step = 10 # Evaluate on the training batch after some steps.\n",
1577 + "val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.\n",
1578 + "save_epoch = 5 # Save the model after some epochs.\n",
1579 + "batch_norm_decay = 0.99 # decay in bn ops\n",
1580 + "weight_decay = 5e-4 # l2 weight decay\n",
1581 + "current_global_step = 0 # used when resuming training\n",
1582 + "\n",
1583 + "### tf.data parameters\n",
1584 + "num_threads = 10 # Number of threads for image processing used in tf.data pipeline.\n",
1585 + "prefetech_buffer = 5 # Prefetech_buffer used in tf.data pipeline.\n",
1586 + "\n",
1587 + "### Learning rate and optimizer\n",
1588 + "optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop]\n",
1589 + "save_optimizer = True # Whether to save the optimizer parameters into the checkpoint file.\n",
1590 + "learning_rate_init = 1e-4\n",
1591 + "lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]\n",
1592 + "lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.\n",
1593 + "lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type.\n",
1594 + "lr_lower_bound = 1e-6 # The minimum learning rate.\n",
1595 + "# only used in piecewise lr type\n",
1596 + "pw_boundaries = [30, 50] # epoch based boundaries\n",
1597 + "pw_values = [learning_rate_init, 3e-5, 1e-5]\n",
1598 + "\n",
1599 + "### Load and finetune\n",
1600 + "# Choose the parts you want to restore the weights. List form.\n",
1601 + "# restore_include: None, restore_exclude: None => restore the whole model\n",
1602 + "# restore_include: None, restore_exclude: scope => restore the whole model except `scope`\n",
1603 + "# restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)\n",
1604 + "# choise 1: only restore the darknet body\n",
1605 + "# restore_include = ['yolov3/darknet53_body']\n",
1606 + "# restore_exclude = None\n",
1607 + "# choise 2: restore all layers except the last 3 conv2d layers in 3 scale\n",
1608 + "restore_include = None\n",
1609 + "restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']\n",
1610 + "# Choose the parts you want to finetune. List form.\n",
1611 + "# Set to None to train the whole model.\n",
1612 + "\n",
1613 + "update_part = ['yolov3/yolov3_head']\n",
1614 + "\n",
1615 + "### other training strategies\n",
1616 + "multi_scale_train = True # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.\n",
1617 + "use_label_smooth = True # Whether to use class label smoothing strategy.\n",
1618 + "use_focal_loss = True # Whether to apply focal loss on the conf loss.\n",
1619 + "use_mix_up = True # Whether to use mix up data augmentation strategy. \n",
1620 + "use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.\n",
1621 + "warm_up_epoch = 2 # Warm up training epoches. Set to a larger value if gradient explodes.\n",
1622 + "\n",
1623 + "### some constants in validation\n",
1624 + "# nms\n",
1625 + "nms_threshold = 0.45 # iou threshold in nms operation\n",
1626 + "score_threshold = 0.01 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.\n",
1627 + "nms_topk = 150 # keep at most nms_topk outputs after nms\n",
1628 + "# mAP eval\n",
1629 + "eval_threshold = 0.5 # the iou threshold applied in mAP evaluation\n",
1630 + "use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric\n",
1631 + "\n",
1632 + "### parse some params\n",
1633 + "anchors = parse_anchors(anchor_path)\n",
1634 + "classes = read_class_names(class_name_path)\n",
1635 + "class_num = len(classes)\n",
1636 + "train_img_cnt = TFRecordIterator(train_file, 'GZIP').count()\n",
1637 + "val_img_cnt = TFRecordIterator(val_file, 'GZIP').count()\n",
1638 + "train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))\n",
1639 + "\n",
1640 + "lr_decay_freq = int(train_batch_num * lr_decay_epoch)\n",
1641 + "pw_boundaries = [float(i) * train_batch_num + current_global_step for i in pw_boundaries]\n"
1642 + ],
1643 + "execution_count": 9,
1644 + "outputs": [
1645 + {
1646 + "output_type": "stream",
1647 + "text": [
1648 + "WARNING:tensorflow:From <ipython-input-2-ea7f0591b13c>:7: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.\n",
1649 + "Instructions for updating:\n",
1650 + "Use eager execution and: \n",
1651 + "`tf.data.TFRecordDataset(path)`\n"
1652 + ],
1653 + "name": "stdout"
1654 + }
1655 + ]
1656 + },
1657 + {
1658 + "cell_type": "code",
1659 + "metadata": {
1660 + "id": "NagT2oNZFf0q",
1661 + "colab_type": "code",
1662 + "colab": {}
1663 + },
1664 + "source": [
1665 + "## train\n",
1666 + "\n",
1667 + "import os\n",
1668 + "from tqdm import trange\n",
1669 + "\n",
1670 + "if training:\n",
1671 + " is_training = tf.placeholder(tf.bool, name=\"phase_train\")\n",
1672 + " handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')\n",
1673 + "\n",
1674 + " pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])\n",
1675 + " pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])\n",
1676 + " gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, class_num, nms_topk, score_threshold, nms_threshold)\n",
1677 + "\n",
1678 + " ### tf.data pipeline\n",
1679 + " train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')\n",
1680 + " train_dataset = train_dataset.shuffle(train_img_cnt)\n",
1681 + " train_dataset = train_dataset.batch(batch_size)\n",
1682 + " train_dataset = train_dataset.map(\n",
1683 + " lambda x: tf.py_func(get_batch_data,\n",
1684 + " inp=[x, class_num, img_size, anchors, True, multi_scale_train, use_mix_up, letterbox_resizing],\n",
1685 + " Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),\n",
1686 + " num_parallel_calls=num_threads\n",
1687 + " )\n",
1688 + " train_dataset = train_dataset.prefetch(prefetech_buffer)\n",
1689 + "\n",
1690 + " val_dataset = tf.data.TFRecordDataset(filenames=val_file, compression_type='GZIP')\n",
1691 + " val_dataset = val_dataset.batch(1)\n",
1692 + " val_dataset = val_dataset.map(\n",
1693 + " lambda x: tf.py_func(get_batch_data,\n",
1694 + " inp=[x, class_num, img_size, anchors, False, False, False, letterbox_resizing],\n",
1695 + " Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),\n",
1696 + " num_parallel_calls=num_threads\n",
1697 + " )\n",
1698 + " val_dataset.prefetch(prefetech_buffer)\n",
1699 + "\n",
1700 + " iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)\n",
1701 + " train_init_op = iterator.make_initializer(train_dataset)\n",
1702 + " val_init_op = iterator.make_initializer(val_dataset)\n",
1703 + "\n",
1704 + " image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()\n",
1705 + " y_true = [y_true_13, y_true_26, y_true_52]\n",
1706 + "\n",
1707 + " image_ids.set_shape([None])\n",
1708 + " image.set_shape([None, None, None, 3])\n",
1709 + " for y in y_true:\n",
1710 + " y.set_shape([None, None, None, None, None])\n",
1711 + "\n",
1712 + "\n",
1713 + " ### Model definition\n",
1714 + " yolo_model = yolov3(class_num, anchors, use_label_smooth, use_focal_loss, batch_norm_decay, weight_decay, use_static_shape=False)\n",
1715 + "\n",
1716 + " with tf.variable_scope('yolov3'):\n",
1717 + " pred_feature_maps = yolo_model.forward(image, is_training=is_training)\n",
1718 + "\n",
1719 + " loss = yolo_model.compute_loss(pred_feature_maps, y_true)\n",
1720 + " y_pred = yolo_model.predict(pred_feature_maps)\n",
1721 + "\n",
1722 + " l2_loss = tf.losses.get_regularization_loss()\n",
1723 + "\n",
1724 + " saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to_restore(include=restore_include, exclude=restore_exclude))\n",
1725 + " update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part)\n",
1726 + "\n",
1727 + "\n",
1728 + " global_step = tf.Variable(float(current_global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])\n",
1729 + " if use_warm_up:\n",
1730 + " learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch), \n",
1731 + " lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch),\n",
1732 + " lambda: config_learning_rate(global_step - train_batch_num * warm_up_epoch))\n",
1733 + " else:\n",
1734 + " learning_rate = config_learning_rate(global_step)\n",
1735 + "\n",
1736 + " optimizer = config_optimizer(optimizer_name, learning_rate)\n",
1737 + "\n",
1738 + " update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
1739 + "\n",
1740 + " with tf.control_dependencies(update_ops):\n",
1741 + " gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)\n",
1742 + " clip_grad_var = [gv if gv[0] is None else [\n",
1743 + " tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]\n",
1744 + " train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step)\n",
1745 + "\n",
1746 + " if save_optimizer:\n",
1747 + " print('Saving optimizer parameters: ON')\n",
1748 + " saver_to_save = tf.train.Saver()\n",
1749 + " saver_best = tf.train.Saver()\n",
1750 + " else:\n",
1751 + " print('Saving optimizer parameters: OFF')\n",
1752 + "\n",
1753 + "\n",
1754 + " with tf.Session() as sess:\n",
1755 + " sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])\n",
1756 + "\n",
1757 + " if os.path.exists(restore_path):\n",
1758 + " saver_to_restore.restore(sess, restore_path)\n",
1759 + "\n",
1760 + " print('\\nStart training...: Total epoches =', total_epoches, '\\n')\n",
1761 + "\n",
1762 + " best_mAP = -np.Inf\n",
1763 + "\n",
1764 + " for epoch in range(total_epoches):\n",
1765 + " sess.run(train_init_op)\n",
1766 + " loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()\n",
1767 + "\n",
1768 + " ### train part\n",
1769 + " for i in trange(train_batch_num):\n",
1770 + " _, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(\n",
1771 + " [train_op, y_pred, y_true, loss, global_step, learning_rate],\n",
1772 + " feed_dict={is_training: True})\n",
1773 + "\n",
1774 + " loss_total.update(__loss[0], len(__y_pred[0]))\n",
1775 + " loss_xy.update(__loss[1], len(__y_pred[0]))\n",
1776 + " loss_wh.update(__loss[2], len(__y_pred[0]))\n",
1777 + " loss_conf.update(__loss[3], len(__y_pred[0]))\n",
1778 + " loss_class.update(__loss[4], len(__y_pred[0]))\n",
1779 + "\n",
1780 + " if __global_step % train_evaluation_step == 0 and __global_step > 0:\n",
1781 + " recall, precision = evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __y_pred, __y_true, class_num, nms_threshold)\n",
1782 + "\n",
1783 + " info = \"Epoch: {}, global_step: {} | loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f} | \".format(\n",
1784 + " epoch, int(__global_step), loss_total.average, loss_xy.average, loss_wh.average, loss_conf.average, loss_class.average)\n",
1785 + " info += 'Last batch: rec: {:.3f}, prec: {:.3f} | lr: {:.5g}'.format(recall, precision, __lr)\n",
1786 + " print(info)\n",
1787 + " \n",
1788 + " if np.isnan(loss_total.average):\n",
1789 + " print('****' * 10)\n",
1790 + " raise ArithmeticError('Gradient exploded!')\n",
1791 + "\n",
1792 + " ## train end (saving parameters)\n",
1793 + " if save_optimizer and epoch % save_epoch == 0 and epoch > 0:\n",
1794 + " if loss_total.average <= 2.:\n",
1795 + " saver_to_save.save(sess, save_dir + 'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format(epoch, int(__global_step), loss_total.average, __lr))\n",
1796 + "\n",
1797 + " ### validation part\n",
1798 + " if epoch % val_evaluation_epoch == 0 and epoch >= warm_up_epoch:\n",
1799 + " sess.run(val_init_op)\n",
1800 + "\n",
1801 + " val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()\n",
1802 + "\n",
1803 + " val_preds = []\n",
1804 + "\n",
1805 + " for j in trange(val_img_cnt):\n",
1806 + " __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss],\n",
1807 + " feed_dict={is_training: False})\n",
1808 + " pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)\n",
1809 + " val_preds.extend(pred_content)\n",
1810 + " val_loss_total.update(__loss[0])\n",
1811 + " val_loss_xy.update(__loss[1])\n",
1812 + " val_loss_wh.update(__loss[2])\n",
1813 + " val_loss_conf.update(__loss[3])\n",
1814 + " val_loss_class.update(__loss[4])\n",
1815 + "\n",
1816 + " # calc mAP\n",
1817 + " rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()\n",
1818 + " gt_dict = parse_gt_rec(val_file, 'GZIP', img_size, letterbox_resize)\n",
1819 + "\n",
1820 + " info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\\n'.format(epoch, __global_step, __lr)\n",
1821 + "\n",
1822 + " for ii in range(class_num):\n",
1823 + " npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=eval_threshold, use_07_metric=use_voc_07_metric)\n",
1824 + " info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\\n'.format(ii, rec, prec, ap)\n",
1825 + " rec_total.update(rec, npos)\n",
1826 + " prec_total.update(prec, nd)\n",
1827 + " ap_total.update(ap, 1)\n",
1828 + "\n",
1829 + " mAP = ap_total.average\n",
1830 + " info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\\n'.format(rec_total.average, prec_total.average, mAP)\n",
1831 + " info += 'EVAL: loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f}\\n'.format(\n",
1832 + " val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average)\n",
1833 + " print(info)\n",
1834 + "\n",
1835 + " if save_optimizer and mAP > best_mAP:\n",
1836 + " best_mAP = mAP\n",
1837 + " saver_best.save(sess, save_dir + 'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'.format(\n",
1838 + " epoch, int(__global_step), best_mAP, val_loss_total.average, __lr))"
1839 + ],
1840 + "execution_count": 0,
1841 + "outputs": []
1842 + },
1843 + {
1844 + "cell_type": "code",
1845 + "metadata": {
1846 + "id": "HmoSmKIuOpyC",
1847 + "colab_type": "code",
1848 + "colab": {}
1849 + },
1850 + "source": [
1851 + "## evaluation (test)\n",
1852 + "\n",
1853 + "import argparse\n",
1854 + "\n",
1855 + "if not training:\n",
1856 + "\n",
1857 + " ### ArgumentParser\n",
1858 + " parser = argparse.ArgumentParser(description=\"YOLO-V3 eval procedure.\")\n",
1859 + "\n",
1860 + " # paths\n",
1861 + " parser.add_argument(\"--eval_file\", type=str, default=\"/content/gdrive/My Drive/yolo/data/test.tfrecord\",\n",
1862 + " help=\"The path of the validation or test txt file.\")\n",
1863 + "\n",
1864 + " parser.add_argument(\"--restore_path\", type=str, default=\"/content/gdrive/My Drive/yolo/data/darknet_weights/yolov3.ckpt\",\n",
1865 + " help=\"The path of the weights to restore.\")\n",
1866 + "\n",
1867 + " parser.add_argument(\"--anchor_path\", type=str, default=\"./content/gdrive/My Drive/yolo/data/yolo_anchors.txt\",\n",
1868 + " help=\"The path of the anchor txt file.\")\n",
1869 + "\n",
1870 + " parser.add_argument(\"--class_name_path\", type=str, default=\"/content/gdrive/My Drive/yolo/data/classes.txt\",\n",
1871 + " help=\"The path of the class names.\")\n",
1872 + "\n",
1873 + " # some numbers\n",
1874 + " parser.add_argument(\"--img_size\", nargs='*', type=int, default=[416, 416],\n",
1875 + " help=\"Resize the input image to `img_size`, size format: [width, height]\")\n",
1876 + "\n",
1877 + " parser.add_argument(\"--letterbox_resize\", type=lambda x: (str(x).lower() == 'true'), default=False,\n",
1878 + " help=\"Whether to use the letterbox resize, i.e., keep the original image aspect ratio.\")\n",
1879 + "\n",
1880 + " parser.add_argument(\"--num_threads\", type=int, default=10,\n",
1881 + " help=\"Number of threads for image processing used in tf.data pipeline.\")\n",
1882 + "\n",
1883 + " parser.add_argument(\"--prefetech_buffer\", type=int, default=5,\n",
1884 + " help=\"Prefetech_buffer used in tf.data pipeline.\")\n",
1885 + "\n",
1886 + " parser.add_argument(\"--nms_threshold\", type=float, default=0.45,\n",
1887 + " help=\"IOU threshold in nms operation.\")\n",
1888 + "\n",
1889 + " parser.add_argument(\"--score_threshold\", type=float, default=0.01,\n",
1890 + " help=\"Threshold of the probability of the classes in nms operation.\")\n",
1891 + "\n",
1892 + " parser.add_argument(\"--nms_topk\", type=int, default=400,\n",
1893 + " help=\"Keep at most nms_topk outputs after nms.\")\n",
1894 + "\n",
1895 + " parser.add_argument(\"--use_voc_07_metric\", type=lambda x: (str(x).lower() == 'true'), default=False,\n",
1896 + " help=\"Whether to use the voc 2007 mAP metrics.\")\n",
1897 + "\n",
1898 + " args = parser.parse_args()\n",
1899 + "\n",
1900 + " # args params\n",
1901 + " args.anchors = parse_anchors(args.anchor_path)\n",
1902 + " args.classes = read_class_names(args.class_name_path)\n",
1903 + " args.class_num = len(args.classes)\n",
1904 + " args.img_cnt = len(open(args.eval_file, 'r').readlines())\n",
1905 + "\n",
1906 + " # setting placeholders\n",
1907 + " is_training = tf.placeholder(dtype=tf.bool, name=\"phase_train\")\n",
1908 + " handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')\n",
1909 + " pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])\n",
1910 + " pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])\n",
1911 + " gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)\n",
1912 + "\n",
1913 + " ### tf.data pipeline\n",
1914 + " val_dataset = tf.data.TFRecordDataset(filenames=args.eval_file, compression_type='GZIP')\n",
1915 + " val_dataset = val_dataset.batch(1)\n",
1916 + " val_dataset = val_dataset.map(\n",
1917 + " lambda x: tf.py_func(get_batch_data, [x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize], [tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),\n",
1918 + " num_parallel_calls=args.num_threads\n",
1919 + " )\n",
1920 + " val_dataset.prefetch(args.prefetech_buffer)\n",
1921 + " iterator = val_dataset.make_one_shot_iterator()\n",
1922 + "\n",
1923 + " image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()\n",
1924 + " image_ids.set_shape([None])\n",
1925 + " y_true = [y_true_13, y_true_26, y_true_52]\n",
1926 + " image.set_shape([None, args.img_size[1], args.img_size[0], 3])\n",
1927 + " for y in y_true:\n",
1928 + " y.set_shape([None, None, None, None, None])\n",
1929 + "\n",
1930 + " ### Model definition\n",
1931 + " yolo_model = yolov3(args.class_num, args.anchors)\n",
1932 + " with tf.variable_scope('yolov3'):\n",
1933 + " pred_feature_maps = yolo_model.forward(image, is_training=is_training)\n",
1934 + " loss = yolo_model.compute_loss(pred_feature_maps, y_true)\n",
1935 + " y_pred = yolo_model.predict(pred_feature_maps)\n",
1936 + "\n",
1937 + " saver_to_restore = tf.train.Saver()\n",
1938 + "\n",
1939 + "\n",
1940 + " with tf.Session() as sess:\n",
1941 + " sess.run([tf.global_variables_initializer()])\n",
1942 + " if os.path.exists(args.restore_path):\n",
1943 + " saver_to_restore.restore(sess, args.restore_path)\n",
1944 + " else:\n",
1945 + " raise ValueError('there is no model to evaluate. You should move/create the checkpoint file to restore path')\n",
1946 + "\n",
1947 + " print('\\nStart evaluation...\\n')\n",
1948 + "\n",
1949 + " val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \\\n",
1950 + " AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()\n",
1951 + " val_preds = []\n",
1952 + "\n",
1953 + " for j in trange(args.img_cnt):\n",
1954 + " __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss], feed_dict={is_training: False})\n",
1955 + " pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)\n",
1956 + "\n",
1957 + " val_preds.extend(pred_content)\n",
1958 + " val_loss_total.update(__loss[0])\n",
1959 + " val_loss_xy.update(__loss[1])\n",
1960 + " val_loss_wh.update(__loss[2])\n",
1961 + " val_loss_conf.update(__loss[3])\n",
1962 + " val_loss_class.update(__loss[4])\n",
1963 + "\n",
1964 + " rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()\n",
1965 + " gt_dict = parse_gt_rec(args.eval_file, 'GZIP', args.img_size, args.letterbox_resize)\n",
1966 + " print('mAP eval:')\n",
1967 + " for ii in range(args.class_num):\n",
1968 + " npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=0.5, use_07_metric=args.use_voc_07_metric)\n",
1969 + " rec_total.update(rec, npos)\n",
1970 + " prec_total.update(prec, nd)\n",
1971 + " ap_total.update(ap, 1)\n",
1972 + " print('Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}'.format(ii, rec, prec, ap))\n",
1973 + "\n",
1974 + " mAP = ap_total.average\n",
1975 + " print('final mAP: {:.4f}'.format(mAP))\n",
1976 + " print(\"recall: {:.3f}, precision: {:.3f}\".format(rec_total.average, prec_total.average))\n",
1977 + " print(\"total_loss: {:.3f}, loss_xy: {:.3f}, loss_wh: {:.3f}, loss_conf: {:.3f}, loss_class: {:.3f}\".format(\n",
1978 + " val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average\n",
1979 + " ))"
1980 + ],
1981 + "execution_count": 0,
1982 + "outputs": []
1983 + }
1984 + ]
1985 +}
...\ No newline at end of file ...\ No newline at end of file