Showing
25 changed files
with
950 additions
and
0 deletions
source_code/config/cafe.data
0 → 100644
source_code/config/cafe_distance.data
0 → 100644
source_code/config/testdata.data
0 → 100644
source_code/config/tiny1.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[convolutional] | ||
92 | +batch_normalize=1 | ||
93 | +filters=512 | ||
94 | +size=3 | ||
95 | +stride=1 | ||
96 | +pad=1 | ||
97 | +activation=leaky | ||
98 | + | ||
99 | +# 10 | ||
100 | +[convolutional] | ||
101 | +size=1 | ||
102 | +stride=1 | ||
103 | +pad=1 | ||
104 | +filters=42 | ||
105 | +activation=linear | ||
106 | + | ||
107 | +# 11 | ||
108 | +[yolo] | ||
109 | +mask = 0, 1, 2 | ||
110 | +anchors = 37,58, 81,82, 135,169 | ||
111 | +classes=9 | ||
112 | +num=3 | ||
113 | +jitter=.3 | ||
114 | +ignore_thresh = .7 | ||
115 | +truth_thresh = 1 | ||
116 | +random=1 | ||
117 | + | ||
118 | + | ||
119 | + | ||
120 | + | ||
121 | + |
source_code/config/tiny2.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[convolutional] | ||
92 | +size=1 | ||
93 | +stride=1 | ||
94 | +pad=1 | ||
95 | +filters=42 | ||
96 | +activation=linear | ||
97 | + | ||
98 | +# 10 | ||
99 | +[yolo] | ||
100 | +mask = 0, 1, 2 | ||
101 | +anchors = 59,119, 81,82, 135,169 | ||
102 | +classes=9 | ||
103 | +num=3 | ||
104 | +jitter=.3 | ||
105 | +ignore_thresh = .7 | ||
106 | +truth_thresh = 1 | ||
107 | +random=1 | ||
108 | + | ||
109 | + | ||
110 | + | ||
111 | + | ||
112 | + |
source_code/config/yolov3-tiny.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[maxpool] | ||
92 | +size=2 | ||
93 | +stride=2 | ||
94 | + | ||
95 | +# 10 | ||
96 | +[convolutional] | ||
97 | +batch_normalize=1 | ||
98 | +filters=512 | ||
99 | +size=3 | ||
100 | +stride=1 | ||
101 | +pad=1 | ||
102 | +activation=leaky | ||
103 | + | ||
104 | +# 11 | ||
105 | +[maxpool] | ||
106 | +size=2 | ||
107 | +stride=1 | ||
108 | + | ||
109 | +# 12 | ||
110 | +[convolutional] | ||
111 | +batch_normalize=1 | ||
112 | +filters=1024 | ||
113 | +size=3 | ||
114 | +stride=1 | ||
115 | +pad=1 | ||
116 | +activation=leaky | ||
117 | + | ||
118 | +########### | ||
119 | + | ||
120 | +# 13 | ||
121 | +[convolutional] | ||
122 | +batch_normalize=1 | ||
123 | +filters=256 | ||
124 | +size=1 | ||
125 | +stride=1 | ||
126 | +pad=1 | ||
127 | +activation=leaky | ||
128 | + | ||
129 | +# 14 | ||
130 | +[convolutional] | ||
131 | +batch_normalize=1 | ||
132 | +filters=512 | ||
133 | +size=3 | ||
134 | +stride=1 | ||
135 | +pad=1 | ||
136 | +activation=leaky | ||
137 | + | ||
138 | +# 15 | ||
139 | +[convolutional] | ||
140 | +size=1 | ||
141 | +stride=1 | ||
142 | +pad=1 | ||
143 | +filters=30 | ||
144 | +activation=linear | ||
145 | + | ||
146 | + | ||
147 | + | ||
148 | +# 16 | ||
149 | +[yolo] | ||
150 | +mask = 3,4,5 | ||
151 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
152 | +classes=5 | ||
153 | +num=6 | ||
154 | +jitter=.3 | ||
155 | +ignore_thresh = .7 | ||
156 | +truth_thresh = 1 | ||
157 | +random=1 | ||
158 | + | ||
159 | +# 17 | ||
160 | +[route] | ||
161 | +layers = -4 | ||
162 | + | ||
163 | +# 18 | ||
164 | +[convolutional] | ||
165 | +batch_normalize=1 | ||
166 | +filters=128 | ||
167 | +size=1 | ||
168 | +stride=1 | ||
169 | +pad=1 | ||
170 | +activation=leaky | ||
171 | + | ||
172 | +# 19 | ||
173 | +[upsample] | ||
174 | +stride=2 | ||
175 | + | ||
176 | +# 20 | ||
177 | +[route] | ||
178 | +layers = -1, 8 | ||
179 | + | ||
180 | +# 21 | ||
181 | +[convolutional] | ||
182 | +batch_normalize=1 | ||
183 | +filters=256 | ||
184 | +size=3 | ||
185 | +stride=1 | ||
186 | +pad=1 | ||
187 | +activation=leaky | ||
188 | + | ||
189 | +# 22 | ||
190 | +[convolutional] | ||
191 | +size=1 | ||
192 | +stride=1 | ||
193 | +pad=1 | ||
194 | +filters=30 | ||
195 | +activation=linear | ||
196 | + | ||
197 | +# 23 | ||
198 | +[yolo] | ||
199 | +mask = 0,1,2 | ||
200 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
201 | +classes=5 | ||
202 | +num=6 | ||
203 | +jitter=.3 | ||
204 | +ignore_thresh = .7 | ||
205 | +truth_thresh = 1 | ||
206 | +random=1 |
source_code/models.py
0 → 100644
This diff is collapsed. Click to expand it.
source_code/roipool.py
0 → 100644
1 | +from __future__ import division | ||
2 | + | ||
3 | +import torch | ||
4 | +import torch.nn as nn | ||
5 | +import torch.nn.functional as F | ||
6 | +from torch.nn.modules import module | ||
7 | + | ||
8 | +from utils.utils import * | ||
9 | + | ||
10 | + | ||
11 | +class ROIPool(nn.Module): | ||
12 | + def __init__(self, output_size): | ||
13 | + super(ROIPool, self).__init__() | ||
14 | + self.maxpool = nn.AdaptiveMaxPool2d(output_size) | ||
15 | + self.size = output_size | ||
16 | + self.fc1 = nn.Linear(2304, 1024) | ||
17 | + self.fc2 = nn.Linear(1024, 512) | ||
18 | + self.fc3 = nn.Linear(512, 1) | ||
19 | + self.softplus = nn.Softplus() | ||
20 | + self.smoothl1 = nn.SmoothL1Loss() | ||
21 | + self.mse = nn.MSELoss() | ||
22 | + | ||
23 | + | ||
24 | + def target_detection_iou(self, box1, box2): | ||
25 | + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] | ||
26 | + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] | ||
27 | + | ||
28 | + # get the corrdinates of the intersection rectangle | ||
29 | + b1_x1 = b1_x1.type(torch.float64) | ||
30 | + b1_y1 = b1_y1.type(torch.float64) | ||
31 | + b1_x2 = b1_x2.type(torch.float64) | ||
32 | + b1_y2 = b1_y2.type(torch.float64) | ||
33 | + | ||
34 | + inter_rect_x1 = torch.max(b1_x1, b2_x1) | ||
35 | + inter_rect_y1 = torch.max(b1_y1, b2_y1) | ||
36 | + inter_rect_x2 = torch.min(b1_x2, b2_x2) | ||
37 | + inter_rect_y2 = torch.min(b1_y2, b2_y2) | ||
38 | + # Intersection area | ||
39 | + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( | ||
40 | + inter_rect_y2 - inter_rect_y1 + 1, min=0 | ||
41 | + ) | ||
42 | + # Union Area | ||
43 | + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) | ||
44 | + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) | ||
45 | + | ||
46 | + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) | ||
47 | + | ||
48 | + return iou | ||
49 | + | ||
50 | + def similar_bbox(self, detections, targets): | ||
51 | + rescaled_boxes = rescale_boxes(detections, 416, (480, 640)) | ||
52 | + similar_box = list(range(len(rescaled_boxes))) | ||
53 | + for i in range(len(rescaled_boxes)): | ||
54 | + for j in range(len(targets)): | ||
55 | + target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480] | ||
56 | + target_xyxy = torch.tensor(target_xyxy) | ||
57 | + iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy) | ||
58 | + if iou > 0.01: | ||
59 | + similar_box[i] = targets[j][-1] | ||
60 | + break | ||
61 | + else: | ||
62 | + similar_box[i] = -1 | ||
63 | + return similar_box | ||
64 | + | ||
65 | + | ||
66 | + def cal_scale(self, x, detections, targets): | ||
67 | + targets_distance = targets[:, :4] | ||
68 | + square_targets = [] | ||
69 | + | ||
70 | + for target_distance in targets_distance: | ||
71 | + x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
72 | + y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
73 | + x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
74 | + y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
75 | + | ||
76 | + square_targets.append([x1, y1, x2, y2]) | ||
77 | + square_targets = torch.tensor(square_targets) | ||
78 | + | ||
79 | + scale = get_scale(square_targets) | ||
80 | + output_distance = [] | ||
81 | + | ||
82 | + roi_results = [] | ||
83 | + for i in scale: | ||
84 | + x1_scale = i[0] | ||
85 | + y1_scale = i[1] | ||
86 | + x2_scale = i[2] | ||
87 | + y2_scale = i[3] | ||
88 | + | ||
89 | + output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
90 | + | ||
91 | + output = self.maxpool(output) | ||
92 | + | ||
93 | + output = output.view(1, -1) | ||
94 | + # print(output) | ||
95 | + roi_results.append(output) | ||
96 | + return roi_results | ||
97 | + | ||
98 | + def cal_scale_evaL(self, x, detections): | ||
99 | + detections = detections[:, :4] | ||
100 | + scale = get_scale(detections) | ||
101 | + output_distance = [] | ||
102 | + roi_results = [] | ||
103 | + for i in scale: | ||
104 | + x1_scale = i[0] | ||
105 | + y1_scale = i[1] | ||
106 | + x2_scale = i[2] | ||
107 | + y2_scale = i[3] | ||
108 | + | ||
109 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
110 | + output = self.maxpool(output) | ||
111 | + output = output.view(1, -1) | ||
112 | + roi_results.append(output) | ||
113 | + return roi_results | ||
114 | + | ||
115 | + def forward(self, x, detections, targets=None): | ||
116 | + if targets is not None: | ||
117 | + distances = targets[:, 4] | ||
118 | + distances = distances * 10 | ||
119 | + # distances = distances * 10 | ||
120 | + # print(f'disatnces = {distances}') | ||
121 | + # targets_distance = targets[:, :4] | ||
122 | + # square_targets = [] | ||
123 | + | ||
124 | + # for target_distance in targets_distance: | ||
125 | + # x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
126 | + # y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
127 | + # x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
128 | + # y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
129 | + | ||
130 | + # square_targets.append([x1, y1, x2, y2]) | ||
131 | + # square_targets = torch.tensor(square_targets) | ||
132 | + | ||
133 | + # scale = get_scale(square_targets) | ||
134 | + # output_distance = [] | ||
135 | + | ||
136 | + # roi_results = [] | ||
137 | + # for i in scale: | ||
138 | + # x1_scale = i[0] | ||
139 | + # y1_scale = i[1] | ||
140 | + # x2_scale = i[2] | ||
141 | + # y2_scale = i[3] | ||
142 | + | ||
143 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
144 | + | ||
145 | + # output = self.maxpool(output) | ||
146 | + | ||
147 | + # output = output.view(1, -1).cuda() | ||
148 | + # # print(output) | ||
149 | + # roi_results.append(output) | ||
150 | + roi_results = self.cal_scale(x, detections, targets) | ||
151 | + | ||
152 | + output = torch.cat(roi_results, 0) | ||
153 | + # print(output.shape) | ||
154 | + # print(output.shape) | ||
155 | + output = self.fc1(output) | ||
156 | + output = self.fc2(output) | ||
157 | + output = self.fc3(output) | ||
158 | + output = self.softplus(output) | ||
159 | + # print(f'output = {output}') | ||
160 | + #loss = 0 | ||
161 | + # output_distance = torch.tensor(output, requires_grad=True) | ||
162 | + | ||
163 | + | ||
164 | + ''' | ||
165 | + output = x | ||
166 | + # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
167 | + output = self.maxpool(output) | ||
168 | + output = output.view(1, -1).cuda() | ||
169 | + # print(output.shape) | ||
170 | + output = self.fc1(output) | ||
171 | + output = self.fc2(output) | ||
172 | + output = self.fc3(output) | ||
173 | + output = self.softplus(output) | ||
174 | + ''' | ||
175 | + | ||
176 | + # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu') | ||
177 | + | ||
178 | + #print(f'output_distance = {output_distance}') | ||
179 | + #print(output_distance.shape) | ||
180 | + #print(f'distances = {distances}') | ||
181 | + #print(distances.shape) | ||
182 | + distances = distances.cuda() | ||
183 | + # print(f'output = {output}') | ||
184 | + # print(f'output = {output}') | ||
185 | + # print(f'distances = {distances}') | ||
186 | + loss = self.smoothl1(output, distances.float()) | ||
187 | + # print(f'loss = {loss}') | ||
188 | + | ||
189 | + # print(f'output_distance = {output_distance}') | ||
190 | + # print(f'distances = {distances}') | ||
191 | + # print(f'loss = {loss}') | ||
192 | + return loss, output | ||
193 | + | ||
194 | + else: | ||
195 | + | ||
196 | + ''' | ||
197 | + detections = detections[:, :4] | ||
198 | + scale = get_scale(detections) | ||
199 | + output_distance = [] | ||
200 | + for i in scale: | ||
201 | + x1_scale = i[0] | ||
202 | + y1_scale = i[1] | ||
203 | + x2_scale = i[2] | ||
204 | + y2_scale = i[3] | ||
205 | + | ||
206 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
207 | + output = self.maxpool(output) | ||
208 | + output = output.view(1, -1).cuda() | ||
209 | + ''' | ||
210 | + roi_results = self.cal_scale_evaL(x, detections) | ||
211 | + output = torch.cat(roi_results, 0) | ||
212 | + # print(f'output = {output.shape}') | ||
213 | + output = self.fc1(output) | ||
214 | + output = self.fc2(output) | ||
215 | + output = self.fc3(output) | ||
216 | + output = self.softplus(output) | ||
217 | + # print(f'output = {output}') | ||
218 | + | ||
219 | + | ||
220 | + return output | ||
221 | + | ||
222 | + | ||
223 | + ''' | ||
224 | + scale = get_scale(detections) | ||
225 | + | ||
226 | + | ||
227 | + output_distance = [] | ||
228 | + for i in scale: | ||
229 | + x1_scale = i[0] | ||
230 | + y1_scale = i[1] | ||
231 | + x2_scale = i[2] | ||
232 | + y2_scale = i[3] | ||
233 | + | ||
234 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
235 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
236 | + output = self.maxpool(output) | ||
237 | + output = output.view(1, -1).cuda() | ||
238 | + output = self.fc1(output) | ||
239 | + output = self.fc2(output) | ||
240 | + | ||
241 | + output_distance.append(output) | ||
242 | + | ||
243 | + if targets is None: | ||
244 | + return output_distance, 0 | ||
245 | + | ||
246 | + else: | ||
247 | + loss = 0 | ||
248 | + box_similar_distance = self.similar_bbox(detections, targets) | ||
249 | + for i in range(len(box_similar_distance)): | ||
250 | + if box_similar_distance[i] == -1: | ||
251 | + output_distance[i] = -1 | ||
252 | + | ||
253 | + | ||
254 | + output_distance = torch.FloatTensor(output_distance).to('cpu') | ||
255 | + box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu') | ||
256 | + | ||
257 | + | ||
258 | + # print(f'output_distance = {output_distance}') | ||
259 | + # print(f'target_distance = {box_similar_distance}') | ||
260 | + loss = self.smoothl1(output_distance, box_similar_distance) | ||
261 | + ''' | ||
262 | + | ||
263 | + | ||
264 | + | ||
265 | + | ||
266 | + | ||
267 | + | ||
268 | + | ||
269 | + |
source_code/utils/__init__.py
0 → 100644
File mode changed
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
source_code/utils/augmentations.py
0 → 100644
source_code/utils/datasets.py
0 → 100644
1 | +import glob | ||
2 | +import random | ||
3 | +import os | ||
4 | +import sys | ||
5 | +import numpy as np | ||
6 | +from PIL import Image | ||
7 | +import torch | ||
8 | +import torch.nn.functional as F | ||
9 | +import time | ||
10 | + | ||
11 | +from utils.augmentations import horisontal_flip | ||
12 | +from torch.utils.data import Dataset | ||
13 | +import torchvision.transforms as transforms | ||
14 | + | ||
15 | + | ||
16 | +def pad_to_square(img, pad_value): | ||
17 | + c, h, w = img.shape | ||
18 | + dim_diff = np.abs(h - w) | ||
19 | + # (upper / left) padding and (lower / right) padding | ||
20 | + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 | ||
21 | + # Determine padding | ||
22 | + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) | ||
23 | + # Add padding | ||
24 | + img = F.pad(img, pad, "constant", value=pad_value) | ||
25 | + | ||
26 | + return img, pad | ||
27 | + | ||
28 | + | ||
29 | +def resize(image, size): | ||
30 | + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) | ||
31 | + return image | ||
32 | + | ||
33 | + | ||
34 | +def random_resize(images, min_size=288, max_size=448): | ||
35 | + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] | ||
36 | + images = F.interpolate(images, size=new_size, mode="nearest") | ||
37 | + return images | ||
38 | + | ||
39 | + | ||
40 | +class ImageFolder(Dataset): | ||
41 | + def __init__(self, folder_path, img_size=416): | ||
42 | + self.files = sorted(glob.glob("%s/*.*" % folder_path)) | ||
43 | + self.img_size = img_size | ||
44 | + | ||
45 | + def __getitem__(self, index): | ||
46 | + img_path = self.files[index % len(self.files)] | ||
47 | + # Extract image as PyTorch tensor | ||
48 | + img = transforms.ToTensor()(Image.open(img_path)) | ||
49 | + # Pad to square resolution | ||
50 | + img, _ = pad_to_square(img, 0) | ||
51 | + # Resize | ||
52 | + img = resize(img, self.img_size) | ||
53 | + | ||
54 | + return img_path, img | ||
55 | + | ||
56 | + def __len__(self): | ||
57 | + return len(self.files) | ||
58 | + | ||
59 | + | ||
60 | +class ListDataset(Dataset): | ||
61 | + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): | ||
62 | + with open(list_path, "r") as file: | ||
63 | + self.img_files = file.readlines() | ||
64 | + | ||
65 | + self.label_files = [ | ||
66 | + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") | ||
67 | + for path in self.img_files | ||
68 | + ] | ||
69 | + self.img_size = img_size | ||
70 | + self.max_objects = 100 | ||
71 | + self.augment = augment | ||
72 | + self.multiscale = multiscale | ||
73 | + self.normalized_labels = normalized_labels | ||
74 | + self.min_size = self.img_size - 3 * 32 | ||
75 | + self.max_size = self.img_size + 3 * 32 | ||
76 | + self.batch_count = 0 | ||
77 | + | ||
78 | + def __getitem__(self, index): | ||
79 | + | ||
80 | + # --------- | ||
81 | + # Image | ||
82 | + # --------- | ||
83 | + | ||
84 | + img_path = self.img_files[index % len(self.img_files)].rstrip() | ||
85 | + # Extract image as PyTorch tensor | ||
86 | + img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB')) | ||
87 | + | ||
88 | + # Handle images with less than three channels | ||
89 | + if len(img.shape) != 3: | ||
90 | + img = img.unsqueeze(0) | ||
91 | + img = img.expand((3, img.shape[1:])) | ||
92 | + | ||
93 | + _, h, w = img.shape | ||
94 | + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) | ||
95 | + # Pad to square resolution | ||
96 | + img, pad = pad_to_square(img, 0) | ||
97 | + _, padded_h, padded_w = img.shape | ||
98 | + | ||
99 | + # --------- | ||
100 | + # Label | ||
101 | + # --------- | ||
102 | + | ||
103 | + label_path = self.label_files[index % len(self.img_files)].rstrip() | ||
104 | + | ||
105 | + targets = None | ||
106 | + targets_distance = None | ||
107 | + if os.path.exists(label_path): | ||
108 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
109 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5)) | ||
110 | + else: | ||
111 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5)) | ||
112 | + # Extract coordinates for unpadded + unscaled image | ||
113 | + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) | ||
114 | + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) | ||
115 | + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) | ||
116 | + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) | ||
117 | + # Adjust for added padding | ||
118 | + x1 += pad[0] | ||
119 | + y1 += pad[2] | ||
120 | + x2 += pad[1] | ||
121 | + y2 += pad[3] | ||
122 | + # Returns (x, y, w, h) | ||
123 | + boxes[:, 1] = ((x1 + x2) / 2) / padded_w | ||
124 | + boxes[:, 2] = ((y1 + y2) / 2) / padded_h | ||
125 | + boxes[:, 3] *= w_factor / padded_w | ||
126 | + boxes[:, 4] *= h_factor / padded_h | ||
127 | + | ||
128 | + targets = torch.zeros((len(boxes), 6)) | ||
129 | + targets[:, 1:] = boxes | ||
130 | + | ||
131 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
132 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5)) | ||
133 | + else: | ||
134 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5)) | ||
135 | + | ||
136 | + # Apply augmentations | ||
137 | + # if self.augment: | ||
138 | + # if np.random.random() < 0.5: | ||
139 | + # img, targets = horisontal_flip(img, targets) | ||
140 | + | ||
141 | + return img_path, img, targets, targets_distance | ||
142 | + | ||
143 | + def collate_fn(self, batch): | ||
144 | + paths, imgs, targets, targets_distance = list(zip(*batch)) | ||
145 | + # Remove empty placeholder targets | ||
146 | + targets = [boxes for boxes in targets if boxes is not None] | ||
147 | + # Add sample index to targets | ||
148 | + for i, boxes in enumerate(targets): | ||
149 | + boxes[:, 0] = i | ||
150 | + targets = torch.cat(targets, 0) | ||
151 | + # Selects new image size every tenth batch | ||
152 | + if self.multiscale and self.batch_count % 10 == 0: | ||
153 | + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) | ||
154 | + # Resize images to input shape | ||
155 | + imgs = torch.stack([resize(img, self.img_size) for img in imgs]) | ||
156 | + self.batch_count += 1 | ||
157 | + return paths, imgs, targets, targets_distance | ||
158 | + | ||
159 | + def __len__(self): | ||
160 | + return len(self.img_files) |
source_code/utils/logger.py
0 → 100644
1 | +import tensorflow as tf | ||
2 | + | ||
3 | + | ||
4 | +class Logger(object): | ||
5 | + def __init__(self, log_dir): | ||
6 | + """Create a summary writer logging to log_dir.""" | ||
7 | + self.writer = tf.summary.create_file_writer(log_dir) | ||
8 | + | ||
9 | + def scalar_summary(self, tag, value, step): | ||
10 | + """Log a scalar variable.""" | ||
11 | + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) | ||
12 | + self.writer.add_summary(summary, step) | ||
13 | + | ||
14 | + def list_of_scalars_summary(self, tag_value_pairs, step): | ||
15 | + """Log scalar variables.""" | ||
16 | + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) | ||
17 | + # self.writer.add_summary(summary, step) |
source_code/utils/parse_config.py
0 → 100644
1 | + | ||
2 | + | ||
3 | +def parse_model_config(path): | ||
4 | + """Parses the yolo-v3 layer configuration file and returns module definitions""" | ||
5 | + file = open(path, 'r') | ||
6 | + lines = file.read().split('\n') | ||
7 | + lines = [x for x in lines if x and not x.startswith('#')] | ||
8 | + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces | ||
9 | + module_defs = [] | ||
10 | + for line in lines: | ||
11 | + if line.startswith('['): # This marks the start of a new block | ||
12 | + module_defs.append({}) | ||
13 | + module_defs[-1]['type'] = line[1:-1].rstrip() | ||
14 | + if module_defs[-1]['type'] == 'convolutional': | ||
15 | + module_defs[-1]['batch_normalize'] = 0 | ||
16 | + else: | ||
17 | + key, value = line.split("=") | ||
18 | + value = value.strip() | ||
19 | + module_defs[-1][key.rstrip()] = value.strip() | ||
20 | + | ||
21 | + return module_defs | ||
22 | + | ||
23 | +def parse_data_config(path): | ||
24 | + """Parses the data configuration file""" | ||
25 | + options = dict() | ||
26 | + options['gpus'] = '0,1,2,3' | ||
27 | + options['num_workers'] = '10' | ||
28 | + with open(path, 'r') as fp: | ||
29 | + lines = fp.readlines() | ||
30 | + for line in lines: | ||
31 | + line = line.strip() | ||
32 | + if line == '' or line.startswith('#'): | ||
33 | + continue | ||
34 | + key, value = line.split('=') | ||
35 | + options[key.strip()] = value.strip() | ||
36 | + return options |
source_code/utils/utils.py
0 → 100644
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment