Showing
41 changed files
with
3193 additions
and
0 deletions
2015101094_전형진_최종보고서.pdf
0 → 100644
No preview for this file type
source/.vscode/settings.json
0 → 100644
source/LICENSE
0 → 100644
1 | +MIT License | ||
2 | + | ||
3 | +Copyright (c) 2017 liukuang | ||
4 | + | ||
5 | +Permission is hereby granted, free of charge, to any person obtaining a copy | ||
6 | +of this software and associated documentation files (the "Software"), to deal | ||
7 | +in the Software without restriction, including without limitation the rights | ||
8 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
9 | +copies of the Software, and to permit persons to whom the Software is | ||
10 | +furnished to do so, subject to the following conditions: | ||
11 | + | ||
12 | +The above copyright notice and this permission notice shall be included in all | ||
13 | +copies or substantial portions of the Software. | ||
14 | + | ||
15 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
18 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
20 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | +SOFTWARE. |
source/README.md
0 → 100644
1 | +# Train CIFAR10 with PyTorch | ||
2 | + | ||
3 | +I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset. | ||
4 | + | ||
5 | +## Prerequisites | ||
6 | +- Python 3.6+ | ||
7 | +- PyTorch 1.0+ | ||
8 | + | ||
9 | +## Training | ||
10 | +``` | ||
11 | +# Start training with: | ||
12 | +python main.py | ||
13 | + | ||
14 | +# You can manually resume the training with: | ||
15 | +python main.py --resume --lr=0.01 | ||
16 | +``` | ||
17 | + | ||
18 | +## Accuracy | ||
19 | +| Model | Acc. | | ||
20 | +| ----------------- | ----------- | | ||
21 | +| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% | | ||
22 | +| [ResNet18](https://arxiv.org/abs/1512.03385) | 93.02% | | ||
23 | +| [ResNet50](https://arxiv.org/abs/1512.03385) | 93.62% | | ||
24 | +| [ResNet101](https://arxiv.org/abs/1512.03385) | 93.75% | | ||
25 | +| [RegNetX_200MF](https://arxiv.org/abs/2003.13678) | 94.24% | | ||
26 | +| [RegNetY_400MF](https://arxiv.org/abs/2003.13678) | 94.29% | | ||
27 | +| [MobileNetV2](https://arxiv.org/abs/1801.04381) | 94.43% | | ||
28 | +| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431) | 94.73% | | ||
29 | +| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431) | 94.82% | | ||
30 | +| [SimpleDLA](https://arxiv.org/abs/1707.064) | 94.89% | | ||
31 | +| [DenseNet121](https://arxiv.org/abs/1608.06993) | 95.04% | | ||
32 | +| [PreActResNet18](https://arxiv.org/abs/1603.05027) | 95.11% | | ||
33 | +| [DPN92](https://arxiv.org/abs/1707.01629) | 95.16% | | ||
34 | +| [DLA](https://arxiv.org/pdf/1707.06484.pdf) | 95.47% | | ||
35 | + |
source/__pycache__/lsq_int.cpython-36.pyc
0 → 100644
No preview for this file type
source/__pycache__/lsq_sq.cpython-36.pyc
0 → 100644
No preview for this file type
source/__pycache__/replace.cpython-36.pyc
0 → 100644
No preview for this file type
No preview for this file type
source/__pycache__/utils.cpython-36.pyc
0 → 100644
No preview for this file type
source/lsq_int.py
0 → 100644
1 | +import torch | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | +import math | ||
5 | +from decimal import Decimal | ||
6 | +import numpy as np | ||
7 | + | ||
8 | +# Parent Class for Quantization Module | ||
9 | +class LSQModule: | ||
10 | + def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None): | ||
11 | + self.abit = abit | ||
12 | + self.wbit = wbit | ||
13 | + self.ibit = ibit | ||
14 | + self.dequantize = dequantize | ||
15 | + self.register_buffer('init_state', torch.zeros(1)) | ||
16 | + self.scale = scale | ||
17 | + | ||
18 | + # member variable setter | ||
19 | + def set_abit(self, v): | ||
20 | + self.abit = v | ||
21 | + def set_wbit(self, v): | ||
22 | + self.wbit = v | ||
23 | + def set_ibit(self, v): | ||
24 | + self.ibit = v | ||
25 | + def set_dequantize(self, v): | ||
26 | + self.dequantize = v | ||
27 | + | ||
28 | +class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule): | ||
29 | + def __init__(self, abit, dequantize=True, output_size=(1,1)): | ||
30 | + super(QAvgPool2d, self).__init__(output_size) | ||
31 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
32 | + scale=nn.Parameter(torch.Tensor(1))) | ||
33 | + def __repr__(self): | ||
34 | + return self.__class__.__name__ + '(' \ | ||
35 | + + 'output_size=' + str(self.output_size) \ | ||
36 | + + ', abit=' + str(self.abit) \ | ||
37 | + + ')' | ||
38 | + def forward(self, x): | ||
39 | + former = x[1] | ||
40 | + x = x[0] | ||
41 | + x = super().forward(x) | ||
42 | + Qn = - (2 ** (self.abit - 1)) | ||
43 | + Qp = 2 ** (self.abit - 1) - 1 | ||
44 | + # Qn = 0. | ||
45 | + # Qp = (2 ** self.abit) - 1 | ||
46 | + | ||
47 | + act_scale = self.scale | ||
48 | + down_scale = act_scale / former | ||
49 | + # down_scale = down_scale.numpy().astype() | ||
50 | + # x = x.cpu().numpy().astype(Decimal) | ||
51 | + | ||
52 | + x = x.cpu().detach().numpy().astype(Decimal) | ||
53 | + down_scale = down_scale.cpu().detach().numpy().astype(Decimal) | ||
54 | + output = x / down_scale | ||
55 | + output = torch.from_numpy(output.astype(np.float32)).cuda() | ||
56 | + x = torch.round(output).clamp(Qn, Qp) | ||
57 | + | ||
58 | + return x, act_scale | ||
59 | + | ||
60 | +class QMaxPool2d(nn.MaxPool2d, LSQModule): | ||
61 | + def __init__(self, kernel_size=3, stride=2, padding=1): | ||
62 | + super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding) | ||
63 | + LSQModule.__init__(self) | ||
64 | + | ||
65 | + def __repr__(self): | ||
66 | + return self.__class__.__name__ + '(' \ | ||
67 | + + 'kernel_size=' + str(self.kernel_size) \ | ||
68 | + + ', stride=' + str(self.stride) \ | ||
69 | + + ', padding=' + str(self.padding) \ | ||
70 | + + ')' | ||
71 | + | ||
72 | + def forward(self, x, act_scale=None): | ||
73 | + result = super().forward(x) | ||
74 | + return result | ||
75 | + | ||
76 | +class QReLU(nn.Module, LSQModule): | ||
77 | + def __init__(self, abit, dequantize=True, inplace=False): | ||
78 | + super(QReLU, self).__init__() | ||
79 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
80 | + scale=nn.Parameter(torch.Tensor(1))) | ||
81 | + self.inplace = inplace | ||
82 | + | ||
83 | + def __repr__(self): | ||
84 | + return self.__class__.__name__ + '(' \ | ||
85 | + + 'abit=' + str(self.abit) \ | ||
86 | + + ', dequantize=' + str(self.dequantize) \ | ||
87 | + + ', inplace=' + str(self.inplace) \ | ||
88 | + + ', init_state=' + str(self.init_state) \ | ||
89 | + + ')' | ||
90 | + | ||
91 | + def forward(self, x): | ||
92 | + x = F.relu(x) | ||
93 | + Qn = 0. | ||
94 | + Qp = (2 ** self.abit) - 1 | ||
95 | + if self.training and self.init_state == 0: | ||
96 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
97 | + self.init_state.fill_(1) | ||
98 | + | ||
99 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
100 | + act_scale = grad_scale(self.scale, g) | ||
101 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
102 | + if self.dequantize: | ||
103 | + x = x * act_scale | ||
104 | + return x, act_scale | ||
105 | + | ||
106 | +class QLeakyReLU(nn.Module, LSQModule): | ||
107 | + def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False): | ||
108 | + super(QLeakyReLU, self).__init__() | ||
109 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
110 | + scale=nn.Parameter(torch.Tensor(1))) | ||
111 | + self.inplace = inplace | ||
112 | + self.negative_slope=negative_slope | ||
113 | + | ||
114 | + def __repr__(self): | ||
115 | + return self.__class__.__name__ + '(' \ | ||
116 | + + 'abit=' + str(self.abit) \ | ||
117 | + + ', negative_slope=' + str(self.negative_slope) \ | ||
118 | + + ', inplace=' + str(self.inplace) \ | ||
119 | + + ')' | ||
120 | + | ||
121 | + def forward(self, input): | ||
122 | + deq_scale = input[1] | ||
123 | + input = input[0] | ||
124 | + | ||
125 | + Qn = - (2 ** (self.abit - 1)) | ||
126 | + Qp = 2 ** (self.abit - 1) - 1 | ||
127 | + | ||
128 | + | ||
129 | + input = input.cpu().detach().numpy().astype(Decimal) | ||
130 | + # input = torch.from_numpy(input) | ||
131 | + down_scale = deq_scale / self.scale | ||
132 | + slope_scale = self.negative_slope * down_scale | ||
133 | + down_scale = down_scale.cpu().detach().numpy().astype(Decimal) | ||
134 | + slope_scale = slope_scale.cpu().detach().numpy().astype(Decimal) | ||
135 | + | ||
136 | + output = np.where(input<0, input*slope_scale, input*down_scale).astype(np.float32) | ||
137 | + output = torch.from_numpy(output).cuda() | ||
138 | + | ||
139 | + x = torch.round(output).clamp(Qn, Qp) | ||
140 | + return x, self.scale | ||
141 | + | ||
142 | +class QHswish(nn.Hardswish, LSQModule): | ||
143 | + def __init__(self, abit, dequantize=True, inplace=False): | ||
144 | + super(QHswish, self).__init__(inplace=inplace) | ||
145 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
146 | + scale=nn.Parameter(torch.Tensor(1))) | ||
147 | + self.inplace = inplace | ||
148 | + | ||
149 | + def __repr__(self): | ||
150 | + return self.__class__.__name__ + '(' \ | ||
151 | + + 'abit=' + str(self.abit) \ | ||
152 | + + ', inplace=' + str(self.inplace) \ | ||
153 | + + ')' | ||
154 | + | ||
155 | + def forward(self, input): | ||
156 | + deq_scale = input[1] | ||
157 | + x = input[0] | ||
158 | + # input = input * deq_scale | ||
159 | + | ||
160 | + # x = super().forward(input) | ||
161 | + | ||
162 | + Qn = - (2 ** (self.abit - 1)) | ||
163 | + Qp = 2 ** (self.abit - 1) - 1 | ||
164 | + | ||
165 | + q_scale = self.scale | ||
166 | + down_scale = deq_scale / q_scale | ||
167 | + | ||
168 | + flag = int(torch.round(3/deq_scale)) | ||
169 | + c1 = (down_scale * deq_scale / 6).cpu().detach().numpy().astype(Decimal) | ||
170 | + c2 = (down_scale / 2).cpu().detach().numpy().astype(Decimal) | ||
171 | + down_scale = down_scale.cpu().detach().numpy().astype(Decimal) | ||
172 | + | ||
173 | + x = x.cpu().detach().numpy().astype(Decimal) | ||
174 | + | ||
175 | + x = np.where(x<=-flag, x*0, x) | ||
176 | + x = np.where(x>=flag, down_scale*x, x*(c1*x+c2)).astype(np.float32) | ||
177 | + x = torch.from_numpy(x).cuda() | ||
178 | + # x = torch.where(x <= -flag, x*0, x) | ||
179 | + # x = torch.where(x >= flag, | ||
180 | + # down_scale*x, x*x*c1+x*c2) | ||
181 | + | ||
182 | + # act_scale = self.scale | ||
183 | + # down_scale = former_scale / self.scale | ||
184 | + # x = x * former_scal | ||
185 | + x = torch.round(x).clamp(Qn, Qp) | ||
186 | + | ||
187 | + return x, self.scale | ||
188 | + | ||
189 | +class QConv2d(nn.Conv2d, LSQModule): | ||
190 | + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True): | ||
191 | + super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias) | ||
192 | + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize, | ||
193 | + scale=nn.Parameter(torch.Tensor(1))) | ||
194 | + | ||
195 | + def __repr__(self): #- for show detail arttribute on print(model) | ||
196 | + return self.__class__.__name__ + '(' \ | ||
197 | + + 'in_channels=' + str(self.in_channels) \ | ||
198 | + + ', out_channels=' + str(self.out_channels) \ | ||
199 | + + ', bias=' + str(self.bias is not None) \ | ||
200 | + + ', kernel_size=' + str(self.kernel_size) \ | ||
201 | + + ', stride=' + str(self.stride) \ | ||
202 | + + ', groups=' + str(self.groups) \ | ||
203 | + + ', padding=' + str(self.padding) \ | ||
204 | + + ', wbit=' + str(self.wbit) \ | ||
205 | + + ')' | ||
206 | + | ||
207 | + def forward(self, x, act_scale=None): | ||
208 | + Qn = - (2 ** (self.wbit - 1)) | ||
209 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
210 | + if self.training and self.init_state == 0: | ||
211 | + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp)) | ||
212 | + self.init_state.fill_(1) | ||
213 | + | ||
214 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
215 | + scale = grad_scale(self.scale, g) | ||
216 | + | ||
217 | + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp)) | ||
218 | + | ||
219 | + if self.dequantize: | ||
220 | + self.weight.data = self.weight.data * scale | ||
221 | + | ||
222 | + if self.bias is not None: | ||
223 | + bias_scale = scale*act_scale | ||
224 | + self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp)) | ||
225 | + if self.dequantize: | ||
226 | + self.bias.data = self.bias.data * bias_scale | ||
227 | + | ||
228 | + output = super().forward(x) | ||
229 | + return output | ||
230 | + | ||
231 | +class QLinear(nn.Linear, LSQModule): | ||
232 | + def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True): | ||
233 | + super(QLinear, self).__init__(in_features, out_features, bias) | ||
234 | + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize, | ||
235 | + scale=nn.Parameter(torch.Tensor(1))) | ||
236 | + | ||
237 | + def __repr__(self): | ||
238 | + return self.__class__.__name__ + '(' \ | ||
239 | + + 'in_features=' + str(self.in_features) \ | ||
240 | + + ', out_features=' + str(self.out_features) \ | ||
241 | + + ', bias=' + str(self.bias is not None) \ | ||
242 | + + ', wbit=' + str(self.wbit) \ | ||
243 | + + ')' | ||
244 | + | ||
245 | + def forward(self, input, act_scale=None): | ||
246 | + | ||
247 | + if self.wbit < 32: | ||
248 | + Qn = - (2 ** (self.wbit - 1)) | ||
249 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
250 | + | ||
251 | + scale = self.scale | ||
252 | + | ||
253 | + cur_weight = torch.round((self.weight.data / scale).clamp(Qn, Qp)) | ||
254 | + | ||
255 | + # with torch.no_grad(): | ||
256 | + if self.bias is not None: | ||
257 | + bias_scale = scale*act_scale | ||
258 | + cur_bias = torch.round((self.bias.data / bias_scale)) | ||
259 | + | ||
260 | + | ||
261 | + output = F.linear(input, cur_weight, cur_bias) | ||
262 | + return output | ||
263 | + | ||
264 | +class Input_Quantizer(nn.Module, LSQModule): | ||
265 | + def __init__(self, abit=8, dequantize=True): | ||
266 | + super(Input_Quantizer, self).__init__() | ||
267 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
268 | + scale=nn.Parameter(torch.Tensor(1))) | ||
269 | + | ||
270 | + def __repr__(self): | ||
271 | + return self.__class__.__name__ + '(' \ | ||
272 | + + 'abit=' + str(self.abit) \ | ||
273 | + + ', dequantize=' + str(self.dequantize) \ | ||
274 | + + ', init_state=' + str(self.init_state) \ | ||
275 | + + ')' | ||
276 | + | ||
277 | + def forward(self, x): | ||
278 | + Qn = - (2 ** (self.abit - 1)) | ||
279 | + Qp = (2 ** (self.abit - 1)) - 1 | ||
280 | + | ||
281 | + x = torch.round((x / self.scale).clamp(Qn, Qp)) | ||
282 | + | ||
283 | + return x, self.scale | ||
284 | + | ||
285 | +class FuseConv2dQ(QConv2d): | ||
286 | + def __init__(self, in_channels, out_channels, kernel_size, stride=1, | ||
287 | + padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True): | ||
288 | + super(FuseConv2dQ, self).__init__( | ||
289 | + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, | ||
290 | + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, | ||
291 | + wbit=wbit, dequantize=dequantize) | ||
292 | + | ||
293 | + self.bn = nn.BatchNorm2d(out_channels) | ||
294 | + | ||
295 | + def forward(self, x): | ||
296 | + act_scale = x[1] | ||
297 | + x = x[0] | ||
298 | + | ||
299 | + # simulate bn folding to Conv | ||
300 | + f_weight, f_bias = self.fusing() | ||
301 | + Qn = - (2 ** (self.wbit - 1)) | ||
302 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
303 | + | ||
304 | + scale = self.scale | ||
305 | + q_weight = torch.round((f_weight.data / scale).clamp(Qn, Qp)) | ||
306 | + bias_scale = scale*act_scale | ||
307 | + q_bias = torch.round((f_bias / bias_scale)) | ||
308 | + | ||
309 | + output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups) | ||
310 | + # output *= bias_scale # dequantize | ||
311 | + | ||
312 | + return output, bias_scale | ||
313 | + | ||
314 | + def replace_bn(self, bn_module): | ||
315 | + self.bn = bn_module | ||
316 | + self.bn.track_running_stats = False | ||
317 | + | ||
318 | + def fusing(self): | ||
319 | + std = torch.sqrt(self.bn.running_var + self.bn.eps) | ||
320 | + f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1]) | ||
321 | + if self.bias is not None: | ||
322 | + f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std) | ||
323 | + else: | ||
324 | + f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std) | ||
325 | + return f_weight, f_bias | ||
326 | + | ||
327 | +def grad_scale(x, scale): | ||
328 | + y = x | ||
329 | + y_grad = x * scale | ||
330 | + output = (y - y_grad).detach() + y_grad | ||
331 | + | ||
332 | + return output | ||
333 | + | ||
334 | +def round_pass(x): | ||
335 | + y = torch.round(x) | ||
336 | + y_grad = x | ||
337 | + output = (y - y_grad).detach() + y_grad | ||
338 | + | ||
339 | + return output | ||
340 | + |
source/lsq_sq.py
0 → 100644
1 | +import torch | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | +import math | ||
5 | + | ||
6 | +# Parent Class for Quantization Module | ||
7 | +class LSQModule: | ||
8 | + def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None): | ||
9 | + self.abit = abit | ||
10 | + self.wbit = wbit | ||
11 | + self.ibit = ibit | ||
12 | + self.dequantize = dequantize | ||
13 | + self.register_buffer('init_state', torch.zeros(1)) | ||
14 | + self.scale = scale | ||
15 | + | ||
16 | + # member variable setter | ||
17 | + def set_abit(self, v): | ||
18 | + self.abit = v | ||
19 | + def set_wbit(self, v): | ||
20 | + self.wbit = v | ||
21 | + def set_ibit(self, v): | ||
22 | + self.ibit = v | ||
23 | + def set_dequantize(self, v): | ||
24 | + self.dequantize = v | ||
25 | + | ||
26 | + | ||
27 | +class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule): | ||
28 | + def __init__(self, abit, dequantize=True, output_size=(1,1)): | ||
29 | + super(QAvgPool2d, self).__init__(output_size) | ||
30 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
31 | + scale=nn.Parameter(torch.Tensor(1))) | ||
32 | + def __repr__(self): | ||
33 | + return self.__class__.__name__ + '(' \ | ||
34 | + + 'output_size=' + str(self.output_size) \ | ||
35 | + + ', abit=' + str(self.abit) \ | ||
36 | + + ')' | ||
37 | + def forward(self, x): | ||
38 | + x = x[0] | ||
39 | + x = super().forward(x) | ||
40 | + # Qn = - (2 ** (self.abit - 1)) | ||
41 | + # Qp = 2 ** (self.abit - 1) - 1 | ||
42 | + Qn = 0. | ||
43 | + Qp = (2 ** self.abit) - 1 | ||
44 | + if self.training and self.init_state == 0: | ||
45 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
46 | + self.init_state.fill_(1) | ||
47 | + | ||
48 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
49 | + act_scale = grad_scale(self.scale, g) | ||
50 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
51 | + if self.dequantize: | ||
52 | + x = x * act_scale | ||
53 | + return x, act_scale | ||
54 | + | ||
55 | + | ||
56 | +class QMaxPool2d(nn.MaxPool2d, LSQModule): | ||
57 | + def __init__(self, kernel_size=3, stride=2, padding=1): | ||
58 | + super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding) | ||
59 | + LSQModule.__init__(self) | ||
60 | + | ||
61 | + def __repr__(self): | ||
62 | + return self.__class__.__name__ + '(' \ | ||
63 | + + 'kernel_size=' + str(self.kernel_size) \ | ||
64 | + + ', stride=' + str(self.stride) \ | ||
65 | + + ', padding=' + str(self.padding) \ | ||
66 | + + ')' | ||
67 | + | ||
68 | + def forward(self, x, act_scale=None): | ||
69 | + result = super().forward(x) | ||
70 | + return result | ||
71 | + | ||
72 | + | ||
73 | +class QReLU(nn.Module, LSQModule): | ||
74 | + def __init__(self, abit, dequantize=True, inplace=False): | ||
75 | + super(QReLU, self).__init__() | ||
76 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
77 | + scale=nn.Parameter(torch.Tensor(1))) | ||
78 | + self.inplace = inplace | ||
79 | + | ||
80 | + def __repr__(self): | ||
81 | + return self.__class__.__name__ + '(' \ | ||
82 | + + 'abit=' + str(self.abit) \ | ||
83 | + + ', dequantize=' + str(self.dequantize) \ | ||
84 | + + ', inplace=' + str(self.inplace) \ | ||
85 | + + ', init_state=' + str(self.init_state) \ | ||
86 | + + ')' | ||
87 | + | ||
88 | + def forward(self, x): | ||
89 | + x = F.relu(x) | ||
90 | + Qn = 0. | ||
91 | + Qp = (2 ** self.abit) - 1 | ||
92 | + if self.training and self.init_state == 0: | ||
93 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
94 | + self.init_state.fill_(1) | ||
95 | + | ||
96 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
97 | + act_scale = grad_scale(self.scale, g) | ||
98 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
99 | + if self.dequantize: | ||
100 | + x = x * act_scale | ||
101 | + return x, act_scale | ||
102 | + | ||
103 | +class QLeakyReLU(nn.Module, LSQModule): | ||
104 | + def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False): | ||
105 | + super(QLeakyReLU, self).__init__() | ||
106 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
107 | + scale=nn.Parameter(torch.Tensor(1))) | ||
108 | + self.inplace = inplace | ||
109 | + self.negative_slope=negative_slope | ||
110 | + | ||
111 | + def __repr__(self): | ||
112 | + return self.__class__.__name__ + '(' \ | ||
113 | + + 'abit=' + str(self.abit) \ | ||
114 | + + ', negative_slope=' + str(self.negative_slope) \ | ||
115 | + + ', inplace=' + str(self.inplace) \ | ||
116 | + + ')' | ||
117 | + | ||
118 | + def forward(self, input): | ||
119 | + x = F.leaky_relu(input=input, negative_slope=self.negative_slope) | ||
120 | + Qn = - (2 ** (self.abit - 1)) | ||
121 | + Qp = 2 ** (self.abit - 1) - 1 | ||
122 | + # Qn = 0. | ||
123 | + # Qp = (2 ** self.abit) - 1 | ||
124 | + if self.training and self.init_state == 0: | ||
125 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
126 | + self.init_state.fill_(1) | ||
127 | + | ||
128 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
129 | + act_scale = grad_scale(self.scale, g) | ||
130 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
131 | + if self.dequantize: | ||
132 | + x = x * act_scale | ||
133 | + | ||
134 | + return x, act_scale | ||
135 | + | ||
136 | +class QHswish(nn.Hardswish, LSQModule): | ||
137 | + def __init__(self, abit, dequantize=True, inplace=False): | ||
138 | + super(QHswish, self).__init__(inplace=inplace) | ||
139 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
140 | + scale=nn.Parameter(torch.Tensor(1))) | ||
141 | + self.inplace = inplace | ||
142 | + | ||
143 | + def __repr__(self): | ||
144 | + return self.__class__.__name__ + '(' \ | ||
145 | + + 'abit=' + str(self.abit) \ | ||
146 | + + ', inplace=' + str(self.inplace) \ | ||
147 | + + ')' | ||
148 | + | ||
149 | + def forward(self, input): | ||
150 | + x = super().forward(input) | ||
151 | + Qn = - (2 ** (self.abit - 1)) | ||
152 | + Qp = 2 ** (self.abit - 1) - 1 | ||
153 | + # Qn = 0. | ||
154 | + # Qp = (2 ** self.abit) - 1 | ||
155 | + if self.training and self.init_state == 0: | ||
156 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
157 | + self.init_state.fill_(1) | ||
158 | + | ||
159 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
160 | + act_scale = grad_scale(self.scale, g) | ||
161 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
162 | + if self.dequantize: | ||
163 | + x = x * act_scale | ||
164 | + return x, act_scale | ||
165 | + | ||
166 | +class QHsigmoid(nn.Hardsigmoid, LSQModule): | ||
167 | + def __init__(self, abit, dequantize=True, inplace=False): | ||
168 | + super(QHsigmoid, self).__init__(inplace=inplace) | ||
169 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
170 | + scale=nn.Parameter(torch.Tensor(1))) | ||
171 | + self.inplace = inplace | ||
172 | + | ||
173 | + def __repr__(self): | ||
174 | + return self.__class__.__name__ + '(' \ | ||
175 | + + 'abit=' + str(self.abit) \ | ||
176 | + + ', inplace=' + str(self.inplace) \ | ||
177 | + + ')' | ||
178 | + | ||
179 | + def forward(self, input): | ||
180 | + x = super().forward(input) | ||
181 | + # Qn = - (2 ** (self.abit - 1)) | ||
182 | + # Qp = 2 ** (self.abit - 1) - 1 | ||
183 | + Qn = 0. | ||
184 | + Qp = (2 ** self.abit) - 1 | ||
185 | + if self.training and self.init_state == 0: | ||
186 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
187 | + self.init_state.fill_(1) | ||
188 | + | ||
189 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
190 | + act_scale = grad_scale(self.scale, g) | ||
191 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
192 | + if self.dequantize: | ||
193 | + x = x * act_scale | ||
194 | + return x, act_scale | ||
195 | + | ||
196 | +class QConv2d(nn.Conv2d, LSQModule): | ||
197 | + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True): | ||
198 | + super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias) | ||
199 | + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize, | ||
200 | + scale=nn.Parameter(torch.Tensor(1))) | ||
201 | + | ||
202 | + def __repr__(self): #- for show detail arttribute on print(model) | ||
203 | + return self.__class__.__name__ + '(' \ | ||
204 | + + 'in_channels=' + str(self.in_channels) \ | ||
205 | + + ', out_channels=' + str(self.out_channels) \ | ||
206 | + + ', bias=' + str(self.bias is not None) \ | ||
207 | + + ', kernel_size=' + str(self.kernel_size) \ | ||
208 | + + ', stride=' + str(self.stride) \ | ||
209 | + + ', groups=' + str(self.groups) \ | ||
210 | + + ', padding=' + str(self.padding) \ | ||
211 | + + ', wbit=' + str(self.wbit) \ | ||
212 | + + ')' | ||
213 | + | ||
214 | + def forward(self, x, act_scale=None): | ||
215 | + Qn = - (2 ** (self.wbit - 1)) | ||
216 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
217 | + if self.training and self.init_state == 0: | ||
218 | + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp)) | ||
219 | + self.init_state.fill_(1) | ||
220 | + | ||
221 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
222 | + scale = grad_scale(self.scale, g) | ||
223 | + | ||
224 | + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp)) | ||
225 | + | ||
226 | + if self.dequantize: | ||
227 | + self.weight.data = self.weight.data * scale | ||
228 | + | ||
229 | + if self.bias is not None: | ||
230 | + bias_scale = scale*act_scale | ||
231 | + self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp)) | ||
232 | + if self.dequantize: | ||
233 | + self.bias.data = self.bias.data * bias_scale | ||
234 | + | ||
235 | + output = super().forward(x) | ||
236 | + return output | ||
237 | + | ||
238 | +class QLinear(nn.Linear, LSQModule): | ||
239 | + def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True): | ||
240 | + super(QLinear, self).__init__(in_features, out_features, bias) | ||
241 | + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize, | ||
242 | + scale=nn.Parameter(torch.Tensor(1))) | ||
243 | + | ||
244 | + def __repr__(self): | ||
245 | + return self.__class__.__name__ + '(' \ | ||
246 | + + 'in_features=' + str(self.in_features) \ | ||
247 | + + ', out_features=' + str(self.out_features) \ | ||
248 | + + ', bias=' + str(self.bias is not None) \ | ||
249 | + + ', wbit=' + str(self.wbit) \ | ||
250 | + + ')' | ||
251 | + | ||
252 | + def forward(self, input, act_scale=None): | ||
253 | + if self.wbit < 32: | ||
254 | + Qn = - (2 ** (self.wbit - 1)) | ||
255 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
256 | + if self.training and self.init_state == 0: | ||
257 | + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp)) | ||
258 | + self.init_state.fill_(1) | ||
259 | + | ||
260 | + g = 1.0 / math.sqrt(input.numel() * Qp) | ||
261 | + scale = grad_scale(self.scale, g) | ||
262 | + | ||
263 | + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp)) | ||
264 | + if self.dequantize: | ||
265 | + self.weight.data = self.weight.data * scale | ||
266 | + | ||
267 | + # with torch.no_grad(): | ||
268 | + if self.bias is not None: | ||
269 | + bias_scale = scale*act_scale | ||
270 | + self.bias.data = round_pass((self.bias.data / bias_scale)) | ||
271 | + if self.dequantize: | ||
272 | + self.bias.data = self.bias.data * bias_scale | ||
273 | + | ||
274 | + output = super().forward(input) | ||
275 | + return output | ||
276 | + | ||
277 | + | ||
278 | +class Input_Quantizer(nn.Module, LSQModule): | ||
279 | + def __init__(self, abit=8, dequantize=True): | ||
280 | + super(Input_Quantizer, self).__init__() | ||
281 | + LSQModule.__init__(self, abit=abit, dequantize=dequantize, | ||
282 | + scale=nn.Parameter(torch.Tensor(1))) | ||
283 | + | ||
284 | + def __repr__(self): | ||
285 | + return self.__class__.__name__ + '(' \ | ||
286 | + + 'abit=' + str(self.abit) \ | ||
287 | + + ', dequantize=' + str(self.dequantize) \ | ||
288 | + + ', init_state=' + str(self.init_state) \ | ||
289 | + + ')' | ||
290 | + | ||
291 | + def forward(self, x): | ||
292 | + Qn = - (2 ** (self.abit - 1)) | ||
293 | + Qp = (2 ** (self.abit - 1)) - 1 | ||
294 | + if self.training and self.init_state == 0: | ||
295 | + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp)) | ||
296 | + self.init_state.fill_(1) | ||
297 | + | ||
298 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
299 | + act_scale = grad_scale(self.scale, g) | ||
300 | + x = round_pass((x / act_scale).clamp(Qn, Qp)) | ||
301 | + | ||
302 | + if self.dequantize: | ||
303 | + x = x * act_scale | ||
304 | + return x, act_scale | ||
305 | + | ||
306 | + | ||
307 | +class FuseConv2dQ(QConv2d): | ||
308 | + def __init__(self, in_channels, out_channels, kernel_size, stride=1, | ||
309 | + padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True): | ||
310 | + super(FuseConv2dQ, self).__init__( | ||
311 | + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, | ||
312 | + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, | ||
313 | + wbit=wbit, dequantize=dequantize) | ||
314 | + | ||
315 | + self.bn = nn.BatchNorm2d(out_channels) | ||
316 | + | ||
317 | + def forward(self, x): | ||
318 | + act_scale = x[1] | ||
319 | + x = x[0] | ||
320 | + temp = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) | ||
321 | + _ = self.bn(temp) | ||
322 | + | ||
323 | + # simulate bn folding to Conv | ||
324 | + f_weight, f_bias = self.fusing() | ||
325 | + Qn = - (2 ** (self.wbit - 1)) | ||
326 | + Qp = 2 ** (self.wbit - 1) - 1 | ||
327 | + if self.training and self.init_state == 0: | ||
328 | + self.scale.data.copy_(2 * f_weight.abs().mean() / math.sqrt(Qp)) | ||
329 | + self.init_state.fill_(1) | ||
330 | + | ||
331 | + g = 1.0 / math.sqrt(x.numel() * Qp) | ||
332 | + scale = grad_scale(self.scale, g) | ||
333 | + q_weight = round_pass((f_weight.data / scale).clamp(Qn, Qp)) | ||
334 | + | ||
335 | + if self.dequantize: | ||
336 | + q_weight = q_weight * scale | ||
337 | + | ||
338 | + # with torch.no_grad(): | ||
339 | + bias_scale = scale*act_scale | ||
340 | + q_bias = round_pass((f_bias / bias_scale)) | ||
341 | + if self.dequantize: | ||
342 | + q_bias = q_bias * bias_scale | ||
343 | + | ||
344 | + output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups) | ||
345 | + return output | ||
346 | + | ||
347 | + def replace_bn(self, bn_module): | ||
348 | + self.bn = bn_module | ||
349 | + self.bn.track_running_stats = False | ||
350 | + | ||
351 | + def fusing(self): | ||
352 | + std = torch.sqrt(self.bn.running_var + self.bn.eps) | ||
353 | + f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1]) | ||
354 | + if self.bias is not None: | ||
355 | + f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std) | ||
356 | + else: | ||
357 | + f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std) | ||
358 | + return f_weight, f_bias | ||
359 | + | ||
360 | + | ||
361 | +def grad_scale(x, scale): | ||
362 | + y = x | ||
363 | + y_grad = x * scale | ||
364 | + output = (y - y_grad).detach() + y_grad | ||
365 | + | ||
366 | + return output | ||
367 | + | ||
368 | +def round_pass(x): | ||
369 | + y = torch.round(x) | ||
370 | + y_grad = x | ||
371 | + output = (y - y_grad).detach() + y_grad | ||
372 | + | ||
373 | + return output | ||
374 | + |
source/main.py
0 → 100644
1 | +'''Train CIFAR10 with PyTorch.''' | ||
2 | +import torch | ||
3 | +import torch.nn as nn | ||
4 | +import torch.optim as optim | ||
5 | +import torch.nn.functional as F | ||
6 | +import torch.backends.cudnn as cudnn | ||
7 | + | ||
8 | +import torchvision | ||
9 | +import torchvision.transforms as transforms | ||
10 | + | ||
11 | +import os | ||
12 | +import argparse | ||
13 | + | ||
14 | +from models.mobilenet import MobileNet1 | ||
15 | +from utils import progress_bar | ||
16 | +from replace import replace_sq | ||
17 | +from collections import OrderedDict | ||
18 | +# from lsq_int import Input_Quantizer | ||
19 | +from lsq_sq import Input_Quantizer | ||
20 | +from replace_int import replace_int | ||
21 | + | ||
22 | + | ||
23 | +parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training') | ||
24 | +parser.add_argument('--lr', default=0.1, type=float, help='learning rate') | ||
25 | +parser.add_argument('--resume', '-r', default=None, type=str, | ||
26 | + help='resume from checkpoint') | ||
27 | +parser.add_argument('--dir', default='default', type=str, | ||
28 | + help='save dir name') | ||
29 | +parser.add_argument('--test', default=False, action='store_true', | ||
30 | + help='test version or not') | ||
31 | +parser.add_argument('--qat', default=False, action='store_true', | ||
32 | + help='qat version or not') | ||
33 | +args = parser.parse_args() | ||
34 | + | ||
35 | + | ||
36 | +# Training | ||
37 | +def train(epoch): | ||
38 | + print('\nEpoch: %d' % epoch) | ||
39 | + net.train() | ||
40 | + train_loss = 0 | ||
41 | + correct = 0 | ||
42 | + total = 0 | ||
43 | + for batch_idx, (inputs, targets) in enumerate(trainloader): | ||
44 | + inputs, targets = inputs.to(device), targets.to(device) | ||
45 | + optimizer.zero_grad() | ||
46 | + outputs = net(inputs) | ||
47 | + loss = criterion(outputs, targets) | ||
48 | + loss.backward() | ||
49 | + optimizer.step() | ||
50 | + | ||
51 | + train_loss += loss.item() | ||
52 | + _, predicted = outputs.max(1) | ||
53 | + total += targets.size(0) | ||
54 | + correct += predicted.eq(targets).sum().item() | ||
55 | + progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' | ||
56 | + % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) | ||
57 | + | ||
58 | + | ||
59 | +def test(epoch): | ||
60 | + global best_acc | ||
61 | + dir_name = args.dir | ||
62 | + net.eval() | ||
63 | + test_loss = 0 | ||
64 | + correct = 0 | ||
65 | + total = 0 | ||
66 | + with torch.no_grad(): | ||
67 | + for batch_idx, (inputs, targets) in enumerate(testloader): | ||
68 | + | ||
69 | + inputs, targets = inputs.to(device), targets.to(device) | ||
70 | + outputs = net(inputs) | ||
71 | + loss = criterion(outputs, targets) | ||
72 | + | ||
73 | + test_loss += loss.item() | ||
74 | + _, predicted = outputs.max(1) | ||
75 | + total += targets.size(0) | ||
76 | + correct += predicted.eq(targets).sum().item() | ||
77 | + progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' | ||
78 | + % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) | ||
79 | + | ||
80 | + # Save checkpoint. | ||
81 | + acc = 100.*correct/total | ||
82 | + if acc > best_acc: | ||
83 | + print('Saving..') | ||
84 | + state = { | ||
85 | + 'net': net.state_dict(), | ||
86 | + 'acc': acc, | ||
87 | + 'epoch': epoch, | ||
88 | + } | ||
89 | + if not os.path.isdir(dir_name): | ||
90 | + os.mkdir(dir_name) | ||
91 | + torch.save(state, f'./{dir_name}/ckpt.pth') | ||
92 | + best_acc = acc | ||
93 | + print('*** best Test Accuracy: ', best_acc) | ||
94 | + | ||
95 | +if __name__ == '__main__': | ||
96 | + device = 'cuda' if torch.cuda.is_available() else 'cpu' | ||
97 | + best_acc = 0 # best test accuracy | ||
98 | + start_epoch = 0 # start from epoch 0 or last checkpoint epoch | ||
99 | + | ||
100 | + # Data | ||
101 | + print('==> Preparing data..') | ||
102 | + transform_train = transforms.Compose([ | ||
103 | + transforms.RandomCrop(32, padding=4), | ||
104 | + transforms.RandomHorizontalFlip(), | ||
105 | + transforms.ToTensor(), | ||
106 | + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), | ||
107 | + ]) | ||
108 | + | ||
109 | + transform_test = transforms.Compose([ | ||
110 | + transforms.ToTensor(), | ||
111 | + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), | ||
112 | + ]) | ||
113 | + | ||
114 | + trainset = torchvision.datasets.CIFAR10( | ||
115 | + root='./data', train=True, download=True, transform=transform_train) | ||
116 | + trainloader = torch.utils.data.DataLoader( | ||
117 | + trainset, batch_size=256, shuffle=True, num_workers=4) | ||
118 | + | ||
119 | + testset = torchvision.datasets.CIFAR10( | ||
120 | + root='./data', train=False, download=True, transform=transform_test) | ||
121 | + testloader = torch.utils.data.DataLoader( | ||
122 | + testset, batch_size=100, shuffle=False, num_workers=4) | ||
123 | + | ||
124 | + classes = ('plane', 'car', 'bird', 'cat', 'deer', | ||
125 | + 'dog', 'frog', 'horse', 'ship', 'truck') | ||
126 | + | ||
127 | + # Model | ||
128 | + print('==> Building model..') | ||
129 | + net = MobileNet1(3, 10) | ||
130 | + net = net.to(device) | ||
131 | + | ||
132 | + if args.qat: | ||
133 | + net = replace_sq(model=net) | ||
134 | + net = nn.Sequential(Input_Quantizer(abit=8, dequantize=True), | ||
135 | + net) | ||
136 | + | ||
137 | + if args.resume: | ||
138 | + # Load checkpoint. | ||
139 | + print('==> Resuming from checkpoint..') | ||
140 | + assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' | ||
141 | + checkpoint = torch.load(args.resume) | ||
142 | + new_state_dict = OrderedDict() | ||
143 | + for k, v in checkpoint['net'].items(): | ||
144 | + k = k.replace("module.", "") | ||
145 | + new_state_dict[k] = v | ||
146 | + net.load_state_dict(new_state_dict) | ||
147 | + best_acc = 0.0 | ||
148 | + start_epoch = 0 | ||
149 | + | ||
150 | + print(net) | ||
151 | + # replace_int(net) | ||
152 | + | ||
153 | + if device == 'cuda': | ||
154 | + net = torch.nn.DataParallel(net) | ||
155 | + cudnn.benchmark = True | ||
156 | + net.cuda() | ||
157 | + | ||
158 | + criterion = nn.CrossEntropyLoss() | ||
159 | + optimizer = optim.SGD(net.parameters(), lr=args.lr, | ||
160 | + momentum=0.9, weight_decay=5e-4) | ||
161 | + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) | ||
162 | + | ||
163 | + | ||
164 | + for epoch in range(start_epoch, start_epoch+200): | ||
165 | + if args.test: | ||
166 | + test(epoch) | ||
167 | + break | ||
168 | + else: | ||
169 | + train(epoch) | ||
170 | + test(epoch) | ||
171 | + scheduler.step() |
No preview for this file type
source/models/densenet.py
0 → 100644
1 | +'''DenseNet in PyTorch.''' | ||
2 | +import math | ||
3 | + | ||
4 | +import torch | ||
5 | +import torch.nn as nn | ||
6 | +import torch.nn.functional as F | ||
7 | + | ||
8 | + | ||
9 | +class Bottleneck(nn.Module): | ||
10 | + def __init__(self, in_planes, growth_rate): | ||
11 | + super(Bottleneck, self).__init__() | ||
12 | + self.bn1 = nn.BatchNorm2d(in_planes) | ||
13 | + self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) | ||
14 | + self.bn2 = nn.BatchNorm2d(4*growth_rate) | ||
15 | + self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) | ||
16 | + | ||
17 | + def forward(self, x): | ||
18 | + out = self.conv1(F.relu(self.bn1(x))) | ||
19 | + out = self.conv2(F.relu(self.bn2(out))) | ||
20 | + out = torch.cat([out,x], 1) | ||
21 | + return out | ||
22 | + | ||
23 | + | ||
24 | +class Transition(nn.Module): | ||
25 | + def __init__(self, in_planes, out_planes): | ||
26 | + super(Transition, self).__init__() | ||
27 | + self.bn = nn.BatchNorm2d(in_planes) | ||
28 | + self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) | ||
29 | + | ||
30 | + def forward(self, x): | ||
31 | + out = self.conv(F.relu(self.bn(x))) | ||
32 | + out = F.avg_pool2d(out, 2) | ||
33 | + return out | ||
34 | + | ||
35 | + | ||
36 | +class DenseNet(nn.Module): | ||
37 | + def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): | ||
38 | + super(DenseNet, self).__init__() | ||
39 | + self.growth_rate = growth_rate | ||
40 | + | ||
41 | + num_planes = 2*growth_rate | ||
42 | + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) | ||
43 | + | ||
44 | + self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) | ||
45 | + num_planes += nblocks[0]*growth_rate | ||
46 | + out_planes = int(math.floor(num_planes*reduction)) | ||
47 | + self.trans1 = Transition(num_planes, out_planes) | ||
48 | + num_planes = out_planes | ||
49 | + | ||
50 | + self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) | ||
51 | + num_planes += nblocks[1]*growth_rate | ||
52 | + out_planes = int(math.floor(num_planes*reduction)) | ||
53 | + self.trans2 = Transition(num_planes, out_planes) | ||
54 | + num_planes = out_planes | ||
55 | + | ||
56 | + self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) | ||
57 | + num_planes += nblocks[2]*growth_rate | ||
58 | + out_planes = int(math.floor(num_planes*reduction)) | ||
59 | + self.trans3 = Transition(num_planes, out_planes) | ||
60 | + num_planes = out_planes | ||
61 | + | ||
62 | + self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) | ||
63 | + num_planes += nblocks[3]*growth_rate | ||
64 | + | ||
65 | + self.bn = nn.BatchNorm2d(num_planes) | ||
66 | + self.linear = nn.Linear(num_planes, num_classes) | ||
67 | + | ||
68 | + def _make_dense_layers(self, block, in_planes, nblock): | ||
69 | + layers = [] | ||
70 | + for i in range(nblock): | ||
71 | + layers.append(block(in_planes, self.growth_rate)) | ||
72 | + in_planes += self.growth_rate | ||
73 | + return nn.Sequential(*layers) | ||
74 | + | ||
75 | + def forward(self, x): | ||
76 | + out = self.conv1(x) | ||
77 | + out = self.trans1(self.dense1(out)) | ||
78 | + out = self.trans2(self.dense2(out)) | ||
79 | + out = self.trans3(self.dense3(out)) | ||
80 | + out = self.dense4(out) | ||
81 | + out = F.avg_pool2d(F.relu(self.bn(out)), 4) | ||
82 | + out = out.view(out.size(0), -1) | ||
83 | + out = self.linear(out) | ||
84 | + return out | ||
85 | + | ||
86 | +def DenseNet121(): | ||
87 | + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) | ||
88 | + | ||
89 | +def DenseNet169(): | ||
90 | + return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) | ||
91 | + | ||
92 | +def DenseNet201(): | ||
93 | + return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) | ||
94 | + | ||
95 | +def DenseNet161(): | ||
96 | + return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) | ||
97 | + | ||
98 | +def densenet_cifar(): | ||
99 | + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) | ||
100 | + | ||
101 | +def test(): | ||
102 | + net = densenet_cifar() | ||
103 | + x = torch.randn(1,3,32,32) | ||
104 | + y = net(x) | ||
105 | + print(y) | ||
106 | + | ||
107 | +# test() |
source/models/dla.py
0 → 100644
1 | +'''DLA in PyTorch. | ||
2 | + | ||
3 | +Reference: | ||
4 | + Deep Layer Aggregation. https://arxiv.org/abs/1707.06484 | ||
5 | +''' | ||
6 | +import torch | ||
7 | +import torch.nn as nn | ||
8 | +import torch.nn.functional as F | ||
9 | + | ||
10 | + | ||
11 | +class BasicBlock(nn.Module): | ||
12 | + expansion = 1 | ||
13 | + | ||
14 | + def __init__(self, in_planes, planes, stride=1): | ||
15 | + super(BasicBlock, self).__init__() | ||
16 | + self.conv1 = nn.Conv2d( | ||
17 | + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
18 | + self.bn1 = nn.BatchNorm2d(planes) | ||
19 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, | ||
20 | + stride=1, padding=1, bias=False) | ||
21 | + self.bn2 = nn.BatchNorm2d(planes) | ||
22 | + | ||
23 | + self.shortcut = nn.Sequential() | ||
24 | + if stride != 1 or in_planes != self.expansion*planes: | ||
25 | + self.shortcut = nn.Sequential( | ||
26 | + nn.Conv2d(in_planes, self.expansion*planes, | ||
27 | + kernel_size=1, stride=stride, bias=False), | ||
28 | + nn.BatchNorm2d(self.expansion*planes) | ||
29 | + ) | ||
30 | + | ||
31 | + def forward(self, x): | ||
32 | + out = F.relu(self.bn1(self.conv1(x))) | ||
33 | + out = self.bn2(self.conv2(out)) | ||
34 | + out += self.shortcut(x) | ||
35 | + out = F.relu(out) | ||
36 | + return out | ||
37 | + | ||
38 | + | ||
39 | +class Root(nn.Module): | ||
40 | + def __init__(self, in_channels, out_channels, kernel_size=1): | ||
41 | + super(Root, self).__init__() | ||
42 | + self.conv = nn.Conv2d( | ||
43 | + in_channels, out_channels, kernel_size, | ||
44 | + stride=1, padding=(kernel_size - 1) // 2, bias=False) | ||
45 | + self.bn = nn.BatchNorm2d(out_channels) | ||
46 | + | ||
47 | + def forward(self, xs): | ||
48 | + x = torch.cat(xs, 1) | ||
49 | + out = F.relu(self.bn(self.conv(x))) | ||
50 | + return out | ||
51 | + | ||
52 | + | ||
53 | +class Tree(nn.Module): | ||
54 | + def __init__(self, block, in_channels, out_channels, level=1, stride=1): | ||
55 | + super(Tree, self).__init__() | ||
56 | + self.level = level | ||
57 | + if level == 1: | ||
58 | + self.root = Root(2*out_channels, out_channels) | ||
59 | + self.left_node = block(in_channels, out_channels, stride=stride) | ||
60 | + self.right_node = block(out_channels, out_channels, stride=1) | ||
61 | + else: | ||
62 | + self.root = Root((level+2)*out_channels, out_channels) | ||
63 | + for i in reversed(range(1, level)): | ||
64 | + subtree = Tree(block, in_channels, out_channels, | ||
65 | + level=i, stride=stride) | ||
66 | + self.__setattr__('level_%d' % i, subtree) | ||
67 | + self.prev_root = block(in_channels, out_channels, stride=stride) | ||
68 | + self.left_node = block(out_channels, out_channels, stride=1) | ||
69 | + self.right_node = block(out_channels, out_channels, stride=1) | ||
70 | + | ||
71 | + def forward(self, x): | ||
72 | + xs = [self.prev_root(x)] if self.level > 1 else [] | ||
73 | + for i in reversed(range(1, self.level)): | ||
74 | + level_i = self.__getattr__('level_%d' % i) | ||
75 | + x = level_i(x) | ||
76 | + xs.append(x) | ||
77 | + x = self.left_node(x) | ||
78 | + xs.append(x) | ||
79 | + x = self.right_node(x) | ||
80 | + xs.append(x) | ||
81 | + out = self.root(xs) | ||
82 | + return out | ||
83 | + | ||
84 | + | ||
85 | +class DLA(nn.Module): | ||
86 | + def __init__(self, block=BasicBlock, num_classes=10): | ||
87 | + super(DLA, self).__init__() | ||
88 | + self.base = nn.Sequential( | ||
89 | + nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False), | ||
90 | + nn.BatchNorm2d(16), | ||
91 | + nn.ReLU(True) | ||
92 | + ) | ||
93 | + | ||
94 | + self.layer1 = nn.Sequential( | ||
95 | + nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), | ||
96 | + nn.BatchNorm2d(16), | ||
97 | + nn.ReLU(True) | ||
98 | + ) | ||
99 | + | ||
100 | + self.layer2 = nn.Sequential( | ||
101 | + nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False), | ||
102 | + nn.BatchNorm2d(32), | ||
103 | + nn.ReLU(True) | ||
104 | + ) | ||
105 | + | ||
106 | + self.layer3 = Tree(block, 32, 64, level=1, stride=1) | ||
107 | + self.layer4 = Tree(block, 64, 128, level=2, stride=2) | ||
108 | + self.layer5 = Tree(block, 128, 256, level=2, stride=2) | ||
109 | + self.layer6 = Tree(block, 256, 512, level=1, stride=2) | ||
110 | + self.linear = nn.Linear(512, num_classes) | ||
111 | + | ||
112 | + def forward(self, x): | ||
113 | + out = self.base(x) | ||
114 | + out = self.layer1(out) | ||
115 | + out = self.layer2(out) | ||
116 | + out = self.layer3(out) | ||
117 | + out = self.layer4(out) | ||
118 | + out = self.layer5(out) | ||
119 | + out = self.layer6(out) | ||
120 | + out = F.avg_pool2d(out, 4) | ||
121 | + out = out.view(out.size(0), -1) | ||
122 | + out = self.linear(out) | ||
123 | + return out | ||
124 | + | ||
125 | + | ||
126 | +def test(): | ||
127 | + net = DLA() | ||
128 | + print(net) | ||
129 | + x = torch.randn(1, 3, 32, 32) | ||
130 | + y = net(x) | ||
131 | + print(y.size()) | ||
132 | + | ||
133 | + | ||
134 | +if __name__ == '__main__': | ||
135 | + test() |
source/models/dla_simple.py
0 → 100644
1 | +'''Simplified version of DLA in PyTorch. | ||
2 | + | ||
3 | +Note this implementation is not identical to the original paper version. | ||
4 | +But it seems works fine. | ||
5 | + | ||
6 | +See dla.py for the original paper version. | ||
7 | + | ||
8 | +Reference: | ||
9 | + Deep Layer Aggregation. https://arxiv.org/abs/1707.06484 | ||
10 | +''' | ||
11 | +import torch | ||
12 | +import torch.nn as nn | ||
13 | +import torch.nn.functional as F | ||
14 | + | ||
15 | + | ||
16 | +class BasicBlock(nn.Module): | ||
17 | + expansion = 1 | ||
18 | + | ||
19 | + def __init__(self, in_planes, planes, stride=1): | ||
20 | + super(BasicBlock, self).__init__() | ||
21 | + self.conv1 = nn.Conv2d( | ||
22 | + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
23 | + self.bn1 = nn.BatchNorm2d(planes) | ||
24 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, | ||
25 | + stride=1, padding=1, bias=False) | ||
26 | + self.bn2 = nn.BatchNorm2d(planes) | ||
27 | + | ||
28 | + self.shortcut = nn.Sequential() | ||
29 | + if stride != 1 or in_planes != self.expansion*planes: | ||
30 | + self.shortcut = nn.Sequential( | ||
31 | + nn.Conv2d(in_planes, self.expansion*planes, | ||
32 | + kernel_size=1, stride=stride, bias=False), | ||
33 | + nn.BatchNorm2d(self.expansion*planes) | ||
34 | + ) | ||
35 | + | ||
36 | + def forward(self, x): | ||
37 | + out = F.relu(self.bn1(self.conv1(x))) | ||
38 | + out = self.bn2(self.conv2(out)) | ||
39 | + out += self.shortcut(x) | ||
40 | + out = F.relu(out) | ||
41 | + return out | ||
42 | + | ||
43 | + | ||
44 | +class Root(nn.Module): | ||
45 | + def __init__(self, in_channels, out_channels, kernel_size=1): | ||
46 | + super(Root, self).__init__() | ||
47 | + self.conv = nn.Conv2d( | ||
48 | + in_channels, out_channels, kernel_size, | ||
49 | + stride=1, padding=(kernel_size - 1) // 2, bias=False) | ||
50 | + self.bn = nn.BatchNorm2d(out_channels) | ||
51 | + | ||
52 | + def forward(self, xs): | ||
53 | + x = torch.cat(xs, 1) | ||
54 | + out = F.relu(self.bn(self.conv(x))) | ||
55 | + return out | ||
56 | + | ||
57 | + | ||
58 | +class Tree(nn.Module): | ||
59 | + def __init__(self, block, in_channels, out_channels, level=1, stride=1): | ||
60 | + super(Tree, self).__init__() | ||
61 | + self.root = Root(2*out_channels, out_channels) | ||
62 | + if level == 1: | ||
63 | + self.left_tree = block(in_channels, out_channels, stride=stride) | ||
64 | + self.right_tree = block(out_channels, out_channels, stride=1) | ||
65 | + else: | ||
66 | + self.left_tree = Tree(block, in_channels, | ||
67 | + out_channels, level=level-1, stride=stride) | ||
68 | + self.right_tree = Tree(block, out_channels, | ||
69 | + out_channels, level=level-1, stride=1) | ||
70 | + | ||
71 | + def forward(self, x): | ||
72 | + out1 = self.left_tree(x) | ||
73 | + out2 = self.right_tree(out1) | ||
74 | + out = self.root([out1, out2]) | ||
75 | + return out | ||
76 | + | ||
77 | + | ||
78 | +class SimpleDLA(nn.Module): | ||
79 | + def __init__(self, block=BasicBlock, num_classes=10): | ||
80 | + super(SimpleDLA, self).__init__() | ||
81 | + self.base = nn.Sequential( | ||
82 | + nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False), | ||
83 | + nn.BatchNorm2d(16), | ||
84 | + nn.ReLU(True) | ||
85 | + ) | ||
86 | + | ||
87 | + self.layer1 = nn.Sequential( | ||
88 | + nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), | ||
89 | + nn.BatchNorm2d(16), | ||
90 | + nn.ReLU(True) | ||
91 | + ) | ||
92 | + | ||
93 | + self.layer2 = nn.Sequential( | ||
94 | + nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False), | ||
95 | + nn.BatchNorm2d(32), | ||
96 | + nn.ReLU(True) | ||
97 | + ) | ||
98 | + | ||
99 | + self.layer3 = Tree(block, 32, 64, level=1, stride=1) | ||
100 | + self.layer4 = Tree(block, 64, 128, level=2, stride=2) | ||
101 | + self.layer5 = Tree(block, 128, 256, level=2, stride=2) | ||
102 | + self.layer6 = Tree(block, 256, 512, level=1, stride=2) | ||
103 | + self.linear = nn.Linear(512, num_classes) | ||
104 | + | ||
105 | + def forward(self, x): | ||
106 | + out = self.base(x) | ||
107 | + out = self.layer1(out) | ||
108 | + out = self.layer2(out) | ||
109 | + out = self.layer3(out) | ||
110 | + out = self.layer4(out) | ||
111 | + out = self.layer5(out) | ||
112 | + out = self.layer6(out) | ||
113 | + out = F.avg_pool2d(out, 4) | ||
114 | + out = out.view(out.size(0), -1) | ||
115 | + out = self.linear(out) | ||
116 | + return out | ||
117 | + | ||
118 | + | ||
119 | +def test(): | ||
120 | + net = SimpleDLA() | ||
121 | + print(net) | ||
122 | + x = torch.randn(1, 3, 32, 32) | ||
123 | + y = net(x) | ||
124 | + print(y.size()) | ||
125 | + | ||
126 | + | ||
127 | +if __name__ == '__main__': | ||
128 | + test() |
source/models/dpn.py
0 → 100644
1 | +'''Dual Path Networks in PyTorch.''' | ||
2 | +import torch | ||
3 | +import torch.nn as nn | ||
4 | +import torch.nn.functional as F | ||
5 | + | ||
6 | + | ||
7 | +class Bottleneck(nn.Module): | ||
8 | + def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): | ||
9 | + super(Bottleneck, self).__init__() | ||
10 | + self.out_planes = out_planes | ||
11 | + self.dense_depth = dense_depth | ||
12 | + | ||
13 | + self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) | ||
14 | + self.bn1 = nn.BatchNorm2d(in_planes) | ||
15 | + self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) | ||
16 | + self.bn2 = nn.BatchNorm2d(in_planes) | ||
17 | + self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) | ||
18 | + self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) | ||
19 | + | ||
20 | + self.shortcut = nn.Sequential() | ||
21 | + if first_layer: | ||
22 | + self.shortcut = nn.Sequential( | ||
23 | + nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), | ||
24 | + nn.BatchNorm2d(out_planes+dense_depth) | ||
25 | + ) | ||
26 | + | ||
27 | + def forward(self, x): | ||
28 | + out = F.relu(self.bn1(self.conv1(x))) | ||
29 | + out = F.relu(self.bn2(self.conv2(out))) | ||
30 | + out = self.bn3(self.conv3(out)) | ||
31 | + x = self.shortcut(x) | ||
32 | + d = self.out_planes | ||
33 | + out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) | ||
34 | + out = F.relu(out) | ||
35 | + return out | ||
36 | + | ||
37 | + | ||
38 | +class DPN(nn.Module): | ||
39 | + def __init__(self, cfg): | ||
40 | + super(DPN, self).__init__() | ||
41 | + in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] | ||
42 | + num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] | ||
43 | + | ||
44 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) | ||
45 | + self.bn1 = nn.BatchNorm2d(64) | ||
46 | + self.last_planes = 64 | ||
47 | + self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) | ||
48 | + self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) | ||
49 | + self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) | ||
50 | + self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) | ||
51 | + self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10) | ||
52 | + | ||
53 | + def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): | ||
54 | + strides = [stride] + [1]*(num_blocks-1) | ||
55 | + layers = [] | ||
56 | + for i,stride in enumerate(strides): | ||
57 | + layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) | ||
58 | + self.last_planes = out_planes + (i+2) * dense_depth | ||
59 | + return nn.Sequential(*layers) | ||
60 | + | ||
61 | + def forward(self, x): | ||
62 | + out = F.relu(self.bn1(self.conv1(x))) | ||
63 | + out = self.layer1(out) | ||
64 | + out = self.layer2(out) | ||
65 | + out = self.layer3(out) | ||
66 | + out = self.layer4(out) | ||
67 | + out = F.avg_pool2d(out, 4) | ||
68 | + out = out.view(out.size(0), -1) | ||
69 | + out = self.linear(out) | ||
70 | + return out | ||
71 | + | ||
72 | + | ||
73 | +def DPN26(): | ||
74 | + cfg = { | ||
75 | + 'in_planes': (96,192,384,768), | ||
76 | + 'out_planes': (256,512,1024,2048), | ||
77 | + 'num_blocks': (2,2,2,2), | ||
78 | + 'dense_depth': (16,32,24,128) | ||
79 | + } | ||
80 | + return DPN(cfg) | ||
81 | + | ||
82 | +def DPN92(): | ||
83 | + cfg = { | ||
84 | + 'in_planes': (96,192,384,768), | ||
85 | + 'out_planes': (256,512,1024,2048), | ||
86 | + 'num_blocks': (3,4,20,3), | ||
87 | + 'dense_depth': (16,32,24,128) | ||
88 | + } | ||
89 | + return DPN(cfg) | ||
90 | + | ||
91 | + | ||
92 | +def test(): | ||
93 | + net = DPN92() | ||
94 | + x = torch.randn(1,3,32,32) | ||
95 | + y = net(x) | ||
96 | + print(y) | ||
97 | + | ||
98 | +# test() |
source/models/efficientnet.py
0 → 100644
1 | +'''EfficientNet in PyTorch. | ||
2 | + | ||
3 | +Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks". | ||
4 | + | ||
5 | +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py | ||
6 | +''' | ||
7 | +import torch | ||
8 | +import torch.nn as nn | ||
9 | +import torch.nn.functional as F | ||
10 | + | ||
11 | + | ||
12 | +def swish(x): | ||
13 | + return x * x.sigmoid() | ||
14 | + | ||
15 | + | ||
16 | +def drop_connect(x, drop_ratio): | ||
17 | + keep_ratio = 1.0 - drop_ratio | ||
18 | + mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device) | ||
19 | + mask.bernoulli_(keep_ratio) | ||
20 | + x.div_(keep_ratio) | ||
21 | + x.mul_(mask) | ||
22 | + return x | ||
23 | + | ||
24 | + | ||
25 | +class SE(nn.Module): | ||
26 | + '''Squeeze-and-Excitation block with Swish.''' | ||
27 | + | ||
28 | + def __init__(self, in_channels, se_channels): | ||
29 | + super(SE, self).__init__() | ||
30 | + self.se1 = nn.Conv2d(in_channels, se_channels, | ||
31 | + kernel_size=1, bias=True) | ||
32 | + self.se2 = nn.Conv2d(se_channels, in_channels, | ||
33 | + kernel_size=1, bias=True) | ||
34 | + | ||
35 | + def forward(self, x): | ||
36 | + out = F.adaptive_avg_pool2d(x, (1, 1)) | ||
37 | + out = swish(self.se1(out)) | ||
38 | + out = self.se2(out).sigmoid() | ||
39 | + out = x * out | ||
40 | + return out | ||
41 | + | ||
42 | + | ||
43 | +class Block(nn.Module): | ||
44 | + '''expansion + depthwise + pointwise + squeeze-excitation''' | ||
45 | + | ||
46 | + def __init__(self, | ||
47 | + in_channels, | ||
48 | + out_channels, | ||
49 | + kernel_size, | ||
50 | + stride, | ||
51 | + expand_ratio=1, | ||
52 | + se_ratio=0., | ||
53 | + drop_rate=0.): | ||
54 | + super(Block, self).__init__() | ||
55 | + self.stride = stride | ||
56 | + self.drop_rate = drop_rate | ||
57 | + self.expand_ratio = expand_ratio | ||
58 | + | ||
59 | + # Expansion | ||
60 | + channels = expand_ratio * in_channels | ||
61 | + self.conv1 = nn.Conv2d(in_channels, | ||
62 | + channels, | ||
63 | + kernel_size=1, | ||
64 | + stride=1, | ||
65 | + padding=0, | ||
66 | + bias=False) | ||
67 | + self.bn1 = nn.BatchNorm2d(channels) | ||
68 | + | ||
69 | + # Depthwise conv | ||
70 | + self.conv2 = nn.Conv2d(channels, | ||
71 | + channels, | ||
72 | + kernel_size=kernel_size, | ||
73 | + stride=stride, | ||
74 | + padding=(1 if kernel_size == 3 else 2), | ||
75 | + groups=channels, | ||
76 | + bias=False) | ||
77 | + self.bn2 = nn.BatchNorm2d(channels) | ||
78 | + | ||
79 | + # SE layers | ||
80 | + se_channels = int(in_channels * se_ratio) | ||
81 | + self.se = SE(channels, se_channels) | ||
82 | + | ||
83 | + # Output | ||
84 | + self.conv3 = nn.Conv2d(channels, | ||
85 | + out_channels, | ||
86 | + kernel_size=1, | ||
87 | + stride=1, | ||
88 | + padding=0, | ||
89 | + bias=False) | ||
90 | + self.bn3 = nn.BatchNorm2d(out_channels) | ||
91 | + | ||
92 | + # Skip connection if in and out shapes are the same (MV-V2 style) | ||
93 | + self.has_skip = (stride == 1) and (in_channels == out_channels) | ||
94 | + | ||
95 | + def forward(self, x): | ||
96 | + out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x))) | ||
97 | + out = swish(self.bn2(self.conv2(out))) | ||
98 | + out = self.se(out) | ||
99 | + out = self.bn3(self.conv3(out)) | ||
100 | + if self.has_skip: | ||
101 | + if self.training and self.drop_rate > 0: | ||
102 | + out = drop_connect(out, self.drop_rate) | ||
103 | + out = out + x | ||
104 | + return out | ||
105 | + | ||
106 | + | ||
107 | +class EfficientNet(nn.Module): | ||
108 | + def __init__(self, cfg, num_classes=10): | ||
109 | + super(EfficientNet, self).__init__() | ||
110 | + self.cfg = cfg | ||
111 | + self.conv1 = nn.Conv2d(3, | ||
112 | + 32, | ||
113 | + kernel_size=3, | ||
114 | + stride=1, | ||
115 | + padding=1, | ||
116 | + bias=False) | ||
117 | + self.bn1 = nn.BatchNorm2d(32) | ||
118 | + self.layers = self._make_layers(in_channels=32) | ||
119 | + self.linear = nn.Linear(cfg['out_channels'][-1], num_classes) | ||
120 | + | ||
121 | + def _make_layers(self, in_channels): | ||
122 | + layers = [] | ||
123 | + cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size', | ||
124 | + 'stride']] | ||
125 | + b = 0 | ||
126 | + blocks = sum(self.cfg['num_blocks']) | ||
127 | + for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg): | ||
128 | + strides = [stride] + [1] * (num_blocks - 1) | ||
129 | + for stride in strides: | ||
130 | + drop_rate = self.cfg['drop_connect_rate'] * b / blocks | ||
131 | + layers.append( | ||
132 | + Block(in_channels, | ||
133 | + out_channels, | ||
134 | + kernel_size, | ||
135 | + stride, | ||
136 | + expansion, | ||
137 | + se_ratio=0.25, | ||
138 | + drop_rate=drop_rate)) | ||
139 | + in_channels = out_channels | ||
140 | + return nn.Sequential(*layers) | ||
141 | + | ||
142 | + def forward(self, x): | ||
143 | + out = swish(self.bn1(self.conv1(x))) | ||
144 | + out = self.layers(out) | ||
145 | + out = F.adaptive_avg_pool2d(out, 1) | ||
146 | + out = out.view(out.size(0), -1) | ||
147 | + dropout_rate = self.cfg['dropout_rate'] | ||
148 | + if self.training and dropout_rate > 0: | ||
149 | + out = F.dropout(out, p=dropout_rate) | ||
150 | + out = self.linear(out) | ||
151 | + return out | ||
152 | + | ||
153 | + | ||
154 | +def EfficientNetB0(): | ||
155 | + cfg = { | ||
156 | + 'num_blocks': [1, 2, 2, 3, 3, 4, 1], | ||
157 | + 'expansion': [1, 6, 6, 6, 6, 6, 6], | ||
158 | + 'out_channels': [16, 24, 40, 80, 112, 192, 320], | ||
159 | + 'kernel_size': [3, 3, 5, 3, 5, 5, 3], | ||
160 | + 'stride': [1, 2, 2, 2, 1, 2, 1], | ||
161 | + 'dropout_rate': 0.2, | ||
162 | + 'drop_connect_rate': 0.2, | ||
163 | + } | ||
164 | + return EfficientNet(cfg) | ||
165 | + | ||
166 | + | ||
167 | +def test(): | ||
168 | + net = EfficientNetB0() | ||
169 | + x = torch.randn(2, 3, 32, 32) | ||
170 | + y = net(x) | ||
171 | + print(y.shape) | ||
172 | + | ||
173 | + | ||
174 | +if __name__ == '__main__': | ||
175 | + test() |
source/models/googlenet.py
0 → 100644
1 | +'''GoogLeNet with PyTorch.''' | ||
2 | +import torch | ||
3 | +import torch.nn as nn | ||
4 | +import torch.nn.functional as F | ||
5 | + | ||
6 | + | ||
7 | +class Inception(nn.Module): | ||
8 | + def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): | ||
9 | + super(Inception, self).__init__() | ||
10 | + # 1x1 conv branch | ||
11 | + self.b1 = nn.Sequential( | ||
12 | + nn.Conv2d(in_planes, n1x1, kernel_size=1), | ||
13 | + nn.BatchNorm2d(n1x1), | ||
14 | + nn.ReLU(True), | ||
15 | + ) | ||
16 | + | ||
17 | + # 1x1 conv -> 3x3 conv branch | ||
18 | + self.b2 = nn.Sequential( | ||
19 | + nn.Conv2d(in_planes, n3x3red, kernel_size=1), | ||
20 | + nn.BatchNorm2d(n3x3red), | ||
21 | + nn.ReLU(True), | ||
22 | + nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), | ||
23 | + nn.BatchNorm2d(n3x3), | ||
24 | + nn.ReLU(True), | ||
25 | + ) | ||
26 | + | ||
27 | + # 1x1 conv -> 5x5 conv branch | ||
28 | + self.b3 = nn.Sequential( | ||
29 | + nn.Conv2d(in_planes, n5x5red, kernel_size=1), | ||
30 | + nn.BatchNorm2d(n5x5red), | ||
31 | + nn.ReLU(True), | ||
32 | + nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), | ||
33 | + nn.BatchNorm2d(n5x5), | ||
34 | + nn.ReLU(True), | ||
35 | + nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), | ||
36 | + nn.BatchNorm2d(n5x5), | ||
37 | + nn.ReLU(True), | ||
38 | + ) | ||
39 | + | ||
40 | + # 3x3 pool -> 1x1 conv branch | ||
41 | + self.b4 = nn.Sequential( | ||
42 | + nn.MaxPool2d(3, stride=1, padding=1), | ||
43 | + nn.Conv2d(in_planes, pool_planes, kernel_size=1), | ||
44 | + nn.BatchNorm2d(pool_planes), | ||
45 | + nn.ReLU(True), | ||
46 | + ) | ||
47 | + | ||
48 | + def forward(self, x): | ||
49 | + y1 = self.b1(x) | ||
50 | + y2 = self.b2(x) | ||
51 | + y3 = self.b3(x) | ||
52 | + y4 = self.b4(x) | ||
53 | + return torch.cat([y1,y2,y3,y4], 1) | ||
54 | + | ||
55 | + | ||
56 | +class GoogLeNet(nn.Module): | ||
57 | + def __init__(self): | ||
58 | + super(GoogLeNet, self).__init__() | ||
59 | + self.pre_layers = nn.Sequential( | ||
60 | + nn.Conv2d(3, 192, kernel_size=3, padding=1), | ||
61 | + nn.BatchNorm2d(192), | ||
62 | + nn.ReLU(True), | ||
63 | + ) | ||
64 | + | ||
65 | + self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) | ||
66 | + self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) | ||
67 | + | ||
68 | + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) | ||
69 | + | ||
70 | + self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) | ||
71 | + self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) | ||
72 | + self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) | ||
73 | + self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) | ||
74 | + self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) | ||
75 | + | ||
76 | + self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) | ||
77 | + self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) | ||
78 | + | ||
79 | + self.avgpool = nn.AvgPool2d(8, stride=1) | ||
80 | + self.linear = nn.Linear(1024, 10) | ||
81 | + | ||
82 | + def forward(self, x): | ||
83 | + out = self.pre_layers(x) | ||
84 | + out = self.a3(out) | ||
85 | + out = self.b3(out) | ||
86 | + out = self.maxpool(out) | ||
87 | + out = self.a4(out) | ||
88 | + out = self.b4(out) | ||
89 | + out = self.c4(out) | ||
90 | + out = self.d4(out) | ||
91 | + out = self.e4(out) | ||
92 | + out = self.maxpool(out) | ||
93 | + out = self.a5(out) | ||
94 | + out = self.b5(out) | ||
95 | + out = self.avgpool(out) | ||
96 | + out = out.view(out.size(0), -1) | ||
97 | + out = self.linear(out) | ||
98 | + return out | ||
99 | + | ||
100 | + | ||
101 | +def test(): | ||
102 | + net = GoogLeNet() | ||
103 | + x = torch.randn(1,3,32,32) | ||
104 | + y = net(x) | ||
105 | + print(y.size()) | ||
106 | + | ||
107 | +# test() |
source/models/lenet.py
0 → 100644
1 | +'''LeNet in PyTorch.''' | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | + | ||
5 | +class LeNet(nn.Module): | ||
6 | + def __init__(self): | ||
7 | + super(LeNet, self).__init__() | ||
8 | + self.conv1 = nn.Conv2d(3, 6, 5) | ||
9 | + self.conv2 = nn.Conv2d(6, 16, 5) | ||
10 | + self.fc1 = nn.Linear(16*5*5, 120) | ||
11 | + self.fc2 = nn.Linear(120, 84) | ||
12 | + self.fc3 = nn.Linear(84, 10) | ||
13 | + | ||
14 | + def forward(self, x): | ||
15 | + out = F.relu(self.conv1(x)) | ||
16 | + out = F.max_pool2d(out, 2) | ||
17 | + out = F.relu(self.conv2(out)) | ||
18 | + out = F.max_pool2d(out, 2) | ||
19 | + out = out.view(out.size(0), -1) | ||
20 | + out = F.relu(self.fc1(out)) | ||
21 | + out = F.relu(self.fc2(out)) | ||
22 | + out = self.fc3(out) | ||
23 | + return out |
source/models/mobilenet.py
0 → 100644
1 | +import torch | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | + | ||
5 | +class MobileNet1(nn.Module): | ||
6 | + def __init__(self, inchannel=3, num_classes=10): | ||
7 | + super(MobileNet1, self).__init__() | ||
8 | + self.num_classes = num_classes | ||
9 | + | ||
10 | + def conv_bn(inp, oup, stride): | ||
11 | + return nn.Sequential( | ||
12 | + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), | ||
13 | + nn.BatchNorm2d(oup), | ||
14 | + nn.Hardswish() | ||
15 | + #nn.Hardsigmoid(inplace=True) | ||
16 | + # nn.LeakyReLU(negative_slope=0.1, inplace=True) | ||
17 | + # nn.ReLU(inplace=True) | ||
18 | + ) | ||
19 | + | ||
20 | + def conv_dw(inp, oup, stride): | ||
21 | + return nn.Sequential( | ||
22 | + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), | ||
23 | + nn.BatchNorm2d(inp), | ||
24 | + nn.Hardswish(), | ||
25 | + #nn.Hardsigmoid(inplace=True), | ||
26 | + # nn.LeakyReLU(negative_slope=0.1, inplace=True), | ||
27 | + # nn.ReLU(inplace=True), | ||
28 | + | ||
29 | + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), | ||
30 | + nn.BatchNorm2d(oup), | ||
31 | + nn.Hardswish() | ||
32 | + #nn.Hardsigmoid(inplace=True) | ||
33 | + # nn.LeakyReLU(negative_slope=0.1, inplace=True) | ||
34 | + # nn.ReLU(inplace=True), | ||
35 | + ) | ||
36 | + | ||
37 | + self.model = nn.Sequential( | ||
38 | + conv_bn(inchannel, 32, 1), | ||
39 | + conv_dw( 32, 64, 1), | ||
40 | + conv_dw( 64, 128, 2), | ||
41 | + conv_dw(128, 128, 1), | ||
42 | + conv_dw(128, 256, 2), | ||
43 | + conv_dw(256, 256, 1), | ||
44 | + conv_dw(256, 512, 2), | ||
45 | + conv_dw(512, 512, 1), | ||
46 | + conv_dw(512, 512, 1), | ||
47 | + conv_dw(512, 512, 1), | ||
48 | + conv_dw(512, 512, 1), | ||
49 | + conv_dw(512, 512, 1), | ||
50 | + conv_dw(512, 1024, 2), | ||
51 | + conv_dw(1024, 1024, 1), | ||
52 | + nn.AdaptiveAvgPool2d(1) | ||
53 | + ) | ||
54 | + self.fc = nn.Linear(1024, self.num_classes) | ||
55 | + | ||
56 | + | ||
57 | + def forward(self, x): | ||
58 | + x, act_scale = self.model(x) | ||
59 | + # x = self.model(x) | ||
60 | + x = x.view(x.size(0), -1) | ||
61 | + # x = self.fc(x) | ||
62 | + x = self.fc(x, act_scale) | ||
63 | + return x |
source/models/mobilenetv2.py
0 → 100644
1 | +'''MobileNetV2 in PyTorch. | ||
2 | + | ||
3 | +See the paper "Inverted Residuals and Linear Bottlenecks: | ||
4 | +Mobile Networks for Classification, Detection and Segmentation" for more details. | ||
5 | +''' | ||
6 | +import torch | ||
7 | +import torch.nn as nn | ||
8 | +import torch.nn.functional as F | ||
9 | + | ||
10 | + | ||
11 | +class Block(nn.Module): | ||
12 | + '''expand + depthwise + pointwise''' | ||
13 | + def __init__(self, in_planes, out_planes, expansion, stride): | ||
14 | + super(Block, self).__init__() | ||
15 | + self.stride = stride | ||
16 | + | ||
17 | + planes = expansion * in_planes | ||
18 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) | ||
19 | + self.bn1 = nn.BatchNorm2d(planes) | ||
20 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) | ||
21 | + self.bn2 = nn.BatchNorm2d(planes) | ||
22 | + self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) | ||
23 | + self.bn3 = nn.BatchNorm2d(out_planes) | ||
24 | + | ||
25 | + self.shortcut = nn.Sequential() | ||
26 | + if stride == 1 and in_planes != out_planes: | ||
27 | + self.shortcut = nn.Sequential( | ||
28 | + nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), | ||
29 | + nn.BatchNorm2d(out_planes), | ||
30 | + ) | ||
31 | + | ||
32 | + def forward(self, x): | ||
33 | + out = F.relu(self.bn1(self.conv1(x))) | ||
34 | + out = F.relu(self.bn2(self.conv2(out))) | ||
35 | + out = self.bn3(self.conv3(out)) | ||
36 | + out = out + self.shortcut(x) if self.stride==1 else out | ||
37 | + return out | ||
38 | + | ||
39 | + | ||
40 | +class MobileNetV2(nn.Module): | ||
41 | + # (expansion, out_planes, num_blocks, stride) | ||
42 | + cfg = [(1, 16, 1, 1), | ||
43 | + (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 | ||
44 | + (6, 32, 3, 2), | ||
45 | + (6, 64, 4, 2), | ||
46 | + (6, 96, 3, 1), | ||
47 | + (6, 160, 3, 2), | ||
48 | + (6, 320, 1, 1)] | ||
49 | + | ||
50 | + def __init__(self, num_classes=10): | ||
51 | + super(MobileNetV2, self).__init__() | ||
52 | + # NOTE: change conv1 stride 2 -> 1 for CIFAR10 | ||
53 | + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) | ||
54 | + self.bn1 = nn.BatchNorm2d(32) | ||
55 | + self.layers = self._make_layers(in_planes=32) | ||
56 | + self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) | ||
57 | + self.bn2 = nn.BatchNorm2d(1280) | ||
58 | + self.linear = nn.Linear(1280, num_classes) | ||
59 | + | ||
60 | + def _make_layers(self, in_planes): | ||
61 | + layers = [] | ||
62 | + for expansion, out_planes, num_blocks, stride in self.cfg: | ||
63 | + strides = [stride] + [1]*(num_blocks-1) | ||
64 | + for stride in strides: | ||
65 | + layers.append(Block(in_planes, out_planes, expansion, stride)) | ||
66 | + in_planes = out_planes | ||
67 | + return nn.Sequential(*layers) | ||
68 | + | ||
69 | + def forward(self, x): | ||
70 | + out = F.relu(self.bn1(self.conv1(x))) | ||
71 | + out = self.layers(out) | ||
72 | + out = F.relu(self.bn2(self.conv2(out))) | ||
73 | + # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 | ||
74 | + out = F.avg_pool2d(out, 4) | ||
75 | + out = out.view(out.size(0), -1) | ||
76 | + out = self.linear(out) | ||
77 | + return out | ||
78 | + | ||
79 | + | ||
80 | +def test(): | ||
81 | + net = MobileNetV2() | ||
82 | + x = torch.randn(2,3,32,32) | ||
83 | + y = net(x) | ||
84 | + print(y.size()) | ||
85 | + | ||
86 | +# test() |
source/models/pnasnet.py
0 → 100644
1 | +'''PNASNet in PyTorch. | ||
2 | + | ||
3 | +Paper: Progressive Neural Architecture Search | ||
4 | +''' | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | + | ||
9 | + | ||
10 | +class SepConv(nn.Module): | ||
11 | + '''Separable Convolution.''' | ||
12 | + def __init__(self, in_planes, out_planes, kernel_size, stride): | ||
13 | + super(SepConv, self).__init__() | ||
14 | + self.conv1 = nn.Conv2d(in_planes, out_planes, | ||
15 | + kernel_size, stride, | ||
16 | + padding=(kernel_size-1)//2, | ||
17 | + bias=False, groups=in_planes) | ||
18 | + self.bn1 = nn.BatchNorm2d(out_planes) | ||
19 | + | ||
20 | + def forward(self, x): | ||
21 | + return self.bn1(self.conv1(x)) | ||
22 | + | ||
23 | + | ||
24 | +class CellA(nn.Module): | ||
25 | + def __init__(self, in_planes, out_planes, stride=1): | ||
26 | + super(CellA, self).__init__() | ||
27 | + self.stride = stride | ||
28 | + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) | ||
29 | + if stride==2: | ||
30 | + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) | ||
31 | + self.bn1 = nn.BatchNorm2d(out_planes) | ||
32 | + | ||
33 | + def forward(self, x): | ||
34 | + y1 = self.sep_conv1(x) | ||
35 | + y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) | ||
36 | + if self.stride==2: | ||
37 | + y2 = self.bn1(self.conv1(y2)) | ||
38 | + return F.relu(y1+y2) | ||
39 | + | ||
40 | +class CellB(nn.Module): | ||
41 | + def __init__(self, in_planes, out_planes, stride=1): | ||
42 | + super(CellB, self).__init__() | ||
43 | + self.stride = stride | ||
44 | + # Left branch | ||
45 | + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) | ||
46 | + self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) | ||
47 | + # Right branch | ||
48 | + self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) | ||
49 | + if stride==2: | ||
50 | + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) | ||
51 | + self.bn1 = nn.BatchNorm2d(out_planes) | ||
52 | + # Reduce channels | ||
53 | + self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) | ||
54 | + self.bn2 = nn.BatchNorm2d(out_planes) | ||
55 | + | ||
56 | + def forward(self, x): | ||
57 | + # Left branch | ||
58 | + y1 = self.sep_conv1(x) | ||
59 | + y2 = self.sep_conv2(x) | ||
60 | + # Right branch | ||
61 | + y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) | ||
62 | + if self.stride==2: | ||
63 | + y3 = self.bn1(self.conv1(y3)) | ||
64 | + y4 = self.sep_conv3(x) | ||
65 | + # Concat & reduce channels | ||
66 | + b1 = F.relu(y1+y2) | ||
67 | + b2 = F.relu(y3+y4) | ||
68 | + y = torch.cat([b1,b2], 1) | ||
69 | + return F.relu(self.bn2(self.conv2(y))) | ||
70 | + | ||
71 | +class PNASNet(nn.Module): | ||
72 | + def __init__(self, cell_type, num_cells, num_planes): | ||
73 | + super(PNASNet, self).__init__() | ||
74 | + self.in_planes = num_planes | ||
75 | + self.cell_type = cell_type | ||
76 | + | ||
77 | + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) | ||
78 | + self.bn1 = nn.BatchNorm2d(num_planes) | ||
79 | + | ||
80 | + self.layer1 = self._make_layer(num_planes, num_cells=6) | ||
81 | + self.layer2 = self._downsample(num_planes*2) | ||
82 | + self.layer3 = self._make_layer(num_planes*2, num_cells=6) | ||
83 | + self.layer4 = self._downsample(num_planes*4) | ||
84 | + self.layer5 = self._make_layer(num_planes*4, num_cells=6) | ||
85 | + | ||
86 | + self.linear = nn.Linear(num_planes*4, 10) | ||
87 | + | ||
88 | + def _make_layer(self, planes, num_cells): | ||
89 | + layers = [] | ||
90 | + for _ in range(num_cells): | ||
91 | + layers.append(self.cell_type(self.in_planes, planes, stride=1)) | ||
92 | + self.in_planes = planes | ||
93 | + return nn.Sequential(*layers) | ||
94 | + | ||
95 | + def _downsample(self, planes): | ||
96 | + layer = self.cell_type(self.in_planes, planes, stride=2) | ||
97 | + self.in_planes = planes | ||
98 | + return layer | ||
99 | + | ||
100 | + def forward(self, x): | ||
101 | + out = F.relu(self.bn1(self.conv1(x))) | ||
102 | + out = self.layer1(out) | ||
103 | + out = self.layer2(out) | ||
104 | + out = self.layer3(out) | ||
105 | + out = self.layer4(out) | ||
106 | + out = self.layer5(out) | ||
107 | + out = F.avg_pool2d(out, 8) | ||
108 | + out = self.linear(out.view(out.size(0), -1)) | ||
109 | + return out | ||
110 | + | ||
111 | + | ||
112 | +def PNASNetA(): | ||
113 | + return PNASNet(CellA, num_cells=6, num_planes=44) | ||
114 | + | ||
115 | +def PNASNetB(): | ||
116 | + return PNASNet(CellB, num_cells=6, num_planes=32) | ||
117 | + | ||
118 | + | ||
119 | +def test(): | ||
120 | + net = PNASNetB() | ||
121 | + x = torch.randn(1,3,32,32) | ||
122 | + y = net(x) | ||
123 | + print(y) | ||
124 | + | ||
125 | +# test() |
source/models/preact_resnet.py
0 → 100644
1 | +'''Pre-activation ResNet in PyTorch. | ||
2 | + | ||
3 | +Reference: | ||
4 | +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun | ||
5 | + Identity Mappings in Deep Residual Networks. arXiv:1603.05027 | ||
6 | +''' | ||
7 | +import torch | ||
8 | +import torch.nn as nn | ||
9 | +import torch.nn.functional as F | ||
10 | + | ||
11 | + | ||
12 | +class PreActBlock(nn.Module): | ||
13 | + '''Pre-activation version of the BasicBlock.''' | ||
14 | + expansion = 1 | ||
15 | + | ||
16 | + def __init__(self, in_planes, planes, stride=1): | ||
17 | + super(PreActBlock, self).__init__() | ||
18 | + self.bn1 = nn.BatchNorm2d(in_planes) | ||
19 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
20 | + self.bn2 = nn.BatchNorm2d(planes) | ||
21 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) | ||
22 | + | ||
23 | + if stride != 1 or in_planes != self.expansion*planes: | ||
24 | + self.shortcut = nn.Sequential( | ||
25 | + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) | ||
26 | + ) | ||
27 | + | ||
28 | + def forward(self, x): | ||
29 | + out = F.relu(self.bn1(x)) | ||
30 | + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x | ||
31 | + out = self.conv1(out) | ||
32 | + out = self.conv2(F.relu(self.bn2(out))) | ||
33 | + out += shortcut | ||
34 | + return out | ||
35 | + | ||
36 | + | ||
37 | +class PreActBottleneck(nn.Module): | ||
38 | + '''Pre-activation version of the original Bottleneck module.''' | ||
39 | + expansion = 4 | ||
40 | + | ||
41 | + def __init__(self, in_planes, planes, stride=1): | ||
42 | + super(PreActBottleneck, self).__init__() | ||
43 | + self.bn1 = nn.BatchNorm2d(in_planes) | ||
44 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) | ||
45 | + self.bn2 = nn.BatchNorm2d(planes) | ||
46 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
47 | + self.bn3 = nn.BatchNorm2d(planes) | ||
48 | + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) | ||
49 | + | ||
50 | + if stride != 1 or in_planes != self.expansion*planes: | ||
51 | + self.shortcut = nn.Sequential( | ||
52 | + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) | ||
53 | + ) | ||
54 | + | ||
55 | + def forward(self, x): | ||
56 | + out = F.relu(self.bn1(x)) | ||
57 | + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x | ||
58 | + out = self.conv1(out) | ||
59 | + out = self.conv2(F.relu(self.bn2(out))) | ||
60 | + out = self.conv3(F.relu(self.bn3(out))) | ||
61 | + out += shortcut | ||
62 | + return out | ||
63 | + | ||
64 | + | ||
65 | +class PreActResNet(nn.Module): | ||
66 | + def __init__(self, block, num_blocks, num_classes=10): | ||
67 | + super(PreActResNet, self).__init__() | ||
68 | + self.in_planes = 64 | ||
69 | + | ||
70 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) | ||
71 | + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) | ||
72 | + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) | ||
73 | + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) | ||
74 | + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) | ||
75 | + self.linear = nn.Linear(512*block.expansion, num_classes) | ||
76 | + | ||
77 | + def _make_layer(self, block, planes, num_blocks, stride): | ||
78 | + strides = [stride] + [1]*(num_blocks-1) | ||
79 | + layers = [] | ||
80 | + for stride in strides: | ||
81 | + layers.append(block(self.in_planes, planes, stride)) | ||
82 | + self.in_planes = planes * block.expansion | ||
83 | + return nn.Sequential(*layers) | ||
84 | + | ||
85 | + def forward(self, x): | ||
86 | + out = self.conv1(x) | ||
87 | + out = self.layer1(out) | ||
88 | + out = self.layer2(out) | ||
89 | + out = self.layer3(out) | ||
90 | + out = self.layer4(out) | ||
91 | + out = F.avg_pool2d(out, 4) | ||
92 | + out = out.view(out.size(0), -1) | ||
93 | + out = self.linear(out) | ||
94 | + return out | ||
95 | + | ||
96 | + | ||
97 | +def PreActResNet18(): | ||
98 | + return PreActResNet(PreActBlock, [2,2,2,2]) | ||
99 | + | ||
100 | +def PreActResNet34(): | ||
101 | + return PreActResNet(PreActBlock, [3,4,6,3]) | ||
102 | + | ||
103 | +def PreActResNet50(): | ||
104 | + return PreActResNet(PreActBottleneck, [3,4,6,3]) | ||
105 | + | ||
106 | +def PreActResNet101(): | ||
107 | + return PreActResNet(PreActBottleneck, [3,4,23,3]) | ||
108 | + | ||
109 | +def PreActResNet152(): | ||
110 | + return PreActResNet(PreActBottleneck, [3,8,36,3]) | ||
111 | + | ||
112 | + | ||
113 | +def test(): | ||
114 | + net = PreActResNet18() | ||
115 | + y = net((torch.randn(1,3,32,32))) | ||
116 | + print(y.size()) | ||
117 | + | ||
118 | +# test() |
source/models/regnet.py
0 → 100644
1 | +'''RegNet in PyTorch. | ||
2 | + | ||
3 | +Paper: "Designing Network Design Spaces". | ||
4 | + | ||
5 | +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py | ||
6 | +''' | ||
7 | +import torch | ||
8 | +import torch.nn as nn | ||
9 | +import torch.nn.functional as F | ||
10 | + | ||
11 | + | ||
12 | +class SE(nn.Module): | ||
13 | + '''Squeeze-and-Excitation block.''' | ||
14 | + | ||
15 | + def __init__(self, in_planes, se_planes): | ||
16 | + super(SE, self).__init__() | ||
17 | + self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True) | ||
18 | + self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True) | ||
19 | + | ||
20 | + def forward(self, x): | ||
21 | + out = F.adaptive_avg_pool2d(x, (1, 1)) | ||
22 | + out = F.relu(self.se1(out)) | ||
23 | + out = self.se2(out).sigmoid() | ||
24 | + out = x * out | ||
25 | + return out | ||
26 | + | ||
27 | + | ||
28 | +class Block(nn.Module): | ||
29 | + def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio): | ||
30 | + super(Block, self).__init__() | ||
31 | + # 1x1 | ||
32 | + w_b = int(round(w_out * bottleneck_ratio)) | ||
33 | + self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False) | ||
34 | + self.bn1 = nn.BatchNorm2d(w_b) | ||
35 | + # 3x3 | ||
36 | + num_groups = w_b // group_width | ||
37 | + self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3, | ||
38 | + stride=stride, padding=1, groups=num_groups, bias=False) | ||
39 | + self.bn2 = nn.BatchNorm2d(w_b) | ||
40 | + # se | ||
41 | + self.with_se = se_ratio > 0 | ||
42 | + if self.with_se: | ||
43 | + w_se = int(round(w_in * se_ratio)) | ||
44 | + self.se = SE(w_b, w_se) | ||
45 | + # 1x1 | ||
46 | + self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False) | ||
47 | + self.bn3 = nn.BatchNorm2d(w_out) | ||
48 | + | ||
49 | + self.shortcut = nn.Sequential() | ||
50 | + if stride != 1 or w_in != w_out: | ||
51 | + self.shortcut = nn.Sequential( | ||
52 | + nn.Conv2d(w_in, w_out, | ||
53 | + kernel_size=1, stride=stride, bias=False), | ||
54 | + nn.BatchNorm2d(w_out) | ||
55 | + ) | ||
56 | + | ||
57 | + def forward(self, x): | ||
58 | + out = F.relu(self.bn1(self.conv1(x))) | ||
59 | + out = F.relu(self.bn2(self.conv2(out))) | ||
60 | + if self.with_se: | ||
61 | + out = self.se(out) | ||
62 | + out = self.bn3(self.conv3(out)) | ||
63 | + out += self.shortcut(x) | ||
64 | + out = F.relu(out) | ||
65 | + return out | ||
66 | + | ||
67 | + | ||
68 | +class RegNet(nn.Module): | ||
69 | + def __init__(self, cfg, num_classes=10): | ||
70 | + super(RegNet, self).__init__() | ||
71 | + self.cfg = cfg | ||
72 | + self.in_planes = 64 | ||
73 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, | ||
74 | + stride=1, padding=1, bias=False) | ||
75 | + self.bn1 = nn.BatchNorm2d(64) | ||
76 | + self.layer1 = self._make_layer(0) | ||
77 | + self.layer2 = self._make_layer(1) | ||
78 | + self.layer3 = self._make_layer(2) | ||
79 | + self.layer4 = self._make_layer(3) | ||
80 | + self.linear = nn.Linear(self.cfg['widths'][-1], num_classes) | ||
81 | + | ||
82 | + def _make_layer(self, idx): | ||
83 | + depth = self.cfg['depths'][idx] | ||
84 | + width = self.cfg['widths'][idx] | ||
85 | + stride = self.cfg['strides'][idx] | ||
86 | + group_width = self.cfg['group_width'] | ||
87 | + bottleneck_ratio = self.cfg['bottleneck_ratio'] | ||
88 | + se_ratio = self.cfg['se_ratio'] | ||
89 | + | ||
90 | + layers = [] | ||
91 | + for i in range(depth): | ||
92 | + s = stride if i == 0 else 1 | ||
93 | + layers.append(Block(self.in_planes, width, | ||
94 | + s, group_width, bottleneck_ratio, se_ratio)) | ||
95 | + self.in_planes = width | ||
96 | + return nn.Sequential(*layers) | ||
97 | + | ||
98 | + def forward(self, x): | ||
99 | + out = F.relu(self.bn1(self.conv1(x))) | ||
100 | + out = self.layer1(out) | ||
101 | + out = self.layer2(out) | ||
102 | + out = self.layer3(out) | ||
103 | + out = self.layer4(out) | ||
104 | + out = F.adaptive_avg_pool2d(out, (1, 1)) | ||
105 | + out = out.view(out.size(0), -1) | ||
106 | + out = self.linear(out) | ||
107 | + return out | ||
108 | + | ||
109 | + | ||
110 | +def RegNetX_200MF(): | ||
111 | + cfg = { | ||
112 | + 'depths': [1, 1, 4, 7], | ||
113 | + 'widths': [24, 56, 152, 368], | ||
114 | + 'strides': [1, 1, 2, 2], | ||
115 | + 'group_width': 8, | ||
116 | + 'bottleneck_ratio': 1, | ||
117 | + 'se_ratio': 0, | ||
118 | + } | ||
119 | + return RegNet(cfg) | ||
120 | + | ||
121 | + | ||
122 | +def RegNetX_400MF(): | ||
123 | + cfg = { | ||
124 | + 'depths': [1, 2, 7, 12], | ||
125 | + 'widths': [32, 64, 160, 384], | ||
126 | + 'strides': [1, 1, 2, 2], | ||
127 | + 'group_width': 16, | ||
128 | + 'bottleneck_ratio': 1, | ||
129 | + 'se_ratio': 0, | ||
130 | + } | ||
131 | + return RegNet(cfg) | ||
132 | + | ||
133 | + | ||
134 | +def RegNetY_400MF(): | ||
135 | + cfg = { | ||
136 | + 'depths': [1, 2, 7, 12], | ||
137 | + 'widths': [32, 64, 160, 384], | ||
138 | + 'strides': [1, 1, 2, 2], | ||
139 | + 'group_width': 16, | ||
140 | + 'bottleneck_ratio': 1, | ||
141 | + 'se_ratio': 0.25, | ||
142 | + } | ||
143 | + return RegNet(cfg) | ||
144 | + | ||
145 | + | ||
146 | +def test(): | ||
147 | + net = RegNetX_200MF() | ||
148 | + print(net) | ||
149 | + x = torch.randn(2, 3, 32, 32) | ||
150 | + y = net(x) | ||
151 | + print(y.shape) | ||
152 | + | ||
153 | + | ||
154 | +if __name__ == '__main__': | ||
155 | + test() |
source/models/resnet.py
0 → 100644
1 | +'''ResNet in PyTorch. | ||
2 | + | ||
3 | +For Pre-activation ResNet, see 'preact_resnet.py'. | ||
4 | + | ||
5 | +Reference: | ||
6 | +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun | ||
7 | + Deep Residual Learning for Image Recognition. arXiv:1512.03385 | ||
8 | +''' | ||
9 | +import torch | ||
10 | +import torch.nn as nn | ||
11 | +import torch.nn.functional as F | ||
12 | + | ||
13 | + | ||
14 | +class BasicBlock(nn.Module): | ||
15 | + expansion = 1 | ||
16 | + | ||
17 | + def __init__(self, in_planes, planes, stride=1): | ||
18 | + super(BasicBlock, self).__init__() | ||
19 | + self.conv1 = nn.Conv2d( | ||
20 | + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
21 | + self.bn1 = nn.BatchNorm2d(planes) | ||
22 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, | ||
23 | + stride=1, padding=1, bias=False) | ||
24 | + self.bn2 = nn.BatchNorm2d(planes) | ||
25 | + | ||
26 | + self.shortcut = nn.Sequential() | ||
27 | + if stride != 1 or in_planes != self.expansion*planes: | ||
28 | + self.shortcut = nn.Sequential( | ||
29 | + nn.Conv2d(in_planes, self.expansion*planes, | ||
30 | + kernel_size=1, stride=stride, bias=False), | ||
31 | + nn.BatchNorm2d(self.expansion*planes) | ||
32 | + ) | ||
33 | + | ||
34 | + def forward(self, x): | ||
35 | + out = F.relu(self.bn1(self.conv1(x))) | ||
36 | + out = self.bn2(self.conv2(out)) | ||
37 | + out += self.shortcut(x) | ||
38 | + out = F.relu(out) | ||
39 | + return out | ||
40 | + | ||
41 | + | ||
42 | +class Bottleneck(nn.Module): | ||
43 | + expansion = 4 | ||
44 | + | ||
45 | + def __init__(self, in_planes, planes, stride=1): | ||
46 | + super(Bottleneck, self).__init__() | ||
47 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) | ||
48 | + self.bn1 = nn.BatchNorm2d(planes) | ||
49 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, | ||
50 | + stride=stride, padding=1, bias=False) | ||
51 | + self.bn2 = nn.BatchNorm2d(planes) | ||
52 | + self.conv3 = nn.Conv2d(planes, self.expansion * | ||
53 | + planes, kernel_size=1, bias=False) | ||
54 | + self.bn3 = nn.BatchNorm2d(self.expansion*planes) | ||
55 | + | ||
56 | + self.shortcut = nn.Sequential() | ||
57 | + if stride != 1 or in_planes != self.expansion*planes: | ||
58 | + self.shortcut = nn.Sequential( | ||
59 | + nn.Conv2d(in_planes, self.expansion*planes, | ||
60 | + kernel_size=1, stride=stride, bias=False), | ||
61 | + nn.BatchNorm2d(self.expansion*planes) | ||
62 | + ) | ||
63 | + | ||
64 | + def forward(self, x): | ||
65 | + out = F.relu(self.bn1(self.conv1(x))) | ||
66 | + out = F.relu(self.bn2(self.conv2(out))) | ||
67 | + out = self.bn3(self.conv3(out)) | ||
68 | + out += self.shortcut(x) | ||
69 | + out = F.relu(out) | ||
70 | + return out | ||
71 | + | ||
72 | + | ||
73 | +class ResNet(nn.Module): | ||
74 | + def __init__(self, block, num_blocks, num_classes=10): | ||
75 | + super(ResNet, self).__init__() | ||
76 | + self.in_planes = 64 | ||
77 | + | ||
78 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, | ||
79 | + stride=1, padding=1, bias=False) | ||
80 | + self.bn1 = nn.BatchNorm2d(64) | ||
81 | + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) | ||
82 | + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) | ||
83 | + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) | ||
84 | + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) | ||
85 | + self.linear = nn.Linear(512*block.expansion, num_classes) | ||
86 | + | ||
87 | + def _make_layer(self, block, planes, num_blocks, stride): | ||
88 | + strides = [stride] + [1]*(num_blocks-1) | ||
89 | + layers = [] | ||
90 | + for stride in strides: | ||
91 | + layers.append(block(self.in_planes, planes, stride)) | ||
92 | + self.in_planes = planes * block.expansion | ||
93 | + return nn.Sequential(*layers) | ||
94 | + | ||
95 | + def forward(self, x): | ||
96 | + out = F.relu(self.bn1(self.conv1(x))) | ||
97 | + out = self.layer1(out) | ||
98 | + out = self.layer2(out) | ||
99 | + out = self.layer3(out) | ||
100 | + out = self.layer4(out) | ||
101 | + out = F.avg_pool2d(out, 4) | ||
102 | + out = out.view(out.size(0), -1) | ||
103 | + out = self.linear(out) | ||
104 | + return out | ||
105 | + | ||
106 | + | ||
107 | +def ResNet18(): | ||
108 | + return ResNet(BasicBlock, [2, 2, 2, 2]) | ||
109 | + | ||
110 | + | ||
111 | +def ResNet34(): | ||
112 | + return ResNet(BasicBlock, [3, 4, 6, 3]) | ||
113 | + | ||
114 | + | ||
115 | +def ResNet50(): | ||
116 | + return ResNet(Bottleneck, [3, 4, 6, 3]) | ||
117 | + | ||
118 | + | ||
119 | +def ResNet101(): | ||
120 | + return ResNet(Bottleneck, [3, 4, 23, 3]) | ||
121 | + | ||
122 | + | ||
123 | +def ResNet152(): | ||
124 | + return ResNet(Bottleneck, [3, 8, 36, 3]) | ||
125 | + | ||
126 | + | ||
127 | +def test(): | ||
128 | + net = ResNet18() | ||
129 | + y = net(torch.randn(1, 3, 32, 32)) | ||
130 | + print(y.size()) | ||
131 | + | ||
132 | +# test() |
source/models/resnext.py
0 → 100644
1 | +'''ResNeXt in PyTorch. | ||
2 | + | ||
3 | +See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. | ||
4 | +''' | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | + | ||
9 | + | ||
10 | +class Block(nn.Module): | ||
11 | + '''Grouped convolution block.''' | ||
12 | + expansion = 2 | ||
13 | + | ||
14 | + def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): | ||
15 | + super(Block, self).__init__() | ||
16 | + group_width = cardinality * bottleneck_width | ||
17 | + self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) | ||
18 | + self.bn1 = nn.BatchNorm2d(group_width) | ||
19 | + self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) | ||
20 | + self.bn2 = nn.BatchNorm2d(group_width) | ||
21 | + self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) | ||
22 | + self.bn3 = nn.BatchNorm2d(self.expansion*group_width) | ||
23 | + | ||
24 | + self.shortcut = nn.Sequential() | ||
25 | + if stride != 1 or in_planes != self.expansion*group_width: | ||
26 | + self.shortcut = nn.Sequential( | ||
27 | + nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), | ||
28 | + nn.BatchNorm2d(self.expansion*group_width) | ||
29 | + ) | ||
30 | + | ||
31 | + def forward(self, x): | ||
32 | + out = F.relu(self.bn1(self.conv1(x))) | ||
33 | + out = F.relu(self.bn2(self.conv2(out))) | ||
34 | + out = self.bn3(self.conv3(out)) | ||
35 | + out += self.shortcut(x) | ||
36 | + out = F.relu(out) | ||
37 | + return out | ||
38 | + | ||
39 | + | ||
40 | +class ResNeXt(nn.Module): | ||
41 | + def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): | ||
42 | + super(ResNeXt, self).__init__() | ||
43 | + self.cardinality = cardinality | ||
44 | + self.bottleneck_width = bottleneck_width | ||
45 | + self.in_planes = 64 | ||
46 | + | ||
47 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) | ||
48 | + self.bn1 = nn.BatchNorm2d(64) | ||
49 | + self.layer1 = self._make_layer(num_blocks[0], 1) | ||
50 | + self.layer2 = self._make_layer(num_blocks[1], 2) | ||
51 | + self.layer3 = self._make_layer(num_blocks[2], 2) | ||
52 | + # self.layer4 = self._make_layer(num_blocks[3], 2) | ||
53 | + self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) | ||
54 | + | ||
55 | + def _make_layer(self, num_blocks, stride): | ||
56 | + strides = [stride] + [1]*(num_blocks-1) | ||
57 | + layers = [] | ||
58 | + for stride in strides: | ||
59 | + layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) | ||
60 | + self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width | ||
61 | + # Increase bottleneck_width by 2 after each stage. | ||
62 | + self.bottleneck_width *= 2 | ||
63 | + return nn.Sequential(*layers) | ||
64 | + | ||
65 | + def forward(self, x): | ||
66 | + out = F.relu(self.bn1(self.conv1(x))) | ||
67 | + out = self.layer1(out) | ||
68 | + out = self.layer2(out) | ||
69 | + out = self.layer3(out) | ||
70 | + # out = self.layer4(out) | ||
71 | + out = F.avg_pool2d(out, 8) | ||
72 | + out = out.view(out.size(0), -1) | ||
73 | + out = self.linear(out) | ||
74 | + return out | ||
75 | + | ||
76 | + | ||
77 | +def ResNeXt29_2x64d(): | ||
78 | + return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) | ||
79 | + | ||
80 | +def ResNeXt29_4x64d(): | ||
81 | + return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) | ||
82 | + | ||
83 | +def ResNeXt29_8x64d(): | ||
84 | + return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) | ||
85 | + | ||
86 | +def ResNeXt29_32x4d(): | ||
87 | + return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) | ||
88 | + | ||
89 | +def test_resnext(): | ||
90 | + net = ResNeXt29_2x64d() | ||
91 | + x = torch.randn(1,3,32,32) | ||
92 | + y = net(x) | ||
93 | + print(y.size()) | ||
94 | + | ||
95 | +# test_resnext() |
source/models/senet.py
0 → 100644
1 | +'''SENet in PyTorch. | ||
2 | + | ||
3 | +SENet is the winner of ImageNet-2017. The paper is not released yet. | ||
4 | +''' | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | + | ||
9 | + | ||
10 | +class BasicBlock(nn.Module): | ||
11 | + def __init__(self, in_planes, planes, stride=1): | ||
12 | + super(BasicBlock, self).__init__() | ||
13 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
14 | + self.bn1 = nn.BatchNorm2d(planes) | ||
15 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) | ||
16 | + self.bn2 = nn.BatchNorm2d(planes) | ||
17 | + | ||
18 | + self.shortcut = nn.Sequential() | ||
19 | + if stride != 1 or in_planes != planes: | ||
20 | + self.shortcut = nn.Sequential( | ||
21 | + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), | ||
22 | + nn.BatchNorm2d(planes) | ||
23 | + ) | ||
24 | + | ||
25 | + # SE layers | ||
26 | + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear | ||
27 | + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) | ||
28 | + | ||
29 | + def forward(self, x): | ||
30 | + out = F.relu(self.bn1(self.conv1(x))) | ||
31 | + out = self.bn2(self.conv2(out)) | ||
32 | + | ||
33 | + # Squeeze | ||
34 | + w = F.avg_pool2d(out, out.size(2)) | ||
35 | + w = F.relu(self.fc1(w)) | ||
36 | + w = F.sigmoid(self.fc2(w)) | ||
37 | + # Excitation | ||
38 | + out = out * w # New broadcasting feature from v0.2! | ||
39 | + | ||
40 | + out += self.shortcut(x) | ||
41 | + out = F.relu(out) | ||
42 | + return out | ||
43 | + | ||
44 | + | ||
45 | +class PreActBlock(nn.Module): | ||
46 | + def __init__(self, in_planes, planes, stride=1): | ||
47 | + super(PreActBlock, self).__init__() | ||
48 | + self.bn1 = nn.BatchNorm2d(in_planes) | ||
49 | + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | ||
50 | + self.bn2 = nn.BatchNorm2d(planes) | ||
51 | + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) | ||
52 | + | ||
53 | + if stride != 1 or in_planes != planes: | ||
54 | + self.shortcut = nn.Sequential( | ||
55 | + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False) | ||
56 | + ) | ||
57 | + | ||
58 | + # SE layers | ||
59 | + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) | ||
60 | + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) | ||
61 | + | ||
62 | + def forward(self, x): | ||
63 | + out = F.relu(self.bn1(x)) | ||
64 | + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x | ||
65 | + out = self.conv1(out) | ||
66 | + out = self.conv2(F.relu(self.bn2(out))) | ||
67 | + | ||
68 | + # Squeeze | ||
69 | + w = F.avg_pool2d(out, out.size(2)) | ||
70 | + w = F.relu(self.fc1(w)) | ||
71 | + w = F.sigmoid(self.fc2(w)) | ||
72 | + # Excitation | ||
73 | + out = out * w | ||
74 | + | ||
75 | + out += shortcut | ||
76 | + return out | ||
77 | + | ||
78 | + | ||
79 | +class SENet(nn.Module): | ||
80 | + def __init__(self, block, num_blocks, num_classes=10): | ||
81 | + super(SENet, self).__init__() | ||
82 | + self.in_planes = 64 | ||
83 | + | ||
84 | + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) | ||
85 | + self.bn1 = nn.BatchNorm2d(64) | ||
86 | + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) | ||
87 | + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) | ||
88 | + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) | ||
89 | + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) | ||
90 | + self.linear = nn.Linear(512, num_classes) | ||
91 | + | ||
92 | + def _make_layer(self, block, planes, num_blocks, stride): | ||
93 | + strides = [stride] + [1]*(num_blocks-1) | ||
94 | + layers = [] | ||
95 | + for stride in strides: | ||
96 | + layers.append(block(self.in_planes, planes, stride)) | ||
97 | + self.in_planes = planes | ||
98 | + return nn.Sequential(*layers) | ||
99 | + | ||
100 | + def forward(self, x): | ||
101 | + out = F.relu(self.bn1(self.conv1(x))) | ||
102 | + out = self.layer1(out) | ||
103 | + out = self.layer2(out) | ||
104 | + out = self.layer3(out) | ||
105 | + out = self.layer4(out) | ||
106 | + out = F.avg_pool2d(out, 4) | ||
107 | + out = out.view(out.size(0), -1) | ||
108 | + out = self.linear(out) | ||
109 | + return out | ||
110 | + | ||
111 | + | ||
112 | +def SENet18(): | ||
113 | + return SENet(PreActBlock, [2,2,2,2]) | ||
114 | + | ||
115 | + | ||
116 | +def test(): | ||
117 | + net = SENet18() | ||
118 | + y = net(torch.randn(1,3,32,32)) | ||
119 | + print(y.size()) | ||
120 | + | ||
121 | +# test() |
source/models/shufflenet.py
0 → 100644
1 | +'''ShuffleNet in PyTorch. | ||
2 | + | ||
3 | +See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. | ||
4 | +''' | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | + | ||
9 | + | ||
10 | +class ShuffleBlock(nn.Module): | ||
11 | + def __init__(self, groups): | ||
12 | + super(ShuffleBlock, self).__init__() | ||
13 | + self.groups = groups | ||
14 | + | ||
15 | + def forward(self, x): | ||
16 | + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' | ||
17 | + N,C,H,W = x.size() | ||
18 | + g = self.groups | ||
19 | + return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W) | ||
20 | + | ||
21 | + | ||
22 | +class Bottleneck(nn.Module): | ||
23 | + def __init__(self, in_planes, out_planes, stride, groups): | ||
24 | + super(Bottleneck, self).__init__() | ||
25 | + self.stride = stride | ||
26 | + | ||
27 | + mid_planes = out_planes/4 | ||
28 | + g = 1 if in_planes==24 else groups | ||
29 | + self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) | ||
30 | + self.bn1 = nn.BatchNorm2d(mid_planes) | ||
31 | + self.shuffle1 = ShuffleBlock(groups=g) | ||
32 | + self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) | ||
33 | + self.bn2 = nn.BatchNorm2d(mid_planes) | ||
34 | + self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) | ||
35 | + self.bn3 = nn.BatchNorm2d(out_planes) | ||
36 | + | ||
37 | + self.shortcut = nn.Sequential() | ||
38 | + if stride == 2: | ||
39 | + self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) | ||
40 | + | ||
41 | + def forward(self, x): | ||
42 | + out = F.relu(self.bn1(self.conv1(x))) | ||
43 | + out = self.shuffle1(out) | ||
44 | + out = F.relu(self.bn2(self.conv2(out))) | ||
45 | + out = self.bn3(self.conv3(out)) | ||
46 | + res = self.shortcut(x) | ||
47 | + out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) | ||
48 | + return out | ||
49 | + | ||
50 | + | ||
51 | +class ShuffleNet(nn.Module): | ||
52 | + def __init__(self, cfg): | ||
53 | + super(ShuffleNet, self).__init__() | ||
54 | + out_planes = cfg['out_planes'] | ||
55 | + num_blocks = cfg['num_blocks'] | ||
56 | + groups = cfg['groups'] | ||
57 | + | ||
58 | + self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) | ||
59 | + self.bn1 = nn.BatchNorm2d(24) | ||
60 | + self.in_planes = 24 | ||
61 | + self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) | ||
62 | + self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) | ||
63 | + self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) | ||
64 | + self.linear = nn.Linear(out_planes[2], 10) | ||
65 | + | ||
66 | + def _make_layer(self, out_planes, num_blocks, groups): | ||
67 | + layers = [] | ||
68 | + for i in range(num_blocks): | ||
69 | + stride = 2 if i == 0 else 1 | ||
70 | + cat_planes = self.in_planes if i == 0 else 0 | ||
71 | + layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) | ||
72 | + self.in_planes = out_planes | ||
73 | + return nn.Sequential(*layers) | ||
74 | + | ||
75 | + def forward(self, x): | ||
76 | + out = F.relu(self.bn1(self.conv1(x))) | ||
77 | + out = self.layer1(out) | ||
78 | + out = self.layer2(out) | ||
79 | + out = self.layer3(out) | ||
80 | + out = F.avg_pool2d(out, 4) | ||
81 | + out = out.view(out.size(0), -1) | ||
82 | + out = self.linear(out) | ||
83 | + return out | ||
84 | + | ||
85 | + | ||
86 | +def ShuffleNetG2(): | ||
87 | + cfg = { | ||
88 | + 'out_planes': [200,400,800], | ||
89 | + 'num_blocks': [4,8,4], | ||
90 | + 'groups': 2 | ||
91 | + } | ||
92 | + return ShuffleNet(cfg) | ||
93 | + | ||
94 | +def ShuffleNetG3(): | ||
95 | + cfg = { | ||
96 | + 'out_planes': [240,480,960], | ||
97 | + 'num_blocks': [4,8,4], | ||
98 | + 'groups': 3 | ||
99 | + } | ||
100 | + return ShuffleNet(cfg) | ||
101 | + | ||
102 | + | ||
103 | +def test(): | ||
104 | + net = ShuffleNetG2() | ||
105 | + x = torch.randn(1,3,32,32) | ||
106 | + y = net(x) | ||
107 | + print(y) | ||
108 | + | ||
109 | +# test() |
source/models/shufflenetv2.py
0 → 100644
1 | +'''ShuffleNetV2 in PyTorch. | ||
2 | + | ||
3 | +See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details. | ||
4 | +''' | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | + | ||
9 | + | ||
10 | +class ShuffleBlock(nn.Module): | ||
11 | + def __init__(self, groups=2): | ||
12 | + super(ShuffleBlock, self).__init__() | ||
13 | + self.groups = groups | ||
14 | + | ||
15 | + def forward(self, x): | ||
16 | + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' | ||
17 | + N, C, H, W = x.size() | ||
18 | + g = self.groups | ||
19 | + return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W) | ||
20 | + | ||
21 | + | ||
22 | +class SplitBlock(nn.Module): | ||
23 | + def __init__(self, ratio): | ||
24 | + super(SplitBlock, self).__init__() | ||
25 | + self.ratio = ratio | ||
26 | + | ||
27 | + def forward(self, x): | ||
28 | + c = int(x.size(1) * self.ratio) | ||
29 | + return x[:, :c, :, :], x[:, c:, :, :] | ||
30 | + | ||
31 | + | ||
32 | +class BasicBlock(nn.Module): | ||
33 | + def __init__(self, in_channels, split_ratio=0.5): | ||
34 | + super(BasicBlock, self).__init__() | ||
35 | + self.split = SplitBlock(split_ratio) | ||
36 | + in_channels = int(in_channels * split_ratio) | ||
37 | + self.conv1 = nn.Conv2d(in_channels, in_channels, | ||
38 | + kernel_size=1, bias=False) | ||
39 | + self.bn1 = nn.BatchNorm2d(in_channels) | ||
40 | + self.conv2 = nn.Conv2d(in_channels, in_channels, | ||
41 | + kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False) | ||
42 | + self.bn2 = nn.BatchNorm2d(in_channels) | ||
43 | + self.conv3 = nn.Conv2d(in_channels, in_channels, | ||
44 | + kernel_size=1, bias=False) | ||
45 | + self.bn3 = nn.BatchNorm2d(in_channels) | ||
46 | + self.shuffle = ShuffleBlock() | ||
47 | + | ||
48 | + def forward(self, x): | ||
49 | + x1, x2 = self.split(x) | ||
50 | + out = F.relu(self.bn1(self.conv1(x2))) | ||
51 | + out = self.bn2(self.conv2(out)) | ||
52 | + out = F.relu(self.bn3(self.conv3(out))) | ||
53 | + out = torch.cat([x1, out], 1) | ||
54 | + out = self.shuffle(out) | ||
55 | + return out | ||
56 | + | ||
57 | + | ||
58 | +class DownBlock(nn.Module): | ||
59 | + def __init__(self, in_channels, out_channels): | ||
60 | + super(DownBlock, self).__init__() | ||
61 | + mid_channels = out_channels // 2 | ||
62 | + # left | ||
63 | + self.conv1 = nn.Conv2d(in_channels, in_channels, | ||
64 | + kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False) | ||
65 | + self.bn1 = nn.BatchNorm2d(in_channels) | ||
66 | + self.conv2 = nn.Conv2d(in_channels, mid_channels, | ||
67 | + kernel_size=1, bias=False) | ||
68 | + self.bn2 = nn.BatchNorm2d(mid_channels) | ||
69 | + # right | ||
70 | + self.conv3 = nn.Conv2d(in_channels, mid_channels, | ||
71 | + kernel_size=1, bias=False) | ||
72 | + self.bn3 = nn.BatchNorm2d(mid_channels) | ||
73 | + self.conv4 = nn.Conv2d(mid_channels, mid_channels, | ||
74 | + kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False) | ||
75 | + self.bn4 = nn.BatchNorm2d(mid_channels) | ||
76 | + self.conv5 = nn.Conv2d(mid_channels, mid_channels, | ||
77 | + kernel_size=1, bias=False) | ||
78 | + self.bn5 = nn.BatchNorm2d(mid_channels) | ||
79 | + | ||
80 | + self.shuffle = ShuffleBlock() | ||
81 | + | ||
82 | + def forward(self, x): | ||
83 | + # left | ||
84 | + out1 = self.bn1(self.conv1(x)) | ||
85 | + out1 = F.relu(self.bn2(self.conv2(out1))) | ||
86 | + # right | ||
87 | + out2 = F.relu(self.bn3(self.conv3(x))) | ||
88 | + out2 = self.bn4(self.conv4(out2)) | ||
89 | + out2 = F.relu(self.bn5(self.conv5(out2))) | ||
90 | + # concat | ||
91 | + out = torch.cat([out1, out2], 1) | ||
92 | + out = self.shuffle(out) | ||
93 | + return out | ||
94 | + | ||
95 | + | ||
96 | +class ShuffleNetV2(nn.Module): | ||
97 | + def __init__(self, net_size): | ||
98 | + super(ShuffleNetV2, self).__init__() | ||
99 | + out_channels = configs[net_size]['out_channels'] | ||
100 | + num_blocks = configs[net_size]['num_blocks'] | ||
101 | + | ||
102 | + self.conv1 = nn.Conv2d(3, 24, kernel_size=3, | ||
103 | + stride=1, padding=1, bias=False) | ||
104 | + self.bn1 = nn.BatchNorm2d(24) | ||
105 | + self.in_channels = 24 | ||
106 | + self.layer1 = self._make_layer(out_channels[0], num_blocks[0]) | ||
107 | + self.layer2 = self._make_layer(out_channels[1], num_blocks[1]) | ||
108 | + self.layer3 = self._make_layer(out_channels[2], num_blocks[2]) | ||
109 | + self.conv2 = nn.Conv2d(out_channels[2], out_channels[3], | ||
110 | + kernel_size=1, stride=1, padding=0, bias=False) | ||
111 | + self.bn2 = nn.BatchNorm2d(out_channels[3]) | ||
112 | + self.linear = nn.Linear(out_channels[3], 10) | ||
113 | + | ||
114 | + def _make_layer(self, out_channels, num_blocks): | ||
115 | + layers = [DownBlock(self.in_channels, out_channels)] | ||
116 | + for i in range(num_blocks): | ||
117 | + layers.append(BasicBlock(out_channels)) | ||
118 | + self.in_channels = out_channels | ||
119 | + return nn.Sequential(*layers) | ||
120 | + | ||
121 | + def forward(self, x): | ||
122 | + out = F.relu(self.bn1(self.conv1(x))) | ||
123 | + # out = F.max_pool2d(out, 3, stride=2, padding=1) | ||
124 | + out = self.layer1(out) | ||
125 | + out = self.layer2(out) | ||
126 | + out = self.layer3(out) | ||
127 | + out = F.relu(self.bn2(self.conv2(out))) | ||
128 | + out = F.avg_pool2d(out, 4) | ||
129 | + out = out.view(out.size(0), -1) | ||
130 | + out = self.linear(out) | ||
131 | + return out | ||
132 | + | ||
133 | + | ||
134 | +configs = { | ||
135 | + 0.5: { | ||
136 | + 'out_channels': (48, 96, 192, 1024), | ||
137 | + 'num_blocks': (3, 7, 3) | ||
138 | + }, | ||
139 | + | ||
140 | + 1: { | ||
141 | + 'out_channels': (116, 232, 464, 1024), | ||
142 | + 'num_blocks': (3, 7, 3) | ||
143 | + }, | ||
144 | + 1.5: { | ||
145 | + 'out_channels': (176, 352, 704, 1024), | ||
146 | + 'num_blocks': (3, 7, 3) | ||
147 | + }, | ||
148 | + 2: { | ||
149 | + 'out_channels': (224, 488, 976, 2048), | ||
150 | + 'num_blocks': (3, 7, 3) | ||
151 | + } | ||
152 | +} | ||
153 | + | ||
154 | + | ||
155 | +def test(): | ||
156 | + net = ShuffleNetV2(net_size=0.5) | ||
157 | + x = torch.randn(3, 3, 32, 32) | ||
158 | + y = net(x) | ||
159 | + print(y.shape) | ||
160 | + | ||
161 | + | ||
162 | +# test() |
source/models/vgg.py
0 → 100644
1 | +'''VGG11/13/16/19 in Pytorch.''' | ||
2 | +import torch | ||
3 | +import torch.nn as nn | ||
4 | + | ||
5 | + | ||
6 | +cfg = { | ||
7 | + 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], | ||
8 | + 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], | ||
9 | + 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], | ||
10 | + 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], | ||
11 | +} | ||
12 | + | ||
13 | + | ||
14 | +class VGG(nn.Module): | ||
15 | + def __init__(self, vgg_name): | ||
16 | + super(VGG, self).__init__() | ||
17 | + self.features = self._make_layers(cfg[vgg_name]) | ||
18 | + self.classifier = nn.Linear(512, 10) | ||
19 | + | ||
20 | + def forward(self, x): | ||
21 | + out = self.features(x) | ||
22 | + out = out.view(out.size(0), -1) | ||
23 | + out = self.classifier(out) | ||
24 | + return out | ||
25 | + | ||
26 | + def _make_layers(self, cfg): | ||
27 | + layers = [] | ||
28 | + in_channels = 3 | ||
29 | + for x in cfg: | ||
30 | + if x == 'M': | ||
31 | + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] | ||
32 | + else: | ||
33 | + layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), | ||
34 | + nn.BatchNorm2d(x), | ||
35 | + nn.ReLU(inplace=True)] | ||
36 | + in_channels = x | ||
37 | + layers += [nn.AvgPool2d(kernel_size=1, stride=1)] | ||
38 | + return nn.Sequential(*layers) | ||
39 | + | ||
40 | + | ||
41 | +def test(): | ||
42 | + net = VGG('VGG11') | ||
43 | + x = torch.randn(2,3,32,32) | ||
44 | + y = net(x) | ||
45 | + print(y.size()) | ||
46 | + | ||
47 | +# test() |
source/replace.py
0 → 100644
1 | +import torch | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | +import math | ||
5 | +from lsq_sq import * | ||
6 | +from models.mobilenet import * | ||
7 | + | ||
8 | +conv_idx = -1 | ||
9 | +act_idx = -1 | ||
10 | +former_conv = None | ||
11 | + | ||
12 | +def replace_sq(model, bit_width=8): | ||
13 | + global conv_idx, act_idx | ||
14 | + | ||
15 | + for name, module in model.named_children(): | ||
16 | + if isinstance(module, (nn.Sequential)): #- conventional | ||
17 | + replace_sq(model.__dict__['_modules'][name], bit_width) | ||
18 | + | ||
19 | + elif isinstance(module, nn.Conv2d): | ||
20 | + former_conv = name | ||
21 | + conv_idx += 1 | ||
22 | + bias = False if module.bias is None else True | ||
23 | + | ||
24 | + model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels, | ||
25 | + module.kernel_size, stride=module.stride, | ||
26 | + padding=module.padding, dilation=module.dilation, | ||
27 | + groups=module.groups, bias=bias, wbit=bit_width) | ||
28 | + model.__dict__['_modules'][name].weight = module.weight | ||
29 | + if bias: | ||
30 | + model.__dict__['_modules'][name].bias = module.bias | ||
31 | + | ||
32 | + elif isinstance(module, nn.BatchNorm2d): | ||
33 | + model.__dict__['_modules'][former_conv].replace_bn(module) | ||
34 | + model.__dict__['_modules'][name] = nn.Identity() | ||
35 | + | ||
36 | + elif isinstance(module, nn.ReLU): | ||
37 | + act_idx += 1 | ||
38 | + model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True) | ||
39 | + | ||
40 | + elif isinstance(module, nn.Hardswish): | ||
41 | + act_idx += 1 | ||
42 | + model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True) | ||
43 | + | ||
44 | + elif isinstance(module, nn.Hardsigmoid): | ||
45 | + act_idx += 1 | ||
46 | + model.__dict__['_modules'][name] = QHsigmoid(abit=bit_width, inplace=False, dequantize=True) | ||
47 | + | ||
48 | + elif isinstance(module, nn.LeakyReLU): | ||
49 | + act_idx += 1 | ||
50 | + model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True) | ||
51 | + | ||
52 | + elif isinstance(module, nn.Linear): | ||
53 | + bias = False if module.bias is None else True | ||
54 | + model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width) | ||
55 | + model.__dict__['_modules'][name].weight = module.weight | ||
56 | + if bias: | ||
57 | + model.__dict__['_modules'][name].bias = module.bias | ||
58 | + | ||
59 | + elif isinstance(module, nn.AdaptiveAvgPool2d): | ||
60 | + model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size) | ||
61 | + | ||
62 | + # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support | ||
63 | + # replace_sq(model.__dict__['_modules'][name], bit_width) | ||
64 | + | ||
65 | + # elif isinstance(module, InvertedResidual): #mv2 | ||
66 | + # replace_sq(model.__dict__['_modules'][name], bit_width) | ||
67 | + | ||
68 | + else: | ||
69 | + model.__dict__['_modules'][name] = module | ||
70 | + | ||
71 | + return model |
source/replace_int.py
0 → 100644
1 | +import torch | ||
2 | +import torch.nn as nn | ||
3 | +import torch.nn.functional as F | ||
4 | +import math | ||
5 | +from lsq_int import * | ||
6 | +from models.mobilenet import * | ||
7 | + | ||
8 | +conv_idx = -1 | ||
9 | +act_idx = -1 | ||
10 | +former_conv = None | ||
11 | + | ||
12 | +def replace_int(model, bit_width=8): | ||
13 | + global conv_idx, act_idx | ||
14 | + | ||
15 | + for name, module in model.named_children(): | ||
16 | + if isinstance(module, (nn.Sequential)): #- conventional | ||
17 | + replace_int(model.__dict__['_modules'][name], bit_width) | ||
18 | + | ||
19 | + elif isinstance(module, nn.Conv2d): | ||
20 | + former_conv = name | ||
21 | + conv_idx += 1 | ||
22 | + bias = False if module.bias is None else True | ||
23 | + | ||
24 | + model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels, | ||
25 | + module.kernel_size, stride=module.stride, | ||
26 | + padding=module.padding, dilation=module.dilation, | ||
27 | + groups=module.groups, bias=bias, wbit=bit_width) | ||
28 | + model.__dict__['_modules'][name].weight = module.weight | ||
29 | + if bias: | ||
30 | + model.__dict__['_modules'][name].bias = module.bias | ||
31 | + | ||
32 | + elif isinstance(module, nn.BatchNorm2d): | ||
33 | + model.__dict__['_modules'][former_conv].replace_bn(module) | ||
34 | + model.__dict__['_modules'][name] = nn.Identity() | ||
35 | + | ||
36 | + elif isinstance(module, nn.ReLU): | ||
37 | + act_idx += 1 | ||
38 | + model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True) | ||
39 | + | ||
40 | + elif isinstance(module, nn.Hardswish): | ||
41 | + act_idx += 1 | ||
42 | + model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True) | ||
43 | + | ||
44 | + elif isinstance(module, nn.LeakyReLU): | ||
45 | + act_idx += 1 | ||
46 | + model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True) | ||
47 | + | ||
48 | + elif isinstance(module, nn.Linear): | ||
49 | + bias = False if module.bias is None else True | ||
50 | + model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width) | ||
51 | + model.__dict__['_modules'][name].weight = module.weight | ||
52 | + if bias: | ||
53 | + model.__dict__['_modules'][name].bias = module.bias | ||
54 | + | ||
55 | + elif isinstance(module, nn.AdaptiveAvgPool2d): | ||
56 | + model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size) | ||
57 | + | ||
58 | + # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support | ||
59 | + # replace_sq(model.__dict__['_modules'][name], bit_width) | ||
60 | + | ||
61 | + # elif isinstance(module, InvertedResidual): #mv2 | ||
62 | + # replace_sq(model.__dict__['_modules'][name], bit_width) | ||
63 | + | ||
64 | + else: | ||
65 | + model.__dict__['_modules'][name] = module | ||
66 | + | ||
67 | + return model |
source/utils.py
0 → 100644
1 | +'''Some helper functions for PyTorch, including: | ||
2 | + - get_mean_and_std: calculate the mean and std value of dataset. | ||
3 | + - msr_init: net parameter initialization. | ||
4 | + - progress_bar: progress bar mimic xlua.progress. | ||
5 | +''' | ||
6 | +import os | ||
7 | +import sys | ||
8 | +import time | ||
9 | +import math | ||
10 | + | ||
11 | +import torch | ||
12 | +import torch.nn as nn | ||
13 | +import torch.nn.init as init | ||
14 | + | ||
15 | + | ||
16 | +def get_mean_and_std(dataset): | ||
17 | + '''Compute the mean and std value of dataset.''' | ||
18 | + dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2) | ||
19 | + mean = torch.zeros(3) | ||
20 | + std = torch.zeros(3) | ||
21 | + print('==> Computing mean and std..') | ||
22 | + for inputs, targets in dataloader: | ||
23 | + for i in range(3): | ||
24 | + mean[i] += inputs[:,i,:,:].mean() | ||
25 | + std[i] += inputs[:,i,:,:].std() | ||
26 | + mean.div_(len(dataset)) | ||
27 | + std.div_(len(dataset)) | ||
28 | + return mean, std | ||
29 | + | ||
30 | +def init_params(net): | ||
31 | + '''Init layer parameters.''' | ||
32 | + for m in net.modules(): | ||
33 | + if isinstance(m, nn.Conv2d): | ||
34 | + init.kaiming_normal(m.weight, mode='fan_out') | ||
35 | + if m.bias: | ||
36 | + init.constant(m.bias, 0) | ||
37 | + elif isinstance(m, nn.BatchNorm2d): | ||
38 | + init.constant(m.weight, 1) | ||
39 | + init.constant(m.bias, 0) | ||
40 | + elif isinstance(m, nn.Linear): | ||
41 | + init.normal(m.weight, std=1e-3) | ||
42 | + if m.bias: | ||
43 | + init.constant(m.bias, 0) | ||
44 | + | ||
45 | + | ||
46 | +_, term_width = os.popen('stty size', 'r').read().split() | ||
47 | +term_width = int(term_width) | ||
48 | + | ||
49 | +TOTAL_BAR_LENGTH = 65. | ||
50 | +last_time = time.time() | ||
51 | +begin_time = last_time | ||
52 | +def progress_bar(current, total, msg=None): | ||
53 | + global last_time, begin_time | ||
54 | + if current == 0: | ||
55 | + begin_time = time.time() # Reset for new bar. | ||
56 | + | ||
57 | + cur_len = int(TOTAL_BAR_LENGTH*current/total) | ||
58 | + rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 | ||
59 | + | ||
60 | + sys.stdout.write(' [') | ||
61 | + for i in range(cur_len): | ||
62 | + sys.stdout.write('=') | ||
63 | + sys.stdout.write('>') | ||
64 | + for i in range(rest_len): | ||
65 | + sys.stdout.write('.') | ||
66 | + sys.stdout.write(']') | ||
67 | + | ||
68 | + cur_time = time.time() | ||
69 | + step_time = cur_time - last_time | ||
70 | + last_time = cur_time | ||
71 | + tot_time = cur_time - begin_time | ||
72 | + | ||
73 | + L = [] | ||
74 | + L.append(' Step: %s' % format_time(step_time)) | ||
75 | + L.append(' | Tot: %s' % format_time(tot_time)) | ||
76 | + if msg: | ||
77 | + L.append(' | ' + msg) | ||
78 | + | ||
79 | + msg = ''.join(L) | ||
80 | + sys.stdout.write(msg) | ||
81 | + for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3): | ||
82 | + sys.stdout.write(' ') | ||
83 | + | ||
84 | + # Go back to the center of the bar. | ||
85 | + for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2): | ||
86 | + sys.stdout.write('\b') | ||
87 | + sys.stdout.write(' %d/%d ' % (current+1, total)) | ||
88 | + | ||
89 | + if current < total-1: | ||
90 | + sys.stdout.write('\r') | ||
91 | + else: | ||
92 | + sys.stdout.write('\n') | ||
93 | + sys.stdout.flush() | ||
94 | + | ||
95 | +def format_time(seconds): | ||
96 | + days = int(seconds / 3600/24) | ||
97 | + seconds = seconds - days*3600*24 | ||
98 | + hours = int(seconds / 3600) | ||
99 | + seconds = seconds - hours*3600 | ||
100 | + minutes = int(seconds / 60) | ||
101 | + seconds = seconds - minutes*60 | ||
102 | + secondsf = int(seconds) | ||
103 | + seconds = seconds - secondsf | ||
104 | + millis = int(seconds*1000) | ||
105 | + | ||
106 | + f = '' | ||
107 | + i = 1 | ||
108 | + if days > 0: | ||
109 | + f += str(days) + 'D' | ||
110 | + i += 1 | ||
111 | + if hours > 0 and i <= 2: | ||
112 | + f += str(hours) + 'h' | ||
113 | + i += 1 | ||
114 | + if minutes > 0 and i <= 2: | ||
115 | + f += str(minutes) + 'm' | ||
116 | + i += 1 | ||
117 | + if secondsf > 0 and i <= 2: | ||
118 | + f += str(secondsf) + 's' | ||
119 | + i += 1 | ||
120 | + if millis > 0 and i <= 2: | ||
121 | + f += str(millis) + 'ms' | ||
122 | + i += 1 | ||
123 | + if f == '': | ||
124 | + f = '0ms' | ||
125 | + return f |
주간보고서/210322.docx
0 → 100644
No preview for this file type
주간보고서/210329.docx
0 → 100644
No preview for this file type
주간보고서/210405.docx
0 → 100644
No preview for this file type
주간보고서/210412.docx
0 → 100644
No preview for this file type
주간보고서/210419.docx
0 → 100644
No preview for this file type
주간보고서/210426.docx
0 → 100644
No preview for this file type
주간보고서/210503.docx
0 → 100644
No preview for this file type
-
Please register or login to post a comment