hyungjin

final

No preview for this file type
1 +{
2 + "python.pythonPath": "/home/chunjin1212/anaconda3/envs/torch/bin/python"
3 +}
...\ No newline at end of file ...\ No newline at end of file
1 +MIT License
2 +
3 +Copyright (c) 2017 liukuang
4 +
5 +Permission is hereby granted, free of charge, to any person obtaining a copy
6 +of this software and associated documentation files (the "Software"), to deal
7 +in the Software without restriction, including without limitation the rights
8 +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 +copies of the Software, and to permit persons to whom the Software is
10 +furnished to do so, subject to the following conditions:
11 +
12 +The above copyright notice and this permission notice shall be included in all
13 +copies or substantial portions of the Software.
14 +
15 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 +SOFTWARE.
1 +# Train CIFAR10 with PyTorch
2 +
3 +I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
4 +
5 +## Prerequisites
6 +- Python 3.6+
7 +- PyTorch 1.0+
8 +
9 +## Training
10 +```
11 +# Start training with:
12 +python main.py
13 +
14 +# You can manually resume the training with:
15 +python main.py --resume --lr=0.01
16 +```
17 +
18 +## Accuracy
19 +| Model | Acc. |
20 +| ----------------- | ----------- |
21 +| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% |
22 +| [ResNet18](https://arxiv.org/abs/1512.03385) | 93.02% |
23 +| [ResNet50](https://arxiv.org/abs/1512.03385) | 93.62% |
24 +| [ResNet101](https://arxiv.org/abs/1512.03385) | 93.75% |
25 +| [RegNetX_200MF](https://arxiv.org/abs/2003.13678) | 94.24% |
26 +| [RegNetY_400MF](https://arxiv.org/abs/2003.13678) | 94.29% |
27 +| [MobileNetV2](https://arxiv.org/abs/1801.04381) | 94.43% |
28 +| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431) | 94.73% |
29 +| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431) | 94.82% |
30 +| [SimpleDLA](https://arxiv.org/abs/1707.064) | 94.89% |
31 +| [DenseNet121](https://arxiv.org/abs/1608.06993) | 95.04% |
32 +| [PreActResNet18](https://arxiv.org/abs/1603.05027) | 95.11% |
33 +| [DPN92](https://arxiv.org/abs/1707.01629) | 95.16% |
34 +| [DLA](https://arxiv.org/pdf/1707.06484.pdf) | 95.47% |
35 +
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
1 +import torch
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +import math
5 +from decimal import Decimal
6 +import numpy as np
7 +
8 +# Parent Class for Quantization Module
9 +class LSQModule:
10 + def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None):
11 + self.abit = abit
12 + self.wbit = wbit
13 + self.ibit = ibit
14 + self.dequantize = dequantize
15 + self.register_buffer('init_state', torch.zeros(1))
16 + self.scale = scale
17 +
18 + # member variable setter
19 + def set_abit(self, v):
20 + self.abit = v
21 + def set_wbit(self, v):
22 + self.wbit = v
23 + def set_ibit(self, v):
24 + self.ibit = v
25 + def set_dequantize(self, v):
26 + self.dequantize = v
27 +
28 +class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule):
29 + def __init__(self, abit, dequantize=True, output_size=(1,1)):
30 + super(QAvgPool2d, self).__init__(output_size)
31 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
32 + scale=nn.Parameter(torch.Tensor(1)))
33 + def __repr__(self):
34 + return self.__class__.__name__ + '(' \
35 + + 'output_size=' + str(self.output_size) \
36 + + ', abit=' + str(self.abit) \
37 + + ')'
38 + def forward(self, x):
39 + former = x[1]
40 + x = x[0]
41 + x = super().forward(x)
42 + Qn = - (2 ** (self.abit - 1))
43 + Qp = 2 ** (self.abit - 1) - 1
44 + # Qn = 0.
45 + # Qp = (2 ** self.abit) - 1
46 +
47 + act_scale = self.scale
48 + down_scale = act_scale / former
49 + # down_scale = down_scale.numpy().astype()
50 + # x = x.cpu().numpy().astype(Decimal)
51 +
52 + x = x.cpu().detach().numpy().astype(Decimal)
53 + down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
54 + output = x / down_scale
55 + output = torch.from_numpy(output.astype(np.float32)).cuda()
56 + x = torch.round(output).clamp(Qn, Qp)
57 +
58 + return x, act_scale
59 +
60 +class QMaxPool2d(nn.MaxPool2d, LSQModule):
61 + def __init__(self, kernel_size=3, stride=2, padding=1):
62 + super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding)
63 + LSQModule.__init__(self)
64 +
65 + def __repr__(self):
66 + return self.__class__.__name__ + '(' \
67 + + 'kernel_size=' + str(self.kernel_size) \
68 + + ', stride=' + str(self.stride) \
69 + + ', padding=' + str(self.padding) \
70 + + ')'
71 +
72 + def forward(self, x, act_scale=None):
73 + result = super().forward(x)
74 + return result
75 +
76 +class QReLU(nn.Module, LSQModule):
77 + def __init__(self, abit, dequantize=True, inplace=False):
78 + super(QReLU, self).__init__()
79 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
80 + scale=nn.Parameter(torch.Tensor(1)))
81 + self.inplace = inplace
82 +
83 + def __repr__(self):
84 + return self.__class__.__name__ + '(' \
85 + + 'abit=' + str(self.abit) \
86 + + ', dequantize=' + str(self.dequantize) \
87 + + ', inplace=' + str(self.inplace) \
88 + + ', init_state=' + str(self.init_state) \
89 + + ')'
90 +
91 + def forward(self, x):
92 + x = F.relu(x)
93 + Qn = 0.
94 + Qp = (2 ** self.abit) - 1
95 + if self.training and self.init_state == 0:
96 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
97 + self.init_state.fill_(1)
98 +
99 + g = 1.0 / math.sqrt(x.numel() * Qp)
100 + act_scale = grad_scale(self.scale, g)
101 + x = round_pass((x / act_scale).clamp(Qn, Qp))
102 + if self.dequantize:
103 + x = x * act_scale
104 + return x, act_scale
105 +
106 +class QLeakyReLU(nn.Module, LSQModule):
107 + def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False):
108 + super(QLeakyReLU, self).__init__()
109 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
110 + scale=nn.Parameter(torch.Tensor(1)))
111 + self.inplace = inplace
112 + self.negative_slope=negative_slope
113 +
114 + def __repr__(self):
115 + return self.__class__.__name__ + '(' \
116 + + 'abit=' + str(self.abit) \
117 + + ', negative_slope=' + str(self.negative_slope) \
118 + + ', inplace=' + str(self.inplace) \
119 + + ')'
120 +
121 + def forward(self, input):
122 + deq_scale = input[1]
123 + input = input[0]
124 +
125 + Qn = - (2 ** (self.abit - 1))
126 + Qp = 2 ** (self.abit - 1) - 1
127 +
128 +
129 + input = input.cpu().detach().numpy().astype(Decimal)
130 + # input = torch.from_numpy(input)
131 + down_scale = deq_scale / self.scale
132 + slope_scale = self.negative_slope * down_scale
133 + down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
134 + slope_scale = slope_scale.cpu().detach().numpy().astype(Decimal)
135 +
136 + output = np.where(input<0, input*slope_scale, input*down_scale).astype(np.float32)
137 + output = torch.from_numpy(output).cuda()
138 +
139 + x = torch.round(output).clamp(Qn, Qp)
140 + return x, self.scale
141 +
142 +class QHswish(nn.Hardswish, LSQModule):
143 + def __init__(self, abit, dequantize=True, inplace=False):
144 + super(QHswish, self).__init__(inplace=inplace)
145 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
146 + scale=nn.Parameter(torch.Tensor(1)))
147 + self.inplace = inplace
148 +
149 + def __repr__(self):
150 + return self.__class__.__name__ + '(' \
151 + + 'abit=' + str(self.abit) \
152 + + ', inplace=' + str(self.inplace) \
153 + + ')'
154 +
155 + def forward(self, input):
156 + deq_scale = input[1]
157 + x = input[0]
158 + # input = input * deq_scale
159 +
160 + # x = super().forward(input)
161 +
162 + Qn = - (2 ** (self.abit - 1))
163 + Qp = 2 ** (self.abit - 1) - 1
164 +
165 + q_scale = self.scale
166 + down_scale = deq_scale / q_scale
167 +
168 + flag = int(torch.round(3/deq_scale))
169 + c1 = (down_scale * deq_scale / 6).cpu().detach().numpy().astype(Decimal)
170 + c2 = (down_scale / 2).cpu().detach().numpy().astype(Decimal)
171 + down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
172 +
173 + x = x.cpu().detach().numpy().astype(Decimal)
174 +
175 + x = np.where(x<=-flag, x*0, x)
176 + x = np.where(x>=flag, down_scale*x, x*(c1*x+c2)).astype(np.float32)
177 + x = torch.from_numpy(x).cuda()
178 + # x = torch.where(x <= -flag, x*0, x)
179 + # x = torch.where(x >= flag,
180 + # down_scale*x, x*x*c1+x*c2)
181 +
182 + # act_scale = self.scale
183 + # down_scale = former_scale / self.scale
184 + # x = x * former_scal
185 + x = torch.round(x).clamp(Qn, Qp)
186 +
187 + return x, self.scale
188 +
189 +class QConv2d(nn.Conv2d, LSQModule):
190 + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True):
191 + super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
192 + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
193 + scale=nn.Parameter(torch.Tensor(1)))
194 +
195 + def __repr__(self): #- for show detail arttribute on print(model)
196 + return self.__class__.__name__ + '(' \
197 + + 'in_channels=' + str(self.in_channels) \
198 + + ', out_channels=' + str(self.out_channels) \
199 + + ', bias=' + str(self.bias is not None) \
200 + + ', kernel_size=' + str(self.kernel_size) \
201 + + ', stride=' + str(self.stride) \
202 + + ', groups=' + str(self.groups) \
203 + + ', padding=' + str(self.padding) \
204 + + ', wbit=' + str(self.wbit) \
205 + + ')'
206 +
207 + def forward(self, x, act_scale=None):
208 + Qn = - (2 ** (self.wbit - 1))
209 + Qp = 2 ** (self.wbit - 1) - 1
210 + if self.training and self.init_state == 0:
211 + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
212 + self.init_state.fill_(1)
213 +
214 + g = 1.0 / math.sqrt(x.numel() * Qp)
215 + scale = grad_scale(self.scale, g)
216 +
217 + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
218 +
219 + if self.dequantize:
220 + self.weight.data = self.weight.data * scale
221 +
222 + if self.bias is not None:
223 + bias_scale = scale*act_scale
224 + self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp))
225 + if self.dequantize:
226 + self.bias.data = self.bias.data * bias_scale
227 +
228 + output = super().forward(x)
229 + return output
230 +
231 +class QLinear(nn.Linear, LSQModule):
232 + def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True):
233 + super(QLinear, self).__init__(in_features, out_features, bias)
234 + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
235 + scale=nn.Parameter(torch.Tensor(1)))
236 +
237 + def __repr__(self):
238 + return self.__class__.__name__ + '(' \
239 + + 'in_features=' + str(self.in_features) \
240 + + ', out_features=' + str(self.out_features) \
241 + + ', bias=' + str(self.bias is not None) \
242 + + ', wbit=' + str(self.wbit) \
243 + + ')'
244 +
245 + def forward(self, input, act_scale=None):
246 +
247 + if self.wbit < 32:
248 + Qn = - (2 ** (self.wbit - 1))
249 + Qp = 2 ** (self.wbit - 1) - 1
250 +
251 + scale = self.scale
252 +
253 + cur_weight = torch.round((self.weight.data / scale).clamp(Qn, Qp))
254 +
255 + # with torch.no_grad():
256 + if self.bias is not None:
257 + bias_scale = scale*act_scale
258 + cur_bias = torch.round((self.bias.data / bias_scale))
259 +
260 +
261 + output = F.linear(input, cur_weight, cur_bias)
262 + return output
263 +
264 +class Input_Quantizer(nn.Module, LSQModule):
265 + def __init__(self, abit=8, dequantize=True):
266 + super(Input_Quantizer, self).__init__()
267 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
268 + scale=nn.Parameter(torch.Tensor(1)))
269 +
270 + def __repr__(self):
271 + return self.__class__.__name__ + '(' \
272 + + 'abit=' + str(self.abit) \
273 + + ', dequantize=' + str(self.dequantize) \
274 + + ', init_state=' + str(self.init_state) \
275 + + ')'
276 +
277 + def forward(self, x):
278 + Qn = - (2 ** (self.abit - 1))
279 + Qp = (2 ** (self.abit - 1)) - 1
280 +
281 + x = torch.round((x / self.scale).clamp(Qn, Qp))
282 +
283 + return x, self.scale
284 +
285 +class FuseConv2dQ(QConv2d):
286 + def __init__(self, in_channels, out_channels, kernel_size, stride=1,
287 + padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True):
288 + super(FuseConv2dQ, self).__init__(
289 + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
290 + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias,
291 + wbit=wbit, dequantize=dequantize)
292 +
293 + self.bn = nn.BatchNorm2d(out_channels)
294 +
295 + def forward(self, x):
296 + act_scale = x[1]
297 + x = x[0]
298 +
299 + # simulate bn folding to Conv
300 + f_weight, f_bias = self.fusing()
301 + Qn = - (2 ** (self.wbit - 1))
302 + Qp = 2 ** (self.wbit - 1) - 1
303 +
304 + scale = self.scale
305 + q_weight = torch.round((f_weight.data / scale).clamp(Qn, Qp))
306 + bias_scale = scale*act_scale
307 + q_bias = torch.round((f_bias / bias_scale))
308 +
309 + output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups)
310 + # output *= bias_scale # dequantize
311 +
312 + return output, bias_scale
313 +
314 + def replace_bn(self, bn_module):
315 + self.bn = bn_module
316 + self.bn.track_running_stats = False
317 +
318 + def fusing(self):
319 + std = torch.sqrt(self.bn.running_var + self.bn.eps)
320 + f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1])
321 + if self.bias is not None:
322 + f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std)
323 + else:
324 + f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std)
325 + return f_weight, f_bias
326 +
327 +def grad_scale(x, scale):
328 + y = x
329 + y_grad = x * scale
330 + output = (y - y_grad).detach() + y_grad
331 +
332 + return output
333 +
334 +def round_pass(x):
335 + y = torch.round(x)
336 + y_grad = x
337 + output = (y - y_grad).detach() + y_grad
338 +
339 + return output
340 +
1 +import torch
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +import math
5 +
6 +# Parent Class for Quantization Module
7 +class LSQModule:
8 + def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None):
9 + self.abit = abit
10 + self.wbit = wbit
11 + self.ibit = ibit
12 + self.dequantize = dequantize
13 + self.register_buffer('init_state', torch.zeros(1))
14 + self.scale = scale
15 +
16 + # member variable setter
17 + def set_abit(self, v):
18 + self.abit = v
19 + def set_wbit(self, v):
20 + self.wbit = v
21 + def set_ibit(self, v):
22 + self.ibit = v
23 + def set_dequantize(self, v):
24 + self.dequantize = v
25 +
26 +
27 +class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule):
28 + def __init__(self, abit, dequantize=True, output_size=(1,1)):
29 + super(QAvgPool2d, self).__init__(output_size)
30 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
31 + scale=nn.Parameter(torch.Tensor(1)))
32 + def __repr__(self):
33 + return self.__class__.__name__ + '(' \
34 + + 'output_size=' + str(self.output_size) \
35 + + ', abit=' + str(self.abit) \
36 + + ')'
37 + def forward(self, x):
38 + x = x[0]
39 + x = super().forward(x)
40 + # Qn = - (2 ** (self.abit - 1))
41 + # Qp = 2 ** (self.abit - 1) - 1
42 + Qn = 0.
43 + Qp = (2 ** self.abit) - 1
44 + if self.training and self.init_state == 0:
45 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
46 + self.init_state.fill_(1)
47 +
48 + g = 1.0 / math.sqrt(x.numel() * Qp)
49 + act_scale = grad_scale(self.scale, g)
50 + x = round_pass((x / act_scale).clamp(Qn, Qp))
51 + if self.dequantize:
52 + x = x * act_scale
53 + return x, act_scale
54 +
55 +
56 +class QMaxPool2d(nn.MaxPool2d, LSQModule):
57 + def __init__(self, kernel_size=3, stride=2, padding=1):
58 + super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding)
59 + LSQModule.__init__(self)
60 +
61 + def __repr__(self):
62 + return self.__class__.__name__ + '(' \
63 + + 'kernel_size=' + str(self.kernel_size) \
64 + + ', stride=' + str(self.stride) \
65 + + ', padding=' + str(self.padding) \
66 + + ')'
67 +
68 + def forward(self, x, act_scale=None):
69 + result = super().forward(x)
70 + return result
71 +
72 +
73 +class QReLU(nn.Module, LSQModule):
74 + def __init__(self, abit, dequantize=True, inplace=False):
75 + super(QReLU, self).__init__()
76 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
77 + scale=nn.Parameter(torch.Tensor(1)))
78 + self.inplace = inplace
79 +
80 + def __repr__(self):
81 + return self.__class__.__name__ + '(' \
82 + + 'abit=' + str(self.abit) \
83 + + ', dequantize=' + str(self.dequantize) \
84 + + ', inplace=' + str(self.inplace) \
85 + + ', init_state=' + str(self.init_state) \
86 + + ')'
87 +
88 + def forward(self, x):
89 + x = F.relu(x)
90 + Qn = 0.
91 + Qp = (2 ** self.abit) - 1
92 + if self.training and self.init_state == 0:
93 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
94 + self.init_state.fill_(1)
95 +
96 + g = 1.0 / math.sqrt(x.numel() * Qp)
97 + act_scale = grad_scale(self.scale, g)
98 + x = round_pass((x / act_scale).clamp(Qn, Qp))
99 + if self.dequantize:
100 + x = x * act_scale
101 + return x, act_scale
102 +
103 +class QLeakyReLU(nn.Module, LSQModule):
104 + def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False):
105 + super(QLeakyReLU, self).__init__()
106 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
107 + scale=nn.Parameter(torch.Tensor(1)))
108 + self.inplace = inplace
109 + self.negative_slope=negative_slope
110 +
111 + def __repr__(self):
112 + return self.__class__.__name__ + '(' \
113 + + 'abit=' + str(self.abit) \
114 + + ', negative_slope=' + str(self.negative_slope) \
115 + + ', inplace=' + str(self.inplace) \
116 + + ')'
117 +
118 + def forward(self, input):
119 + x = F.leaky_relu(input=input, negative_slope=self.negative_slope)
120 + Qn = - (2 ** (self.abit - 1))
121 + Qp = 2 ** (self.abit - 1) - 1
122 + # Qn = 0.
123 + # Qp = (2 ** self.abit) - 1
124 + if self.training and self.init_state == 0:
125 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
126 + self.init_state.fill_(1)
127 +
128 + g = 1.0 / math.sqrt(x.numel() * Qp)
129 + act_scale = grad_scale(self.scale, g)
130 + x = round_pass((x / act_scale).clamp(Qn, Qp))
131 + if self.dequantize:
132 + x = x * act_scale
133 +
134 + return x, act_scale
135 +
136 +class QHswish(nn.Hardswish, LSQModule):
137 + def __init__(self, abit, dequantize=True, inplace=False):
138 + super(QHswish, self).__init__(inplace=inplace)
139 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
140 + scale=nn.Parameter(torch.Tensor(1)))
141 + self.inplace = inplace
142 +
143 + def __repr__(self):
144 + return self.__class__.__name__ + '(' \
145 + + 'abit=' + str(self.abit) \
146 + + ', inplace=' + str(self.inplace) \
147 + + ')'
148 +
149 + def forward(self, input):
150 + x = super().forward(input)
151 + Qn = - (2 ** (self.abit - 1))
152 + Qp = 2 ** (self.abit - 1) - 1
153 + # Qn = 0.
154 + # Qp = (2 ** self.abit) - 1
155 + if self.training and self.init_state == 0:
156 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
157 + self.init_state.fill_(1)
158 +
159 + g = 1.0 / math.sqrt(x.numel() * Qp)
160 + act_scale = grad_scale(self.scale, g)
161 + x = round_pass((x / act_scale).clamp(Qn, Qp))
162 + if self.dequantize:
163 + x = x * act_scale
164 + return x, act_scale
165 +
166 +class QHsigmoid(nn.Hardsigmoid, LSQModule):
167 + def __init__(self, abit, dequantize=True, inplace=False):
168 + super(QHsigmoid, self).__init__(inplace=inplace)
169 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
170 + scale=nn.Parameter(torch.Tensor(1)))
171 + self.inplace = inplace
172 +
173 + def __repr__(self):
174 + return self.__class__.__name__ + '(' \
175 + + 'abit=' + str(self.abit) \
176 + + ', inplace=' + str(self.inplace) \
177 + + ')'
178 +
179 + def forward(self, input):
180 + x = super().forward(input)
181 + # Qn = - (2 ** (self.abit - 1))
182 + # Qp = 2 ** (self.abit - 1) - 1
183 + Qn = 0.
184 + Qp = (2 ** self.abit) - 1
185 + if self.training and self.init_state == 0:
186 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
187 + self.init_state.fill_(1)
188 +
189 + g = 1.0 / math.sqrt(x.numel() * Qp)
190 + act_scale = grad_scale(self.scale, g)
191 + x = round_pass((x / act_scale).clamp(Qn, Qp))
192 + if self.dequantize:
193 + x = x * act_scale
194 + return x, act_scale
195 +
196 +class QConv2d(nn.Conv2d, LSQModule):
197 + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True):
198 + super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
199 + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
200 + scale=nn.Parameter(torch.Tensor(1)))
201 +
202 + def __repr__(self): #- for show detail arttribute on print(model)
203 + return self.__class__.__name__ + '(' \
204 + + 'in_channels=' + str(self.in_channels) \
205 + + ', out_channels=' + str(self.out_channels) \
206 + + ', bias=' + str(self.bias is not None) \
207 + + ', kernel_size=' + str(self.kernel_size) \
208 + + ', stride=' + str(self.stride) \
209 + + ', groups=' + str(self.groups) \
210 + + ', padding=' + str(self.padding) \
211 + + ', wbit=' + str(self.wbit) \
212 + + ')'
213 +
214 + def forward(self, x, act_scale=None):
215 + Qn = - (2 ** (self.wbit - 1))
216 + Qp = 2 ** (self.wbit - 1) - 1
217 + if self.training and self.init_state == 0:
218 + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
219 + self.init_state.fill_(1)
220 +
221 + g = 1.0 / math.sqrt(x.numel() * Qp)
222 + scale = grad_scale(self.scale, g)
223 +
224 + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
225 +
226 + if self.dequantize:
227 + self.weight.data = self.weight.data * scale
228 +
229 + if self.bias is not None:
230 + bias_scale = scale*act_scale
231 + self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp))
232 + if self.dequantize:
233 + self.bias.data = self.bias.data * bias_scale
234 +
235 + output = super().forward(x)
236 + return output
237 +
238 +class QLinear(nn.Linear, LSQModule):
239 + def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True):
240 + super(QLinear, self).__init__(in_features, out_features, bias)
241 + LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
242 + scale=nn.Parameter(torch.Tensor(1)))
243 +
244 + def __repr__(self):
245 + return self.__class__.__name__ + '(' \
246 + + 'in_features=' + str(self.in_features) \
247 + + ', out_features=' + str(self.out_features) \
248 + + ', bias=' + str(self.bias is not None) \
249 + + ', wbit=' + str(self.wbit) \
250 + + ')'
251 +
252 + def forward(self, input, act_scale=None):
253 + if self.wbit < 32:
254 + Qn = - (2 ** (self.wbit - 1))
255 + Qp = 2 ** (self.wbit - 1) - 1
256 + if self.training and self.init_state == 0:
257 + self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
258 + self.init_state.fill_(1)
259 +
260 + g = 1.0 / math.sqrt(input.numel() * Qp)
261 + scale = grad_scale(self.scale, g)
262 +
263 + self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
264 + if self.dequantize:
265 + self.weight.data = self.weight.data * scale
266 +
267 + # with torch.no_grad():
268 + if self.bias is not None:
269 + bias_scale = scale*act_scale
270 + self.bias.data = round_pass((self.bias.data / bias_scale))
271 + if self.dequantize:
272 + self.bias.data = self.bias.data * bias_scale
273 +
274 + output = super().forward(input)
275 + return output
276 +
277 +
278 +class Input_Quantizer(nn.Module, LSQModule):
279 + def __init__(self, abit=8, dequantize=True):
280 + super(Input_Quantizer, self).__init__()
281 + LSQModule.__init__(self, abit=abit, dequantize=dequantize,
282 + scale=nn.Parameter(torch.Tensor(1)))
283 +
284 + def __repr__(self):
285 + return self.__class__.__name__ + '(' \
286 + + 'abit=' + str(self.abit) \
287 + + ', dequantize=' + str(self.dequantize) \
288 + + ', init_state=' + str(self.init_state) \
289 + + ')'
290 +
291 + def forward(self, x):
292 + Qn = - (2 ** (self.abit - 1))
293 + Qp = (2 ** (self.abit - 1)) - 1
294 + if self.training and self.init_state == 0:
295 + self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
296 + self.init_state.fill_(1)
297 +
298 + g = 1.0 / math.sqrt(x.numel() * Qp)
299 + act_scale = grad_scale(self.scale, g)
300 + x = round_pass((x / act_scale).clamp(Qn, Qp))
301 +
302 + if self.dequantize:
303 + x = x * act_scale
304 + return x, act_scale
305 +
306 +
307 +class FuseConv2dQ(QConv2d):
308 + def __init__(self, in_channels, out_channels, kernel_size, stride=1,
309 + padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True):
310 + super(FuseConv2dQ, self).__init__(
311 + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
312 + stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias,
313 + wbit=wbit, dequantize=dequantize)
314 +
315 + self.bn = nn.BatchNorm2d(out_channels)
316 +
317 + def forward(self, x):
318 + act_scale = x[1]
319 + x = x[0]
320 + temp = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
321 + _ = self.bn(temp)
322 +
323 + # simulate bn folding to Conv
324 + f_weight, f_bias = self.fusing()
325 + Qn = - (2 ** (self.wbit - 1))
326 + Qp = 2 ** (self.wbit - 1) - 1
327 + if self.training and self.init_state == 0:
328 + self.scale.data.copy_(2 * f_weight.abs().mean() / math.sqrt(Qp))
329 + self.init_state.fill_(1)
330 +
331 + g = 1.0 / math.sqrt(x.numel() * Qp)
332 + scale = grad_scale(self.scale, g)
333 + q_weight = round_pass((f_weight.data / scale).clamp(Qn, Qp))
334 +
335 + if self.dequantize:
336 + q_weight = q_weight * scale
337 +
338 + # with torch.no_grad():
339 + bias_scale = scale*act_scale
340 + q_bias = round_pass((f_bias / bias_scale))
341 + if self.dequantize:
342 + q_bias = q_bias * bias_scale
343 +
344 + output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups)
345 + return output
346 +
347 + def replace_bn(self, bn_module):
348 + self.bn = bn_module
349 + self.bn.track_running_stats = False
350 +
351 + def fusing(self):
352 + std = torch.sqrt(self.bn.running_var + self.bn.eps)
353 + f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1])
354 + if self.bias is not None:
355 + f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std)
356 + else:
357 + f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std)
358 + return f_weight, f_bias
359 +
360 +
361 +def grad_scale(x, scale):
362 + y = x
363 + y_grad = x * scale
364 + output = (y - y_grad).detach() + y_grad
365 +
366 + return output
367 +
368 +def round_pass(x):
369 + y = torch.round(x)
370 + y_grad = x
371 + output = (y - y_grad).detach() + y_grad
372 +
373 + return output
374 +
1 +'''Train CIFAR10 with PyTorch.'''
2 +import torch
3 +import torch.nn as nn
4 +import torch.optim as optim
5 +import torch.nn.functional as F
6 +import torch.backends.cudnn as cudnn
7 +
8 +import torchvision
9 +import torchvision.transforms as transforms
10 +
11 +import os
12 +import argparse
13 +
14 +from models.mobilenet import MobileNet1
15 +from utils import progress_bar
16 +from replace import replace_sq
17 +from collections import OrderedDict
18 +# from lsq_int import Input_Quantizer
19 +from lsq_sq import Input_Quantizer
20 +from replace_int import replace_int
21 +
22 +
23 +parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
24 +parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
25 +parser.add_argument('--resume', '-r', default=None, type=str,
26 + help='resume from checkpoint')
27 +parser.add_argument('--dir', default='default', type=str,
28 + help='save dir name')
29 +parser.add_argument('--test', default=False, action='store_true',
30 + help='test version or not')
31 +parser.add_argument('--qat', default=False, action='store_true',
32 + help='qat version or not')
33 +args = parser.parse_args()
34 +
35 +
36 +# Training
37 +def train(epoch):
38 + print('\nEpoch: %d' % epoch)
39 + net.train()
40 + train_loss = 0
41 + correct = 0
42 + total = 0
43 + for batch_idx, (inputs, targets) in enumerate(trainloader):
44 + inputs, targets = inputs.to(device), targets.to(device)
45 + optimizer.zero_grad()
46 + outputs = net(inputs)
47 + loss = criterion(outputs, targets)
48 + loss.backward()
49 + optimizer.step()
50 +
51 + train_loss += loss.item()
52 + _, predicted = outputs.max(1)
53 + total += targets.size(0)
54 + correct += predicted.eq(targets).sum().item()
55 + progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
56 + % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
57 +
58 +
59 +def test(epoch):
60 + global best_acc
61 + dir_name = args.dir
62 + net.eval()
63 + test_loss = 0
64 + correct = 0
65 + total = 0
66 + with torch.no_grad():
67 + for batch_idx, (inputs, targets) in enumerate(testloader):
68 +
69 + inputs, targets = inputs.to(device), targets.to(device)
70 + outputs = net(inputs)
71 + loss = criterion(outputs, targets)
72 +
73 + test_loss += loss.item()
74 + _, predicted = outputs.max(1)
75 + total += targets.size(0)
76 + correct += predicted.eq(targets).sum().item()
77 + progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
78 + % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
79 +
80 + # Save checkpoint.
81 + acc = 100.*correct/total
82 + if acc > best_acc:
83 + print('Saving..')
84 + state = {
85 + 'net': net.state_dict(),
86 + 'acc': acc,
87 + 'epoch': epoch,
88 + }
89 + if not os.path.isdir(dir_name):
90 + os.mkdir(dir_name)
91 + torch.save(state, f'./{dir_name}/ckpt.pth')
92 + best_acc = acc
93 + print('*** best Test Accuracy: ', best_acc)
94 +
95 +if __name__ == '__main__':
96 + device = 'cuda' if torch.cuda.is_available() else 'cpu'
97 + best_acc = 0 # best test accuracy
98 + start_epoch = 0 # start from epoch 0 or last checkpoint epoch
99 +
100 + # Data
101 + print('==> Preparing data..')
102 + transform_train = transforms.Compose([
103 + transforms.RandomCrop(32, padding=4),
104 + transforms.RandomHorizontalFlip(),
105 + transforms.ToTensor(),
106 + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
107 + ])
108 +
109 + transform_test = transforms.Compose([
110 + transforms.ToTensor(),
111 + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
112 + ])
113 +
114 + trainset = torchvision.datasets.CIFAR10(
115 + root='./data', train=True, download=True, transform=transform_train)
116 + trainloader = torch.utils.data.DataLoader(
117 + trainset, batch_size=256, shuffle=True, num_workers=4)
118 +
119 + testset = torchvision.datasets.CIFAR10(
120 + root='./data', train=False, download=True, transform=transform_test)
121 + testloader = torch.utils.data.DataLoader(
122 + testset, batch_size=100, shuffle=False, num_workers=4)
123 +
124 + classes = ('plane', 'car', 'bird', 'cat', 'deer',
125 + 'dog', 'frog', 'horse', 'ship', 'truck')
126 +
127 + # Model
128 + print('==> Building model..')
129 + net = MobileNet1(3, 10)
130 + net = net.to(device)
131 +
132 + if args.qat:
133 + net = replace_sq(model=net)
134 + net = nn.Sequential(Input_Quantizer(abit=8, dequantize=True),
135 + net)
136 +
137 + if args.resume:
138 + # Load checkpoint.
139 + print('==> Resuming from checkpoint..')
140 + assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
141 + checkpoint = torch.load(args.resume)
142 + new_state_dict = OrderedDict()
143 + for k, v in checkpoint['net'].items():
144 + k = k.replace("module.", "")
145 + new_state_dict[k] = v
146 + net.load_state_dict(new_state_dict)
147 + best_acc = 0.0
148 + start_epoch = 0
149 +
150 + print(net)
151 + # replace_int(net)
152 +
153 + if device == 'cuda':
154 + net = torch.nn.DataParallel(net)
155 + cudnn.benchmark = True
156 + net.cuda()
157 +
158 + criterion = nn.CrossEntropyLoss()
159 + optimizer = optim.SGD(net.parameters(), lr=args.lr,
160 + momentum=0.9, weight_decay=5e-4)
161 + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
162 +
163 +
164 + for epoch in range(start_epoch, start_epoch+200):
165 + if args.test:
166 + test(epoch)
167 + break
168 + else:
169 + train(epoch)
170 + test(epoch)
171 + scheduler.step()
1 +'''DenseNet in PyTorch.'''
2 +import math
3 +
4 +import torch
5 +import torch.nn as nn
6 +import torch.nn.functional as F
7 +
8 +
9 +class Bottleneck(nn.Module):
10 + def __init__(self, in_planes, growth_rate):
11 + super(Bottleneck, self).__init__()
12 + self.bn1 = nn.BatchNorm2d(in_planes)
13 + self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
14 + self.bn2 = nn.BatchNorm2d(4*growth_rate)
15 + self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
16 +
17 + def forward(self, x):
18 + out = self.conv1(F.relu(self.bn1(x)))
19 + out = self.conv2(F.relu(self.bn2(out)))
20 + out = torch.cat([out,x], 1)
21 + return out
22 +
23 +
24 +class Transition(nn.Module):
25 + def __init__(self, in_planes, out_planes):
26 + super(Transition, self).__init__()
27 + self.bn = nn.BatchNorm2d(in_planes)
28 + self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
29 +
30 + def forward(self, x):
31 + out = self.conv(F.relu(self.bn(x)))
32 + out = F.avg_pool2d(out, 2)
33 + return out
34 +
35 +
36 +class DenseNet(nn.Module):
37 + def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
38 + super(DenseNet, self).__init__()
39 + self.growth_rate = growth_rate
40 +
41 + num_planes = 2*growth_rate
42 + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
43 +
44 + self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
45 + num_planes += nblocks[0]*growth_rate
46 + out_planes = int(math.floor(num_planes*reduction))
47 + self.trans1 = Transition(num_planes, out_planes)
48 + num_planes = out_planes
49 +
50 + self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
51 + num_planes += nblocks[1]*growth_rate
52 + out_planes = int(math.floor(num_planes*reduction))
53 + self.trans2 = Transition(num_planes, out_planes)
54 + num_planes = out_planes
55 +
56 + self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
57 + num_planes += nblocks[2]*growth_rate
58 + out_planes = int(math.floor(num_planes*reduction))
59 + self.trans3 = Transition(num_planes, out_planes)
60 + num_planes = out_planes
61 +
62 + self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
63 + num_planes += nblocks[3]*growth_rate
64 +
65 + self.bn = nn.BatchNorm2d(num_planes)
66 + self.linear = nn.Linear(num_planes, num_classes)
67 +
68 + def _make_dense_layers(self, block, in_planes, nblock):
69 + layers = []
70 + for i in range(nblock):
71 + layers.append(block(in_planes, self.growth_rate))
72 + in_planes += self.growth_rate
73 + return nn.Sequential(*layers)
74 +
75 + def forward(self, x):
76 + out = self.conv1(x)
77 + out = self.trans1(self.dense1(out))
78 + out = self.trans2(self.dense2(out))
79 + out = self.trans3(self.dense3(out))
80 + out = self.dense4(out)
81 + out = F.avg_pool2d(F.relu(self.bn(out)), 4)
82 + out = out.view(out.size(0), -1)
83 + out = self.linear(out)
84 + return out
85 +
86 +def DenseNet121():
87 + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
88 +
89 +def DenseNet169():
90 + return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
91 +
92 +def DenseNet201():
93 + return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
94 +
95 +def DenseNet161():
96 + return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
97 +
98 +def densenet_cifar():
99 + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
100 +
101 +def test():
102 + net = densenet_cifar()
103 + x = torch.randn(1,3,32,32)
104 + y = net(x)
105 + print(y)
106 +
107 +# test()
1 +'''DLA in PyTorch.
2 +
3 +Reference:
4 + Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
5 +'''
6 +import torch
7 +import torch.nn as nn
8 +import torch.nn.functional as F
9 +
10 +
11 +class BasicBlock(nn.Module):
12 + expansion = 1
13 +
14 + def __init__(self, in_planes, planes, stride=1):
15 + super(BasicBlock, self).__init__()
16 + self.conv1 = nn.Conv2d(
17 + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 + self.bn1 = nn.BatchNorm2d(planes)
19 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
20 + stride=1, padding=1, bias=False)
21 + self.bn2 = nn.BatchNorm2d(planes)
22 +
23 + self.shortcut = nn.Sequential()
24 + if stride != 1 or in_planes != self.expansion*planes:
25 + self.shortcut = nn.Sequential(
26 + nn.Conv2d(in_planes, self.expansion*planes,
27 + kernel_size=1, stride=stride, bias=False),
28 + nn.BatchNorm2d(self.expansion*planes)
29 + )
30 +
31 + def forward(self, x):
32 + out = F.relu(self.bn1(self.conv1(x)))
33 + out = self.bn2(self.conv2(out))
34 + out += self.shortcut(x)
35 + out = F.relu(out)
36 + return out
37 +
38 +
39 +class Root(nn.Module):
40 + def __init__(self, in_channels, out_channels, kernel_size=1):
41 + super(Root, self).__init__()
42 + self.conv = nn.Conv2d(
43 + in_channels, out_channels, kernel_size,
44 + stride=1, padding=(kernel_size - 1) // 2, bias=False)
45 + self.bn = nn.BatchNorm2d(out_channels)
46 +
47 + def forward(self, xs):
48 + x = torch.cat(xs, 1)
49 + out = F.relu(self.bn(self.conv(x)))
50 + return out
51 +
52 +
53 +class Tree(nn.Module):
54 + def __init__(self, block, in_channels, out_channels, level=1, stride=1):
55 + super(Tree, self).__init__()
56 + self.level = level
57 + if level == 1:
58 + self.root = Root(2*out_channels, out_channels)
59 + self.left_node = block(in_channels, out_channels, stride=stride)
60 + self.right_node = block(out_channels, out_channels, stride=1)
61 + else:
62 + self.root = Root((level+2)*out_channels, out_channels)
63 + for i in reversed(range(1, level)):
64 + subtree = Tree(block, in_channels, out_channels,
65 + level=i, stride=stride)
66 + self.__setattr__('level_%d' % i, subtree)
67 + self.prev_root = block(in_channels, out_channels, stride=stride)
68 + self.left_node = block(out_channels, out_channels, stride=1)
69 + self.right_node = block(out_channels, out_channels, stride=1)
70 +
71 + def forward(self, x):
72 + xs = [self.prev_root(x)] if self.level > 1 else []
73 + for i in reversed(range(1, self.level)):
74 + level_i = self.__getattr__('level_%d' % i)
75 + x = level_i(x)
76 + xs.append(x)
77 + x = self.left_node(x)
78 + xs.append(x)
79 + x = self.right_node(x)
80 + xs.append(x)
81 + out = self.root(xs)
82 + return out
83 +
84 +
85 +class DLA(nn.Module):
86 + def __init__(self, block=BasicBlock, num_classes=10):
87 + super(DLA, self).__init__()
88 + self.base = nn.Sequential(
89 + nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
90 + nn.BatchNorm2d(16),
91 + nn.ReLU(True)
92 + )
93 +
94 + self.layer1 = nn.Sequential(
95 + nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
96 + nn.BatchNorm2d(16),
97 + nn.ReLU(True)
98 + )
99 +
100 + self.layer2 = nn.Sequential(
101 + nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
102 + nn.BatchNorm2d(32),
103 + nn.ReLU(True)
104 + )
105 +
106 + self.layer3 = Tree(block, 32, 64, level=1, stride=1)
107 + self.layer4 = Tree(block, 64, 128, level=2, stride=2)
108 + self.layer5 = Tree(block, 128, 256, level=2, stride=2)
109 + self.layer6 = Tree(block, 256, 512, level=1, stride=2)
110 + self.linear = nn.Linear(512, num_classes)
111 +
112 + def forward(self, x):
113 + out = self.base(x)
114 + out = self.layer1(out)
115 + out = self.layer2(out)
116 + out = self.layer3(out)
117 + out = self.layer4(out)
118 + out = self.layer5(out)
119 + out = self.layer6(out)
120 + out = F.avg_pool2d(out, 4)
121 + out = out.view(out.size(0), -1)
122 + out = self.linear(out)
123 + return out
124 +
125 +
126 +def test():
127 + net = DLA()
128 + print(net)
129 + x = torch.randn(1, 3, 32, 32)
130 + y = net(x)
131 + print(y.size())
132 +
133 +
134 +if __name__ == '__main__':
135 + test()
1 +'''Simplified version of DLA in PyTorch.
2 +
3 +Note this implementation is not identical to the original paper version.
4 +But it seems works fine.
5 +
6 +See dla.py for the original paper version.
7 +
8 +Reference:
9 + Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
10 +'''
11 +import torch
12 +import torch.nn as nn
13 +import torch.nn.functional as F
14 +
15 +
16 +class BasicBlock(nn.Module):
17 + expansion = 1
18 +
19 + def __init__(self, in_planes, planes, stride=1):
20 + super(BasicBlock, self).__init__()
21 + self.conv1 = nn.Conv2d(
22 + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
23 + self.bn1 = nn.BatchNorm2d(planes)
24 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
25 + stride=1, padding=1, bias=False)
26 + self.bn2 = nn.BatchNorm2d(planes)
27 +
28 + self.shortcut = nn.Sequential()
29 + if stride != 1 or in_planes != self.expansion*planes:
30 + self.shortcut = nn.Sequential(
31 + nn.Conv2d(in_planes, self.expansion*planes,
32 + kernel_size=1, stride=stride, bias=False),
33 + nn.BatchNorm2d(self.expansion*planes)
34 + )
35 +
36 + def forward(self, x):
37 + out = F.relu(self.bn1(self.conv1(x)))
38 + out = self.bn2(self.conv2(out))
39 + out += self.shortcut(x)
40 + out = F.relu(out)
41 + return out
42 +
43 +
44 +class Root(nn.Module):
45 + def __init__(self, in_channels, out_channels, kernel_size=1):
46 + super(Root, self).__init__()
47 + self.conv = nn.Conv2d(
48 + in_channels, out_channels, kernel_size,
49 + stride=1, padding=(kernel_size - 1) // 2, bias=False)
50 + self.bn = nn.BatchNorm2d(out_channels)
51 +
52 + def forward(self, xs):
53 + x = torch.cat(xs, 1)
54 + out = F.relu(self.bn(self.conv(x)))
55 + return out
56 +
57 +
58 +class Tree(nn.Module):
59 + def __init__(self, block, in_channels, out_channels, level=1, stride=1):
60 + super(Tree, self).__init__()
61 + self.root = Root(2*out_channels, out_channels)
62 + if level == 1:
63 + self.left_tree = block(in_channels, out_channels, stride=stride)
64 + self.right_tree = block(out_channels, out_channels, stride=1)
65 + else:
66 + self.left_tree = Tree(block, in_channels,
67 + out_channels, level=level-1, stride=stride)
68 + self.right_tree = Tree(block, out_channels,
69 + out_channels, level=level-1, stride=1)
70 +
71 + def forward(self, x):
72 + out1 = self.left_tree(x)
73 + out2 = self.right_tree(out1)
74 + out = self.root([out1, out2])
75 + return out
76 +
77 +
78 +class SimpleDLA(nn.Module):
79 + def __init__(self, block=BasicBlock, num_classes=10):
80 + super(SimpleDLA, self).__init__()
81 + self.base = nn.Sequential(
82 + nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
83 + nn.BatchNorm2d(16),
84 + nn.ReLU(True)
85 + )
86 +
87 + self.layer1 = nn.Sequential(
88 + nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
89 + nn.BatchNorm2d(16),
90 + nn.ReLU(True)
91 + )
92 +
93 + self.layer2 = nn.Sequential(
94 + nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
95 + nn.BatchNorm2d(32),
96 + nn.ReLU(True)
97 + )
98 +
99 + self.layer3 = Tree(block, 32, 64, level=1, stride=1)
100 + self.layer4 = Tree(block, 64, 128, level=2, stride=2)
101 + self.layer5 = Tree(block, 128, 256, level=2, stride=2)
102 + self.layer6 = Tree(block, 256, 512, level=1, stride=2)
103 + self.linear = nn.Linear(512, num_classes)
104 +
105 + def forward(self, x):
106 + out = self.base(x)
107 + out = self.layer1(out)
108 + out = self.layer2(out)
109 + out = self.layer3(out)
110 + out = self.layer4(out)
111 + out = self.layer5(out)
112 + out = self.layer6(out)
113 + out = F.avg_pool2d(out, 4)
114 + out = out.view(out.size(0), -1)
115 + out = self.linear(out)
116 + return out
117 +
118 +
119 +def test():
120 + net = SimpleDLA()
121 + print(net)
122 + x = torch.randn(1, 3, 32, 32)
123 + y = net(x)
124 + print(y.size())
125 +
126 +
127 +if __name__ == '__main__':
128 + test()
1 +'''Dual Path Networks in PyTorch.'''
2 +import torch
3 +import torch.nn as nn
4 +import torch.nn.functional as F
5 +
6 +
7 +class Bottleneck(nn.Module):
8 + def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
9 + super(Bottleneck, self).__init__()
10 + self.out_planes = out_planes
11 + self.dense_depth = dense_depth
12 +
13 + self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
14 + self.bn1 = nn.BatchNorm2d(in_planes)
15 + self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
16 + self.bn2 = nn.BatchNorm2d(in_planes)
17 + self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
18 + self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
19 +
20 + self.shortcut = nn.Sequential()
21 + if first_layer:
22 + self.shortcut = nn.Sequential(
23 + nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
24 + nn.BatchNorm2d(out_planes+dense_depth)
25 + )
26 +
27 + def forward(self, x):
28 + out = F.relu(self.bn1(self.conv1(x)))
29 + out = F.relu(self.bn2(self.conv2(out)))
30 + out = self.bn3(self.conv3(out))
31 + x = self.shortcut(x)
32 + d = self.out_planes
33 + out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
34 + out = F.relu(out)
35 + return out
36 +
37 +
38 +class DPN(nn.Module):
39 + def __init__(self, cfg):
40 + super(DPN, self).__init__()
41 + in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
42 + num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
43 +
44 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
45 + self.bn1 = nn.BatchNorm2d(64)
46 + self.last_planes = 64
47 + self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
48 + self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
49 + self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
50 + self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
51 + self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
52 +
53 + def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
54 + strides = [stride] + [1]*(num_blocks-1)
55 + layers = []
56 + for i,stride in enumerate(strides):
57 + layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
58 + self.last_planes = out_planes + (i+2) * dense_depth
59 + return nn.Sequential(*layers)
60 +
61 + def forward(self, x):
62 + out = F.relu(self.bn1(self.conv1(x)))
63 + out = self.layer1(out)
64 + out = self.layer2(out)
65 + out = self.layer3(out)
66 + out = self.layer4(out)
67 + out = F.avg_pool2d(out, 4)
68 + out = out.view(out.size(0), -1)
69 + out = self.linear(out)
70 + return out
71 +
72 +
73 +def DPN26():
74 + cfg = {
75 + 'in_planes': (96,192,384,768),
76 + 'out_planes': (256,512,1024,2048),
77 + 'num_blocks': (2,2,2,2),
78 + 'dense_depth': (16,32,24,128)
79 + }
80 + return DPN(cfg)
81 +
82 +def DPN92():
83 + cfg = {
84 + 'in_planes': (96,192,384,768),
85 + 'out_planes': (256,512,1024,2048),
86 + 'num_blocks': (3,4,20,3),
87 + 'dense_depth': (16,32,24,128)
88 + }
89 + return DPN(cfg)
90 +
91 +
92 +def test():
93 + net = DPN92()
94 + x = torch.randn(1,3,32,32)
95 + y = net(x)
96 + print(y)
97 +
98 +# test()
1 +'''EfficientNet in PyTorch.
2 +
3 +Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
4 +
5 +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
6 +'''
7 +import torch
8 +import torch.nn as nn
9 +import torch.nn.functional as F
10 +
11 +
12 +def swish(x):
13 + return x * x.sigmoid()
14 +
15 +
16 +def drop_connect(x, drop_ratio):
17 + keep_ratio = 1.0 - drop_ratio
18 + mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
19 + mask.bernoulli_(keep_ratio)
20 + x.div_(keep_ratio)
21 + x.mul_(mask)
22 + return x
23 +
24 +
25 +class SE(nn.Module):
26 + '''Squeeze-and-Excitation block with Swish.'''
27 +
28 + def __init__(self, in_channels, se_channels):
29 + super(SE, self).__init__()
30 + self.se1 = nn.Conv2d(in_channels, se_channels,
31 + kernel_size=1, bias=True)
32 + self.se2 = nn.Conv2d(se_channels, in_channels,
33 + kernel_size=1, bias=True)
34 +
35 + def forward(self, x):
36 + out = F.adaptive_avg_pool2d(x, (1, 1))
37 + out = swish(self.se1(out))
38 + out = self.se2(out).sigmoid()
39 + out = x * out
40 + return out
41 +
42 +
43 +class Block(nn.Module):
44 + '''expansion + depthwise + pointwise + squeeze-excitation'''
45 +
46 + def __init__(self,
47 + in_channels,
48 + out_channels,
49 + kernel_size,
50 + stride,
51 + expand_ratio=1,
52 + se_ratio=0.,
53 + drop_rate=0.):
54 + super(Block, self).__init__()
55 + self.stride = stride
56 + self.drop_rate = drop_rate
57 + self.expand_ratio = expand_ratio
58 +
59 + # Expansion
60 + channels = expand_ratio * in_channels
61 + self.conv1 = nn.Conv2d(in_channels,
62 + channels,
63 + kernel_size=1,
64 + stride=1,
65 + padding=0,
66 + bias=False)
67 + self.bn1 = nn.BatchNorm2d(channels)
68 +
69 + # Depthwise conv
70 + self.conv2 = nn.Conv2d(channels,
71 + channels,
72 + kernel_size=kernel_size,
73 + stride=stride,
74 + padding=(1 if kernel_size == 3 else 2),
75 + groups=channels,
76 + bias=False)
77 + self.bn2 = nn.BatchNorm2d(channels)
78 +
79 + # SE layers
80 + se_channels = int(in_channels * se_ratio)
81 + self.se = SE(channels, se_channels)
82 +
83 + # Output
84 + self.conv3 = nn.Conv2d(channels,
85 + out_channels,
86 + kernel_size=1,
87 + stride=1,
88 + padding=0,
89 + bias=False)
90 + self.bn3 = nn.BatchNorm2d(out_channels)
91 +
92 + # Skip connection if in and out shapes are the same (MV-V2 style)
93 + self.has_skip = (stride == 1) and (in_channels == out_channels)
94 +
95 + def forward(self, x):
96 + out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
97 + out = swish(self.bn2(self.conv2(out)))
98 + out = self.se(out)
99 + out = self.bn3(self.conv3(out))
100 + if self.has_skip:
101 + if self.training and self.drop_rate > 0:
102 + out = drop_connect(out, self.drop_rate)
103 + out = out + x
104 + return out
105 +
106 +
107 +class EfficientNet(nn.Module):
108 + def __init__(self, cfg, num_classes=10):
109 + super(EfficientNet, self).__init__()
110 + self.cfg = cfg
111 + self.conv1 = nn.Conv2d(3,
112 + 32,
113 + kernel_size=3,
114 + stride=1,
115 + padding=1,
116 + bias=False)
117 + self.bn1 = nn.BatchNorm2d(32)
118 + self.layers = self._make_layers(in_channels=32)
119 + self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
120 +
121 + def _make_layers(self, in_channels):
122 + layers = []
123 + cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
124 + 'stride']]
125 + b = 0
126 + blocks = sum(self.cfg['num_blocks'])
127 + for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
128 + strides = [stride] + [1] * (num_blocks - 1)
129 + for stride in strides:
130 + drop_rate = self.cfg['drop_connect_rate'] * b / blocks
131 + layers.append(
132 + Block(in_channels,
133 + out_channels,
134 + kernel_size,
135 + stride,
136 + expansion,
137 + se_ratio=0.25,
138 + drop_rate=drop_rate))
139 + in_channels = out_channels
140 + return nn.Sequential(*layers)
141 +
142 + def forward(self, x):
143 + out = swish(self.bn1(self.conv1(x)))
144 + out = self.layers(out)
145 + out = F.adaptive_avg_pool2d(out, 1)
146 + out = out.view(out.size(0), -1)
147 + dropout_rate = self.cfg['dropout_rate']
148 + if self.training and dropout_rate > 0:
149 + out = F.dropout(out, p=dropout_rate)
150 + out = self.linear(out)
151 + return out
152 +
153 +
154 +def EfficientNetB0():
155 + cfg = {
156 + 'num_blocks': [1, 2, 2, 3, 3, 4, 1],
157 + 'expansion': [1, 6, 6, 6, 6, 6, 6],
158 + 'out_channels': [16, 24, 40, 80, 112, 192, 320],
159 + 'kernel_size': [3, 3, 5, 3, 5, 5, 3],
160 + 'stride': [1, 2, 2, 2, 1, 2, 1],
161 + 'dropout_rate': 0.2,
162 + 'drop_connect_rate': 0.2,
163 + }
164 + return EfficientNet(cfg)
165 +
166 +
167 +def test():
168 + net = EfficientNetB0()
169 + x = torch.randn(2, 3, 32, 32)
170 + y = net(x)
171 + print(y.shape)
172 +
173 +
174 +if __name__ == '__main__':
175 + test()
1 +'''GoogLeNet with PyTorch.'''
2 +import torch
3 +import torch.nn as nn
4 +import torch.nn.functional as F
5 +
6 +
7 +class Inception(nn.Module):
8 + def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
9 + super(Inception, self).__init__()
10 + # 1x1 conv branch
11 + self.b1 = nn.Sequential(
12 + nn.Conv2d(in_planes, n1x1, kernel_size=1),
13 + nn.BatchNorm2d(n1x1),
14 + nn.ReLU(True),
15 + )
16 +
17 + # 1x1 conv -> 3x3 conv branch
18 + self.b2 = nn.Sequential(
19 + nn.Conv2d(in_planes, n3x3red, kernel_size=1),
20 + nn.BatchNorm2d(n3x3red),
21 + nn.ReLU(True),
22 + nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
23 + nn.BatchNorm2d(n3x3),
24 + nn.ReLU(True),
25 + )
26 +
27 + # 1x1 conv -> 5x5 conv branch
28 + self.b3 = nn.Sequential(
29 + nn.Conv2d(in_planes, n5x5red, kernel_size=1),
30 + nn.BatchNorm2d(n5x5red),
31 + nn.ReLU(True),
32 + nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
33 + nn.BatchNorm2d(n5x5),
34 + nn.ReLU(True),
35 + nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
36 + nn.BatchNorm2d(n5x5),
37 + nn.ReLU(True),
38 + )
39 +
40 + # 3x3 pool -> 1x1 conv branch
41 + self.b4 = nn.Sequential(
42 + nn.MaxPool2d(3, stride=1, padding=1),
43 + nn.Conv2d(in_planes, pool_planes, kernel_size=1),
44 + nn.BatchNorm2d(pool_planes),
45 + nn.ReLU(True),
46 + )
47 +
48 + def forward(self, x):
49 + y1 = self.b1(x)
50 + y2 = self.b2(x)
51 + y3 = self.b3(x)
52 + y4 = self.b4(x)
53 + return torch.cat([y1,y2,y3,y4], 1)
54 +
55 +
56 +class GoogLeNet(nn.Module):
57 + def __init__(self):
58 + super(GoogLeNet, self).__init__()
59 + self.pre_layers = nn.Sequential(
60 + nn.Conv2d(3, 192, kernel_size=3, padding=1),
61 + nn.BatchNorm2d(192),
62 + nn.ReLU(True),
63 + )
64 +
65 + self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
66 + self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
67 +
68 + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
69 +
70 + self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
71 + self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
72 + self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
73 + self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
74 + self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
75 +
76 + self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
77 + self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
78 +
79 + self.avgpool = nn.AvgPool2d(8, stride=1)
80 + self.linear = nn.Linear(1024, 10)
81 +
82 + def forward(self, x):
83 + out = self.pre_layers(x)
84 + out = self.a3(out)
85 + out = self.b3(out)
86 + out = self.maxpool(out)
87 + out = self.a4(out)
88 + out = self.b4(out)
89 + out = self.c4(out)
90 + out = self.d4(out)
91 + out = self.e4(out)
92 + out = self.maxpool(out)
93 + out = self.a5(out)
94 + out = self.b5(out)
95 + out = self.avgpool(out)
96 + out = out.view(out.size(0), -1)
97 + out = self.linear(out)
98 + return out
99 +
100 +
101 +def test():
102 + net = GoogLeNet()
103 + x = torch.randn(1,3,32,32)
104 + y = net(x)
105 + print(y.size())
106 +
107 +# test()
1 +'''LeNet in PyTorch.'''
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +
5 +class LeNet(nn.Module):
6 + def __init__(self):
7 + super(LeNet, self).__init__()
8 + self.conv1 = nn.Conv2d(3, 6, 5)
9 + self.conv2 = nn.Conv2d(6, 16, 5)
10 + self.fc1 = nn.Linear(16*5*5, 120)
11 + self.fc2 = nn.Linear(120, 84)
12 + self.fc3 = nn.Linear(84, 10)
13 +
14 + def forward(self, x):
15 + out = F.relu(self.conv1(x))
16 + out = F.max_pool2d(out, 2)
17 + out = F.relu(self.conv2(out))
18 + out = F.max_pool2d(out, 2)
19 + out = out.view(out.size(0), -1)
20 + out = F.relu(self.fc1(out))
21 + out = F.relu(self.fc2(out))
22 + out = self.fc3(out)
23 + return out
1 +import torch
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +
5 +class MobileNet1(nn.Module):
6 + def __init__(self, inchannel=3, num_classes=10):
7 + super(MobileNet1, self).__init__()
8 + self.num_classes = num_classes
9 +
10 + def conv_bn(inp, oup, stride):
11 + return nn.Sequential(
12 + nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
13 + nn.BatchNorm2d(oup),
14 + nn.Hardswish()
15 + #nn.Hardsigmoid(inplace=True)
16 + # nn.LeakyReLU(negative_slope=0.1, inplace=True)
17 + # nn.ReLU(inplace=True)
18 + )
19 +
20 + def conv_dw(inp, oup, stride):
21 + return nn.Sequential(
22 + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
23 + nn.BatchNorm2d(inp),
24 + nn.Hardswish(),
25 + #nn.Hardsigmoid(inplace=True),
26 + # nn.LeakyReLU(negative_slope=0.1, inplace=True),
27 + # nn.ReLU(inplace=True),
28 +
29 + nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
30 + nn.BatchNorm2d(oup),
31 + nn.Hardswish()
32 + #nn.Hardsigmoid(inplace=True)
33 + # nn.LeakyReLU(negative_slope=0.1, inplace=True)
34 + # nn.ReLU(inplace=True),
35 + )
36 +
37 + self.model = nn.Sequential(
38 + conv_bn(inchannel, 32, 1),
39 + conv_dw( 32, 64, 1),
40 + conv_dw( 64, 128, 2),
41 + conv_dw(128, 128, 1),
42 + conv_dw(128, 256, 2),
43 + conv_dw(256, 256, 1),
44 + conv_dw(256, 512, 2),
45 + conv_dw(512, 512, 1),
46 + conv_dw(512, 512, 1),
47 + conv_dw(512, 512, 1),
48 + conv_dw(512, 512, 1),
49 + conv_dw(512, 512, 1),
50 + conv_dw(512, 1024, 2),
51 + conv_dw(1024, 1024, 1),
52 + nn.AdaptiveAvgPool2d(1)
53 + )
54 + self.fc = nn.Linear(1024, self.num_classes)
55 +
56 +
57 + def forward(self, x):
58 + x, act_scale = self.model(x)
59 + # x = self.model(x)
60 + x = x.view(x.size(0), -1)
61 + # x = self.fc(x)
62 + x = self.fc(x, act_scale)
63 + return x
1 +'''MobileNetV2 in PyTorch.
2 +
3 +See the paper "Inverted Residuals and Linear Bottlenecks:
4 +Mobile Networks for Classification, Detection and Segmentation" for more details.
5 +'''
6 +import torch
7 +import torch.nn as nn
8 +import torch.nn.functional as F
9 +
10 +
11 +class Block(nn.Module):
12 + '''expand + depthwise + pointwise'''
13 + def __init__(self, in_planes, out_planes, expansion, stride):
14 + super(Block, self).__init__()
15 + self.stride = stride
16 +
17 + planes = expansion * in_planes
18 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
19 + self.bn1 = nn.BatchNorm2d(planes)
20 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
21 + self.bn2 = nn.BatchNorm2d(planes)
22 + self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
23 + self.bn3 = nn.BatchNorm2d(out_planes)
24 +
25 + self.shortcut = nn.Sequential()
26 + if stride == 1 and in_planes != out_planes:
27 + self.shortcut = nn.Sequential(
28 + nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
29 + nn.BatchNorm2d(out_planes),
30 + )
31 +
32 + def forward(self, x):
33 + out = F.relu(self.bn1(self.conv1(x)))
34 + out = F.relu(self.bn2(self.conv2(out)))
35 + out = self.bn3(self.conv3(out))
36 + out = out + self.shortcut(x) if self.stride==1 else out
37 + return out
38 +
39 +
40 +class MobileNetV2(nn.Module):
41 + # (expansion, out_planes, num_blocks, stride)
42 + cfg = [(1, 16, 1, 1),
43 + (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
44 + (6, 32, 3, 2),
45 + (6, 64, 4, 2),
46 + (6, 96, 3, 1),
47 + (6, 160, 3, 2),
48 + (6, 320, 1, 1)]
49 +
50 + def __init__(self, num_classes=10):
51 + super(MobileNetV2, self).__init__()
52 + # NOTE: change conv1 stride 2 -> 1 for CIFAR10
53 + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
54 + self.bn1 = nn.BatchNorm2d(32)
55 + self.layers = self._make_layers(in_planes=32)
56 + self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
57 + self.bn2 = nn.BatchNorm2d(1280)
58 + self.linear = nn.Linear(1280, num_classes)
59 +
60 + def _make_layers(self, in_planes):
61 + layers = []
62 + for expansion, out_planes, num_blocks, stride in self.cfg:
63 + strides = [stride] + [1]*(num_blocks-1)
64 + for stride in strides:
65 + layers.append(Block(in_planes, out_planes, expansion, stride))
66 + in_planes = out_planes
67 + return nn.Sequential(*layers)
68 +
69 + def forward(self, x):
70 + out = F.relu(self.bn1(self.conv1(x)))
71 + out = self.layers(out)
72 + out = F.relu(self.bn2(self.conv2(out)))
73 + # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
74 + out = F.avg_pool2d(out, 4)
75 + out = out.view(out.size(0), -1)
76 + out = self.linear(out)
77 + return out
78 +
79 +
80 +def test():
81 + net = MobileNetV2()
82 + x = torch.randn(2,3,32,32)
83 + y = net(x)
84 + print(y.size())
85 +
86 +# test()
1 +'''PNASNet in PyTorch.
2 +
3 +Paper: Progressive Neural Architecture Search
4 +'''
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +
9 +
10 +class SepConv(nn.Module):
11 + '''Separable Convolution.'''
12 + def __init__(self, in_planes, out_planes, kernel_size, stride):
13 + super(SepConv, self).__init__()
14 + self.conv1 = nn.Conv2d(in_planes, out_planes,
15 + kernel_size, stride,
16 + padding=(kernel_size-1)//2,
17 + bias=False, groups=in_planes)
18 + self.bn1 = nn.BatchNorm2d(out_planes)
19 +
20 + def forward(self, x):
21 + return self.bn1(self.conv1(x))
22 +
23 +
24 +class CellA(nn.Module):
25 + def __init__(self, in_planes, out_planes, stride=1):
26 + super(CellA, self).__init__()
27 + self.stride = stride
28 + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
29 + if stride==2:
30 + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
31 + self.bn1 = nn.BatchNorm2d(out_planes)
32 +
33 + def forward(self, x):
34 + y1 = self.sep_conv1(x)
35 + y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
36 + if self.stride==2:
37 + y2 = self.bn1(self.conv1(y2))
38 + return F.relu(y1+y2)
39 +
40 +class CellB(nn.Module):
41 + def __init__(self, in_planes, out_planes, stride=1):
42 + super(CellB, self).__init__()
43 + self.stride = stride
44 + # Left branch
45 + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
46 + self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
47 + # Right branch
48 + self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
49 + if stride==2:
50 + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
51 + self.bn1 = nn.BatchNorm2d(out_planes)
52 + # Reduce channels
53 + self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
54 + self.bn2 = nn.BatchNorm2d(out_planes)
55 +
56 + def forward(self, x):
57 + # Left branch
58 + y1 = self.sep_conv1(x)
59 + y2 = self.sep_conv2(x)
60 + # Right branch
61 + y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
62 + if self.stride==2:
63 + y3 = self.bn1(self.conv1(y3))
64 + y4 = self.sep_conv3(x)
65 + # Concat & reduce channels
66 + b1 = F.relu(y1+y2)
67 + b2 = F.relu(y3+y4)
68 + y = torch.cat([b1,b2], 1)
69 + return F.relu(self.bn2(self.conv2(y)))
70 +
71 +class PNASNet(nn.Module):
72 + def __init__(self, cell_type, num_cells, num_planes):
73 + super(PNASNet, self).__init__()
74 + self.in_planes = num_planes
75 + self.cell_type = cell_type
76 +
77 + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
78 + self.bn1 = nn.BatchNorm2d(num_planes)
79 +
80 + self.layer1 = self._make_layer(num_planes, num_cells=6)
81 + self.layer2 = self._downsample(num_planes*2)
82 + self.layer3 = self._make_layer(num_planes*2, num_cells=6)
83 + self.layer4 = self._downsample(num_planes*4)
84 + self.layer5 = self._make_layer(num_planes*4, num_cells=6)
85 +
86 + self.linear = nn.Linear(num_planes*4, 10)
87 +
88 + def _make_layer(self, planes, num_cells):
89 + layers = []
90 + for _ in range(num_cells):
91 + layers.append(self.cell_type(self.in_planes, planes, stride=1))
92 + self.in_planes = planes
93 + return nn.Sequential(*layers)
94 +
95 + def _downsample(self, planes):
96 + layer = self.cell_type(self.in_planes, planes, stride=2)
97 + self.in_planes = planes
98 + return layer
99 +
100 + def forward(self, x):
101 + out = F.relu(self.bn1(self.conv1(x)))
102 + out = self.layer1(out)
103 + out = self.layer2(out)
104 + out = self.layer3(out)
105 + out = self.layer4(out)
106 + out = self.layer5(out)
107 + out = F.avg_pool2d(out, 8)
108 + out = self.linear(out.view(out.size(0), -1))
109 + return out
110 +
111 +
112 +def PNASNetA():
113 + return PNASNet(CellA, num_cells=6, num_planes=44)
114 +
115 +def PNASNetB():
116 + return PNASNet(CellB, num_cells=6, num_planes=32)
117 +
118 +
119 +def test():
120 + net = PNASNetB()
121 + x = torch.randn(1,3,32,32)
122 + y = net(x)
123 + print(y)
124 +
125 +# test()
1 +'''Pre-activation ResNet in PyTorch.
2 +
3 +Reference:
4 +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
5 + Identity Mappings in Deep Residual Networks. arXiv:1603.05027
6 +'''
7 +import torch
8 +import torch.nn as nn
9 +import torch.nn.functional as F
10 +
11 +
12 +class PreActBlock(nn.Module):
13 + '''Pre-activation version of the BasicBlock.'''
14 + expansion = 1
15 +
16 + def __init__(self, in_planes, planes, stride=1):
17 + super(PreActBlock, self).__init__()
18 + self.bn1 = nn.BatchNorm2d(in_planes)
19 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
20 + self.bn2 = nn.BatchNorm2d(planes)
21 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
22 +
23 + if stride != 1 or in_planes != self.expansion*planes:
24 + self.shortcut = nn.Sequential(
25 + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
26 + )
27 +
28 + def forward(self, x):
29 + out = F.relu(self.bn1(x))
30 + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
31 + out = self.conv1(out)
32 + out = self.conv2(F.relu(self.bn2(out)))
33 + out += shortcut
34 + return out
35 +
36 +
37 +class PreActBottleneck(nn.Module):
38 + '''Pre-activation version of the original Bottleneck module.'''
39 + expansion = 4
40 +
41 + def __init__(self, in_planes, planes, stride=1):
42 + super(PreActBottleneck, self).__init__()
43 + self.bn1 = nn.BatchNorm2d(in_planes)
44 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
45 + self.bn2 = nn.BatchNorm2d(planes)
46 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
47 + self.bn3 = nn.BatchNorm2d(planes)
48 + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
49 +
50 + if stride != 1 or in_planes != self.expansion*planes:
51 + self.shortcut = nn.Sequential(
52 + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
53 + )
54 +
55 + def forward(self, x):
56 + out = F.relu(self.bn1(x))
57 + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
58 + out = self.conv1(out)
59 + out = self.conv2(F.relu(self.bn2(out)))
60 + out = self.conv3(F.relu(self.bn3(out)))
61 + out += shortcut
62 + return out
63 +
64 +
65 +class PreActResNet(nn.Module):
66 + def __init__(self, block, num_blocks, num_classes=10):
67 + super(PreActResNet, self).__init__()
68 + self.in_planes = 64
69 +
70 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
71 + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
72 + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
73 + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
74 + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
75 + self.linear = nn.Linear(512*block.expansion, num_classes)
76 +
77 + def _make_layer(self, block, planes, num_blocks, stride):
78 + strides = [stride] + [1]*(num_blocks-1)
79 + layers = []
80 + for stride in strides:
81 + layers.append(block(self.in_planes, planes, stride))
82 + self.in_planes = planes * block.expansion
83 + return nn.Sequential(*layers)
84 +
85 + def forward(self, x):
86 + out = self.conv1(x)
87 + out = self.layer1(out)
88 + out = self.layer2(out)
89 + out = self.layer3(out)
90 + out = self.layer4(out)
91 + out = F.avg_pool2d(out, 4)
92 + out = out.view(out.size(0), -1)
93 + out = self.linear(out)
94 + return out
95 +
96 +
97 +def PreActResNet18():
98 + return PreActResNet(PreActBlock, [2,2,2,2])
99 +
100 +def PreActResNet34():
101 + return PreActResNet(PreActBlock, [3,4,6,3])
102 +
103 +def PreActResNet50():
104 + return PreActResNet(PreActBottleneck, [3,4,6,3])
105 +
106 +def PreActResNet101():
107 + return PreActResNet(PreActBottleneck, [3,4,23,3])
108 +
109 +def PreActResNet152():
110 + return PreActResNet(PreActBottleneck, [3,8,36,3])
111 +
112 +
113 +def test():
114 + net = PreActResNet18()
115 + y = net((torch.randn(1,3,32,32)))
116 + print(y.size())
117 +
118 +# test()
1 +'''RegNet in PyTorch.
2 +
3 +Paper: "Designing Network Design Spaces".
4 +
5 +Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
6 +'''
7 +import torch
8 +import torch.nn as nn
9 +import torch.nn.functional as F
10 +
11 +
12 +class SE(nn.Module):
13 + '''Squeeze-and-Excitation block.'''
14 +
15 + def __init__(self, in_planes, se_planes):
16 + super(SE, self).__init__()
17 + self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
18 + self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
19 +
20 + def forward(self, x):
21 + out = F.adaptive_avg_pool2d(x, (1, 1))
22 + out = F.relu(self.se1(out))
23 + out = self.se2(out).sigmoid()
24 + out = x * out
25 + return out
26 +
27 +
28 +class Block(nn.Module):
29 + def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
30 + super(Block, self).__init__()
31 + # 1x1
32 + w_b = int(round(w_out * bottleneck_ratio))
33 + self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
34 + self.bn1 = nn.BatchNorm2d(w_b)
35 + # 3x3
36 + num_groups = w_b // group_width
37 + self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
38 + stride=stride, padding=1, groups=num_groups, bias=False)
39 + self.bn2 = nn.BatchNorm2d(w_b)
40 + # se
41 + self.with_se = se_ratio > 0
42 + if self.with_se:
43 + w_se = int(round(w_in * se_ratio))
44 + self.se = SE(w_b, w_se)
45 + # 1x1
46 + self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
47 + self.bn3 = nn.BatchNorm2d(w_out)
48 +
49 + self.shortcut = nn.Sequential()
50 + if stride != 1 or w_in != w_out:
51 + self.shortcut = nn.Sequential(
52 + nn.Conv2d(w_in, w_out,
53 + kernel_size=1, stride=stride, bias=False),
54 + nn.BatchNorm2d(w_out)
55 + )
56 +
57 + def forward(self, x):
58 + out = F.relu(self.bn1(self.conv1(x)))
59 + out = F.relu(self.bn2(self.conv2(out)))
60 + if self.with_se:
61 + out = self.se(out)
62 + out = self.bn3(self.conv3(out))
63 + out += self.shortcut(x)
64 + out = F.relu(out)
65 + return out
66 +
67 +
68 +class RegNet(nn.Module):
69 + def __init__(self, cfg, num_classes=10):
70 + super(RegNet, self).__init__()
71 + self.cfg = cfg
72 + self.in_planes = 64
73 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
74 + stride=1, padding=1, bias=False)
75 + self.bn1 = nn.BatchNorm2d(64)
76 + self.layer1 = self._make_layer(0)
77 + self.layer2 = self._make_layer(1)
78 + self.layer3 = self._make_layer(2)
79 + self.layer4 = self._make_layer(3)
80 + self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
81 +
82 + def _make_layer(self, idx):
83 + depth = self.cfg['depths'][idx]
84 + width = self.cfg['widths'][idx]
85 + stride = self.cfg['strides'][idx]
86 + group_width = self.cfg['group_width']
87 + bottleneck_ratio = self.cfg['bottleneck_ratio']
88 + se_ratio = self.cfg['se_ratio']
89 +
90 + layers = []
91 + for i in range(depth):
92 + s = stride if i == 0 else 1
93 + layers.append(Block(self.in_planes, width,
94 + s, group_width, bottleneck_ratio, se_ratio))
95 + self.in_planes = width
96 + return nn.Sequential(*layers)
97 +
98 + def forward(self, x):
99 + out = F.relu(self.bn1(self.conv1(x)))
100 + out = self.layer1(out)
101 + out = self.layer2(out)
102 + out = self.layer3(out)
103 + out = self.layer4(out)
104 + out = F.adaptive_avg_pool2d(out, (1, 1))
105 + out = out.view(out.size(0), -1)
106 + out = self.linear(out)
107 + return out
108 +
109 +
110 +def RegNetX_200MF():
111 + cfg = {
112 + 'depths': [1, 1, 4, 7],
113 + 'widths': [24, 56, 152, 368],
114 + 'strides': [1, 1, 2, 2],
115 + 'group_width': 8,
116 + 'bottleneck_ratio': 1,
117 + 'se_ratio': 0,
118 + }
119 + return RegNet(cfg)
120 +
121 +
122 +def RegNetX_400MF():
123 + cfg = {
124 + 'depths': [1, 2, 7, 12],
125 + 'widths': [32, 64, 160, 384],
126 + 'strides': [1, 1, 2, 2],
127 + 'group_width': 16,
128 + 'bottleneck_ratio': 1,
129 + 'se_ratio': 0,
130 + }
131 + return RegNet(cfg)
132 +
133 +
134 +def RegNetY_400MF():
135 + cfg = {
136 + 'depths': [1, 2, 7, 12],
137 + 'widths': [32, 64, 160, 384],
138 + 'strides': [1, 1, 2, 2],
139 + 'group_width': 16,
140 + 'bottleneck_ratio': 1,
141 + 'se_ratio': 0.25,
142 + }
143 + return RegNet(cfg)
144 +
145 +
146 +def test():
147 + net = RegNetX_200MF()
148 + print(net)
149 + x = torch.randn(2, 3, 32, 32)
150 + y = net(x)
151 + print(y.shape)
152 +
153 +
154 +if __name__ == '__main__':
155 + test()
1 +'''ResNet in PyTorch.
2 +
3 +For Pre-activation ResNet, see 'preact_resnet.py'.
4 +
5 +Reference:
6 +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
7 + Deep Residual Learning for Image Recognition. arXiv:1512.03385
8 +'''
9 +import torch
10 +import torch.nn as nn
11 +import torch.nn.functional as F
12 +
13 +
14 +class BasicBlock(nn.Module):
15 + expansion = 1
16 +
17 + def __init__(self, in_planes, planes, stride=1):
18 + super(BasicBlock, self).__init__()
19 + self.conv1 = nn.Conv2d(
20 + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
21 + self.bn1 = nn.BatchNorm2d(planes)
22 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
23 + stride=1, padding=1, bias=False)
24 + self.bn2 = nn.BatchNorm2d(planes)
25 +
26 + self.shortcut = nn.Sequential()
27 + if stride != 1 or in_planes != self.expansion*planes:
28 + self.shortcut = nn.Sequential(
29 + nn.Conv2d(in_planes, self.expansion*planes,
30 + kernel_size=1, stride=stride, bias=False),
31 + nn.BatchNorm2d(self.expansion*planes)
32 + )
33 +
34 + def forward(self, x):
35 + out = F.relu(self.bn1(self.conv1(x)))
36 + out = self.bn2(self.conv2(out))
37 + out += self.shortcut(x)
38 + out = F.relu(out)
39 + return out
40 +
41 +
42 +class Bottleneck(nn.Module):
43 + expansion = 4
44 +
45 + def __init__(self, in_planes, planes, stride=1):
46 + super(Bottleneck, self).__init__()
47 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
48 + self.bn1 = nn.BatchNorm2d(planes)
49 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
50 + stride=stride, padding=1, bias=False)
51 + self.bn2 = nn.BatchNorm2d(planes)
52 + self.conv3 = nn.Conv2d(planes, self.expansion *
53 + planes, kernel_size=1, bias=False)
54 + self.bn3 = nn.BatchNorm2d(self.expansion*planes)
55 +
56 + self.shortcut = nn.Sequential()
57 + if stride != 1 or in_planes != self.expansion*planes:
58 + self.shortcut = nn.Sequential(
59 + nn.Conv2d(in_planes, self.expansion*planes,
60 + kernel_size=1, stride=stride, bias=False),
61 + nn.BatchNorm2d(self.expansion*planes)
62 + )
63 +
64 + def forward(self, x):
65 + out = F.relu(self.bn1(self.conv1(x)))
66 + out = F.relu(self.bn2(self.conv2(out)))
67 + out = self.bn3(self.conv3(out))
68 + out += self.shortcut(x)
69 + out = F.relu(out)
70 + return out
71 +
72 +
73 +class ResNet(nn.Module):
74 + def __init__(self, block, num_blocks, num_classes=10):
75 + super(ResNet, self).__init__()
76 + self.in_planes = 64
77 +
78 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
79 + stride=1, padding=1, bias=False)
80 + self.bn1 = nn.BatchNorm2d(64)
81 + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
82 + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
83 + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
84 + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
85 + self.linear = nn.Linear(512*block.expansion, num_classes)
86 +
87 + def _make_layer(self, block, planes, num_blocks, stride):
88 + strides = [stride] + [1]*(num_blocks-1)
89 + layers = []
90 + for stride in strides:
91 + layers.append(block(self.in_planes, planes, stride))
92 + self.in_planes = planes * block.expansion
93 + return nn.Sequential(*layers)
94 +
95 + def forward(self, x):
96 + out = F.relu(self.bn1(self.conv1(x)))
97 + out = self.layer1(out)
98 + out = self.layer2(out)
99 + out = self.layer3(out)
100 + out = self.layer4(out)
101 + out = F.avg_pool2d(out, 4)
102 + out = out.view(out.size(0), -1)
103 + out = self.linear(out)
104 + return out
105 +
106 +
107 +def ResNet18():
108 + return ResNet(BasicBlock, [2, 2, 2, 2])
109 +
110 +
111 +def ResNet34():
112 + return ResNet(BasicBlock, [3, 4, 6, 3])
113 +
114 +
115 +def ResNet50():
116 + return ResNet(Bottleneck, [3, 4, 6, 3])
117 +
118 +
119 +def ResNet101():
120 + return ResNet(Bottleneck, [3, 4, 23, 3])
121 +
122 +
123 +def ResNet152():
124 + return ResNet(Bottleneck, [3, 8, 36, 3])
125 +
126 +
127 +def test():
128 + net = ResNet18()
129 + y = net(torch.randn(1, 3, 32, 32))
130 + print(y.size())
131 +
132 +# test()
1 +'''ResNeXt in PyTorch.
2 +
3 +See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
4 +'''
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +
9 +
10 +class Block(nn.Module):
11 + '''Grouped convolution block.'''
12 + expansion = 2
13 +
14 + def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
15 + super(Block, self).__init__()
16 + group_width = cardinality * bottleneck_width
17 + self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
18 + self.bn1 = nn.BatchNorm2d(group_width)
19 + self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
20 + self.bn2 = nn.BatchNorm2d(group_width)
21 + self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
22 + self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
23 +
24 + self.shortcut = nn.Sequential()
25 + if stride != 1 or in_planes != self.expansion*group_width:
26 + self.shortcut = nn.Sequential(
27 + nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
28 + nn.BatchNorm2d(self.expansion*group_width)
29 + )
30 +
31 + def forward(self, x):
32 + out = F.relu(self.bn1(self.conv1(x)))
33 + out = F.relu(self.bn2(self.conv2(out)))
34 + out = self.bn3(self.conv3(out))
35 + out += self.shortcut(x)
36 + out = F.relu(out)
37 + return out
38 +
39 +
40 +class ResNeXt(nn.Module):
41 + def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
42 + super(ResNeXt, self).__init__()
43 + self.cardinality = cardinality
44 + self.bottleneck_width = bottleneck_width
45 + self.in_planes = 64
46 +
47 + self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
48 + self.bn1 = nn.BatchNorm2d(64)
49 + self.layer1 = self._make_layer(num_blocks[0], 1)
50 + self.layer2 = self._make_layer(num_blocks[1], 2)
51 + self.layer3 = self._make_layer(num_blocks[2], 2)
52 + # self.layer4 = self._make_layer(num_blocks[3], 2)
53 + self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
54 +
55 + def _make_layer(self, num_blocks, stride):
56 + strides = [stride] + [1]*(num_blocks-1)
57 + layers = []
58 + for stride in strides:
59 + layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
60 + self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
61 + # Increase bottleneck_width by 2 after each stage.
62 + self.bottleneck_width *= 2
63 + return nn.Sequential(*layers)
64 +
65 + def forward(self, x):
66 + out = F.relu(self.bn1(self.conv1(x)))
67 + out = self.layer1(out)
68 + out = self.layer2(out)
69 + out = self.layer3(out)
70 + # out = self.layer4(out)
71 + out = F.avg_pool2d(out, 8)
72 + out = out.view(out.size(0), -1)
73 + out = self.linear(out)
74 + return out
75 +
76 +
77 +def ResNeXt29_2x64d():
78 + return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
79 +
80 +def ResNeXt29_4x64d():
81 + return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
82 +
83 +def ResNeXt29_8x64d():
84 + return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
85 +
86 +def ResNeXt29_32x4d():
87 + return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
88 +
89 +def test_resnext():
90 + net = ResNeXt29_2x64d()
91 + x = torch.randn(1,3,32,32)
92 + y = net(x)
93 + print(y.size())
94 +
95 +# test_resnext()
1 +'''SENet in PyTorch.
2 +
3 +SENet is the winner of ImageNet-2017. The paper is not released yet.
4 +'''
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +
9 +
10 +class BasicBlock(nn.Module):
11 + def __init__(self, in_planes, planes, stride=1):
12 + super(BasicBlock, self).__init__()
13 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
14 + self.bn1 = nn.BatchNorm2d(planes)
15 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
16 + self.bn2 = nn.BatchNorm2d(planes)
17 +
18 + self.shortcut = nn.Sequential()
19 + if stride != 1 or in_planes != planes:
20 + self.shortcut = nn.Sequential(
21 + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
22 + nn.BatchNorm2d(planes)
23 + )
24 +
25 + # SE layers
26 + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear
27 + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
28 +
29 + def forward(self, x):
30 + out = F.relu(self.bn1(self.conv1(x)))
31 + out = self.bn2(self.conv2(out))
32 +
33 + # Squeeze
34 + w = F.avg_pool2d(out, out.size(2))
35 + w = F.relu(self.fc1(w))
36 + w = F.sigmoid(self.fc2(w))
37 + # Excitation
38 + out = out * w # New broadcasting feature from v0.2!
39 +
40 + out += self.shortcut(x)
41 + out = F.relu(out)
42 + return out
43 +
44 +
45 +class PreActBlock(nn.Module):
46 + def __init__(self, in_planes, planes, stride=1):
47 + super(PreActBlock, self).__init__()
48 + self.bn1 = nn.BatchNorm2d(in_planes)
49 + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
50 + self.bn2 = nn.BatchNorm2d(planes)
51 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
52 +
53 + if stride != 1 or in_planes != planes:
54 + self.shortcut = nn.Sequential(
55 + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
56 + )
57 +
58 + # SE layers
59 + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
60 + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
61 +
62 + def forward(self, x):
63 + out = F.relu(self.bn1(x))
64 + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
65 + out = self.conv1(out)
66 + out = self.conv2(F.relu(self.bn2(out)))
67 +
68 + # Squeeze
69 + w = F.avg_pool2d(out, out.size(2))
70 + w = F.relu(self.fc1(w))
71 + w = F.sigmoid(self.fc2(w))
72 + # Excitation
73 + out = out * w
74 +
75 + out += shortcut
76 + return out
77 +
78 +
79 +class SENet(nn.Module):
80 + def __init__(self, block, num_blocks, num_classes=10):
81 + super(SENet, self).__init__()
82 + self.in_planes = 64
83 +
84 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
85 + self.bn1 = nn.BatchNorm2d(64)
86 + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
87 + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
88 + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
89 + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
90 + self.linear = nn.Linear(512, num_classes)
91 +
92 + def _make_layer(self, block, planes, num_blocks, stride):
93 + strides = [stride] + [1]*(num_blocks-1)
94 + layers = []
95 + for stride in strides:
96 + layers.append(block(self.in_planes, planes, stride))
97 + self.in_planes = planes
98 + return nn.Sequential(*layers)
99 +
100 + def forward(self, x):
101 + out = F.relu(self.bn1(self.conv1(x)))
102 + out = self.layer1(out)
103 + out = self.layer2(out)
104 + out = self.layer3(out)
105 + out = self.layer4(out)
106 + out = F.avg_pool2d(out, 4)
107 + out = out.view(out.size(0), -1)
108 + out = self.linear(out)
109 + return out
110 +
111 +
112 +def SENet18():
113 + return SENet(PreActBlock, [2,2,2,2])
114 +
115 +
116 +def test():
117 + net = SENet18()
118 + y = net(torch.randn(1,3,32,32))
119 + print(y.size())
120 +
121 +# test()
1 +'''ShuffleNet in PyTorch.
2 +
3 +See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
4 +'''
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +
9 +
10 +class ShuffleBlock(nn.Module):
11 + def __init__(self, groups):
12 + super(ShuffleBlock, self).__init__()
13 + self.groups = groups
14 +
15 + def forward(self, x):
16 + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
17 + N,C,H,W = x.size()
18 + g = self.groups
19 + return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
20 +
21 +
22 +class Bottleneck(nn.Module):
23 + def __init__(self, in_planes, out_planes, stride, groups):
24 + super(Bottleneck, self).__init__()
25 + self.stride = stride
26 +
27 + mid_planes = out_planes/4
28 + g = 1 if in_planes==24 else groups
29 + self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
30 + self.bn1 = nn.BatchNorm2d(mid_planes)
31 + self.shuffle1 = ShuffleBlock(groups=g)
32 + self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
33 + self.bn2 = nn.BatchNorm2d(mid_planes)
34 + self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
35 + self.bn3 = nn.BatchNorm2d(out_planes)
36 +
37 + self.shortcut = nn.Sequential()
38 + if stride == 2:
39 + self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
40 +
41 + def forward(self, x):
42 + out = F.relu(self.bn1(self.conv1(x)))
43 + out = self.shuffle1(out)
44 + out = F.relu(self.bn2(self.conv2(out)))
45 + out = self.bn3(self.conv3(out))
46 + res = self.shortcut(x)
47 + out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
48 + return out
49 +
50 +
51 +class ShuffleNet(nn.Module):
52 + def __init__(self, cfg):
53 + super(ShuffleNet, self).__init__()
54 + out_planes = cfg['out_planes']
55 + num_blocks = cfg['num_blocks']
56 + groups = cfg['groups']
57 +
58 + self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
59 + self.bn1 = nn.BatchNorm2d(24)
60 + self.in_planes = 24
61 + self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
62 + self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
63 + self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
64 + self.linear = nn.Linear(out_planes[2], 10)
65 +
66 + def _make_layer(self, out_planes, num_blocks, groups):
67 + layers = []
68 + for i in range(num_blocks):
69 + stride = 2 if i == 0 else 1
70 + cat_planes = self.in_planes if i == 0 else 0
71 + layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
72 + self.in_planes = out_planes
73 + return nn.Sequential(*layers)
74 +
75 + def forward(self, x):
76 + out = F.relu(self.bn1(self.conv1(x)))
77 + out = self.layer1(out)
78 + out = self.layer2(out)
79 + out = self.layer3(out)
80 + out = F.avg_pool2d(out, 4)
81 + out = out.view(out.size(0), -1)
82 + out = self.linear(out)
83 + return out
84 +
85 +
86 +def ShuffleNetG2():
87 + cfg = {
88 + 'out_planes': [200,400,800],
89 + 'num_blocks': [4,8,4],
90 + 'groups': 2
91 + }
92 + return ShuffleNet(cfg)
93 +
94 +def ShuffleNetG3():
95 + cfg = {
96 + 'out_planes': [240,480,960],
97 + 'num_blocks': [4,8,4],
98 + 'groups': 3
99 + }
100 + return ShuffleNet(cfg)
101 +
102 +
103 +def test():
104 + net = ShuffleNetG2()
105 + x = torch.randn(1,3,32,32)
106 + y = net(x)
107 + print(y)
108 +
109 +# test()
1 +'''ShuffleNetV2 in PyTorch.
2 +
3 +See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
4 +'''
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +
9 +
10 +class ShuffleBlock(nn.Module):
11 + def __init__(self, groups=2):
12 + super(ShuffleBlock, self).__init__()
13 + self.groups = groups
14 +
15 + def forward(self, x):
16 + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
17 + N, C, H, W = x.size()
18 + g = self.groups
19 + return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
20 +
21 +
22 +class SplitBlock(nn.Module):
23 + def __init__(self, ratio):
24 + super(SplitBlock, self).__init__()
25 + self.ratio = ratio
26 +
27 + def forward(self, x):
28 + c = int(x.size(1) * self.ratio)
29 + return x[:, :c, :, :], x[:, c:, :, :]
30 +
31 +
32 +class BasicBlock(nn.Module):
33 + def __init__(self, in_channels, split_ratio=0.5):
34 + super(BasicBlock, self).__init__()
35 + self.split = SplitBlock(split_ratio)
36 + in_channels = int(in_channels * split_ratio)
37 + self.conv1 = nn.Conv2d(in_channels, in_channels,
38 + kernel_size=1, bias=False)
39 + self.bn1 = nn.BatchNorm2d(in_channels)
40 + self.conv2 = nn.Conv2d(in_channels, in_channels,
41 + kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
42 + self.bn2 = nn.BatchNorm2d(in_channels)
43 + self.conv3 = nn.Conv2d(in_channels, in_channels,
44 + kernel_size=1, bias=False)
45 + self.bn3 = nn.BatchNorm2d(in_channels)
46 + self.shuffle = ShuffleBlock()
47 +
48 + def forward(self, x):
49 + x1, x2 = self.split(x)
50 + out = F.relu(self.bn1(self.conv1(x2)))
51 + out = self.bn2(self.conv2(out))
52 + out = F.relu(self.bn3(self.conv3(out)))
53 + out = torch.cat([x1, out], 1)
54 + out = self.shuffle(out)
55 + return out
56 +
57 +
58 +class DownBlock(nn.Module):
59 + def __init__(self, in_channels, out_channels):
60 + super(DownBlock, self).__init__()
61 + mid_channels = out_channels // 2
62 + # left
63 + self.conv1 = nn.Conv2d(in_channels, in_channels,
64 + kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
65 + self.bn1 = nn.BatchNorm2d(in_channels)
66 + self.conv2 = nn.Conv2d(in_channels, mid_channels,
67 + kernel_size=1, bias=False)
68 + self.bn2 = nn.BatchNorm2d(mid_channels)
69 + # right
70 + self.conv3 = nn.Conv2d(in_channels, mid_channels,
71 + kernel_size=1, bias=False)
72 + self.bn3 = nn.BatchNorm2d(mid_channels)
73 + self.conv4 = nn.Conv2d(mid_channels, mid_channels,
74 + kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
75 + self.bn4 = nn.BatchNorm2d(mid_channels)
76 + self.conv5 = nn.Conv2d(mid_channels, mid_channels,
77 + kernel_size=1, bias=False)
78 + self.bn5 = nn.BatchNorm2d(mid_channels)
79 +
80 + self.shuffle = ShuffleBlock()
81 +
82 + def forward(self, x):
83 + # left
84 + out1 = self.bn1(self.conv1(x))
85 + out1 = F.relu(self.bn2(self.conv2(out1)))
86 + # right
87 + out2 = F.relu(self.bn3(self.conv3(x)))
88 + out2 = self.bn4(self.conv4(out2))
89 + out2 = F.relu(self.bn5(self.conv5(out2)))
90 + # concat
91 + out = torch.cat([out1, out2], 1)
92 + out = self.shuffle(out)
93 + return out
94 +
95 +
96 +class ShuffleNetV2(nn.Module):
97 + def __init__(self, net_size):
98 + super(ShuffleNetV2, self).__init__()
99 + out_channels = configs[net_size]['out_channels']
100 + num_blocks = configs[net_size]['num_blocks']
101 +
102 + self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
103 + stride=1, padding=1, bias=False)
104 + self.bn1 = nn.BatchNorm2d(24)
105 + self.in_channels = 24
106 + self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
107 + self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
108 + self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
109 + self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
110 + kernel_size=1, stride=1, padding=0, bias=False)
111 + self.bn2 = nn.BatchNorm2d(out_channels[3])
112 + self.linear = nn.Linear(out_channels[3], 10)
113 +
114 + def _make_layer(self, out_channels, num_blocks):
115 + layers = [DownBlock(self.in_channels, out_channels)]
116 + for i in range(num_blocks):
117 + layers.append(BasicBlock(out_channels))
118 + self.in_channels = out_channels
119 + return nn.Sequential(*layers)
120 +
121 + def forward(self, x):
122 + out = F.relu(self.bn1(self.conv1(x)))
123 + # out = F.max_pool2d(out, 3, stride=2, padding=1)
124 + out = self.layer1(out)
125 + out = self.layer2(out)
126 + out = self.layer3(out)
127 + out = F.relu(self.bn2(self.conv2(out)))
128 + out = F.avg_pool2d(out, 4)
129 + out = out.view(out.size(0), -1)
130 + out = self.linear(out)
131 + return out
132 +
133 +
134 +configs = {
135 + 0.5: {
136 + 'out_channels': (48, 96, 192, 1024),
137 + 'num_blocks': (3, 7, 3)
138 + },
139 +
140 + 1: {
141 + 'out_channels': (116, 232, 464, 1024),
142 + 'num_blocks': (3, 7, 3)
143 + },
144 + 1.5: {
145 + 'out_channels': (176, 352, 704, 1024),
146 + 'num_blocks': (3, 7, 3)
147 + },
148 + 2: {
149 + 'out_channels': (224, 488, 976, 2048),
150 + 'num_blocks': (3, 7, 3)
151 + }
152 +}
153 +
154 +
155 +def test():
156 + net = ShuffleNetV2(net_size=0.5)
157 + x = torch.randn(3, 3, 32, 32)
158 + y = net(x)
159 + print(y.shape)
160 +
161 +
162 +# test()
1 +'''VGG11/13/16/19 in Pytorch.'''
2 +import torch
3 +import torch.nn as nn
4 +
5 +
6 +cfg = {
7 + 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
8 + 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
9 + 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
10 + 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
11 +}
12 +
13 +
14 +class VGG(nn.Module):
15 + def __init__(self, vgg_name):
16 + super(VGG, self).__init__()
17 + self.features = self._make_layers(cfg[vgg_name])
18 + self.classifier = nn.Linear(512, 10)
19 +
20 + def forward(self, x):
21 + out = self.features(x)
22 + out = out.view(out.size(0), -1)
23 + out = self.classifier(out)
24 + return out
25 +
26 + def _make_layers(self, cfg):
27 + layers = []
28 + in_channels = 3
29 + for x in cfg:
30 + if x == 'M':
31 + layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
32 + else:
33 + layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
34 + nn.BatchNorm2d(x),
35 + nn.ReLU(inplace=True)]
36 + in_channels = x
37 + layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
38 + return nn.Sequential(*layers)
39 +
40 +
41 +def test():
42 + net = VGG('VGG11')
43 + x = torch.randn(2,3,32,32)
44 + y = net(x)
45 + print(y.size())
46 +
47 +# test()
1 +import torch
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +import math
5 +from lsq_sq import *
6 +from models.mobilenet import *
7 +
8 +conv_idx = -1
9 +act_idx = -1
10 +former_conv = None
11 +
12 +def replace_sq(model, bit_width=8):
13 + global conv_idx, act_idx
14 +
15 + for name, module in model.named_children():
16 + if isinstance(module, (nn.Sequential)): #- conventional
17 + replace_sq(model.__dict__['_modules'][name], bit_width)
18 +
19 + elif isinstance(module, nn.Conv2d):
20 + former_conv = name
21 + conv_idx += 1
22 + bias = False if module.bias is None else True
23 +
24 + model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
25 + module.kernel_size, stride=module.stride,
26 + padding=module.padding, dilation=module.dilation,
27 + groups=module.groups, bias=bias, wbit=bit_width)
28 + model.__dict__['_modules'][name].weight = module.weight
29 + if bias:
30 + model.__dict__['_modules'][name].bias = module.bias
31 +
32 + elif isinstance(module, nn.BatchNorm2d):
33 + model.__dict__['_modules'][former_conv].replace_bn(module)
34 + model.__dict__['_modules'][name] = nn.Identity()
35 +
36 + elif isinstance(module, nn.ReLU):
37 + act_idx += 1
38 + model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
39 +
40 + elif isinstance(module, nn.Hardswish):
41 + act_idx += 1
42 + model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
43 +
44 + elif isinstance(module, nn.Hardsigmoid):
45 + act_idx += 1
46 + model.__dict__['_modules'][name] = QHsigmoid(abit=bit_width, inplace=False, dequantize=True)
47 +
48 + elif isinstance(module, nn.LeakyReLU):
49 + act_idx += 1
50 + model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
51 +
52 + elif isinstance(module, nn.Linear):
53 + bias = False if module.bias is None else True
54 + model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
55 + model.__dict__['_modules'][name].weight = module.weight
56 + if bias:
57 + model.__dict__['_modules'][name].bias = module.bias
58 +
59 + elif isinstance(module, nn.AdaptiveAvgPool2d):
60 + model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
61 +
62 + # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
63 + # replace_sq(model.__dict__['_modules'][name], bit_width)
64 +
65 + # elif isinstance(module, InvertedResidual): #mv2
66 + # replace_sq(model.__dict__['_modules'][name], bit_width)
67 +
68 + else:
69 + model.__dict__['_modules'][name] = module
70 +
71 + return model
1 +import torch
2 +import torch.nn as nn
3 +import torch.nn.functional as F
4 +import math
5 +from lsq_int import *
6 +from models.mobilenet import *
7 +
8 +conv_idx = -1
9 +act_idx = -1
10 +former_conv = None
11 +
12 +def replace_int(model, bit_width=8):
13 + global conv_idx, act_idx
14 +
15 + for name, module in model.named_children():
16 + if isinstance(module, (nn.Sequential)): #- conventional
17 + replace_int(model.__dict__['_modules'][name], bit_width)
18 +
19 + elif isinstance(module, nn.Conv2d):
20 + former_conv = name
21 + conv_idx += 1
22 + bias = False if module.bias is None else True
23 +
24 + model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
25 + module.kernel_size, stride=module.stride,
26 + padding=module.padding, dilation=module.dilation,
27 + groups=module.groups, bias=bias, wbit=bit_width)
28 + model.__dict__['_modules'][name].weight = module.weight
29 + if bias:
30 + model.__dict__['_modules'][name].bias = module.bias
31 +
32 + elif isinstance(module, nn.BatchNorm2d):
33 + model.__dict__['_modules'][former_conv].replace_bn(module)
34 + model.__dict__['_modules'][name] = nn.Identity()
35 +
36 + elif isinstance(module, nn.ReLU):
37 + act_idx += 1
38 + model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
39 +
40 + elif isinstance(module, nn.Hardswish):
41 + act_idx += 1
42 + model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
43 +
44 + elif isinstance(module, nn.LeakyReLU):
45 + act_idx += 1
46 + model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
47 +
48 + elif isinstance(module, nn.Linear):
49 + bias = False if module.bias is None else True
50 + model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
51 + model.__dict__['_modules'][name].weight = module.weight
52 + if bias:
53 + model.__dict__['_modules'][name].bias = module.bias
54 +
55 + elif isinstance(module, nn.AdaptiveAvgPool2d):
56 + model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
57 +
58 + # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
59 + # replace_sq(model.__dict__['_modules'][name], bit_width)
60 +
61 + # elif isinstance(module, InvertedResidual): #mv2
62 + # replace_sq(model.__dict__['_modules'][name], bit_width)
63 +
64 + else:
65 + model.__dict__['_modules'][name] = module
66 +
67 + return model
1 +'''Some helper functions for PyTorch, including:
2 + - get_mean_and_std: calculate the mean and std value of dataset.
3 + - msr_init: net parameter initialization.
4 + - progress_bar: progress bar mimic xlua.progress.
5 +'''
6 +import os
7 +import sys
8 +import time
9 +import math
10 +
11 +import torch
12 +import torch.nn as nn
13 +import torch.nn.init as init
14 +
15 +
16 +def get_mean_and_std(dataset):
17 + '''Compute the mean and std value of dataset.'''
18 + dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
19 + mean = torch.zeros(3)
20 + std = torch.zeros(3)
21 + print('==> Computing mean and std..')
22 + for inputs, targets in dataloader:
23 + for i in range(3):
24 + mean[i] += inputs[:,i,:,:].mean()
25 + std[i] += inputs[:,i,:,:].std()
26 + mean.div_(len(dataset))
27 + std.div_(len(dataset))
28 + return mean, std
29 +
30 +def init_params(net):
31 + '''Init layer parameters.'''
32 + for m in net.modules():
33 + if isinstance(m, nn.Conv2d):
34 + init.kaiming_normal(m.weight, mode='fan_out')
35 + if m.bias:
36 + init.constant(m.bias, 0)
37 + elif isinstance(m, nn.BatchNorm2d):
38 + init.constant(m.weight, 1)
39 + init.constant(m.bias, 0)
40 + elif isinstance(m, nn.Linear):
41 + init.normal(m.weight, std=1e-3)
42 + if m.bias:
43 + init.constant(m.bias, 0)
44 +
45 +
46 +_, term_width = os.popen('stty size', 'r').read().split()
47 +term_width = int(term_width)
48 +
49 +TOTAL_BAR_LENGTH = 65.
50 +last_time = time.time()
51 +begin_time = last_time
52 +def progress_bar(current, total, msg=None):
53 + global last_time, begin_time
54 + if current == 0:
55 + begin_time = time.time() # Reset for new bar.
56 +
57 + cur_len = int(TOTAL_BAR_LENGTH*current/total)
58 + rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
59 +
60 + sys.stdout.write(' [')
61 + for i in range(cur_len):
62 + sys.stdout.write('=')
63 + sys.stdout.write('>')
64 + for i in range(rest_len):
65 + sys.stdout.write('.')
66 + sys.stdout.write(']')
67 +
68 + cur_time = time.time()
69 + step_time = cur_time - last_time
70 + last_time = cur_time
71 + tot_time = cur_time - begin_time
72 +
73 + L = []
74 + L.append(' Step: %s' % format_time(step_time))
75 + L.append(' | Tot: %s' % format_time(tot_time))
76 + if msg:
77 + L.append(' | ' + msg)
78 +
79 + msg = ''.join(L)
80 + sys.stdout.write(msg)
81 + for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
82 + sys.stdout.write(' ')
83 +
84 + # Go back to the center of the bar.
85 + for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
86 + sys.stdout.write('\b')
87 + sys.stdout.write(' %d/%d ' % (current+1, total))
88 +
89 + if current < total-1:
90 + sys.stdout.write('\r')
91 + else:
92 + sys.stdout.write('\n')
93 + sys.stdout.flush()
94 +
95 +def format_time(seconds):
96 + days = int(seconds / 3600/24)
97 + seconds = seconds - days*3600*24
98 + hours = int(seconds / 3600)
99 + seconds = seconds - hours*3600
100 + minutes = int(seconds / 60)
101 + seconds = seconds - minutes*60
102 + secondsf = int(seconds)
103 + seconds = seconds - secondsf
104 + millis = int(seconds*1000)
105 +
106 + f = ''
107 + i = 1
108 + if days > 0:
109 + f += str(days) + 'D'
110 + i += 1
111 + if hours > 0 and i <= 2:
112 + f += str(hours) + 'h'
113 + i += 1
114 + if minutes > 0 and i <= 2:
115 + f += str(minutes) + 'm'
116 + i += 1
117 + if secondsf > 0 and i <= 2:
118 + f += str(secondsf) + 's'
119 + i += 1
120 + if millis > 0 and i <= 2:
121 + f += str(millis) + 'ms'
122 + i += 1
123 + if f == '':
124 + f = '0ms'
125 + return f
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type