hyungjin

final

No preview for this file type
{
"python.pythonPath": "/home/chunjin1212/anaconda3/envs/torch/bin/python"
}
\ No newline at end of file
MIT License
Copyright (c) 2017 liukuang
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# Train CIFAR10 with PyTorch
I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
## Prerequisites
- Python 3.6+
- PyTorch 1.0+
## Training
```
# Start training with:
python main.py
# You can manually resume the training with:
python main.py --resume --lr=0.01
```
## Accuracy
| Model | Acc. |
| ----------------- | ----------- |
| [VGG16](https://arxiv.org/abs/1409.1556) | 92.64% |
| [ResNet18](https://arxiv.org/abs/1512.03385) | 93.02% |
| [ResNet50](https://arxiv.org/abs/1512.03385) | 93.62% |
| [ResNet101](https://arxiv.org/abs/1512.03385) | 93.75% |
| [RegNetX_200MF](https://arxiv.org/abs/2003.13678) | 94.24% |
| [RegNetY_400MF](https://arxiv.org/abs/2003.13678) | 94.29% |
| [MobileNetV2](https://arxiv.org/abs/1801.04381) | 94.43% |
| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431) | 94.73% |
| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431) | 94.82% |
| [SimpleDLA](https://arxiv.org/abs/1707.064) | 94.89% |
| [DenseNet121](https://arxiv.org/abs/1608.06993) | 95.04% |
| [PreActResNet18](https://arxiv.org/abs/1603.05027) | 95.11% |
| [DPN92](https://arxiv.org/abs/1707.01629) | 95.16% |
| [DLA](https://arxiv.org/pdf/1707.06484.pdf) | 95.47% |
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from decimal import Decimal
import numpy as np
# Parent Class for Quantization Module
class LSQModule:
def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None):
self.abit = abit
self.wbit = wbit
self.ibit = ibit
self.dequantize = dequantize
self.register_buffer('init_state', torch.zeros(1))
self.scale = scale
# member variable setter
def set_abit(self, v):
self.abit = v
def set_wbit(self, v):
self.wbit = v
def set_ibit(self, v):
self.ibit = v
def set_dequantize(self, v):
self.dequantize = v
class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule):
def __init__(self, abit, dequantize=True, output_size=(1,1)):
super(QAvgPool2d, self).__init__(output_size)
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'output_size=' + str(self.output_size) \
+ ', abit=' + str(self.abit) \
+ ')'
def forward(self, x):
former = x[1]
x = x[0]
x = super().forward(x)
Qn = - (2 ** (self.abit - 1))
Qp = 2 ** (self.abit - 1) - 1
# Qn = 0.
# Qp = (2 ** self.abit) - 1
act_scale = self.scale
down_scale = act_scale / former
# down_scale = down_scale.numpy().astype()
# x = x.cpu().numpy().astype(Decimal)
x = x.cpu().detach().numpy().astype(Decimal)
down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
output = x / down_scale
output = torch.from_numpy(output.astype(np.float32)).cuda()
x = torch.round(output).clamp(Qn, Qp)
return x, act_scale
class QMaxPool2d(nn.MaxPool2d, LSQModule):
def __init__(self, kernel_size=3, stride=2, padding=1):
super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding)
LSQModule.__init__(self)
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'kernel_size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ')'
def forward(self, x, act_scale=None):
result = super().forward(x)
return result
class QReLU(nn.Module, LSQModule):
def __init__(self, abit, dequantize=True, inplace=False):
super(QReLU, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', dequantize=' + str(self.dequantize) \
+ ', inplace=' + str(self.inplace) \
+ ', init_state=' + str(self.init_state) \
+ ')'
def forward(self, x):
x = F.relu(x)
Qn = 0.
Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QLeakyReLU(nn.Module, LSQModule):
def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False):
super(QLeakyReLU, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
self.negative_slope=negative_slope
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', negative_slope=' + str(self.negative_slope) \
+ ', inplace=' + str(self.inplace) \
+ ')'
def forward(self, input):
deq_scale = input[1]
input = input[0]
Qn = - (2 ** (self.abit - 1))
Qp = 2 ** (self.abit - 1) - 1
input = input.cpu().detach().numpy().astype(Decimal)
# input = torch.from_numpy(input)
down_scale = deq_scale / self.scale
slope_scale = self.negative_slope * down_scale
down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
slope_scale = slope_scale.cpu().detach().numpy().astype(Decimal)
output = np.where(input<0, input*slope_scale, input*down_scale).astype(np.float32)
output = torch.from_numpy(output).cuda()
x = torch.round(output).clamp(Qn, Qp)
return x, self.scale
class QHswish(nn.Hardswish, LSQModule):
def __init__(self, abit, dequantize=True, inplace=False):
super(QHswish, self).__init__(inplace=inplace)
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', inplace=' + str(self.inplace) \
+ ')'
def forward(self, input):
deq_scale = input[1]
x = input[0]
# input = input * deq_scale
# x = super().forward(input)
Qn = - (2 ** (self.abit - 1))
Qp = 2 ** (self.abit - 1) - 1
q_scale = self.scale
down_scale = deq_scale / q_scale
flag = int(torch.round(3/deq_scale))
c1 = (down_scale * deq_scale / 6).cpu().detach().numpy().astype(Decimal)
c2 = (down_scale / 2).cpu().detach().numpy().astype(Decimal)
down_scale = down_scale.cpu().detach().numpy().astype(Decimal)
x = x.cpu().detach().numpy().astype(Decimal)
x = np.where(x<=-flag, x*0, x)
x = np.where(x>=flag, down_scale*x, x*(c1*x+c2)).astype(np.float32)
x = torch.from_numpy(x).cuda()
# x = torch.where(x <= -flag, x*0, x)
# x = torch.where(x >= flag,
# down_scale*x, x*x*c1+x*c2)
# act_scale = self.scale
# down_scale = former_scale / self.scale
# x = x * former_scal
x = torch.round(x).clamp(Qn, Qp)
return x, self.scale
class QConv2d(nn.Conv2d, LSQModule):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True):
super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self): #- for show detail arttribute on print(model)
return self.__class__.__name__ + '(' \
+ 'in_channels=' + str(self.in_channels) \
+ ', out_channels=' + str(self.out_channels) \
+ ', bias=' + str(self.bias is not None) \
+ ', kernel_size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', groups=' + str(self.groups) \
+ ', padding=' + str(self.padding) \
+ ', wbit=' + str(self.wbit) \
+ ')'
def forward(self, x, act_scale=None):
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
scale = grad_scale(self.scale, g)
self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
if self.dequantize:
self.weight.data = self.weight.data * scale
if self.bias is not None:
bias_scale = scale*act_scale
self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp))
if self.dequantize:
self.bias.data = self.bias.data * bias_scale
output = super().forward(x)
return output
class QLinear(nn.Linear, LSQModule):
def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True):
super(QLinear, self).__init__(in_features, out_features, bias)
LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'in_features=' + str(self.in_features) \
+ ', out_features=' + str(self.out_features) \
+ ', bias=' + str(self.bias is not None) \
+ ', wbit=' + str(self.wbit) \
+ ')'
def forward(self, input, act_scale=None):
if self.wbit < 32:
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
scale = self.scale
cur_weight = torch.round((self.weight.data / scale).clamp(Qn, Qp))
# with torch.no_grad():
if self.bias is not None:
bias_scale = scale*act_scale
cur_bias = torch.round((self.bias.data / bias_scale))
output = F.linear(input, cur_weight, cur_bias)
return output
class Input_Quantizer(nn.Module, LSQModule):
def __init__(self, abit=8, dequantize=True):
super(Input_Quantizer, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', dequantize=' + str(self.dequantize) \
+ ', init_state=' + str(self.init_state) \
+ ')'
def forward(self, x):
Qn = - (2 ** (self.abit - 1))
Qp = (2 ** (self.abit - 1)) - 1
x = torch.round((x / self.scale).clamp(Qn, Qp))
return x, self.scale
class FuseConv2dQ(QConv2d):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True):
super(FuseConv2dQ, self).__init__(
in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias,
wbit=wbit, dequantize=dequantize)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
act_scale = x[1]
x = x[0]
# simulate bn folding to Conv
f_weight, f_bias = self.fusing()
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
scale = self.scale
q_weight = torch.round((f_weight.data / scale).clamp(Qn, Qp))
bias_scale = scale*act_scale
q_bias = torch.round((f_bias / bias_scale))
output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups)
# output *= bias_scale # dequantize
return output, bias_scale
def replace_bn(self, bn_module):
self.bn = bn_module
self.bn.track_running_stats = False
def fusing(self):
std = torch.sqrt(self.bn.running_var + self.bn.eps)
f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1])
if self.bias is not None:
f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std)
else:
f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std)
return f_weight, f_bias
def grad_scale(x, scale):
y = x
y_grad = x * scale
output = (y - y_grad).detach() + y_grad
return output
def round_pass(x):
y = torch.round(x)
y_grad = x
output = (y - y_grad).detach() + y_grad
return output
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
# Parent Class for Quantization Module
class LSQModule:
def __init__(self, abit=None, wbit=None, ibit=None, dequantize=True, scale=None):
self.abit = abit
self.wbit = wbit
self.ibit = ibit
self.dequantize = dequantize
self.register_buffer('init_state', torch.zeros(1))
self.scale = scale
# member variable setter
def set_abit(self, v):
self.abit = v
def set_wbit(self, v):
self.wbit = v
def set_ibit(self, v):
self.ibit = v
def set_dequantize(self, v):
self.dequantize = v
class QAvgPool2d(nn.AdaptiveAvgPool2d, LSQModule):
def __init__(self, abit, dequantize=True, output_size=(1,1)):
super(QAvgPool2d, self).__init__(output_size)
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'output_size=' + str(self.output_size) \
+ ', abit=' + str(self.abit) \
+ ')'
def forward(self, x):
x = x[0]
x = super().forward(x)
# Qn = - (2 ** (self.abit - 1))
# Qp = 2 ** (self.abit - 1) - 1
Qn = 0.
Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QMaxPool2d(nn.MaxPool2d, LSQModule):
def __init__(self, kernel_size=3, stride=2, padding=1):
super(QMaxPool2d, self).__init__(kernel_size=kernel_size, stride=stride, padding=padding)
LSQModule.__init__(self)
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'kernel_size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', padding=' + str(self.padding) \
+ ')'
def forward(self, x, act_scale=None):
result = super().forward(x)
return result
class QReLU(nn.Module, LSQModule):
def __init__(self, abit, dequantize=True, inplace=False):
super(QReLU, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', dequantize=' + str(self.dequantize) \
+ ', inplace=' + str(self.inplace) \
+ ', init_state=' + str(self.init_state) \
+ ')'
def forward(self, x):
x = F.relu(x)
Qn = 0.
Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QLeakyReLU(nn.Module, LSQModule):
def __init__(self, abit, negative_slope=0.1, dequantize=True, inplace=False):
super(QLeakyReLU, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
self.negative_slope=negative_slope
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', negative_slope=' + str(self.negative_slope) \
+ ', inplace=' + str(self.inplace) \
+ ')'
def forward(self, input):
x = F.leaky_relu(input=input, negative_slope=self.negative_slope)
Qn = - (2 ** (self.abit - 1))
Qp = 2 ** (self.abit - 1) - 1
# Qn = 0.
# Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QHswish(nn.Hardswish, LSQModule):
def __init__(self, abit, dequantize=True, inplace=False):
super(QHswish, self).__init__(inplace=inplace)
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', inplace=' + str(self.inplace) \
+ ')'
def forward(self, input):
x = super().forward(input)
Qn = - (2 ** (self.abit - 1))
Qp = 2 ** (self.abit - 1) - 1
# Qn = 0.
# Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QHsigmoid(nn.Hardsigmoid, LSQModule):
def __init__(self, abit, dequantize=True, inplace=False):
super(QHsigmoid, self).__init__(inplace=inplace)
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
self.inplace = inplace
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', inplace=' + str(self.inplace) \
+ ')'
def forward(self, input):
x = super().forward(input)
# Qn = - (2 ** (self.abit - 1))
# Qp = 2 ** (self.abit - 1) - 1
Qn = 0.
Qp = (2 ** self.abit) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class QConv2d(nn.Conv2d, LSQModule):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, wbit=32, dequantize=True):
super(QConv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias)
LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self): #- for show detail arttribute on print(model)
return self.__class__.__name__ + '(' \
+ 'in_channels=' + str(self.in_channels) \
+ ', out_channels=' + str(self.out_channels) \
+ ', bias=' + str(self.bias is not None) \
+ ', kernel_size=' + str(self.kernel_size) \
+ ', stride=' + str(self.stride) \
+ ', groups=' + str(self.groups) \
+ ', padding=' + str(self.padding) \
+ ', wbit=' + str(self.wbit) \
+ ')'
def forward(self, x, act_scale=None):
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
scale = grad_scale(self.scale, g)
self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
if self.dequantize:
self.weight.data = self.weight.data * scale
if self.bias is not None:
bias_scale = scale*act_scale
self.bias.data = round_pass((self.bias.data / bias_scale).clamp(Qn, Qp))
if self.dequantize:
self.bias.data = self.bias.data * bias_scale
output = super().forward(x)
return output
class QLinear(nn.Linear, LSQModule):
def __init__(self, in_features, out_features, bias=True, wbit=32, dequantize=True):
super(QLinear, self).__init__(in_features, out_features, bias)
LSQModule.__init__(self, wbit=wbit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'in_features=' + str(self.in_features) \
+ ', out_features=' + str(self.out_features) \
+ ', bias=' + str(self.bias is not None) \
+ ', wbit=' + str(self.wbit) \
+ ')'
def forward(self, input, act_scale=None):
if self.wbit < 32:
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * self.weight.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(input.numel() * Qp)
scale = grad_scale(self.scale, g)
self.weight.data = round_pass((self.weight.data / scale).clamp(Qn, Qp))
if self.dequantize:
self.weight.data = self.weight.data * scale
# with torch.no_grad():
if self.bias is not None:
bias_scale = scale*act_scale
self.bias.data = round_pass((self.bias.data / bias_scale))
if self.dequantize:
self.bias.data = self.bias.data * bias_scale
output = super().forward(input)
return output
class Input_Quantizer(nn.Module, LSQModule):
def __init__(self, abit=8, dequantize=True):
super(Input_Quantizer, self).__init__()
LSQModule.__init__(self, abit=abit, dequantize=dequantize,
scale=nn.Parameter(torch.Tensor(1)))
def __repr__(self):
return self.__class__.__name__ + '(' \
+ 'abit=' + str(self.abit) \
+ ', dequantize=' + str(self.dequantize) \
+ ', init_state=' + str(self.init_state) \
+ ')'
def forward(self, x):
Qn = - (2 ** (self.abit - 1))
Qp = (2 ** (self.abit - 1)) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * x.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
act_scale = grad_scale(self.scale, g)
x = round_pass((x / act_scale).clamp(Qn, Qp))
if self.dequantize:
x = x * act_scale
return x, act_scale
class FuseConv2dQ(QConv2d):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1, bias=True, wbit=32, dequantize=True):
super(FuseConv2dQ, self).__init__(
in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias,
wbit=wbit, dequantize=dequantize)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
act_scale = x[1]
x = x[0]
temp = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
_ = self.bn(temp)
# simulate bn folding to Conv
f_weight, f_bias = self.fusing()
Qn = - (2 ** (self.wbit - 1))
Qp = 2 ** (self.wbit - 1) - 1
if self.training and self.init_state == 0:
self.scale.data.copy_(2 * f_weight.abs().mean() / math.sqrt(Qp))
self.init_state.fill_(1)
g = 1.0 / math.sqrt(x.numel() * Qp)
scale = grad_scale(self.scale, g)
q_weight = round_pass((f_weight.data / scale).clamp(Qn, Qp))
if self.dequantize:
q_weight = q_weight * scale
# with torch.no_grad():
bias_scale = scale*act_scale
q_bias = round_pass((f_bias / bias_scale))
if self.dequantize:
q_bias = q_bias * bias_scale
output = F.conv2d(x, q_weight, q_bias, self.stride, self.padding, self.dilation, self.groups)
return output
def replace_bn(self, bn_module):
self.bn = bn_module
self.bn.track_running_stats = False
def fusing(self):
std = torch.sqrt(self.bn.running_var + self.bn.eps)
f_weight = self.weight * (self.bn.weight / std).reshape([len(self.bn.weight), 1,1,1])
if self.bias is not None:
f_bias = self.bn.bias + (self.bias - self.bn.runnning_mean) * (self.bn.weight / std)
else:
f_bias = self.bn.bias - self.bn.running_mean * (self.bn.weight / std)
return f_weight, f_bias
def grad_scale(x, scale):
y = x
y_grad = x * scale
output = (y - y_grad).detach() + y_grad
return output
def round_pass(x):
y = torch.round(x)
y_grad = x
output = (y - y_grad).detach() + y_grad
return output
'''Train CIFAR10 with PyTorch.'''
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
from models.mobilenet import MobileNet1
from utils import progress_bar
from replace import replace_sq
from collections import OrderedDict
# from lsq_int import Input_Quantizer
from lsq_sq import Input_Quantizer
from replace_int import replace_int
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
parser.add_argument('--resume', '-r', default=None, type=str,
help='resume from checkpoint')
parser.add_argument('--dir', default='default', type=str,
help='save dir name')
parser.add_argument('--test', default=False, action='store_true',
help='test version or not')
parser.add_argument('--qat', default=False, action='store_true',
help='qat version or not')
args = parser.parse_args()
# Training
def train(epoch):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
def test(epoch):
global best_acc
dir_name = args.dir
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
test_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
# Save checkpoint.
acc = 100.*correct/total
if acc > best_acc:
print('Saving..')
state = {
'net': net.state_dict(),
'acc': acc,
'epoch': epoch,
}
if not os.path.isdir(dir_name):
os.mkdir(dir_name)
torch.save(state, f'./{dir_name}/ckpt.pth')
best_acc = acc
print('*** best Test Accuracy: ', best_acc)
if __name__ == '__main__':
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0 # best test accuracy
start_epoch = 0 # start from epoch 0 or last checkpoint epoch
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(
root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=256, shuffle=True, num_workers=4)
testset = torchvision.datasets.CIFAR10(
root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
testset, batch_size=100, shuffle=False, num_workers=4)
classes = ('plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck')
# Model
print('==> Building model..')
net = MobileNet1(3, 10)
net = net.to(device)
if args.qat:
net = replace_sq(model=net)
net = nn.Sequential(Input_Quantizer(abit=8, dequantize=True),
net)
if args.resume:
# Load checkpoint.
print('==> Resuming from checkpoint..')
assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
checkpoint = torch.load(args.resume)
new_state_dict = OrderedDict()
for k, v in checkpoint['net'].items():
k = k.replace("module.", "")
new_state_dict[k] = v
net.load_state_dict(new_state_dict)
best_acc = 0.0
start_epoch = 0
print(net)
# replace_int(net)
if device == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark = True
net.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr,
momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
for epoch in range(start_epoch, start_epoch+200):
if args.test:
test(epoch)
break
else:
train(epoch)
test(epoch)
scheduler.step()
'''DenseNet in PyTorch.'''
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, in_planes, growth_rate):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(4*growth_rate)
self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat([out,x], 1)
return out
class Transition(nn.Module):
def __init__(self, in_planes, out_planes):
super(Transition, self).__init__()
self.bn = nn.BatchNorm2d(in_planes)
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
def forward(self, x):
out = self.conv(F.relu(self.bn(x)))
out = F.avg_pool2d(out, 2)
return out
class DenseNet(nn.Module):
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
super(DenseNet, self).__init__()
self.growth_rate = growth_rate
num_planes = 2*growth_rate
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
num_planes += nblocks[0]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans1 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
num_planes += nblocks[1]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans2 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
num_planes += nblocks[2]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans3 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
num_planes += nblocks[3]*growth_rate
self.bn = nn.BatchNorm2d(num_planes)
self.linear = nn.Linear(num_planes, num_classes)
def _make_dense_layers(self, block, in_planes, nblock):
layers = []
for i in range(nblock):
layers.append(block(in_planes, self.growth_rate))
in_planes += self.growth_rate
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.trans3(self.dense3(out))
out = self.dense4(out)
out = F.avg_pool2d(F.relu(self.bn(out)), 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def DenseNet121():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
def DenseNet169():
return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
def DenseNet201():
return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
def DenseNet161():
return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
def densenet_cifar():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
def test():
net = densenet_cifar()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()
'''DLA in PyTorch.
Reference:
Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size,
stride=1, padding=(kernel_size - 1) // 2, bias=False)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, xs):
x = torch.cat(xs, 1)
out = F.relu(self.bn(self.conv(x)))
return out
class Tree(nn.Module):
def __init__(self, block, in_channels, out_channels, level=1, stride=1):
super(Tree, self).__init__()
self.level = level
if level == 1:
self.root = Root(2*out_channels, out_channels)
self.left_node = block(in_channels, out_channels, stride=stride)
self.right_node = block(out_channels, out_channels, stride=1)
else:
self.root = Root((level+2)*out_channels, out_channels)
for i in reversed(range(1, level)):
subtree = Tree(block, in_channels, out_channels,
level=i, stride=stride)
self.__setattr__('level_%d' % i, subtree)
self.prev_root = block(in_channels, out_channels, stride=stride)
self.left_node = block(out_channels, out_channels, stride=1)
self.right_node = block(out_channels, out_channels, stride=1)
def forward(self, x):
xs = [self.prev_root(x)] if self.level > 1 else []
for i in reversed(range(1, self.level)):
level_i = self.__getattr__('level_%d' % i)
x = level_i(x)
xs.append(x)
x = self.left_node(x)
xs.append(x)
x = self.right_node(x)
xs.append(x)
out = self.root(xs)
return out
class DLA(nn.Module):
def __init__(self, block=BasicBlock, num_classes=10):
super(DLA, self).__init__()
self.base = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)
self.layer1 = nn.Sequential(
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(True)
)
self.layer3 = Tree(block, 32, 64, level=1, stride=1)
self.layer4 = Tree(block, 64, 128, level=2, stride=2)
self.layer5 = Tree(block, 128, 256, level=2, stride=2)
self.layer6 = Tree(block, 256, 512, level=1, stride=2)
self.linear = nn.Linear(512, num_classes)
def forward(self, x):
out = self.base(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = DLA()
print(net)
x = torch.randn(1, 3, 32, 32)
y = net(x)
print(y.size())
if __name__ == '__main__':
test()
'''Simplified version of DLA in PyTorch.
Note this implementation is not identical to the original paper version.
But it seems works fine.
See dla.py for the original paper version.
Reference:
Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size,
stride=1, padding=(kernel_size - 1) // 2, bias=False)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, xs):
x = torch.cat(xs, 1)
out = F.relu(self.bn(self.conv(x)))
return out
class Tree(nn.Module):
def __init__(self, block, in_channels, out_channels, level=1, stride=1):
super(Tree, self).__init__()
self.root = Root(2*out_channels, out_channels)
if level == 1:
self.left_tree = block(in_channels, out_channels, stride=stride)
self.right_tree = block(out_channels, out_channels, stride=1)
else:
self.left_tree = Tree(block, in_channels,
out_channels, level=level-1, stride=stride)
self.right_tree = Tree(block, out_channels,
out_channels, level=level-1, stride=1)
def forward(self, x):
out1 = self.left_tree(x)
out2 = self.right_tree(out1)
out = self.root([out1, out2])
return out
class SimpleDLA(nn.Module):
def __init__(self, block=BasicBlock, num_classes=10):
super(SimpleDLA, self).__init__()
self.base = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)
self.layer1 = nn.Sequential(
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(True)
)
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(True)
)
self.layer3 = Tree(block, 32, 64, level=1, stride=1)
self.layer4 = Tree(block, 64, 128, level=2, stride=2)
self.layer5 = Tree(block, 128, 256, level=2, stride=2)
self.layer6 = Tree(block, 256, 512, level=1, stride=2)
self.linear = nn.Linear(512, num_classes)
def forward(self, x):
out = self.base(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = SimpleDLA()
print(net)
x = torch.randn(1, 3, 32, 32)
y = net(x)
print(y.size())
if __name__ == '__main__':
test()
'''Dual Path Networks in PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
super(Bottleneck, self).__init__()
self.out_planes = out_planes
self.dense_depth = dense_depth
self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
self.bn2 = nn.BatchNorm2d(in_planes)
self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
self.shortcut = nn.Sequential()
if first_layer:
self.shortcut = nn.Sequential(
nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_planes+dense_depth)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
x = self.shortcut(x)
d = self.out_planes
out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
out = F.relu(out)
return out
class DPN(nn.Module):
def __init__(self, cfg):
super(DPN, self).__init__()
in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.last_planes = 64
self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for i,stride in enumerate(strides):
layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
self.last_planes = out_planes + (i+2) * dense_depth
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def DPN26():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (2,2,2,2),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)
def DPN92():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (3,4,20,3),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)
def test():
net = DPN92()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()
'''EfficientNet in PyTorch.
Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
def swish(x):
return x * x.sigmoid()
def drop_connect(x, drop_ratio):
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x
class SE(nn.Module):
'''Squeeze-and-Excitation block with Swish.'''
def __init__(self, in_channels, se_channels):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_channels, se_channels,
kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_channels, in_channels,
kernel_size=1, bias=True)
def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = swish(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out
class Block(nn.Module):
'''expansion + depthwise + pointwise + squeeze-excitation'''
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio=1,
se_ratio=0.,
drop_rate=0.):
super(Block, self).__init__()
self.stride = stride
self.drop_rate = drop_rate
self.expand_ratio = expand_ratio
# Expansion
channels = expand_ratio * in_channels
self.conv1 = nn.Conv2d(in_channels,
channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn1 = nn.BatchNorm2d(channels)
# Depthwise conv
self.conv2 = nn.Conv2d(channels,
channels,
kernel_size=kernel_size,
stride=stride,
padding=(1 if kernel_size == 3 else 2),
groups=channels,
bias=False)
self.bn2 = nn.BatchNorm2d(channels)
# SE layers
se_channels = int(in_channels * se_ratio)
self.se = SE(channels, se_channels)
# Output
self.conv3 = nn.Conv2d(channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn3 = nn.BatchNorm2d(out_channels)
# Skip connection if in and out shapes are the same (MV-V2 style)
self.has_skip = (stride == 1) and (in_channels == out_channels)
def forward(self, x):
out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
out = swish(self.bn2(self.conv2(out)))
out = self.se(out)
out = self.bn3(self.conv3(out))
if self.has_skip:
if self.training and self.drop_rate > 0:
out = drop_connect(out, self.drop_rate)
out = out + x
return out
class EfficientNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(EfficientNet, self).__init__()
self.cfg = cfg
self.conv1 = nn.Conv2d(3,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_channels=32)
self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
def _make_layers(self, in_channels):
layers = []
cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
'stride']]
b = 0
blocks = sum(self.cfg['num_blocks'])
for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
strides = [stride] + [1] * (num_blocks - 1)
for stride in strides:
drop_rate = self.cfg['drop_connect_rate'] * b / blocks
layers.append(
Block(in_channels,
out_channels,
kernel_size,
stride,
expansion,
se_ratio=0.25,
drop_rate=drop_rate))
in_channels = out_channels
return nn.Sequential(*layers)
def forward(self, x):
out = swish(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
dropout_rate = self.cfg['dropout_rate']
if self.training and dropout_rate > 0:
out = F.dropout(out, p=dropout_rate)
out = self.linear(out)
return out
def EfficientNetB0():
cfg = {
'num_blocks': [1, 2, 2, 3, 3, 4, 1],
'expansion': [1, 6, 6, 6, 6, 6, 6],
'out_channels': [16, 24, 40, 80, 112, 192, 320],
'kernel_size': [3, 3, 5, 3, 5, 5, 3],
'stride': [1, 2, 2, 2, 1, 2, 1],
'dropout_rate': 0.2,
'drop_connect_rate': 0.2,
}
return EfficientNet(cfg)
def test():
net = EfficientNetB0()
x = torch.randn(2, 3, 32, 32)
y = net(x)
print(y.shape)
if __name__ == '__main__':
test()
'''GoogLeNet with PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Inception(nn.Module):
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
super(Inception, self).__init__()
# 1x1 conv branch
self.b1 = nn.Sequential(
nn.Conv2d(in_planes, n1x1, kernel_size=1),
nn.BatchNorm2d(n1x1),
nn.ReLU(True),
)
# 1x1 conv -> 3x3 conv branch
self.b2 = nn.Sequential(
nn.Conv2d(in_planes, n3x3red, kernel_size=1),
nn.BatchNorm2d(n3x3red),
nn.ReLU(True),
nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
nn.BatchNorm2d(n3x3),
nn.ReLU(True),
)
# 1x1 conv -> 5x5 conv branch
self.b3 = nn.Sequential(
nn.Conv2d(in_planes, n5x5red, kernel_size=1),
nn.BatchNorm2d(n5x5red),
nn.ReLU(True),
nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
)
# 3x3 pool -> 1x1 conv branch
self.b4 = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.Conv2d(in_planes, pool_planes, kernel_size=1),
nn.BatchNorm2d(pool_planes),
nn.ReLU(True),
)
def forward(self, x):
y1 = self.b1(x)
y2 = self.b2(x)
y3 = self.b3(x)
y4 = self.b4(x)
return torch.cat([y1,y2,y3,y4], 1)
class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet, self).__init__()
self.pre_layers = nn.Sequential(
nn.Conv2d(3, 192, kernel_size=3, padding=1),
nn.BatchNorm2d(192),
nn.ReLU(True),
)
self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
self.avgpool = nn.AvgPool2d(8, stride=1)
self.linear = nn.Linear(1024, 10)
def forward(self, x):
out = self.pre_layers(x)
out = self.a3(out)
out = self.b3(out)
out = self.maxpool(out)
out = self.a4(out)
out = self.b4(out)
out = self.c4(out)
out = self.d4(out)
out = self.e4(out)
out = self.maxpool(out)
out = self.a5(out)
out = self.b5(out)
out = self.avgpool(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = GoogLeNet()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())
# test()
'''LeNet in PyTorch.'''
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
import torch
import torch.nn as nn
import torch.nn.functional as F
class MobileNet1(nn.Module):
def __init__(self, inchannel=3, num_classes=10):
super(MobileNet1, self).__init__()
self.num_classes = num_classes
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.Hardswish()
#nn.Hardsigmoid(inplace=True)
# nn.LeakyReLU(negative_slope=0.1, inplace=True)
# nn.ReLU(inplace=True)
)
def conv_dw(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.Hardswish(),
#nn.Hardsigmoid(inplace=True),
# nn.LeakyReLU(negative_slope=0.1, inplace=True),
# nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.Hardswish()
#nn.Hardsigmoid(inplace=True)
# nn.LeakyReLU(negative_slope=0.1, inplace=True)
# nn.ReLU(inplace=True),
)
self.model = nn.Sequential(
conv_bn(inchannel, 32, 1),
conv_dw( 32, 64, 1),
conv_dw( 64, 128, 2),
conv_dw(128, 128, 1),
conv_dw(128, 256, 2),
conv_dw(256, 256, 1),
conv_dw(256, 512, 2),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 1024, 2),
conv_dw(1024, 1024, 1),
nn.AdaptiveAvgPool2d(1)
)
self.fc = nn.Linear(1024, self.num_classes)
def forward(self, x):
x, act_scale = self.model(x)
# x = self.model(x)
x = x.view(x.size(0), -1)
# x = self.fc(x)
x = self.fc(x, act_scale)
return x
'''MobileNetV2 in PyTorch.
See the paper "Inverted Residuals and Linear Bottlenecks:
Mobile Networks for Classification, Detection and Segmentation" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, in_planes, out_planes, expansion, stride):
super(Block, self).__init__()
self.stride = stride
planes = expansion * in_planes
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 1 and in_planes != out_planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_planes),
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out = out + self.shortcut(x) if self.stride==1 else out
return out
class MobileNetV2(nn.Module):
# (expansion, out_planes, num_blocks, stride)
cfg = [(1, 16, 1, 1),
(6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1)]
def __init__(self, num_classes=10):
super(MobileNetV2, self).__init__()
# NOTE: change conv1 stride 2 -> 1 for CIFAR10
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(1280)
self.linear = nn.Linear(1280, num_classes)
def _make_layers(self, in_planes):
layers = []
for expansion, out_planes, num_blocks, stride in self.cfg:
strides = [stride] + [1]*(num_blocks-1)
for stride in strides:
layers.append(Block(in_planes, out_planes, expansion, stride))
in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.relu(self.bn2(self.conv2(out)))
# NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = MobileNetV2()
x = torch.randn(2,3,32,32)
y = net(x)
print(y.size())
# test()
'''PNASNet in PyTorch.
Paper: Progressive Neural Architecture Search
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class SepConv(nn.Module):
'''Separable Convolution.'''
def __init__(self, in_planes, out_planes, kernel_size, stride):
super(SepConv, self).__init__()
self.conv1 = nn.Conv2d(in_planes, out_planes,
kernel_size, stride,
padding=(kernel_size-1)//2,
bias=False, groups=in_planes)
self.bn1 = nn.BatchNorm2d(out_planes)
def forward(self, x):
return self.bn1(self.conv1(x))
class CellA(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellA, self).__init__()
self.stride = stride
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)
def forward(self, x):
y1 = self.sep_conv1(x)
y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y2 = self.bn1(self.conv1(y2))
return F.relu(y1+y2)
class CellB(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellB, self).__init__()
self.stride = stride
# Left branch
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
# Right branch
self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)
# Reduce channels
self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
def forward(self, x):
# Left branch
y1 = self.sep_conv1(x)
y2 = self.sep_conv2(x)
# Right branch
y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y3 = self.bn1(self.conv1(y3))
y4 = self.sep_conv3(x)
# Concat & reduce channels
b1 = F.relu(y1+y2)
b2 = F.relu(y3+y4)
y = torch.cat([b1,b2], 1)
return F.relu(self.bn2(self.conv2(y)))
class PNASNet(nn.Module):
def __init__(self, cell_type, num_cells, num_planes):
super(PNASNet, self).__init__()
self.in_planes = num_planes
self.cell_type = cell_type
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(num_planes)
self.layer1 = self._make_layer(num_planes, num_cells=6)
self.layer2 = self._downsample(num_planes*2)
self.layer3 = self._make_layer(num_planes*2, num_cells=6)
self.layer4 = self._downsample(num_planes*4)
self.layer5 = self._make_layer(num_planes*4, num_cells=6)
self.linear = nn.Linear(num_planes*4, 10)
def _make_layer(self, planes, num_cells):
layers = []
for _ in range(num_cells):
layers.append(self.cell_type(self.in_planes, planes, stride=1))
self.in_planes = planes
return nn.Sequential(*layers)
def _downsample(self, planes):
layer = self.cell_type(self.in_planes, planes, stride=2)
self.in_planes = planes
return layer
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = F.avg_pool2d(out, 8)
out = self.linear(out.view(out.size(0), -1))
return out
def PNASNetA():
return PNASNet(CellA, num_cells=6, num_planes=44)
def PNASNetB():
return PNASNet(CellB, num_cells=6, num_planes=32)
def test():
net = PNASNetB()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()
'''Pre-activation ResNet in PyTorch.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class PreActBlock(nn.Module):
'''Pre-activation version of the BasicBlock.'''
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out += shortcut
return out
class PreActBottleneck(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(PreActBottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out = self.conv3(F.relu(self.bn3(out)))
out += shortcut
return out
class PreActResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(PreActResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def PreActResNet18():
return PreActResNet(PreActBlock, [2,2,2,2])
def PreActResNet34():
return PreActResNet(PreActBlock, [3,4,6,3])
def PreActResNet50():
return PreActResNet(PreActBottleneck, [3,4,6,3])
def PreActResNet101():
return PreActResNet(PreActBottleneck, [3,4,23,3])
def PreActResNet152():
return PreActResNet(PreActBottleneck, [3,8,36,3])
def test():
net = PreActResNet18()
y = net((torch.randn(1,3,32,32)))
print(y.size())
# test()
'''RegNet in PyTorch.
Paper: "Designing Network Design Spaces".
Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class SE(nn.Module):
'''Squeeze-and-Excitation block.'''
def __init__(self, in_planes, se_planes):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = F.relu(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out
class Block(nn.Module):
def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
super(Block, self).__init__()
# 1x1
w_b = int(round(w_out * bottleneck_ratio))
self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(w_b)
# 3x3
num_groups = w_b // group_width
self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
stride=stride, padding=1, groups=num_groups, bias=False)
self.bn2 = nn.BatchNorm2d(w_b)
# se
self.with_se = se_ratio > 0
if self.with_se:
w_se = int(round(w_in * se_ratio))
self.se = SE(w_b, w_se)
# 1x1
self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(w_out)
self.shortcut = nn.Sequential()
if stride != 1 or w_in != w_out:
self.shortcut = nn.Sequential(
nn.Conv2d(w_in, w_out,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(w_out)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
if self.with_se:
out = self.se(out)
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class RegNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(RegNet, self).__init__()
self.cfg = cfg
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(0)
self.layer2 = self._make_layer(1)
self.layer3 = self._make_layer(2)
self.layer4 = self._make_layer(3)
self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
def _make_layer(self, idx):
depth = self.cfg['depths'][idx]
width = self.cfg['widths'][idx]
stride = self.cfg['strides'][idx]
group_width = self.cfg['group_width']
bottleneck_ratio = self.cfg['bottleneck_ratio']
se_ratio = self.cfg['se_ratio']
layers = []
for i in range(depth):
s = stride if i == 0 else 1
layers.append(Block(self.in_planes, width,
s, group_width, bottleneck_ratio, se_ratio))
self.in_planes = width
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.adaptive_avg_pool2d(out, (1, 1))
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def RegNetX_200MF():
cfg = {
'depths': [1, 1, 4, 7],
'widths': [24, 56, 152, 368],
'strides': [1, 1, 2, 2],
'group_width': 8,
'bottleneck_ratio': 1,
'se_ratio': 0,
}
return RegNet(cfg)
def RegNetX_400MF():
cfg = {
'depths': [1, 2, 7, 12],
'widths': [32, 64, 160, 384],
'strides': [1, 1, 2, 2],
'group_width': 16,
'bottleneck_ratio': 1,
'se_ratio': 0,
}
return RegNet(cfg)
def RegNetY_400MF():
cfg = {
'depths': [1, 2, 7, 12],
'widths': [32, 64, 160, 384],
'strides': [1, 1, 2, 2],
'group_width': 16,
'bottleneck_ratio': 1,
'se_ratio': 0.25,
}
return RegNet(cfg)
def test():
net = RegNetX_200MF()
print(net)
x = torch.randn(2, 3, 32, 32)
y = net(x)
print(y.shape)
if __name__ == '__main__':
test()
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
def test():
net = ResNet18()
y = net(torch.randn(1, 3, 32, 32))
print(y.size())
# test()
'''ResNeXt in PyTorch.
See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module):
'''Grouped convolution block.'''
expansion = 2
def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(group_width)
self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
self.bn2 = nn.BatchNorm2d(group_width)
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*group_width)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNeXt(nn.Module):
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
super(ResNeXt, self).__init__()
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(num_blocks[0], 1)
self.layer2 = self._make_layer(num_blocks[1], 2)
self.layer3 = self._make_layer(num_blocks[2], 2)
# self.layer4 = self._make_layer(num_blocks[3], 2)
self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
def _make_layer(self, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
# Increase bottleneck_width by 2 after each stage.
self.bottleneck_width *= 2
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
# out = self.layer4(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNeXt29_2x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
def ResNeXt29_4x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
def ResNeXt29_8x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
def ResNeXt29_32x4d():
return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
def test_resnext():
net = ResNeXt29_2x64d()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())
# test_resnext()
'''SENet in PyTorch.
SENet is the winner of ImageNet-2017. The paper is not released yet.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes)
)
# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w # New broadcasting feature from v0.2!
out += self.shortcut(x)
out = F.relu(out)
return out
class PreActBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
)
# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w
out += shortcut
return out
class SENet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(SENet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def SENet18():
return SENet(PreActBlock, [2,2,2,2])
def test():
net = SENet18()
y = net(torch.randn(1,3,32,32))
print(y.size())
# test()
'''ShuffleNet in PyTorch.
See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N,C,H,W = x.size()
g = self.groups
return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride
mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
return out
class ShuffleNet(nn.Module):
def __init__(self, cfg):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], 10)
def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = self.in_planes if i == 0 else 0
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ShuffleNetG2():
cfg = {
'out_planes': [200,400,800],
'num_blocks': [4,8,4],
'groups': 2
}
return ShuffleNet(cfg)
def ShuffleNetG3():
cfg = {
'out_planes': [240,480,960],
'num_blocks': [4,8,4],
'groups': 3
}
return ShuffleNet(cfg)
def test():
net = ShuffleNetG2()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()
'''ShuffleNetV2 in PyTorch.
See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups=2):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N, C, H, W = x.size()
g = self.groups
return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
class SplitBlock(nn.Module):
def __init__(self, ratio):
super(SplitBlock, self).__init__()
self.ratio = ratio
def forward(self, x):
c = int(x.size(1) * self.ratio)
return x[:, :c, :, :], x[:, c:, :, :]
class BasicBlock(nn.Module):
def __init__(self, in_channels, split_ratio=0.5):
super(BasicBlock, self).__init__()
self.split = SplitBlock(split_ratio)
in_channels = int(in_channels * split_ratio)
self.conv1 = nn.Conv2d(in_channels, in_channels,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, in_channels,
kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
self.bn2 = nn.BatchNorm2d(in_channels)
self.conv3 = nn.Conv2d(in_channels, in_channels,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(in_channels)
self.shuffle = ShuffleBlock()
def forward(self, x):
x1, x2 = self.split(x)
out = F.relu(self.bn1(self.conv1(x2)))
out = self.bn2(self.conv2(out))
out = F.relu(self.bn3(self.conv3(out)))
out = torch.cat([x1, out], 1)
out = self.shuffle(out)
return out
class DownBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(DownBlock, self).__init__()
mid_channels = out_channels // 2
# left
self.conv1 = nn.Conv2d(in_channels, in_channels,
kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv2 = nn.Conv2d(in_channels, mid_channels,
kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(mid_channels)
# right
self.conv3 = nn.Conv2d(in_channels, mid_channels,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(mid_channels)
self.conv4 = nn.Conv2d(mid_channels, mid_channels,
kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
self.bn4 = nn.BatchNorm2d(mid_channels)
self.conv5 = nn.Conv2d(mid_channels, mid_channels,
kernel_size=1, bias=False)
self.bn5 = nn.BatchNorm2d(mid_channels)
self.shuffle = ShuffleBlock()
def forward(self, x):
# left
out1 = self.bn1(self.conv1(x))
out1 = F.relu(self.bn2(self.conv2(out1)))
# right
out2 = F.relu(self.bn3(self.conv3(x)))
out2 = self.bn4(self.conv4(out2))
out2 = F.relu(self.bn5(self.conv5(out2)))
# concat
out = torch.cat([out1, out2], 1)
out = self.shuffle(out)
return out
class ShuffleNetV2(nn.Module):
def __init__(self, net_size):
super(ShuffleNetV2, self).__init__()
out_channels = configs[net_size]['out_channels']
num_blocks = configs[net_size]['num_blocks']
self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_channels = 24
self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels[3])
self.linear = nn.Linear(out_channels[3], 10)
def _make_layer(self, out_channels, num_blocks):
layers = [DownBlock(self.in_channels, out_channels)]
for i in range(num_blocks):
layers.append(BasicBlock(out_channels))
self.in_channels = out_channels
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
# out = F.max_pool2d(out, 3, stride=2, padding=1)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.relu(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
configs = {
0.5: {
'out_channels': (48, 96, 192, 1024),
'num_blocks': (3, 7, 3)
},
1: {
'out_channels': (116, 232, 464, 1024),
'num_blocks': (3, 7, 3)
},
1.5: {
'out_channels': (176, 352, 704, 1024),
'num_blocks': (3, 7, 3)
},
2: {
'out_channels': (224, 488, 976, 2048),
'num_blocks': (3, 7, 3)
}
}
def test():
net = ShuffleNetV2(net_size=0.5)
x = torch.randn(3, 3, 32, 32)
y = net(x)
print(y.shape)
# test()
'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self, vgg_name):
super(VGG, self).__init__()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
def _make_layers(self, cfg):
layers = []
in_channels = 3
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
def test():
net = VGG('VGG11')
x = torch.randn(2,3,32,32)
y = net(x)
print(y.size())
# test()
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from lsq_sq import *
from models.mobilenet import *
conv_idx = -1
act_idx = -1
former_conv = None
def replace_sq(model, bit_width=8):
global conv_idx, act_idx
for name, module in model.named_children():
if isinstance(module, (nn.Sequential)): #- conventional
replace_sq(model.__dict__['_modules'][name], bit_width)
elif isinstance(module, nn.Conv2d):
former_conv = name
conv_idx += 1
bias = False if module.bias is None else True
model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
module.kernel_size, stride=module.stride,
padding=module.padding, dilation=module.dilation,
groups=module.groups, bias=bias, wbit=bit_width)
model.__dict__['_modules'][name].weight = module.weight
if bias:
model.__dict__['_modules'][name].bias = module.bias
elif isinstance(module, nn.BatchNorm2d):
model.__dict__['_modules'][former_conv].replace_bn(module)
model.__dict__['_modules'][name] = nn.Identity()
elif isinstance(module, nn.ReLU):
act_idx += 1
model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.Hardswish):
act_idx += 1
model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.Hardsigmoid):
act_idx += 1
model.__dict__['_modules'][name] = QHsigmoid(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.LeakyReLU):
act_idx += 1
model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.Linear):
bias = False if module.bias is None else True
model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
model.__dict__['_modules'][name].weight = module.weight
if bias:
model.__dict__['_modules'][name].bias = module.bias
elif isinstance(module, nn.AdaptiveAvgPool2d):
model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
# elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
# replace_sq(model.__dict__['_modules'][name], bit_width)
# elif isinstance(module, InvertedResidual): #mv2
# replace_sq(model.__dict__['_modules'][name], bit_width)
else:
model.__dict__['_modules'][name] = module
return model
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from lsq_int import *
from models.mobilenet import *
conv_idx = -1
act_idx = -1
former_conv = None
def replace_int(model, bit_width=8):
global conv_idx, act_idx
for name, module in model.named_children():
if isinstance(module, (nn.Sequential)): #- conventional
replace_int(model.__dict__['_modules'][name], bit_width)
elif isinstance(module, nn.Conv2d):
former_conv = name
conv_idx += 1
bias = False if module.bias is None else True
model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
module.kernel_size, stride=module.stride,
padding=module.padding, dilation=module.dilation,
groups=module.groups, bias=bias, wbit=bit_width)
model.__dict__['_modules'][name].weight = module.weight
if bias:
model.__dict__['_modules'][name].bias = module.bias
elif isinstance(module, nn.BatchNorm2d):
model.__dict__['_modules'][former_conv].replace_bn(module)
model.__dict__['_modules'][name] = nn.Identity()
elif isinstance(module, nn.ReLU):
act_idx += 1
model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.Hardswish):
act_idx += 1
model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.LeakyReLU):
act_idx += 1
model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
elif isinstance(module, nn.Linear):
bias = False if module.bias is None else True
model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
model.__dict__['_modules'][name].weight = module.weight
if bias:
model.__dict__['_modules'][name].bias = module.bias
elif isinstance(module, nn.AdaptiveAvgPool2d):
model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
# elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
# replace_sq(model.__dict__['_modules'][name], bit_width)
# elif isinstance(module, InvertedResidual): #mv2
# replace_sq(model.__dict__['_modules'][name], bit_width)
else:
model.__dict__['_modules'][name] = module
return model
'''Some helper functions for PyTorch, including:
- get_mean_and_std: calculate the mean and std value of dataset.
- msr_init: net parameter initialization.
- progress_bar: progress bar mimic xlua.progress.
'''
import os
import sys
import time
import math
import torch
import torch.nn as nn
import torch.nn.init as init
def get_mean_and_std(dataset):
'''Compute the mean and std value of dataset.'''
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
mean = torch.zeros(3)
std = torch.zeros(3)
print('==> Computing mean and std..')
for inputs, targets in dataloader:
for i in range(3):
mean[i] += inputs[:,i,:,:].mean()
std[i] += inputs[:,i,:,:].std()
mean.div_(len(dataset))
std.div_(len(dataset))
return mean, std
def init_params(net):
'''Init layer parameters.'''
for m in net.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bias:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
init.constant(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal(m.weight, std=1e-3)
if m.bias:
init.constant(m.bias, 0)
_, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 65.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
L = []
L.append(' Step: %s' % format_time(step_time))
L.append(' | Tot: %s' % format_time(tot_time))
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
sys.stdout.write(' ')
# Go back to the center of the bar.
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
def format_time(seconds):
days = int(seconds / 3600/24)
seconds = seconds - days*3600*24
hours = int(seconds / 3600)
seconds = seconds - hours*3600
minutes = int(seconds / 60)
seconds = seconds - minutes*60
secondsf = int(seconds)
seconds = seconds - secondsf
millis = int(seconds*1000)
f = ''
i = 1
if days > 0:
f += str(days) + 'D'
i += 1
if hours > 0 and i <= 2:
f += str(hours) + 'h'
i += 1
if minutes > 0 and i <= 2:
f += str(minutes) + 'm'
i += 1
if secondsf > 0 and i <= 2:
f += str(secondsf) + 's'
i += 1
if millis > 0 and i <= 2:
f += str(millis) + 'ms'
i += 1
if f == '':
f = '0ms'
return f
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type