123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This code is refer from:
- https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
- https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
- """
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import paddle
- import paddle.nn as nn
- from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible
- M0_cfgs = [
- # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
- [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
- [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
- [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
- [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
- [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
- [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
- [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
- ]
- M1_cfgs = [
- # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
- [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
- [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
- [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
- [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
- [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
- [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
- [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
- ]
- M2_cfgs = [
- # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
- [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
- [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
- [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
- [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
- [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
- [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
- [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
- [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
- [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
- ]
- M3_cfgs = [
- # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
- [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
- [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
- [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
- [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
- [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
- [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
- [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
- [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
- [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
- [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
- [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
- [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
- ]
- def get_micronet_config(mode):
- return eval(mode + '_cfgs')
- class MaxGroupPooling(nn.Layer):
- def __init__(self, channel_per_group=2):
- super(MaxGroupPooling, self).__init__()
- self.channel_per_group = channel_per_group
- def forward(self, x):
- if self.channel_per_group == 1:
- return x
- # max op
- b, c, h, w = x.shape
- # reshape
- y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
- out = paddle.max(y, axis=2)
- return out
- class SpatialSepConvSF(nn.Layer):
- def __init__(self, inp, oups, kernel_size, stride):
- super(SpatialSepConvSF, self).__init__()
- oup1, oup2 = oups
- self.conv = nn.Sequential(
- nn.Conv2D(
- inp,
- oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0),
- bias_attr=False,
- groups=1),
- nn.BatchNorm2D(oup1),
- nn.Conv2D(
- oup1,
- oup1 * oup2, (1, kernel_size), (1, stride),
- (0, kernel_size // 2),
- bias_attr=False,
- groups=oup1),
- nn.BatchNorm2D(oup1 * oup2),
- ChannelShuffle(oup1), )
- def forward(self, x):
- out = self.conv(x)
- return out
- class ChannelShuffle(nn.Layer):
- def __init__(self, groups):
- super(ChannelShuffle, self).__init__()
- self.groups = groups
- def forward(self, x):
- b, c, h, w = x.shape
- channels_per_group = c // self.groups
- # reshape
- x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])
- x = paddle.transpose(x, (0, 2, 1, 3, 4))
- out = paddle.reshape(x, [b, -1, h, w])
- return out
- class StemLayer(nn.Layer):
- def __init__(self, inp, oup, stride, groups=(4, 4)):
- super(StemLayer, self).__init__()
- g1, g2 = groups
- self.stem = nn.Sequential(
- SpatialSepConvSF(inp, groups, 3, stride),
- MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6())
- def forward(self, x):
- out = self.stem(x)
- return out
- class DepthSpatialSepConv(nn.Layer):
- def __init__(self, inp, expand, kernel_size, stride):
- super(DepthSpatialSepConv, self).__init__()
- exp1, exp2 = expand
- hidden_dim = inp * exp1
- oup = inp * exp1 * exp2
- self.conv = nn.Sequential(
- nn.Conv2D(
- inp,
- inp * exp1, (kernel_size, 1), (stride, 1),
- (kernel_size // 2, 0),
- bias_attr=False,
- groups=inp),
- nn.BatchNorm2D(inp * exp1),
- nn.Conv2D(
- hidden_dim,
- oup, (1, kernel_size),
- 1, (0, kernel_size // 2),
- bias_attr=False,
- groups=hidden_dim),
- nn.BatchNorm2D(oup))
- def forward(self, x):
- x = self.conv(x)
- return x
- class GroupConv(nn.Layer):
- def __init__(self, inp, oup, groups=2):
- super(GroupConv, self).__init__()
- self.inp = inp
- self.oup = oup
- self.groups = groups
- self.conv = nn.Sequential(
- nn.Conv2D(
- inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
- nn.BatchNorm2D(oup))
- def forward(self, x):
- x = self.conv(x)
- return x
- class DepthConv(nn.Layer):
- def __init__(self, inp, oup, kernel_size, stride):
- super(DepthConv, self).__init__()
- self.conv = nn.Sequential(
- nn.Conv2D(
- inp,
- oup,
- kernel_size,
- stride,
- kernel_size // 2,
- bias_attr=False,
- groups=inp),
- nn.BatchNorm2D(oup))
- def forward(self, x):
- out = self.conv(x)
- return out
- class DYShiftMax(nn.Layer):
- def __init__(self,
- inp,
- oup,
- reduction=4,
- act_max=1.0,
- act_relu=True,
- init_a=[0.0, 0.0],
- init_b=[0.0, 0.0],
- relu_before_pool=False,
- g=None,
- expansion=False):
- super(DYShiftMax, self).__init__()
- self.oup = oup
- self.act_max = act_max * 2
- self.act_relu = act_relu
- self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else
- nn.Sequential(), nn.AdaptiveAvgPool2D(1))
- self.exp = 4 if act_relu else 2
- self.init_a = init_a
- self.init_b = init_b
- # determine squeeze
- squeeze = make_divisible(inp // reduction, 4)
- if squeeze < 4:
- squeeze = 4
- self.fc = nn.Sequential(
- nn.Linear(inp, squeeze),
- nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid())
- if g is None:
- g = 1
- self.g = g[1]
- if self.g != 1 and expansion:
- self.g = inp // self.g
- self.gc = inp // self.g
- index = paddle.to_tensor([range(inp)])
- index = paddle.reshape(index, [1, inp, 1, 1])
- index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
- indexgs = paddle.split(index, [1, self.g - 1], axis=1)
- indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
- indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2)
- indexs = paddle.concat((indexs[1], indexs[0]), axis=2)
- self.index = paddle.reshape(indexs, [inp])
- self.expansion = expansion
- def forward(self, x):
- x_in = x
- x_out = x
- b, c, _, _ = x_in.shape
- y = self.avg_pool(x_in)
- y = paddle.reshape(y, [b, c])
- y = self.fc(y)
- y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
- y = (y - 0.5) * self.act_max
- n2, c2, h2, w2 = x_out.shape
- x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])
- if self.exp == 4:
- temp = y.shape
- a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)
- a1 = a1 + self.init_a[0]
- a2 = a2 + self.init_a[1]
- b1 = b1 + self.init_b[0]
- b2 = b2 + self.init_b[1]
- z1 = x_out * a1 + x2 * b1
- z2 = x_out * a2 + x2 * b2
- out = paddle.maximum(z1, z2)
- elif self.exp == 2:
- temp = y.shape
- a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
- a1 = a1 + self.init_a[0]
- b1 = b1 + self.init_b[0]
- out = x_out * a1 + x2 * b1
- return out
- class DYMicroBlock(nn.Layer):
- def __init__(self,
- inp,
- oup,
- kernel_size=3,
- stride=1,
- ch_exp=(2, 2),
- ch_per_group=4,
- groups_1x1=(1, 1),
- depthsep=True,
- shuffle=False,
- activation_cfg=None):
- super(DYMicroBlock, self).__init__()
- self.identity = stride == 1 and inp == oup
- y1, y2, y3 = activation_cfg['dy']
- act_reduction = 8 * activation_cfg['ratio']
- init_a = activation_cfg['init_a']
- init_b = activation_cfg['init_b']
- t1 = ch_exp
- gs1 = ch_per_group
- hidden_fft, g1, g2 = groups_1x1
- hidden_dim2 = inp * t1[0] * t1[1]
- if gs1[0] == 0:
- self.layers = nn.Sequential(
- DepthSpatialSepConv(inp, t1, kernel_size, stride),
- DYShiftMax(
- hidden_dim2,
- hidden_dim2,
- act_max=2.0,
- act_relu=True if y2 == 2 else False,
- init_a=init_a,
- reduction=act_reduction,
- init_b=init_b,
- g=gs1,
- expansion=False) if y2 > 0 else nn.ReLU6(),
- ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
- ChannelShuffle(hidden_dim2 // 2)
- if shuffle and y2 != 0 else nn.Sequential(),
- GroupConv(hidden_dim2, oup, (g1, g2)),
- DYShiftMax(
- oup,
- oup,
- act_max=2.0,
- act_relu=False,
- init_a=[1.0, 0.0],
- reduction=act_reduction // 2,
- init_b=[0.0, 0.0],
- g=(g1, g2),
- expansion=False) if y3 > 0 else nn.Sequential(),
- ChannelShuffle(g2) if shuffle else nn.Sequential(),
- ChannelShuffle(oup // 2)
- if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), )
- elif g2 == 0:
- self.layers = nn.Sequential(
- GroupConv(inp, hidden_dim2, gs1),
- DYShiftMax(
- hidden_dim2,
- hidden_dim2,
- act_max=2.0,
- act_relu=False,
- init_a=[1.0, 0.0],
- reduction=act_reduction,
- init_b=[0.0, 0.0],
- g=gs1,
- expansion=False) if y3 > 0 else nn.Sequential(), )
- else:
- self.layers = nn.Sequential(
- GroupConv(inp, hidden_dim2, gs1),
- DYShiftMax(
- hidden_dim2,
- hidden_dim2,
- act_max=2.0,
- act_relu=True if y1 == 2 else False,
- init_a=init_a,
- reduction=act_reduction,
- init_b=init_b,
- g=gs1,
- expansion=False) if y1 > 0 else nn.ReLU6(),
- ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
- DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
- if depthsep else
- DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
- nn.Sequential(),
- DYShiftMax(
- hidden_dim2,
- hidden_dim2,
- act_max=2.0,
- act_relu=True if y2 == 2 else False,
- init_a=init_a,
- reduction=act_reduction,
- init_b=init_b,
- g=gs1,
- expansion=True) if y2 > 0 else nn.ReLU6(),
- ChannelShuffle(hidden_dim2 // 4)
- if shuffle and y1 != 0 and y2 != 0 else nn.Sequential()
- if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2),
- GroupConv(hidden_dim2, oup, (g1, g2)),
- DYShiftMax(
- oup,
- oup,
- act_max=2.0,
- act_relu=False,
- init_a=[1.0, 0.0],
- reduction=act_reduction // 2
- if oup < hidden_dim2 else act_reduction,
- init_b=[0.0, 0.0],
- g=(g1, g2),
- expansion=False) if y3 > 0 else nn.Sequential(),
- ChannelShuffle(g2) if shuffle else nn.Sequential(),
- ChannelShuffle(oup // 2)
- if shuffle and y3 != 0 else nn.Sequential(), )
- def forward(self, x):
- identity = x
- out = self.layers(x)
- if self.identity:
- out = out + identity
- return out
- class MicroNet(nn.Layer):
- """
- the MicroNet backbone network for recognition module.
- Args:
- mode(str): {'M0', 'M1', 'M2', 'M3'}
- Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
- Default: 'M3'.
- """
- def __init__(self, mode='M3', **kwargs):
- super(MicroNet, self).__init__()
- self.cfgs = get_micronet_config(mode)
- activation_cfg = {}
- if mode == 'M0':
- input_channel = 4
- stem_groups = 2, 2
- out_ch = 384
- activation_cfg['init_a'] = 1.0, 1.0
- activation_cfg['init_b'] = 0.0, 0.0
- elif mode == 'M1':
- input_channel = 6
- stem_groups = 3, 2
- out_ch = 576
- activation_cfg['init_a'] = 1.0, 1.0
- activation_cfg['init_b'] = 0.0, 0.0
- elif mode == 'M2':
- input_channel = 8
- stem_groups = 4, 2
- out_ch = 768
- activation_cfg['init_a'] = 1.0, 1.0
- activation_cfg['init_b'] = 0.0, 0.0
- elif mode == 'M3':
- input_channel = 12
- stem_groups = 4, 3
- out_ch = 432
- activation_cfg['init_a'] = 1.0, 0.5
- activation_cfg['init_b'] = 0.0, 0.5
- else:
- raise NotImplementedError("mode[" + mode +
- "_model] is not implemented!")
- layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]
- for idx, val in enumerate(self.cfgs):
- s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val
- t1 = (c1, c2)
- gs1 = (g1, g2)
- gs2 = (c3, g3, g4)
- activation_cfg['dy'] = [y1, y2, y3]
- activation_cfg['ratio'] = r
- output_channel = c
- layers.append(
- DYMicroBlock(
- input_channel,
- output_channel,
- kernel_size=ks,
- stride=s,
- ch_exp=t1,
- ch_per_group=gs1,
- groups_1x1=gs2,
- depthsep=True,
- shuffle=True,
- activation_cfg=activation_cfg, ))
- input_channel = output_channel
- for i in range(1, n):
- layers.append(
- DYMicroBlock(
- input_channel,
- output_channel,
- kernel_size=ks,
- stride=1,
- ch_exp=t1,
- ch_per_group=gs1,
- groups_1x1=gs2,
- depthsep=True,
- shuffle=True,
- activation_cfg=activation_cfg, ))
- input_channel = output_channel
- self.features = nn.Sequential(*layers)
- self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
- self.out_channels = make_divisible(out_ch)
- def forward(self, x):
- x = self.features(x)
- x = self.pool(x)
- return x
|