# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ This code is refer from: https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import paddle import paddle.nn as nn from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible M0_cfgs = [ # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1], [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1], [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1], [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1], [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2], [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2], ] M1_cfgs = [ # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1], [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1], [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1], [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], ] M2_cfgs = [ # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1], [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1], [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1], [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2], [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2], [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2], [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], ] M3_cfgs = [ # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1], [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1], [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1], [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1], [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2], [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2], [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2], [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2], [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2], [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2], [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2], [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2], ] def get_micronet_config(mode): return eval(mode + '_cfgs') class MaxGroupPooling(nn.Layer): def __init__(self, channel_per_group=2): super(MaxGroupPooling, self).__init__() self.channel_per_group = channel_per_group def forward(self, x): if self.channel_per_group == 1: return x # max op b, c, h, w = x.shape # reshape y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w]) out = paddle.max(y, axis=2) return out class SpatialSepConvSF(nn.Layer): def __init__(self, inp, oups, kernel_size, stride): super(SpatialSepConvSF, self).__init__() oup1, oup2 = oups self.conv = nn.Sequential( nn.Conv2D( inp, oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), bias_attr=False, groups=1), nn.BatchNorm2D(oup1), nn.Conv2D( oup1, oup1 * oup2, (1, kernel_size), (1, stride), (0, kernel_size // 2), bias_attr=False, groups=oup1), nn.BatchNorm2D(oup1 * oup2), ChannelShuffle(oup1), ) def forward(self, x): out = self.conv(x) return out class ChannelShuffle(nn.Layer): def __init__(self, groups): super(ChannelShuffle, self).__init__() self.groups = groups def forward(self, x): b, c, h, w = x.shape channels_per_group = c // self.groups # reshape x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w]) x = paddle.transpose(x, (0, 2, 1, 3, 4)) out = paddle.reshape(x, [b, -1, h, w]) return out class StemLayer(nn.Layer): def __init__(self, inp, oup, stride, groups=(4, 4)): super(StemLayer, self).__init__() g1, g2 = groups self.stem = nn.Sequential( SpatialSepConvSF(inp, groups, 3, stride), MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6()) def forward(self, x): out = self.stem(x) return out class DepthSpatialSepConv(nn.Layer): def __init__(self, inp, expand, kernel_size, stride): super(DepthSpatialSepConv, self).__init__() exp1, exp2 = expand hidden_dim = inp * exp1 oup = inp * exp1 * exp2 self.conv = nn.Sequential( nn.Conv2D( inp, inp * exp1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), bias_attr=False, groups=inp), nn.BatchNorm2D(inp * exp1), nn.Conv2D( hidden_dim, oup, (1, kernel_size), 1, (0, kernel_size // 2), bias_attr=False, groups=hidden_dim), nn.BatchNorm2D(oup)) def forward(self, x): x = self.conv(x) return x class GroupConv(nn.Layer): def __init__(self, inp, oup, groups=2): super(GroupConv, self).__init__() self.inp = inp self.oup = oup self.groups = groups self.conv = nn.Sequential( nn.Conv2D( inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]), nn.BatchNorm2D(oup)) def forward(self, x): x = self.conv(x) return x class DepthConv(nn.Layer): def __init__(self, inp, oup, kernel_size, stride): super(DepthConv, self).__init__() self.conv = nn.Sequential( nn.Conv2D( inp, oup, kernel_size, stride, kernel_size // 2, bias_attr=False, groups=inp), nn.BatchNorm2D(oup)) def forward(self, x): out = self.conv(x) return out class DYShiftMax(nn.Layer): def __init__(self, inp, oup, reduction=4, act_max=1.0, act_relu=True, init_a=[0.0, 0.0], init_b=[0.0, 0.0], relu_before_pool=False, g=None, expansion=False): super(DYShiftMax, self).__init__() self.oup = oup self.act_max = act_max * 2 self.act_relu = act_relu self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else nn.Sequential(), nn.AdaptiveAvgPool2D(1)) self.exp = 4 if act_relu else 2 self.init_a = init_a self.init_b = init_b # determine squeeze squeeze = make_divisible(inp // reduction, 4) if squeeze < 4: squeeze = 4 self.fc = nn.Sequential( nn.Linear(inp, squeeze), nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid()) if g is None: g = 1 self.g = g[1] if self.g != 1 and expansion: self.g = inp // self.g self.gc = inp // self.g index = paddle.to_tensor([range(inp)]) index = paddle.reshape(index, [1, inp, 1, 1]) index = paddle.reshape(index, [1, self.g, self.gc, 1, 1]) indexgs = paddle.split(index, [1, self.g - 1], axis=1) indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1) indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2) indexs = paddle.concat((indexs[1], indexs[0]), axis=2) self.index = paddle.reshape(indexs, [inp]) self.expansion = expansion def forward(self, x): x_in = x x_out = x b, c, _, _ = x_in.shape y = self.avg_pool(x_in) y = paddle.reshape(y, [b, c]) y = self.fc(y) y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1]) y = (y - 0.5) * self.act_max n2, c2, h2, w2 = x_out.shape x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :]) if self.exp == 4: temp = y.shape a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1) a1 = a1 + self.init_a[0] a2 = a2 + self.init_a[1] b1 = b1 + self.init_b[0] b2 = b2 + self.init_b[1] z1 = x_out * a1 + x2 * b1 z2 = x_out * a2 + x2 * b2 out = paddle.maximum(z1, z2) elif self.exp == 2: temp = y.shape a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1) a1 = a1 + self.init_a[0] b1 = b1 + self.init_b[0] out = x_out * a1 + x2 * b1 return out class DYMicroBlock(nn.Layer): def __init__(self, inp, oup, kernel_size=3, stride=1, ch_exp=(2, 2), ch_per_group=4, groups_1x1=(1, 1), depthsep=True, shuffle=False, activation_cfg=None): super(DYMicroBlock, self).__init__() self.identity = stride == 1 and inp == oup y1, y2, y3 = activation_cfg['dy'] act_reduction = 8 * activation_cfg['ratio'] init_a = activation_cfg['init_a'] init_b = activation_cfg['init_b'] t1 = ch_exp gs1 = ch_per_group hidden_fft, g1, g2 = groups_1x1 hidden_dim2 = inp * t1[0] * t1[1] if gs1[0] == 0: self.layers = nn.Sequential( DepthSpatialSepConv(inp, t1, kernel_size, stride), DYShiftMax( hidden_dim2, hidden_dim2, act_max=2.0, act_relu=True if y2 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=False) if y2 > 0 else nn.ReLU6(), ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), ChannelShuffle(hidden_dim2 // 2) if shuffle and y2 != 0 else nn.Sequential(), GroupConv(hidden_dim2, oup, (g1, g2)), DYShiftMax( oup, oup, act_max=2.0, act_relu=False, init_a=[1.0, 0.0], reduction=act_reduction // 2, init_b=[0.0, 0.0], g=(g1, g2), expansion=False) if y3 > 0 else nn.Sequential(), ChannelShuffle(g2) if shuffle else nn.Sequential(), ChannelShuffle(oup // 2) if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), ) elif g2 == 0: self.layers = nn.Sequential( GroupConv(inp, hidden_dim2, gs1), DYShiftMax( hidden_dim2, hidden_dim2, act_max=2.0, act_relu=False, init_a=[1.0, 0.0], reduction=act_reduction, init_b=[0.0, 0.0], g=gs1, expansion=False) if y3 > 0 else nn.Sequential(), ) else: self.layers = nn.Sequential( GroupConv(inp, hidden_dim2, gs1), DYShiftMax( hidden_dim2, hidden_dim2, act_max=2.0, act_relu=True if y1 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=False) if y1 > 0 else nn.ReLU6(), ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride) if depthsep else DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride), nn.Sequential(), DYShiftMax( hidden_dim2, hidden_dim2, act_max=2.0, act_relu=True if y2 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=True) if y2 > 0 else nn.ReLU6(), ChannelShuffle(hidden_dim2 // 4) if shuffle and y1 != 0 and y2 != 0 else nn.Sequential() if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2), GroupConv(hidden_dim2, oup, (g1, g2)), DYShiftMax( oup, oup, act_max=2.0, act_relu=False, init_a=[1.0, 0.0], reduction=act_reduction // 2 if oup < hidden_dim2 else act_reduction, init_b=[0.0, 0.0], g=(g1, g2), expansion=False) if y3 > 0 else nn.Sequential(), ChannelShuffle(g2) if shuffle else nn.Sequential(), ChannelShuffle(oup // 2) if shuffle and y3 != 0 else nn.Sequential(), ) def forward(self, x): identity = x out = self.layers(x) if self.identity: out = out + identity return out class MicroNet(nn.Layer): """ the MicroNet backbone network for recognition module. Args: mode(str): {'M0', 'M1', 'M2', 'M3'} Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds) Default: 'M3'. """ def __init__(self, mode='M3', **kwargs): super(MicroNet, self).__init__() self.cfgs = get_micronet_config(mode) activation_cfg = {} if mode == 'M0': input_channel = 4 stem_groups = 2, 2 out_ch = 384 activation_cfg['init_a'] = 1.0, 1.0 activation_cfg['init_b'] = 0.0, 0.0 elif mode == 'M1': input_channel = 6 stem_groups = 3, 2 out_ch = 576 activation_cfg['init_a'] = 1.0, 1.0 activation_cfg['init_b'] = 0.0, 0.0 elif mode == 'M2': input_channel = 8 stem_groups = 4, 2 out_ch = 768 activation_cfg['init_a'] = 1.0, 1.0 activation_cfg['init_b'] = 0.0, 0.0 elif mode == 'M3': input_channel = 12 stem_groups = 4, 3 out_ch = 432 activation_cfg['init_a'] = 1.0, 0.5 activation_cfg['init_b'] = 0.0, 0.5 else: raise NotImplementedError("mode[" + mode + "_model] is not implemented!") layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)] for idx, val in enumerate(self.cfgs): s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val t1 = (c1, c2) gs1 = (g1, g2) gs2 = (c3, g3, g4) activation_cfg['dy'] = [y1, y2, y3] activation_cfg['ratio'] = r output_channel = c layers.append( DYMicroBlock( input_channel, output_channel, kernel_size=ks, stride=s, ch_exp=t1, ch_per_group=gs1, groups_1x1=gs2, depthsep=True, shuffle=True, activation_cfg=activation_cfg, )) input_channel = output_channel for i in range(1, n): layers.append( DYMicroBlock( input_channel, output_channel, kernel_size=ks, stride=1, ch_exp=t1, ch_per_group=gs1, groups_1x1=gs2, depthsep=True, shuffle=True, activation_cfg=activation_cfg, )) input_channel = output_channel self.features = nn.Sequential(*layers) self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) self.out_channels = make_divisible(out_ch) def forward(self, x): x = self.features(x) x = self.pool(x) return x