From 7ef32740a0d6b03749a08797d7097c4d3d768511 Mon Sep 17 00:00:00 2001 From: gengdongjie Date: Wed, 21 Apr 2021 02:13:00 +0800 Subject: [PATCH] update rcnn weight initialization for maskrcnn --- model_zoo/official/cv/maskrcnn/eval.py | 1 + .../cv/maskrcnn/src/maskrcnn/rcnn_cls.py | 12 ++++-- .../cv/maskrcnn/src/maskrcnn/rcnn_mask.py | 41 ++++++++++++------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/model_zoo/official/cv/maskrcnn/eval.py b/model_zoo/official/cv/maskrcnn/eval.py index 9044227597..c7076485bf 100644 --- a/model_zoo/official/cv/maskrcnn/eval.py +++ b/model_zoo/official/cv/maskrcnn/eval.py @@ -130,3 +130,4 @@ if __name__ == '__main__': print("Start Eval!") maskrcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file) + print("ckpt_path=", args_opt.checkpoint_path) diff --git a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py index 606a7d054d..36255425e5 100644 --- a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py +++ b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py @@ -22,8 +22,10 @@ from mindspore.common.tensor import Tensor from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter + class DenseNoTranpose(nn.Cell): """Dense method""" + def __init__(self, input_channels, output_channels, weight_init): super(DenseNoTranpose, self).__init__() self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float32)) @@ -35,8 +37,10 @@ class DenseNoTranpose(nn.Cell): output = self.bias_add(self.matmul(x, self.weight), self.bias) return output + class FpnCls(nn.Cell): """dense layer of classification and box head""" + def __init__(self, input_channels, output_channels, num_classes, pool_size): super(FpnCls, self).__init__() representation_size = input_channels * pool_size * pool_size @@ -71,6 +75,7 @@ class FpnCls(nn.Cell): return cls_scores, reg_scores + class RcnnCls(nn.Cell): """ Rcnn for classification and box regression subnet. @@ -89,6 +94,7 @@ class RcnnCls(nn.Cell): RcnnCls(config=config, representation_size = 1024, batch_size=2, num_classes = 81, \ target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2)) """ + def __init__(self, config, batch_size, @@ -161,17 +167,17 @@ class RcnnCls(nn.Cell): loss_cls, _ = self.loss_cls(cls_score, labels) weights = self.cast(weights, mstype.float16) loss_cls = loss_cls * weights - loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,)) + loss_cls = self.sum_loss(loss_cls, (0,)) / (self.sum_loss(weights, (0,)) + 1e-5) # loss_reg bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value), mstype.float16) - bbox_weights = bbox_weights * self.rmv_first_tensor # * self.rmv_first_tensor exclude background + bbox_weights = bbox_weights * self.rmv_first_tensor # * self.rmv_first_tensor exclude background pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4)) loss_reg = self.loss_bbox(pos_bbox_pred, bbox_targets) loss_reg = self.sum_loss(loss_reg, (2,)) loss_reg = loss_reg * bbox_weights - loss_reg = loss_reg / self.sum_loss(weights, (0,)) + loss_reg = loss_reg / (self.sum_loss(weights, (0,)) + 1e-5) loss_reg = self.sum_loss(loss_reg, (0, 1)) return loss_cls, loss_reg diff --git a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_mask.py b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_mask.py index 08e4f9c3e6..b5d5d28721 100644 --- a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_mask.py +++ b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_mask.py @@ -19,52 +19,64 @@ import mindspore.common.dtype as mstype import mindspore.nn as nn from mindspore.ops import operations as P from mindspore.common.tensor import Tensor -from mindspore.common.initializer import initializer -def _conv(in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='pad'): + +def _conv(in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='pad', gain=1): """Conv2D wrapper.""" shape = (out_channels, in_channels, kernel_size, kernel_size) - weights = initializer("XavierUniform", shape=shape, dtype=mstype.float32) + # xavier_normal + fan_in = in_channels * kernel_size * kernel_size + fan_out = out_channels * kernel_size * kernel_size + std = gain * (2 / (fan_in + fan_out)) ** 0.5 + weights = Tensor(np.random.normal(loc=0.0, scale=std, size=shape).astype(np.float32)) shape_bias = (out_channels,) bias = Tensor(np.array(np.zeros(shape_bias)).astype(np.float32)) return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=weights, has_bias=True, bias_init=bias) -def _convTanspose(in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='pad'): + +def _convTanspose(in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='pad', + gain=1): """ConvTranspose wrapper.""" shape = (out_channels, in_channels, kernel_size, kernel_size) - weights = initializer("XavierUniform", shape=shape, dtype=mstype.float32) + # xavier_normal + fan_in = in_channels * kernel_size * kernel_size + fan_out = out_channels * kernel_size * kernel_size + std = gain * (2 / (fan_in + fan_out)) ** 0.5 + weights = Tensor(np.random.normal(loc=0.0, scale=std, size=shape).astype(np.float32)) shape_bias = (out_channels,) bias = Tensor(np.array(np.zeros(shape_bias)).astype(np.float32)) return nn.Conv2dTranspose(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=weights, has_bias=True, bias_init=bias) + class FpnMask(nn.Cell): """conv layers of mask head""" + def __init__(self, input_channels, output_channels, num_classes): super(FpnMask, self).__init__() - self.mask_conv1 = _conv(input_channels, output_channels, kernel_size=3, + self.mask_conv1 = _conv(input_channels, output_channels, kernel_size=3, gain=2 ** 0.5, pad_mode="same").to_float(mstype.float16) self.mask_relu1 = P.ReLU() - self.mask_conv2 = _conv(output_channels, output_channels, kernel_size=3, + self.mask_conv2 = _conv(output_channels, output_channels, kernel_size=3, gain=2 ** 0.5, pad_mode="same").to_float(mstype.float16) self.mask_relu2 = P.ReLU() - self.mask_conv3 = _conv(output_channels, output_channels, kernel_size=3, + self.mask_conv3 = _conv(output_channels, output_channels, kernel_size=3, gain=2 ** 0.5, pad_mode="same").to_float(mstype.float16) self.mask_relu3 = P.ReLU() - self.mask_conv4 = _conv(output_channels, output_channels, kernel_size=3, + self.mask_conv4 = _conv(output_channels, output_channels, kernel_size=3, gain=2 ** 0.5, pad_mode="same").to_float(mstype.float16) self.mask_relu4 = P.ReLU() - self.mask_deconv5 = _convTanspose(output_channels, output_channels, kernel_size=2, + self.mask_deconv5 = _convTanspose(output_channels, output_channels, kernel_size=2, gain=2 ** 0.5, stride=2, pad_mode="valid").to_float(mstype.float16) self.mask_relu5 = P.ReLU() - self.mask_conv6 = _conv(output_channels, num_classes, kernel_size=1, stride=1, + self.mask_conv6 = _conv(output_channels, num_classes, kernel_size=1, stride=1, gain=2, pad_mode="valid").to_float(mstype.float16) def construct(self, x): @@ -87,6 +99,7 @@ class FpnMask(nn.Cell): return x + class RcnnMask(nn.Cell): """ Rcnn for mask subnet. @@ -105,6 +118,7 @@ class RcnnMask(nn.Cell): RcnnMask(config=config, representation_size = 1024, batch_size=2, num_classes = 81, \ target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2)) """ + def __init__(self, config, batch_size, @@ -155,20 +169,19 @@ class RcnnMask(nn.Cell): return out - def loss(self, masks_fb_pred, bbox_weights, weights, masks_fb_targets): """Loss method.""" weights = self.cast(weights, mstype.float16) bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value), mstype.float16) - bbox_weights = bbox_weights * self.rmv_first_tensor # * self.rmv_first_tensor exclude background + bbox_weights = bbox_weights * self.rmv_first_tensor # * self.rmv_first_tensor exclude background # loss_mask_fb masks_fb_targets = self.cast(masks_fb_targets, mstype.float16) loss_mask_fb = self.loss_mask(masks_fb_pred, masks_fb_targets) loss_mask_fb = self.mean_loss(loss_mask_fb, (2, 3)) loss_mask_fb = loss_mask_fb * bbox_weights - loss_mask_fb = loss_mask_fb / self.sum_loss(weights, (0,)) + loss_mask_fb = loss_mask_fb / (self.sum_loss(weights, (0,)) + 1e-5) loss_mask_fb = self.sum_loss(loss_mask_fb, (0, 1)) return loss_mask_fb