From: @zhouneng2 Reviewed-by: @linqingke,@oacjiewen Signed-off-by: @linqingketags/v1.2.0-rc1
| @@ -23,7 +23,7 @@ import mindspore.common.dtype as mstype | |||||
| class BboxAssignSample(nn.Cell): | class BboxAssignSample(nn.Cell): | ||||
| """ | """ | ||||
| Bbox assigner and sampler defination. | |||||
| Bbox assigner and sampler definition. | |||||
| Args: | Args: | ||||
| config (dict): Config. | config (dict): Config. | ||||
| @@ -47,10 +47,10 @@ class BboxAssignSample(nn.Cell): | |||||
| cfg = config | cfg = config | ||||
| self.batch_size = batch_size | self.batch_size = batch_size | ||||
| self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mstype.float16) | |||||
| self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mstype.float16) | |||||
| self.min_pos_iou = Tensor(cfg.min_pos_iou, mstype.float16) | |||||
| self.zero_thr = Tensor(0.0, mstype.float16) | |||||
| self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mstype.float32) | |||||
| self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mstype.float32) | |||||
| self.min_pos_iou = Tensor(cfg.min_pos_iou, mstype.float32) | |||||
| self.zero_thr = Tensor(0.0, mstype.float32) | |||||
| self.num_bboxes = num_bboxes | self.num_bboxes = num_bboxes | ||||
| self.num_gts = cfg.num_gts | self.num_gts = cfg.num_gts | ||||
| @@ -92,9 +92,9 @@ class BboxAssignSample(nn.Cell): | |||||
| self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | ||||
| self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | ||||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16)) | |||||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16)) | |||||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16)) | |||||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float32)) | |||||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float32)) | |||||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float32)) | |||||
| def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | ||||
| gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | ||||
| @@ -130,7 +130,7 @@ class BboxAssignSample(nn.Cell): | |||||
| pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | ||||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16) | |||||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float32) | |||||
| pos_check_valid = self.sum_inds(pos_check_valid, -1) | pos_check_valid = self.sum_inds(pos_check_valid, -1) | ||||
| valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | ||||
| pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | ||||
| @@ -141,7 +141,7 @@ class BboxAssignSample(nn.Cell): | |||||
| neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) | neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) | ||||
| num_pos = self.cast(self.logicalnot(valid_pos_index), mstype.float16) | |||||
| num_pos = self.cast(self.logicalnot(valid_pos_index), mstype.float32) | |||||
| num_pos = self.sum_inds(num_pos, -1) | num_pos = self.sum_inds(num_pos, -1) | ||||
| unvalid_pos_index = self.less(self.range_pos_size, num_pos) | unvalid_pos_index = self.less(self.range_pos_size, num_pos) | ||||
| valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) | valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) | ||||
| @@ -87,8 +87,8 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| self.tile = P.Tile() | self.tile = P.Tile() | ||||
| # Check | # Check | ||||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16)) | |||||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16)) | |||||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float32)) | |||||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float32)) | |||||
| # Init tensor | # Init tensor | ||||
| self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | ||||
| @@ -99,8 +99,8 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | ||||
| self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32)) | self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32)) | ||||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16)) | |||||
| self.range_amb_size = Tensor(np.arange(self.num_expected_amb).astype(np.float16)) | |||||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float32)) | |||||
| self.range_amb_size = Tensor(np.arange(self.num_expected_amb).astype(np.float32)) | |||||
| self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | ||||
| if self.use_ambigous_sample: | if self.use_ambigous_sample: | ||||
| self.check_neg_mask = Tensor( | self.check_neg_mask = Tensor( | ||||
| @@ -108,9 +108,9 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| check_neg_mask_ignore_end = np.array(np.ones(self.num_expected_neg), dtype=np.bool) | check_neg_mask_ignore_end = np.array(np.ones(self.num_expected_neg), dtype=np.bool) | ||||
| check_neg_mask_ignore_end[-1] = False | check_neg_mask_ignore_end[-1] = False | ||||
| self.check_neg_mask_ignore_end = Tensor(check_neg_mask_ignore_end) | self.check_neg_mask_ignore_end = Tensor(check_neg_mask_ignore_end) | ||||
| self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float16)) | |||||
| self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float32)) | |||||
| self.bboxs_amb_mask = Tensor(np.zeros((self.num_expected_amb, 4), dtype=np.float16)) | |||||
| self.bboxs_amb_mask = Tensor(np.zeros((self.num_expected_amb, 4), dtype=np.float32)) | |||||
| self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) | self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) | ||||
| self.labels_amb_mask = Tensor(np.array(np.zeros(self.num_expected_amb) + 2, dtype=np.uint8)) | self.labels_amb_mask = Tensor(np.array(np.zeros(self.num_expected_amb) + 2, dtype=np.uint8)) | ||||
| @@ -118,10 +118,10 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| self.reshape_shape_amb = (self.num_expected_amb, 1) | self.reshape_shape_amb = (self.num_expected_amb, 1) | ||||
| self.reshape_shape_neg = (self.num_expected_neg, 1) | self.reshape_shape_neg = (self.num_expected_neg, 1) | ||||
| self.scalar_zero = Tensor(0.0, dtype=mstype.float16) | |||||
| self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=mstype.float16) | |||||
| self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=mstype.float16) | |||||
| self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=mstype.float16) | |||||
| self.scalar_zero = Tensor(0.0, dtype=mstype.float32) | |||||
| self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=mstype.float32) | |||||
| self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=mstype.float32) | |||||
| self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=mstype.float32) | |||||
| def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | ||||
| gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | ||||
| @@ -173,12 +173,12 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| # Get pos index | # Get pos index | ||||
| pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | ||||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16) | |||||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float32) | |||||
| pos_check_valid = self.sum_inds(pos_check_valid, -1) | pos_check_valid = self.sum_inds(pos_check_valid, -1) | ||||
| valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | ||||
| pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | ||||
| num_pos = self.sum_inds(self.cast(self.logicalnot(valid_pos_index), mstype.float16), -1) | |||||
| num_pos = self.sum_inds(self.cast(self.logicalnot(valid_pos_index), mstype.float32), -1) | |||||
| valid_pos_index = self.cast(valid_pos_index, mstype.int32) | valid_pos_index = self.cast(valid_pos_index, mstype.int32) | ||||
| pos_index = self.reshape(pos_index, self.reshape_shape_pos) | pos_index = self.reshape(pos_index, self.reshape_shape_pos) | ||||
| valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) | valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) | ||||
| @@ -197,12 +197,12 @@ class BboxAssignSampleForRcnn(nn.Cell): | |||||
| if self.use_ambigous_sample: | if self.use_ambigous_sample: | ||||
| amb_index, valid_amb_index = self.random_choice_with_mask_amb(self.equal(assigned_gt_inds5, -3)) | amb_index, valid_amb_index = self.random_choice_with_mask_amb(self.equal(assigned_gt_inds5, -3)) | ||||
| amb_check_valid = self.cast(self.equal(assigned_gt_inds5, -3), mstype.float16) | |||||
| amb_check_valid = self.cast(self.equal(assigned_gt_inds5, -3), mstype.float32) | |||||
| amb_check_valid = self.sum_inds(amb_check_valid, -1) | amb_check_valid = self.sum_inds(amb_check_valid, -1) | ||||
| valid_amb_index = self.less(self.range_amb_size, amb_check_valid) | valid_amb_index = self.less(self.range_amb_size, amb_check_valid) | ||||
| amb_index = amb_index * self.reshape(self.cast(valid_amb_index, mstype.int32), (self.num_expected_amb, 1)) | amb_index = amb_index * self.reshape(self.cast(valid_amb_index, mstype.int32), (self.num_expected_amb, 1)) | ||||
| num_amb = self.sum_inds(self.cast(self.logicalnot(valid_amb_index), mstype.float16), -1) | |||||
| num_amb = self.sum_inds(self.cast(self.logicalnot(valid_amb_index), mstype.float32), -1) | |||||
| valid_amb_index = self.cast(valid_amb_index, mstype.int32) | valid_amb_index = self.cast(valid_amb_index, mstype.int32) | ||||
| amb_index = self.reshape(amb_index, self.reshape_shape_amb) | amb_index = self.reshape(amb_index, self.reshape_shape_amb) | ||||
| valid_amb_index = self.reshape(valid_amb_index, self.reshape_shape_amb) | valid_amb_index = self.reshape(valid_amb_index, self.reshape_shape_amb) | ||||
| @@ -35,8 +35,8 @@ def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mod | |||||
| shp_weight_conv = (out_channels, in_channels, kernel_size, kernel_size) | shp_weight_conv = (out_channels, in_channels, kernel_size, kernel_size) | ||||
| shp_bias_conv = (out_channels,) | shp_bias_conv = (out_channels,) | ||||
| weights = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float16).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float16).to_tensor() | |||||
| weights = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float32).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float32).to_tensor() | |||||
| layers = [] | layers = [] | ||||
| layers += [nn.Conv2d(in_channels, out_channels, | layers += [nn.Conv2d(in_channels, out_channels, | ||||
| @@ -147,7 +147,7 @@ class Deeptext_VGG16(nn.Cell): | |||||
| self.rpn_max_num = config.rpn_max_num | self.rpn_max_num = config.rpn_max_num | ||||
| self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(np.float16)) | |||||
| self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(np.float32)) | |||||
| self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool) | self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool) | ||||
| self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool) | self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool) | ||||
| self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask, | self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask, | ||||
| @@ -155,10 +155,10 @@ class Deeptext_VGG16(nn.Cell): | |||||
| self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask, | self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask, | ||||
| self.ones_mask, self.ones_mask, self.zeros_mask), axis=1)) | self.ones_mask, self.ones_mask, self.zeros_mask), axis=1)) | ||||
| self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_score_thr) | |||||
| self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * 0) | |||||
| self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(np.float16) * -1) | |||||
| self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_iou_thr) | |||||
| self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float32) * config.test_score_thr) | |||||
| self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float32) * 0) | |||||
| self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(np.float32) * -1) | |||||
| self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float32) * config.test_iou_thr) | |||||
| self.test_max_per_img = config.test_max_per_img | self.test_max_per_img = config.test_max_per_img | ||||
| self.nms_test = P.NMSWithMask(config.test_iou_thr) | self.nms_test = P.NMSWithMask(config.test_iou_thr) | ||||
| self.softmax = P.Softmax(axis=1) | self.softmax = P.Softmax(axis=1) | ||||
| @@ -174,14 +174,14 @@ class Deeptext_VGG16(nn.Cell): | |||||
| # Init tensor | # Init tensor | ||||
| self.use_ambigous_sample = config.use_ambigous_sample | self.use_ambigous_sample = config.use_ambigous_sample | ||||
| roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i, | roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i, | ||||
| dtype=np.float16) for i in range(self.train_batch_size)] | |||||
| dtype=np.float32) for i in range(self.train_batch_size)] | |||||
| if self.use_ambigous_sample: | if self.use_ambigous_sample: | ||||
| roi_align_index = [np.array(np.ones(( | roi_align_index = [np.array(np.ones(( | ||||
| config.num_expected_pos_stage2 + config.num_expected_amb_stage2 + config.num_expected_neg_stage2, | config.num_expected_pos_stage2 + config.num_expected_amb_stage2 + config.num_expected_neg_stage2, | ||||
| 1)) * i, | 1)) * i, | ||||
| dtype=np.float16) for i in range(self.train_batch_size)] | |||||
| dtype=np.float32) for i in range(self.train_batch_size)] | |||||
| roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=np.float16) \ | |||||
| roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=np.float32) \ | |||||
| for i in range(self.test_batch_size)] | for i in range(self.test_batch_size)] | ||||
| self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index)) | self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index)) | ||||
| @@ -197,8 +197,8 @@ class Deeptext_VGG16(nn.Cell): | |||||
| def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids): | def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids): | ||||
| # f1, f2, f3, f4, f5 = self.vgg16_feature_extractor(img_data) | # f1, f2, f3, f4, f5 = self.vgg16_feature_extractor(img_data) | ||||
| _, _, _, f4, f5 = self.vgg16_feature_extractor(img_data) | _, _, _, f4, f5 = self.vgg16_feature_extractor(img_data) | ||||
| f4 = self.cast(f4, mstype.float16) | |||||
| f5 = self.cast(f5, mstype.float16) | |||||
| f4 = self.cast(f4, mstype.float32) | |||||
| f5 = self.cast(f5, mstype.float32) | |||||
| x = (f4, f5) | x = (f4, f5) | ||||
| rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x, | rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x, | ||||
| @@ -274,10 +274,10 @@ class Deeptext_VGG16(nn.Cell): | |||||
| roi_feats = self.cast(roi_feats, mstype.float32) | roi_feats = self.cast(roi_feats, mstype.float32) | ||||
| roi_feats = self.concat1((roi_feats, roi_align4_out)) | roi_feats = self.concat1((roi_feats, roi_align4_out)) | ||||
| roi_feats = self.cast(roi_feats, mstype.float16) | |||||
| roi_feats = self.cast(roi_feats, mstype.float32) | |||||
| roi_feats = self.roi_align_fuse(roi_feats) | roi_feats = self.roi_align_fuse(roi_feats) | ||||
| roi_feats = self.cast(roi_feats, mstype.float16) | |||||
| roi_feats = self.cast(roi_feats, mstype.float32) | |||||
| rcnn_masks = self.concat(mask_tuple) | rcnn_masks = self.concat(mask_tuple) | ||||
| rcnn_masks = F.stop_gradient(rcnn_masks) | rcnn_masks = F.stop_gradient(rcnn_masks) | ||||
| @@ -427,6 +427,6 @@ class Deeptext_VGG16(nn.Cell): | |||||
| for i in range(num_levels): | for i in range(num_levels): | ||||
| anchors = self.anchor_generators[i].grid_anchors( | anchors = self.anchor_generators[i].grid_anchors( | ||||
| featmap_sizes[i], self.anchor_strides[i]) | featmap_sizes[i], self.anchor_strides[i]) | ||||
| multi_level_anchors += (Tensor(anchors.astype(np.float16)),) | |||||
| multi_level_anchors += (Tensor(anchors.astype(np.float32)),) | |||||
| return multi_level_anchors | return multi_level_anchors | ||||
| @@ -106,7 +106,7 @@ class Proposal(nn.Cell): | |||||
| self.tile = P.Tile() | self.tile = P.Tile() | ||||
| self.set_train_local(config, training=True) | self.set_train_local(config, training=True) | ||||
| self.multi_10 = Tensor(10.0, mstype.float16) | |||||
| self.multi_10 = Tensor(10.0, mstype.float32) | |||||
| def set_train_local(self, config, training=True): | def set_train_local(self, config, training=True): | ||||
| """Set training flag.""" | """Set training flag.""" | ||||
| @@ -133,7 +133,7 @@ class Proposal(nn.Cell): | |||||
| self.topKv2 = P.TopK(sorted=True) | self.topKv2 = P.TopK(sorted=True) | ||||
| self.topK_shape_stage2 = (self.max_num, 1) | self.topK_shape_stage2 = (self.max_num, 1) | ||||
| self.min_float_num = -65536.0 | self.min_float_num = -65536.0 | ||||
| self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float16)) | |||||
| self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float32)) | |||||
| def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list): | def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list): | ||||
| proposals_tuple = () | proposals_tuple = () | ||||
| @@ -164,16 +164,16 @@ class Proposal(nn.Cell): | |||||
| rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape) | rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape) | ||||
| rpn_cls_score = self.activation(rpn_cls_score) | rpn_cls_score = self.activation(rpn_cls_score) | ||||
| rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 0::]), mstype.float16) | |||||
| rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 0::]), mstype.float32) | |||||
| rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mstype.float16) | |||||
| rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mstype.float32) | |||||
| scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.topK_stage1[idx]) | scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.topK_stage1[idx]) | ||||
| topk_inds = self.reshape(topk_inds, self.topK_shape[idx]) | topk_inds = self.reshape(topk_inds, self.topK_shape[idx]) | ||||
| bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds) | bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds) | ||||
| anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mstype.float16) | |||||
| anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mstype.float32) | |||||
| proposals_decode = self.decode(anchors_sorted, bboxes_sorted) | proposals_decode = self.decode(anchors_sorted, bboxes_sorted) | ||||
| @@ -188,7 +188,7 @@ class Proposal(nn.Cell): | |||||
| _, _, _, _, scores = self.split(proposals) | _, _, _, _, scores = self.split(proposals) | ||||
| scores = self.squeeze(scores) | scores = self.squeeze(scores) | ||||
| topk_mask = self.cast(self.topK_mask, mstype.float16) | |||||
| topk_mask = self.cast(self.topK_mask, mstype.float32) | |||||
| scores_using = self.select(masks, scores, topk_mask) | scores_using = self.select(masks, scores, topk_mask) | ||||
| _, topk_inds = self.topKv2(scores_using, self.max_num) | _, topk_inds = self.topKv2(scores_using, self.max_num) | ||||
| @@ -29,15 +29,18 @@ class DenseNoTranpose(nn.Cell): | |||||
| def __init__(self, input_channels, output_channels, weight_init): | def __init__(self, input_channels, output_channels, weight_init): | ||||
| super(DenseNoTranpose, self).__init__() | super(DenseNoTranpose, self).__init__() | ||||
| self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16), | |||||
| self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float32), | |||||
| name="weight") | name="weight") | ||||
| self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias") | |||||
| self.bias = Parameter(initializer("zeros", [output_channels], mstype.float32).to_tensor(), name="bias") | |||||
| self.matmul = P.MatMul(transpose_b=False) | self.matmul = P.MatMul(transpose_b=False) | ||||
| self.bias_add = P.BiasAdd() | self.bias_add = P.BiasAdd() | ||||
| self.cast = P.Cast() | |||||
| def construct(self, x): | def construct(self, x): | ||||
| output = self.bias_add(self.matmul(x, self.weight), self.bias) | |||||
| x = self.cast(x, mstype.float16) | |||||
| weight = self.cast(self.weight, mstype.float16) | |||||
| output = self.bias_add(self.cast(self.matmul(x, weight), mstype.float32), self.bias) | |||||
| return output | return output | ||||
| @@ -71,8 +74,8 @@ class Rcnn(nn.Cell): | |||||
| ): | ): | ||||
| super(Rcnn, self).__init__() | super(Rcnn, self).__init__() | ||||
| cfg = config | cfg = config | ||||
| self.rcnn_loss_cls_weight = Tensor(np.array(cfg.rcnn_loss_cls_weight).astype(np.float16)) | |||||
| self.rcnn_loss_reg_weight = Tensor(np.array(cfg.rcnn_loss_reg_weight).astype(np.float16)) | |||||
| self.rcnn_loss_cls_weight = Tensor(np.array(cfg.rcnn_loss_cls_weight).astype(np.float32)) | |||||
| self.rcnn_loss_reg_weight = Tensor(np.array(cfg.rcnn_loss_reg_weight).astype(np.float32)) | |||||
| self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels | self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels | ||||
| self.target_means = target_means | self.target_means = target_means | ||||
| self.target_stds = target_stds | self.target_stds = target_stds | ||||
| @@ -83,16 +86,16 @@ class Rcnn(nn.Cell): | |||||
| self.use_ambigous_sample = cfg.use_ambigous_sample | self.use_ambigous_sample = cfg.use_ambigous_sample | ||||
| shape_0 = (self.rcnn_fc_out_channels, representation_size) | shape_0 = (self.rcnn_fc_out_channels, representation_size) | ||||
| weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float16).to_tensor() | |||||
| weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float32).to_tensor() | |||||
| shape_1 = (self.rcnn_fc_out_channels, self.rcnn_fc_out_channels) | shape_1 = (self.rcnn_fc_out_channels, self.rcnn_fc_out_channels) | ||||
| weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float16).to_tensor() | |||||
| weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float32).to_tensor() | |||||
| self.shared_fc_0 = DenseNoTranpose(representation_size, self.rcnn_fc_out_channels, weights_0) | self.shared_fc_0 = DenseNoTranpose(representation_size, self.rcnn_fc_out_channels, weights_0) | ||||
| self.shared_fc_1 = DenseNoTranpose(self.rcnn_fc_out_channels, self.rcnn_fc_out_channels, weights_1) | self.shared_fc_1 = DenseNoTranpose(self.rcnn_fc_out_channels, self.rcnn_fc_out_channels, weights_1) | ||||
| cls_weight = initializer('Normal', shape=[num_classes, self.rcnn_fc_out_channels][::-1], | cls_weight = initializer('Normal', shape=[num_classes, self.rcnn_fc_out_channels][::-1], | ||||
| dtype=mstype.float16).to_tensor() | |||||
| dtype=mstype.float32).to_tensor() | |||||
| reg_weight = initializer('Normal', shape=[num_classes * 4, self.rcnn_fc_out_channels][::-1], | reg_weight = initializer('Normal', shape=[num_classes * 4, self.rcnn_fc_out_channels][::-1], | ||||
| dtype=mstype.float16).to_tensor() | |||||
| dtype=mstype.float32).to_tensor() | |||||
| self.cls_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes, cls_weight) | self.cls_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes, cls_weight) | ||||
| self.reg_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes * 4, reg_weight) | self.reg_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes * 4, reg_weight) | ||||
| @@ -115,7 +118,7 @@ class Rcnn(nn.Cell): | |||||
| self.on_value = Tensor(1.0, mstype.float32) | self.on_value = Tensor(1.0, mstype.float32) | ||||
| self.off_value = Tensor(0.0, mstype.float32) | self.off_value = Tensor(0.0, mstype.float32) | ||||
| self.value = Tensor(1.0, mstype.float16) | |||||
| self.value = Tensor(1.0, mstype.float32) | |||||
| self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size | self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size | ||||
| if self.use_ambigous_sample: | if self.use_ambigous_sample: | ||||
| @@ -124,7 +127,7 @@ class Rcnn(nn.Cell): | |||||
| rmv_first = np.ones((self.num_bboxes, self.num_classes)) | rmv_first = np.ones((self.num_bboxes, self.num_classes)) | ||||
| rmv_first[:, 0] = np.zeros((self.num_bboxes,)) | rmv_first[:, 0] = np.zeros((self.num_bboxes,)) | ||||
| self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16)) | |||||
| self.rmv_first_tensor = Tensor(rmv_first.astype(np.float32)) | |||||
| self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size | self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size | ||||
| @@ -145,7 +148,7 @@ class Rcnn(nn.Cell): | |||||
| bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels | bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels | ||||
| if self.use_ambigous_sample: | if self.use_ambigous_sample: | ||||
| bbox_weights = self.cast(self.logicaland(self.equal(labels, 1), mask), mstype.int32) * labels | bbox_weights = self.cast(self.logicaland(self.equal(labels, 1), mask), mstype.int32) * labels | ||||
| labels = self.cast(self.onehot(labels, self.num_classes, self.on_value, self.off_value), mstype.float16) | |||||
| labels = self.cast(self.onehot(labels, self.num_classes, self.on_value, self.off_value), mstype.float32) | |||||
| bbox_targets = self.tile(self.expandims(bbox_targets, 1), (1, self.num_classes, 1)) | bbox_targets = self.tile(self.expandims(bbox_targets, 1), (1, self.num_classes, 1)) | ||||
| loss, loss_cls, loss_reg, loss_print = self.loss(x_cls, x_reg, bbox_targets, bbox_weights, labels, mask) | loss, loss_cls, loss_reg, loss_print = self.loss(x_cls, x_reg, bbox_targets, bbox_weights, labels, mask) | ||||
| @@ -160,12 +163,12 @@ class Rcnn(nn.Cell): | |||||
| loss_print = () | loss_print = () | ||||
| loss_cls, _ = self.loss_cls(cls_score, labels) | loss_cls, _ = self.loss_cls(cls_score, labels) | ||||
| weights = self.cast(weights, mstype.float16) | |||||
| weights = self.cast(weights, mstype.float32) | |||||
| loss_cls = loss_cls * weights | loss_cls = loss_cls * weights | ||||
| loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,)) | loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,)) | ||||
| bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value), | bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value), | ||||
| mstype.float16) | |||||
| mstype.float32) | |||||
| if not self.use_ambigous_sample: | if not self.use_ambigous_sample: | ||||
| bbox_weights = bbox_weights * self.rmv_first_tensor | bbox_weights = bbox_weights * self.rmv_first_tensor | ||||
| pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4)) | pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4)) | ||||
| @@ -25,7 +25,7 @@ from mindspore.common.tensor import Tensor | |||||
| class ROIAlign(nn.Cell): | class ROIAlign(nn.Cell): | ||||
| """ | """ | ||||
| Extract RoI features from mulitple feature map. | |||||
| Extract RoI features from multiple feature map. | |||||
| Args: | Args: | ||||
| out_size_h (int) - RoI height. | out_size_h (int) - RoI height. | ||||
| @@ -61,7 +61,7 @@ class SingleRoIExtractor(nn.Cell): | |||||
| """ | """ | ||||
| Extract RoI features from a single level feature map. | Extract RoI features from a single level feature map. | ||||
| If there are mulitple input feature levels, each RoI is mapped to a level | |||||
| If there are multiple input feature levels, each RoI is mapped to a level | |||||
| according to its scale. | according to its scale. | ||||
| Args: | Args: | ||||
| @@ -101,8 +101,8 @@ class SingleRoIExtractor(nn.Cell): | |||||
| self.select = P.Select() | self.select = P.Select() | ||||
| _mode_16 = False | _mode_16 = False | ||||
| self.dtype = np.float16 if _mode_16 else np.float32 | |||||
| self.ms_dtype = mstype.float16 if _mode_16 else mstype.float32 | |||||
| self.dtype = np.float32 if _mode_16 else np.float32 | |||||
| self.ms_dtype = mstype.float32 if _mode_16 else mstype.float32 | |||||
| self.set_train_local(cfg, training=True) | self.set_train_local(cfg, training=True) | ||||
| def set_train_local(self, config, training=True): | def set_train_local(self, config, training=True): | ||||
| @@ -28,8 +28,8 @@ def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mod | |||||
| shp_weight_conv = (out_channels, in_channels, kernel_size, kernel_size) | shp_weight_conv = (out_channels, in_channels, kernel_size, kernel_size) | ||||
| shp_bias_conv = (out_channels,) | shp_bias_conv = (out_channels,) | ||||
| weights = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float16).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float16).to_tensor() | |||||
| weights = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float32).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float32).to_tensor() | |||||
| layers = [] | layers = [] | ||||
| layers += [nn.Conv2d(in_channels, out_channels, | layers += [nn.Conv2d(in_channels, out_channels, | ||||
| @@ -141,7 +141,7 @@ class RPN(nn.Cell): | |||||
| self.batch_size = batch_size | self.batch_size = batch_size | ||||
| self.test_batch_size = cfg_rpn.test_batch_size | self.test_batch_size = cfg_rpn.test_batch_size | ||||
| self.num_layers = 1 | self.num_layers = 1 | ||||
| self.real_ratio = Tensor(np.ones((1, 1)).astype(np.float16)) | |||||
| self.real_ratio = Tensor(np.ones((1, 1)).astype(np.float32)) | |||||
| self.rpn_convs_list = nn.layer.CellList(self._make_rpn_layer(self.num_layers, in_channels, feat_channels, | self.rpn_convs_list = nn.layer.CellList(self._make_rpn_layer(self.num_layers, in_channels, feat_channels, | ||||
| num_anchors, cls_out_channels)) | num_anchors, cls_out_channels)) | ||||
| @@ -150,15 +150,15 @@ class RPN(nn.Cell): | |||||
| self.reshape = P.Reshape() | self.reshape = P.Reshape() | ||||
| self.concat = P.Concat(axis=0) | self.concat = P.Concat(axis=0) | ||||
| self.fill = P.Fill() | self.fill = P.Fill() | ||||
| self.placeh1 = Tensor(np.ones((1,)).astype(np.float16)) | |||||
| self.placeh1 = Tensor(np.ones((1,)).astype(np.float32)) | |||||
| self.trans_shape = (0, 2, 3, 1) | self.trans_shape = (0, 2, 3, 1) | ||||
| self.reshape_shape_reg = (-1, 4) | self.reshape_shape_reg = (-1, 4) | ||||
| self.reshape_shape_cls = (-1,) | self.reshape_shape_cls = (-1,) | ||||
| self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(np.float16)) | |||||
| self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(np.float16)) | |||||
| self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(np.float16)) | |||||
| self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(np.float32)) | |||||
| self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(np.float32)) | |||||
| self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(np.float32)) | |||||
| self.num_bboxes = cfg_rpn.num_bboxes | self.num_bboxes = cfg_rpn.num_bboxes | ||||
| self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False) | self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False) | ||||
| self.CheckValid = P.CheckValid() | self.CheckValid = P.CheckValid() | ||||
| @@ -169,9 +169,9 @@ class RPN(nn.Cell): | |||||
| self.cast = P.Cast() | self.cast = P.Cast() | ||||
| self.tile = P.Tile() | self.tile = P.Tile() | ||||
| self.zeros_like = P.ZerosLike() | self.zeros_like = P.ZerosLike() | ||||
| self.loss = Tensor(np.zeros((1,)).astype(np.float16)) | |||||
| self.clsloss = Tensor(np.zeros((1,)).astype(np.float16)) | |||||
| self.regloss = Tensor(np.zeros((1,)).astype(np.float16)) | |||||
| self.loss = Tensor(np.zeros((1,)).astype(np.float32)) | |||||
| self.clsloss = Tensor(np.zeros((1,)).astype(np.float32)) | |||||
| self.regloss = Tensor(np.zeros((1,)).astype(np.float32)) | |||||
| def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels): | def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels): | ||||
| """ | """ | ||||
| @@ -191,18 +191,18 @@ class RPN(nn.Cell): | |||||
| shp_weight_conv = (feat_channels, in_channels, 3, 3) | shp_weight_conv = (feat_channels, in_channels, 3, 3) | ||||
| shp_bias_conv = (feat_channels,) | shp_bias_conv = (feat_channels,) | ||||
| weight_conv = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float16).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float16).to_tensor() | |||||
| weight_conv = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float32).to_tensor() | |||||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float32).to_tensor() | |||||
| shp_weight_cls = (num_anchors * cls_out_channels, feat_channels, 1, 1) | shp_weight_cls = (num_anchors * cls_out_channels, feat_channels, 1, 1) | ||||
| shp_bias_cls = (num_anchors * cls_out_channels,) | shp_bias_cls = (num_anchors * cls_out_channels,) | ||||
| weight_cls = initializer('Normal', shape=shp_weight_cls, dtype=mstype.float16).to_tensor() | |||||
| bias_cls = initializer(0, shape=shp_bias_cls, dtype=mstype.float16).to_tensor() | |||||
| weight_cls = initializer('Normal', shape=shp_weight_cls, dtype=mstype.float32).to_tensor() | |||||
| bias_cls = initializer(0, shape=shp_bias_cls, dtype=mstype.float32).to_tensor() | |||||
| shp_weight_reg = (num_anchors * 4, feat_channels, 1, 1) | shp_weight_reg = (num_anchors * 4, feat_channels, 1, 1) | ||||
| shp_bias_reg = (num_anchors * 4,) | shp_bias_reg = (num_anchors * 4,) | ||||
| weight_reg = initializer('Normal', shape=shp_weight_reg, dtype=mstype.float16).to_tensor() | |||||
| bias_reg = initializer(0, shape=shp_bias_reg, dtype=mstype.float16).to_tensor() | |||||
| weight_reg = initializer('Normal', shape=shp_weight_reg, dtype=mstype.float32).to_tensor() | |||||
| bias_reg = initializer(0, shape=shp_bias_reg, dtype=mstype.float32).to_tensor() | |||||
| rpn_layer.append(RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \ | rpn_layer.append(RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \ | ||||
| weight_conv, bias_conv, weight_cls, \ | weight_conv, bias_conv, weight_cls, \ | ||||
| @@ -271,9 +271,9 @@ class RPN(nn.Cell): | |||||
| mstype.bool_), | mstype.bool_), | ||||
| anchor_using_list, gt_valids_i) | anchor_using_list, gt_valids_i) | ||||
| bbox_weight = self.cast(bbox_weight, mstype.float16) | |||||
| label = self.cast(label, mstype.float16) | |||||
| label_weight = self.cast(label_weight, mstype.float16) | |||||
| bbox_weight = self.cast(bbox_weight, mstype.float32) | |||||
| label = self.cast(label, mstype.float32) | |||||
| label_weight = self.cast(label_weight, mstype.float32) | |||||
| begin = self.slice_index[0] | begin = self.slice_index[0] | ||||
| end = self.slice_index[0 + 1] | end = self.slice_index[0 + 1] | ||||
| @@ -247,9 +247,9 @@ def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): | |||||
| def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): | def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): | ||||
| """transpose operation for image""" | """transpose operation for image""" | ||||
| img_data = img.transpose(2, 0, 1).copy() | img_data = img.transpose(2, 0, 1).copy() | ||||
| img_data = img_data.astype(np.float16) | |||||
| img_shape = img_shape.astype(np.float16) | |||||
| gt_bboxes = gt_bboxes.astype(np.float16) | |||||
| img_data = img_data.astype(np.float32) | |||||
| img_shape = img_shape.astype(np.float32) | |||||
| gt_bboxes = gt_bboxes.astype(np.float32) | |||||
| gt_label = gt_label.astype(np.int32) | gt_label = gt_label.astype(np.int32) | ||||
| gt_num = gt_num.astype(np.bool) | gt_num = gt_num.astype(np.bool) | ||||
| @@ -327,6 +327,52 @@ def preprocess_fn(image, box, is_training): | |||||
| return _data_aug(image, box, is_training) | return _data_aug(image, box, is_training) | ||||
| def get_imgs_and_annos(img_dir, txt_dir, image_files, image_anno_dict): | |||||
| img_basenames = [] | |||||
| for file in os.listdir(img_dir): | |||||
| # Filter git file. | |||||
| if 'gif' not in file: | |||||
| img_basenames.append(os.path.basename(file)) | |||||
| img_names = [] | |||||
| for item in img_basenames: | |||||
| temp1, _ = os.path.splitext(item) | |||||
| img_names.append((temp1, item)) | |||||
| for img, img_basename in img_names: | |||||
| image_path = img_dir + '/' + img_basename | |||||
| annos = [] | |||||
| # Parse annotation of dataset in paper. | |||||
| if len(img) == 6 and '_' not in img_basename: | |||||
| gt = open(txt_dir + '/' + img + '.txt').read().splitlines() | |||||
| if img.isdigit() and int(img) > 1200: | |||||
| continue | |||||
| for img_each_label in gt: | |||||
| spt = img_each_label.replace(',', '').split(' ') | |||||
| if ' ' not in img_each_label: | |||||
| spt = img_each_label.split(',') | |||||
| annos.append( | |||||
| [spt[0], spt[1], str(int(spt[0]) + int(spt[2])), str(int(spt[1]) + int(spt[3]))] + [1] + [ | |||||
| int(0)]) | |||||
| else: | |||||
| anno_file = txt_dir + '/gt_img_' + img.split('_')[-1] + '.txt' | |||||
| if not os.path.exists(anno_file): | |||||
| anno_file = txt_dir + '/gt_' + img.split('_')[-1] + '.txt' | |||||
| if not os.path.exists(anno_file): | |||||
| anno_file = txt_dir + '/img_' + img.split('_')[-1] + '.txt' | |||||
| gt = open(anno_file).read().splitlines() | |||||
| for img_each_label in gt: | |||||
| spt = img_each_label.replace(',', '').split(' ') | |||||
| if ' ' not in img_each_label: | |||||
| spt = img_each_label.split(',') | |||||
| annos.append([spt[0], spt[1], spt[2], spt[3]] + [1] + [int(0)]) | |||||
| image_files.append(image_path) | |||||
| if annos: | |||||
| image_anno_dict[image_path] = np.array(annos) | |||||
| else: | |||||
| image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1]) | |||||
| def create_label(is_training): | def create_label(is_training): | ||||
| """Create image label.""" | """Create image label.""" | ||||
| image_files = [] | image_files = [] | ||||
| @@ -340,50 +386,7 @@ def create_label(is_training): | |||||
| txt_dirs = config.test_txts.split(',') | txt_dirs = config.test_txts.split(',') | ||||
| for img_dir, txt_dir in zip(img_dirs, txt_dirs): | for img_dir, txt_dir in zip(img_dirs, txt_dirs): | ||||
| img_basenames = [] | |||||
| for file in os.listdir(img_dir): | |||||
| # Filter git file. | |||||
| if 'gif' not in file: | |||||
| img_basenames.append(os.path.basename(file)) | |||||
| img_names = [] | |||||
| for item in img_basenames: | |||||
| temp1, _ = os.path.splitext(item) | |||||
| img_names.append((temp1, item)) | |||||
| for img, img_basename in img_names: | |||||
| image_path = img_dir + '/' + img_basename | |||||
| annos = [] | |||||
| # Parse annotation of dataset in paper. | |||||
| if len(img) == 6 and '_' not in img_basename: | |||||
| gt = open(txt_dir + '/' + img + '.txt').read().splitlines() | |||||
| if img.isdigit() and int(img) > 1200: | |||||
| continue | |||||
| for img_each_label in gt: | |||||
| spt = img_each_label.replace(',', '').split(' ') | |||||
| if ' ' not in img_each_label: | |||||
| spt = img_each_label.split(',') | |||||
| annos.append( | |||||
| [spt[0], spt[1], str(int(spt[0]) + int(spt[2])), str(int(spt[1]) + int(spt[3]))] + [1] + [ | |||||
| int(0)]) | |||||
| else: | |||||
| anno_file = txt_dir + '/gt_img_' + img.split('_')[-1] + '.txt' | |||||
| if not os.path.exists(anno_file): | |||||
| anno_file = txt_dir + '/gt_' + img.split('_')[-1] + '.txt' | |||||
| if not os.path.exists(anno_file): | |||||
| anno_file = txt_dir + '/img_' + img.split('_')[-1] + '.txt' | |||||
| gt = open(anno_file).read().splitlines() | |||||
| for img_each_label in gt: | |||||
| spt = img_each_label.replace(',', '').split(' ') | |||||
| if ' ' not in img_each_label: | |||||
| spt = img_each_label.split(',') | |||||
| annos.append([spt[0], spt[1], spt[2], spt[3]] + [1] + [int(0)]) | |||||
| image_files.append(image_path) | |||||
| if annos: | |||||
| image_anno_dict[image_path] = np.array(annos) | |||||
| else: | |||||
| image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1]) | |||||
| get_imgs_and_annos(img_dir, txt_dir, image_files, image_anno_dict) | |||||
| if is_training and config.use_coco: | if is_training and config.use_coco: | ||||
| coco_root = config.coco_root | coco_root = config.coco_root | ||||
| @@ -460,7 +463,7 @@ def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device | |||||
| normalize_op = C.Normalize((123.675, 116.28, 103.53), (58.395, 57.12, 57.375)) | normalize_op = C.Normalize((123.675, 116.28, 103.53), (58.395, 57.12, 57.375)) | ||||
| horizontally_op = C.RandomHorizontalFlip(1) | horizontally_op = C.RandomHorizontalFlip(1) | ||||
| type_cast0 = CC.TypeCast(mstype.float32) | type_cast0 = CC.TypeCast(mstype.float32) | ||||
| type_cast1 = CC.TypeCast(mstype.float16) | |||||
| type_cast1 = CC.TypeCast(mstype.float32) | |||||
| type_cast2 = CC.TypeCast(mstype.int32) | type_cast2 = CC.TypeCast(mstype.int32) | ||||
| type_cast3 = CC.TypeCast(mstype.bool_) | type_cast3 = CC.TypeCast(mstype.bool_) | ||||
| @@ -174,7 +174,7 @@ class TrainOneStepCell(nn.Cell): | |||||
| self.optimizer = optimizer | self.optimizer = optimizer | ||||
| self.grad = C.GradOperation(get_by_list=True, | self.grad = C.GradOperation(get_by_list=True, | ||||
| sens_param=True) | sens_param=True) | ||||
| self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16)) | |||||
| self.sens = Tensor((np.ones((1,)) * sens).astype(np.float32)) | |||||
| self.reduce_flag = reduce_flag | self.reduce_flag = reduce_flag | ||||
| if reduce_flag: | if reduce_flag: | ||||
| self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) | self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) | ||||